From d3382e0639406e600097957bcec3bf23ea6949fe Mon Sep 17 00:00:00 2001 From: Ryan Lee Date: Wed, 17 Feb 2021 17:39:29 -0800 Subject: [PATCH 01/10] bitmask_or implementation with bitmask refactor --- .../cudf/column/column_device_view.cuh | 2 +- cpp/include/cudf/detail/null_mask.hpp | 50 +++++++ cpp/include/cudf/null_mask.hpp | 14 ++ cpp/src/bitmask/null_mask.cu | 136 +++++++++++++----- .../main/java/ai/rapids/cudf/ColumnView.java | 2 +- java/src/main/native/src/ColumnViewJni.cpp | 12 +- .../java/ai/rapids/cudf/ColumnVectorTest.java | 21 ++- 7 files changed, 199 insertions(+), 38 deletions(-) diff --git a/cpp/include/cudf/column/column_device_view.cuh b/cpp/include/cudf/column/column_device_view.cuh index 9f5c2f33aa6..23c8557f3bc 100644 --- a/cpp/include/cudf/column/column_device_view.cuh +++ b/cpp/include/cudf/column/column_device_view.cuh @@ -805,7 +805,7 @@ namespace detail { * @brief Convenience function to get offset word from a bitmask * * @see copy_offset_bitmask - * @see offset_bitmask_and + * @see offset_bitmask_binop */ __device__ inline bitmask_type get_mask_offset_word(bitmask_type const* __restrict__ source, size_type destination_word_index, diff --git a/cpp/include/cudf/detail/null_mask.hpp b/cpp/include/cudf/detail/null_mask.hpp index 2f2bc91cb74..71009a32fcc 100644 --- a/cpp/include/cudf/detail/null_mask.hpp +++ b/cpp/include/cudf/detail/null_mask.hpp @@ -87,6 +87,21 @@ rmm::device_buffer copy_bitmask( rmm::cuda_stream_view stream, rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource()); +/** + * @copydoc bitmask_binop(Binop op, std::vector, std::vector const&, + * size_type, rmm::mr::device_memory_resource *) + * + * @param stream CUDA stream used for device memory operations and kernel launches + */ +template +rmm::device_buffer bitmask_binop( + Binop op, + std::vector const &masks, + std::vector const &begin_bits, + size_type mask_size, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource()); + /** * @copydoc bitmask_and(std::vector, std::vector const&, size_type, * rmm::mr::device_memory_resource *) @@ -100,6 +115,18 @@ rmm::device_buffer bitmask_and( rmm::cuda_stream_view stream, rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource()); +/** + * @copydoc cudf::bitmask_binop + * + * @param[in] stream CUDA stream used for device memory operations and kernel launches. + */ +template +rmm::device_buffer bitmask_binop( + Binop op, + table_view const &view, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource()); + /** * @copydoc cudf::bitmask_and * @@ -110,6 +137,29 @@ rmm::device_buffer bitmask_and( rmm::cuda_stream_view stream, rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource()); +/** + * @brief Performs a merger of the specified bitmasks using the binary operator + * provided, and writes in place to destination + * + * @param op The binary operator used to combine the bitmasks + * @param dest_mask Destination to which the AND result is written + * @param masks The list of data pointers of the bitmasks to be ANDed + * @param begin_bits The bit offsets from which each mask is to be ANDed + * @param mask_size The number of bits to be ANDed in each mask + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned device_buffer + * @return rmm::device_buffer Output bitmask + */ +template +void inplace_bitmask_binop( + Binop op, + bitmask_type *dest_mask, + std::vector const &masks, + std::vector const &begin_bits, + size_type mask_size, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource()); + /** * @brief Performs a bitwise AND of the specified bitmasks, * and writes in place to destination diff --git a/cpp/include/cudf/null_mask.hpp b/cpp/include/cudf/null_mask.hpp index 5e1f0f0802e..663c42cec17 100644 --- a/cpp/include/cudf/null_mask.hpp +++ b/cpp/include/cudf/null_mask.hpp @@ -220,5 +220,19 @@ rmm::device_buffer bitmask_and( table_view const& view, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +/** + * @brief Returns a bitwise OR of the bitmasks of columns of a table + * + * If any of the columns isn't nullable, it is considered all valid. + * If no column in the table is nullable, an empty bitmask is returned. + * + * @param view The table of columns + * @param mr Device memory resource used to allocate the returned device_buffer + * @return rmm::device_buffer Output bitmask + */ +rmm::device_buffer bitmask_or( + table_view const& view, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + /** @} */ // end of group } // namespace cudf diff --git a/cpp/src/bitmask/null_mask.cu b/cpp/src/bitmask/null_mask.cu index 8cdcefe9796..c0e0e27f3a6 100644 --- a/cpp/src/bitmask/null_mask.cu +++ b/cpp/src/bitmask/null_mask.cu @@ -316,8 +316,9 @@ __global__ void copy_offset_bitmask(bitmask_type *__restrict__ destination, } /** - * @brief Computes the bitwise AND of an array of bitmasks + * @brief Computes the merger of an array of bitmasks using a binary operator * + * @param op The binary operator used to combine the bitmasks * @param destination The bitmask to write result into * @param source Array of source mask pointers. All masks must be of same size * @param begin_bit Array of offsets into corresponding @p source masks. @@ -326,20 +327,25 @@ __global__ void copy_offset_bitmask(bitmask_type *__restrict__ destination, * @param source_size Number of bits in each mask in @p source * @param number_of_mask_words The number of words of type bitmask_type to copy */ -__global__ void offset_bitmask_and(bitmask_type *__restrict__ destination, - bitmask_type const *const *__restrict__ source, - size_type const *__restrict__ begin_bit, - size_type num_sources, - size_type source_size, - size_type number_of_mask_words) +template +__global__ void offset_bitmask_binop(Binop op, + bitmask_type *__restrict__ destination, + bitmask_type const *const *__restrict__ source, + size_type const *__restrict__ begin_bit, + size_type num_sources, + size_type source_size, + size_type number_of_mask_words) { for (size_type destination_word_index = threadIdx.x + blockIdx.x * blockDim.x; destination_word_index < number_of_mask_words; destination_word_index += blockDim.x * gridDim.x) { - bitmask_type destination_word = ~bitmask_type{0}; // All bits 1 - for (size_type i = 0; i < num_sources; i++) { - destination_word &= detail::get_mask_offset_word( - source[i], destination_word_index, begin_bit[i], begin_bit[i] + source_size); + bitmask_type destination_word = detail::get_mask_offset_word( + source[0], destination_word_index, begin_bit[0], begin_bit[0] + source_size); + for (size_type i = 1; i < num_sources; i++) { + destination_word = + op(destination_word, + detail::get_mask_offset_word( + source[i], destination_word_index, begin_bit[i], begin_bit[i] + source_size)); } destination[destination_word_index] = destination_word; @@ -420,13 +426,14 @@ rmm::device_buffer copy_bitmask(column_view const &view, return null_mask; } -// Inplace Bitwise AND of the masks -void inplace_bitmask_and(bitmask_type *dest_mask, - std::vector const &masks, - std::vector const &begin_bits, - size_type mask_size, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource *mr) +template +void inplace_bitmask_binop(Binop op, + bitmask_type *dest_mask, + std::vector const &masks, + std::vector const &begin_bits, + size_type mask_size, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource *mr) { CUDF_EXPECTS(std::all_of(begin_bits.begin(), begin_bits.end(), [](auto b) { return b >= 0; }), "Invalid range."); @@ -440,7 +447,8 @@ void inplace_bitmask_and(bitmask_type *dest_mask, rmm::device_vector d_begin_bits(begin_bits); cudf::detail::grid_1d config(number_of_mask_words, 256); - offset_bitmask_and<<>>( + offset_bitmask_binop<<>>( + op, dest_mask, d_masks.data().get(), d_begin_bits.data().get(), @@ -451,23 +459,58 @@ void inplace_bitmask_and(bitmask_type *dest_mask, CHECK_CUDA(stream.value()); } -// Bitwise AND of the masks -rmm::device_buffer bitmask_and(std::vector const &masks, - std::vector const &begin_bits, - size_type mask_size, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource *mr) +// Inplace Bitwise AND of the masks +void inplace_bitmask_and(bitmask_type *dest_mask, + std::vector const &masks, + std::vector const &begin_bits, + size_type mask_size, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource *mr) +{ + inplace_bitmask_binop( + [] __device__(bitmask_type left, bitmask_type right) { return left & right; }, + dest_mask, + masks, + begin_bits, + mask_size, + stream, + mr); +} + +template +rmm::device_buffer bitmask_binop(Binop op, + std::vector const &masks, + std::vector const &begin_bits, + size_type mask_size, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource *mr) { rmm::device_buffer dest_mask{}; auto num_bytes = bitmask_allocation_size_bytes(mask_size); dest_mask = rmm::device_buffer{num_bytes, stream, mr}; - inplace_bitmask_and( - static_cast(dest_mask.data()), masks, begin_bits, mask_size, stream, mr); + inplace_bitmask_binop( + op, static_cast(dest_mask.data()), masks, begin_bits, mask_size, stream, mr); return dest_mask; } +// Bitwise AND of the masks +rmm::device_buffer bitmask_and(std::vector const &masks, + std::vector const &begin_bits, + size_type mask_size, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource *mr) +{ + return bitmask_binop( + [] __device__(bitmask_type left, bitmask_type right) { return left & right; }, + masks, + begin_bits, + mask_size, + stream, + mr); +} + cudf::size_type count_set_bits(bitmask_type const *bitmask, size_type start, size_type stop, @@ -631,10 +674,12 @@ std::vector segmented_count_unset_bits(bitmask_type const *bitmask, return ret; } -// Returns the bitwise AND of the null masks of all columns in the table view -rmm::device_buffer bitmask_and(table_view const &view, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource *mr) +// Returns the merged null masks of all columns in the table view +template +rmm::device_buffer bitmask_binop(Binop op, + table_view const &view, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource *mr) { CUDF_FUNC_RANGE(); rmm::device_buffer null_mask{0, stream, mr}; @@ -650,12 +695,34 @@ rmm::device_buffer bitmask_and(table_view const &view, } if (masks.size() > 0) { - return cudf::detail::bitmask_and(masks, offsets, view.num_rows(), stream, mr); + return cudf::detail::bitmask_binop(op, masks, offsets, view.num_rows(), stream, mr); } return null_mask; } +// Returns the bitwise AND of the null masks of all columns in the table view +rmm::device_buffer bitmask_and(table_view const &view, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource *mr) +{ + return bitmask_binop( + [] __device__(bitmask_type left, bitmask_type right) { return left & right; }, + view, + stream, + mr); +} + +rmm::device_buffer bitmask_or(table_view const &view, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource *mr) +{ + return bitmask_binop( + [] __device__(bitmask_type left, bitmask_type right) { return left | right; }, + view, + stream, + mr); +} } // namespace detail // Count non-zero bits in the specified range @@ -708,4 +775,9 @@ rmm::device_buffer bitmask_and(table_view const &view, rmm::mr::device_memory_re return detail::bitmask_and(view, rmm::cuda_stream_default, mr); } +rmm::device_buffer bitmask_or(table_view const &view, rmm::mr::device_memory_resource *mr) +{ + return detail::bitmask_or(view, rmm::cuda_stream_default, mr); +} + } // namespace cudf diff --git a/java/src/main/java/ai/rapids/cudf/ColumnView.java b/java/src/main/java/ai/rapids/cudf/ColumnView.java index 1dce52f7105..03bc3654110 100644 --- a/java/src/main/java/ai/rapids/cudf/ColumnView.java +++ b/java/src/main/java/ai/rapids/cudf/ColumnView.java @@ -520,7 +520,7 @@ public final ColumnVector normalizeNANsAndZeros() { * @return the new ColumnVector with merged null mask. */ public final ColumnVector mergeAndSetValidity(BinaryOp mergeOp, ColumnView... columns) { - assert mergeOp == BinaryOp.BITWISE_AND : "Only BITWISE_AND supported right now"; + assert mergeOp == BinaryOp.BITWISE_AND || mergeOp == BinaryOp.BITWISE_OR : "Only BITWISE_AND and BITWISE_OR supported right now"; long[] columnViews = new long[columns.length]; long size = getRowCount(); diff --git a/java/src/main/native/src/ColumnViewJni.cpp b/java/src/main/native/src/ColumnViewJni.cpp index 82e71b04a2f..1cdd5ec29ef 100644 --- a/java/src/main/native/src/ColumnViewJni.cpp +++ b/java/src/main/native/src/ColumnViewJni.cpp @@ -1305,10 +1305,16 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_bitwiseMergeAndSetValidit cudf::table_view *input_table = new cudf::table_view(column_views); cudf::binary_operator op = static_cast(bin_op); - if(op == cudf::binary_operator::BITWISE_AND) { - copy->set_null_mask(cudf::bitmask_and(*input_table)); + switch(op) { + case cudf::binary_operator::BITWISE_AND: + copy->set_null_mask(cudf::bitmask_and(*input_table)); + break; + case cudf::binary_operator::BITWISE_OR: + copy->set_null_mask(cudf::bitmask_or(*input_table)); + break; + default: + JNI_THROW_NEW(env, "java/lang/IllegalArgumentException", "Unsupported merge operation", 0); } - return reinterpret_cast(copy.release()); } CATCH_STD(env, 0); diff --git a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java index cb1f792b99e..d83d5f97a7e 100644 --- a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java +++ b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java @@ -571,7 +571,7 @@ void testSpark32BitMurmur3HashMixed() { } @Test - void testNullReconfigureNulls() { + void testAndNullReconfigureNulls() { try (ColumnVector v0 = ColumnVector.fromBoxedInts(0, 100, null, null, Integer.MIN_VALUE, null); ColumnVector v1 = ColumnVector.fromBoxedInts(0, 100, 1, 2, Integer.MIN_VALUE, null); ColumnVector intResult = v1.mergeAndSetValidity(BinaryOp.BITWISE_AND, v0); @@ -585,6 +585,25 @@ void testNullReconfigureNulls() { } } + @Test + void testOrNullReconfigureNulls() { + try (ColumnVector v0 = ColumnVector.fromBoxedInts(0, 100, null, null, Integer.MIN_VALUE, null); + ColumnVector v1 = ColumnVector.fromBoxedInts(0, 100, 1, 2, Integer.MIN_VALUE, null); + ColumnVector intResultV0 = v1.mergeAndSetValidity(BinaryOp.BITWISE_OR, v0); + ColumnVector intResultV0V1 = v1.mergeAndSetValidity(BinaryOp.BITWISE_OR, v0, v1); + ColumnVector intResultMulti = v1.mergeAndSetValidity(BinaryOp.BITWISE_OR, v0, v0, v1, v1, v0, v1, v0); + ColumnVector v2 = ColumnVector.fromStrings("0", "100", "1", "2", "MIN_VALUE", "3"); + ColumnVector stringResult = v2.mergeAndSetValidity(BinaryOp.BITWISE_OR, v0, v1); + ColumnVector stringExpected = ColumnVector.fromStrings("0", "100", "1", "2", "MIN_VALUE", null); + ColumnVector noMaskResult = v2.mergeAndSetValidity(BinaryOp.BITWISE_OR)) { + assertColumnsAreEqual(v0, intResultV0); + assertColumnsAreEqual(v1, intResultV0V1); + assertColumnsAreEqual(v1, intResultMulti); + assertColumnsAreEqual(stringExpected, stringResult); + assertColumnsAreEqual(v2, noMaskResult); + } + } + @Test void isNotNullTestEmptyColumn() { try (ColumnVector v = ColumnVector.fromBoxedInts(); From 3e40d93787926702a1778b01c5793dac86e5a72b Mon Sep 17 00:00:00 2001 From: Ryan Lee Date: Mon, 22 Feb 2021 19:33:16 -0800 Subject: [PATCH 02/10] switch to device_span where possible --- cpp/include/cudf/detail/null_mask.hpp | 19 +--- cpp/src/bitmask/null_mask.cu | 134 +++++++++++++++----------- 2 files changed, 83 insertions(+), 70 deletions(-) diff --git a/cpp/include/cudf/detail/null_mask.hpp b/cpp/include/cudf/detail/null_mask.hpp index 71009a32fcc..f4647ad3292 100644 --- a/cpp/include/cudf/detail/null_mask.hpp +++ b/cpp/include/cudf/detail/null_mask.hpp @@ -16,6 +16,7 @@ #pragma once #include +#include #include @@ -115,18 +116,6 @@ rmm::device_buffer bitmask_and( rmm::cuda_stream_view stream, rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource()); -/** - * @copydoc cudf::bitmask_binop - * - * @param[in] stream CUDA stream used for device memory operations and kernel launches. - */ -template -rmm::device_buffer bitmask_binop( - Binop op, - table_view const &view, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource()); - /** * @copydoc cudf::bitmask_and * @@ -153,9 +142,9 @@ rmm::device_buffer bitmask_and( template void inplace_bitmask_binop( Binop op, - bitmask_type *dest_mask, - std::vector const &masks, - std::vector const &begin_bits, + device_span destination, + device_span source, + device_span begin_bit, size_type mask_size, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource()); diff --git a/cpp/src/bitmask/null_mask.cu b/cpp/src/bitmask/null_mask.cu index c0e0e27f3a6..6dbf5e84842 100644 --- a/cpp/src/bitmask/null_mask.cu +++ b/cpp/src/bitmask/null_mask.cu @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -41,6 +42,8 @@ #include #include +using cudf::detail::device_span; + namespace cudf { size_type state_null_count(mask_state state, size_type size) { @@ -329,19 +332,17 @@ __global__ void copy_offset_bitmask(bitmask_type *__restrict__ destination, */ template __global__ void offset_bitmask_binop(Binop op, - bitmask_type *__restrict__ destination, - bitmask_type const *const *__restrict__ source, - size_type const *__restrict__ begin_bit, - size_type num_sources, - size_type source_size, - size_type number_of_mask_words) + device_span destination, + device_span source, + device_span begin_bit, + size_type source_size) { for (size_type destination_word_index = threadIdx.x + blockIdx.x * blockDim.x; - destination_word_index < number_of_mask_words; + destination_word_index < destination.size(); destination_word_index += blockDim.x * gridDim.x) { bitmask_type destination_word = detail::get_mask_offset_word( source[0], destination_word_index, begin_bit[0], begin_bit[0] + source_size); - for (size_type i = 1; i < num_sources; i++) { + for (size_type i = 1; i < source.size(); i++) { destination_word = op(destination_word, detail::get_mask_offset_word( @@ -426,36 +427,26 @@ rmm::device_buffer copy_bitmask(column_view const &view, return null_mask; } + +// Inplace Bitwise merge of the masks template void inplace_bitmask_binop(Binop op, - bitmask_type *dest_mask, - std::vector const &masks, - std::vector const &begin_bits, + device_span dest_mask, + device_span masks, + device_span begin_bits, size_type mask_size, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource *mr) { - CUDF_EXPECTS(std::all_of(begin_bits.begin(), begin_bits.end(), [](auto b) { return b >= 0; }), - "Invalid range."); CUDF_EXPECTS(mask_size > 0, "Invalid bit range."); - CUDF_EXPECTS(std::all_of(masks.begin(), masks.end(), [](auto p) { return p != nullptr; }), - "Mask pointer cannot be null"); - - auto number_of_mask_words = num_bitmask_words(mask_size); - - rmm::device_vector d_masks(masks); - rmm::device_vector d_begin_bits(begin_bits); - cudf::detail::grid_1d config(number_of_mask_words, 256); + cudf::detail::grid_1d config(dest_mask.size(), 256); offset_bitmask_binop<<>>( op, dest_mask, - d_masks.data().get(), - d_begin_bits.data().get(), - d_masks.size(), - mask_size, - number_of_mask_words); - + masks, + begin_bits, + mask_size); CHECK_CUDA(stream.value()); } @@ -467,11 +458,19 @@ void inplace_bitmask_and(bitmask_type *dest_mask, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource *mr) { + CUDF_EXPECTS(std::all_of(begin_bits.begin(), begin_bits.end(), [](auto b) { return b >= 0; }), + "Invalid range."); + CUDF_EXPECTS(std::all_of(masks.begin(), masks.end(), [](auto p) { return p != nullptr; }), + "Mask pointer cannot be null"); + + rmm::device_vector d_masks(masks); + rmm::device_vector d_begin_bits(begin_bits); + inplace_bitmask_binop( [] __device__(bitmask_type left, bitmask_type right) { return left & right; }, - dest_mask, - masks, - begin_bits, + device_span(dest_mask, num_bitmask_words(mask_size)), + device_span(d_masks.data().get(), d_masks.size()), + device_span(d_begin_bits.data().get(), d_begin_bits.size()), mask_size, stream, mr); @@ -485,12 +484,26 @@ rmm::device_buffer bitmask_binop(Binop op, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource *mr) { + CUDF_EXPECTS(std::all_of(begin_bits.begin(), begin_bits.end(), [](auto b) { return b >= 0; }), + "Invalid range."); + CUDF_EXPECTS(std::all_of(masks.begin(), masks.end(), [](auto p) { return p != nullptr; }), + "Mask pointer cannot be null"); rmm::device_buffer dest_mask{}; auto num_bytes = bitmask_allocation_size_bytes(mask_size); + rmm::device_vector d_masks(masks); + rmm::device_vector d_begin_bits(begin_bits); + dest_mask = rmm::device_buffer{num_bytes, stream, mr}; + inplace_bitmask_binop( - op, static_cast(dest_mask.data()), masks, begin_bits, mask_size, stream, mr); + op, + device_span(static_cast(dest_mask.data()), num_bitmask_words(mask_size)), + device_span(d_masks.data().get(), d_masks.size()), + device_span(d_begin_bits.data().get(), d_begin_bits.size()), + mask_size, + stream, + mr); return dest_mask; } @@ -674,12 +687,10 @@ std::vector segmented_count_unset_bits(bitmask_type const *bitmask, return ret; } -// Returns the merged null masks of all columns in the table view -template -rmm::device_buffer bitmask_binop(Binop op, - table_view const &view, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource *mr) +// Returns the bitwise AND of the null masks of all columns in the table view +rmm::device_buffer bitmask_and(table_view const &view, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource *mr) { CUDF_FUNC_RANGE(); rmm::device_buffer null_mask{0, stream, mr}; @@ -695,33 +706,46 @@ rmm::device_buffer bitmask_binop(Binop op, } if (masks.size() > 0) { - return cudf::detail::bitmask_binop(op, masks, offsets, view.num_rows(), stream, mr); + return cudf::detail::bitmask_binop( + [] __device__(bitmask_type left, bitmask_type right) { return left & right; }, + masks, + offsets, + view.num_rows(), + stream, + mr); } return null_mask; } -// Returns the bitwise AND of the null masks of all columns in the table view -rmm::device_buffer bitmask_and(table_view const &view, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource *mr) -{ - return bitmask_binop( - [] __device__(bitmask_type left, bitmask_type right) { return left & right; }, - view, - stream, - mr); -} - rmm::device_buffer bitmask_or(table_view const &view, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource *mr) { - return bitmask_binop( - [] __device__(bitmask_type left, bitmask_type right) { return left | right; }, - view, - stream, - mr); + CUDF_FUNC_RANGE(); + rmm::device_buffer null_mask{0, stream, mr}; + if (view.num_rows() == 0 or view.num_columns() == 0) { return null_mask; } + + std::vector masks; + std::vector offsets; + for (auto &&col : view) { + if (col.nullable()) { + masks.push_back(col.null_mask()); + offsets.push_back(col.offset()); + } + } + + if (static_cast(masks.size()) == view.num_columns()) { + return cudf::detail::bitmask_binop( + [] __device__(bitmask_type left, bitmask_type right) { return left | right; }, + masks, + offsets, + view.num_rows(), + stream, + mr); + } + + return null_mask; } } // namespace detail From 26f3027d7b921310c2a90d1fd9197d2b50023604 Mon Sep 17 00:00:00 2001 From: Ryan Lee Date: Mon, 22 Feb 2021 20:29:31 -0800 Subject: [PATCH 03/10] Move function definitions to cuh file --- cpp/include/cudf/detail/null_mask.cuh | 138 ++++++++++++++++++ cpp/include/cudf/detail/null_mask.hpp | 38 ----- cpp/src/bitmask/null_mask.cu | 95 +----------- .../java/ai/rapids/cudf/ColumnVectorTest.java | 3 + 4 files changed, 144 insertions(+), 130 deletions(-) create mode 100644 cpp/include/cudf/detail/null_mask.cuh diff --git a/cpp/include/cudf/detail/null_mask.cuh b/cpp/include/cudf/detail/null_mask.cuh new file mode 100644 index 00000000000..b50712a4519 --- /dev/null +++ b/cpp/include/cudf/detail/null_mask.cuh @@ -0,0 +1,138 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include +#include +#include +#include + +#include + +using cudf::detail::device_span; + +namespace cudf { +namespace { +/** + * @brief Computes the merger of an array of bitmasks using a binary operator + * + * @param op The binary operator used to combine the bitmasks + * @param destination The bitmask to write result into + * @param source Array of source mask pointers. All masks must be of same size + * @param begin_bit Array of offsets into corresponding @p source masks. + * Must be same size as source array + * @param num_sources Number of masks in @p source array + * @param source_size Number of bits in each mask in @p source + * @param number_of_mask_words The number of words of type bitmask_type to copy + */ +template +__global__ void offset_bitmask_binop(Binop op, + device_span destination, + device_span source, + device_span begin_bit, + size_type source_size) +{ + for (size_type destination_word_index = threadIdx.x + blockIdx.x * blockDim.x; + destination_word_index < destination.size(); + destination_word_index += blockDim.x * gridDim.x) { + bitmask_type destination_word = detail::get_mask_offset_word( + source[0], destination_word_index, begin_bit[0], begin_bit[0] + source_size); + for (size_type i = 1; i < source.size(); i++) { + destination_word = + op(destination_word, + detail::get_mask_offset_word( + source[i], destination_word_index, begin_bit[i], begin_bit[i] + source_size)); + } + + destination[destination_word_index] = destination_word; + } +} +} // namespace +namespace detail { +/** + * @copydoc bitmask_binop(Binop op, std::vector, std::vector const&, + * size_type, rmm::mr::device_memory_resource *) + * + * @param stream CUDA stream used for device memory operations and kernel launches + */ +template +rmm::device_buffer bitmask_binop( + Binop op, + std::vector const &masks, + std::vector const &begin_bits, + size_type mask_size, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource()) +{ + CUDF_EXPECTS(std::all_of(begin_bits.begin(), begin_bits.end(), [](auto b) { return b >= 0; }), + "Invalid range."); + CUDF_EXPECTS(std::all_of(masks.begin(), masks.end(), [](auto p) { return p != nullptr; }), + "Mask pointer cannot be null"); + rmm::device_buffer dest_mask{}; + auto num_bytes = bitmask_allocation_size_bytes(mask_size); + + rmm::device_vector d_masks(masks); + rmm::device_vector d_begin_bits(begin_bits); + + dest_mask = rmm::device_buffer{num_bytes, stream, mr}; + + inplace_bitmask_binop(op, + device_span(static_cast(dest_mask.data()), + num_bitmask_words(mask_size)), + device_span(d_masks.data().get(), d_masks.size()), + device_span(d_begin_bits.data().get(), d_begin_bits.size()), + mask_size, + stream, + mr); + + return dest_mask; +} + +/** + * @brief Performs a merger of the specified bitmasks using the binary operator + * provided, and writes in place to destination + * + * @param op The binary operator used to combine the bitmasks + * @param dest_mask Destination to which the AND result is written + * @param masks The list of data pointers of the bitmasks to be ANDed + * @param begin_bits The bit offsets from which each mask is to be ANDed + * @param mask_size The number of bits to be ANDed in each mask + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned device_buffer + * @return rmm::device_buffer Output bitmask + */ +template +void inplace_bitmask_binop( + Binop op, + device_span dest_mask, + device_span masks, + device_span begin_bits, + size_type mask_size, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource()) +{ + CUDF_EXPECTS(mask_size > 0, "Invalid bit range."); + + cudf::detail::grid_1d config(dest_mask.size(), 256); + offset_bitmask_binop<<>>( + op, dest_mask, masks, begin_bits, mask_size); + CHECK_CUDA(stream.value()); +} + +} // namespace detail + +} // namespace cudf diff --git a/cpp/include/cudf/detail/null_mask.hpp b/cpp/include/cudf/detail/null_mask.hpp index f4647ad3292..4a8edf39640 100644 --- a/cpp/include/cudf/detail/null_mask.hpp +++ b/cpp/include/cudf/detail/null_mask.hpp @@ -88,21 +88,6 @@ rmm::device_buffer copy_bitmask( rmm::cuda_stream_view stream, rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource()); -/** - * @copydoc bitmask_binop(Binop op, std::vector, std::vector const&, - * size_type, rmm::mr::device_memory_resource *) - * - * @param stream CUDA stream used for device memory operations and kernel launches - */ -template -rmm::device_buffer bitmask_binop( - Binop op, - std::vector const &masks, - std::vector const &begin_bits, - size_type mask_size, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource()); - /** * @copydoc bitmask_and(std::vector, std::vector const&, size_type, * rmm::mr::device_memory_resource *) @@ -126,29 +111,6 @@ rmm::device_buffer bitmask_and( rmm::cuda_stream_view stream, rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource()); -/** - * @brief Performs a merger of the specified bitmasks using the binary operator - * provided, and writes in place to destination - * - * @param op The binary operator used to combine the bitmasks - * @param dest_mask Destination to which the AND result is written - * @param masks The list of data pointers of the bitmasks to be ANDed - * @param begin_bits The bit offsets from which each mask is to be ANDed - * @param mask_size The number of bits to be ANDed in each mask - * @param stream CUDA stream used for device memory operations and kernel launches - * @param mr Device memory resource used to allocate the returned device_buffer - * @return rmm::device_buffer Output bitmask - */ -template -void inplace_bitmask_binop( - Binop op, - device_span destination, - device_span source, - device_span begin_bit, - size_type mask_size, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource()); - /** * @brief Performs a bitwise AND of the specified bitmasks, * and writes in place to destination diff --git a/cpp/src/bitmask/null_mask.cu b/cpp/src/bitmask/null_mask.cu index 6dbf5e84842..a1cb10aaaf4 100644 --- a/cpp/src/bitmask/null_mask.cu +++ b/cpp/src/bitmask/null_mask.cu @@ -15,6 +15,7 @@ */ #include +#include #include #include #include @@ -318,41 +319,6 @@ __global__ void copy_offset_bitmask(bitmask_type *__restrict__ destination, } } -/** - * @brief Computes the merger of an array of bitmasks using a binary operator - * - * @param op The binary operator used to combine the bitmasks - * @param destination The bitmask to write result into - * @param source Array of source mask pointers. All masks must be of same size - * @param begin_bit Array of offsets into corresponding @p source masks. - * Must be same size as source array - * @param num_sources Number of masks in @p source array - * @param source_size Number of bits in each mask in @p source - * @param number_of_mask_words The number of words of type bitmask_type to copy - */ -template -__global__ void offset_bitmask_binop(Binop op, - device_span destination, - device_span source, - device_span begin_bit, - size_type source_size) -{ - for (size_type destination_word_index = threadIdx.x + blockIdx.x * blockDim.x; - destination_word_index < destination.size(); - destination_word_index += blockDim.x * gridDim.x) { - bitmask_type destination_word = detail::get_mask_offset_word( - source[0], destination_word_index, begin_bit[0], begin_bit[0] + source_size); - for (size_type i = 1; i < source.size(); i++) { - destination_word = - op(destination_word, - detail::get_mask_offset_word( - source[i], destination_word_index, begin_bit[i], begin_bit[i] + source_size)); - } - - destination[destination_word_index] = destination_word; - } -} - // convert [first_bit_index,last_bit_index) to // [first_word_index,last_word_index) struct to_word_index : public thrust::unary_function { @@ -427,29 +393,6 @@ rmm::device_buffer copy_bitmask(column_view const &view, return null_mask; } - -// Inplace Bitwise merge of the masks -template -void inplace_bitmask_binop(Binop op, - device_span dest_mask, - device_span masks, - device_span begin_bits, - size_type mask_size, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource *mr) -{ - CUDF_EXPECTS(mask_size > 0, "Invalid bit range."); - - cudf::detail::grid_1d config(dest_mask.size(), 256); - offset_bitmask_binop<<>>( - op, - dest_mask, - masks, - begin_bits, - mask_size); - CHECK_CUDA(stream.value()); -} - // Inplace Bitwise AND of the masks void inplace_bitmask_and(bitmask_type *dest_mask, std::vector const &masks, @@ -459,9 +402,9 @@ void inplace_bitmask_and(bitmask_type *dest_mask, rmm::mr::device_memory_resource *mr) { CUDF_EXPECTS(std::all_of(begin_bits.begin(), begin_bits.end(), [](auto b) { return b >= 0; }), - "Invalid range."); + "Invalid range."); CUDF_EXPECTS(std::all_of(masks.begin(), masks.end(), [](auto p) { return p != nullptr; }), - "Mask pointer cannot be null"); + "Mask pointer cannot be null"); rmm::device_vector d_masks(masks); rmm::device_vector d_begin_bits(begin_bits); @@ -476,38 +419,6 @@ void inplace_bitmask_and(bitmask_type *dest_mask, mr); } -template -rmm::device_buffer bitmask_binop(Binop op, - std::vector const &masks, - std::vector const &begin_bits, - size_type mask_size, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource *mr) -{ - CUDF_EXPECTS(std::all_of(begin_bits.begin(), begin_bits.end(), [](auto b) { return b >= 0; }), - "Invalid range."); - CUDF_EXPECTS(std::all_of(masks.begin(), masks.end(), [](auto p) { return p != nullptr; }), - "Mask pointer cannot be null"); - rmm::device_buffer dest_mask{}; - auto num_bytes = bitmask_allocation_size_bytes(mask_size); - - rmm::device_vector d_masks(masks); - rmm::device_vector d_begin_bits(begin_bits); - - dest_mask = rmm::device_buffer{num_bytes, stream, mr}; - - inplace_bitmask_binop( - op, - device_span(static_cast(dest_mask.data()), num_bitmask_words(mask_size)), - device_span(d_masks.data().get(), d_masks.size()), - device_span(d_begin_bits.data().get(), d_begin_bits.size()), - mask_size, - stream, - mr); - - return dest_mask; -} - // Bitwise AND of the masks rmm::device_buffer bitmask_and(std::vector const &masks, std::vector const &begin_bits, diff --git a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java index d83d5f97a7e..253ab77142b 100644 --- a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java +++ b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java @@ -589,9 +589,11 @@ void testAndNullReconfigureNulls() { void testOrNullReconfigureNulls() { try (ColumnVector v0 = ColumnVector.fromBoxedInts(0, 100, null, null, Integer.MIN_VALUE, null); ColumnVector v1 = ColumnVector.fromBoxedInts(0, 100, 1, 2, Integer.MIN_VALUE, null); + ColumnVector v2 = ColumnVector.fromBoxedInts(0, 100, 1, 2, Integer.MIN_VALUE, Integer.MAX_VALUE); ColumnVector intResultV0 = v1.mergeAndSetValidity(BinaryOp.BITWISE_OR, v0); ColumnVector intResultV0V1 = v1.mergeAndSetValidity(BinaryOp.BITWISE_OR, v0, v1); ColumnVector intResultMulti = v1.mergeAndSetValidity(BinaryOp.BITWISE_OR, v0, v0, v1, v1, v0, v1, v0); + ColumnVector intResultv0v1v2 = v2.mergeAndSetValidity(BinaryOp.BITWISE_OR, v0, v1, v2); ColumnVector v2 = ColumnVector.fromStrings("0", "100", "1", "2", "MIN_VALUE", "3"); ColumnVector stringResult = v2.mergeAndSetValidity(BinaryOp.BITWISE_OR, v0, v1); ColumnVector stringExpected = ColumnVector.fromStrings("0", "100", "1", "2", "MIN_VALUE", null); @@ -599,6 +601,7 @@ void testOrNullReconfigureNulls() { assertColumnsAreEqual(v0, intResultV0); assertColumnsAreEqual(v1, intResultV0V1); assertColumnsAreEqual(v1, intResultMulti); + assertColumnsAreEqual(v1, intResultv0v1v2); assertColumnsAreEqual(stringExpected, stringResult); assertColumnsAreEqual(v2, noMaskResult); } From 2889a068500736d33bc54aa5f38452d32540d23c Mon Sep 17 00:00:00 2001 From: Ryan Lee Date: Tue, 23 Feb 2021 02:32:01 -0800 Subject: [PATCH 04/10] add and fix bitmask tests --- cpp/tests/bitmask/bitmask_tests.cu | 58 +++++++++++++++++++++++++++--- 1 file changed, 54 insertions(+), 4 deletions(-) diff --git a/cpp/tests/bitmask/bitmask_tests.cu b/cpp/tests/bitmask/bitmask_tests.cu index 8afa4faa9e3..3b7b54e0316 100644 --- a/cpp/tests/bitmask/bitmask_tests.cu +++ b/cpp/tests/bitmask/bitmask_tests.cu @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +#include #include #include #include @@ -22,6 +23,7 @@ #include #include #include +#include "rmm/device_buffer.hpp" #include #include @@ -413,7 +415,7 @@ TEST_F(CopyBitmaskTest, TestZeroOffset) cleanEndWord(splice_mask, begin_bit, end_bit); auto number_of_bits = end_bit - begin_bit; CUDF_TEST_EXPECT_EQUAL_BUFFERS( - gold_splice_mask.data(), splice_mask.data(), number_of_bits / CHAR_BIT); + gold_splice_mask.data(), splice_mask.data(), cudf::num_bitmask_words(number_of_bits)); } TEST_F(CopyBitmaskTest, TestNonZeroOffset) @@ -433,7 +435,7 @@ TEST_F(CopyBitmaskTest, TestNonZeroOffset) cleanEndWord(splice_mask, begin_bit, end_bit); auto number_of_bits = end_bit - begin_bit; CUDF_TEST_EXPECT_EQUAL_BUFFERS( - gold_splice_mask.data(), splice_mask.data(), number_of_bits / CHAR_BIT); + gold_splice_mask.data(), splice_mask.data(), cudf::num_bitmask_words(number_of_bits)); } TEST_F(CopyBitmaskTest, TestCopyColumnViewVectorContiguous) @@ -468,7 +470,7 @@ TEST_F(CopyBitmaskTest, TestCopyColumnViewVectorContiguous) rmm::device_buffer concatenated_bitmask = cudf::concatenate_masks(views); cleanEndWord(concatenated_bitmask, 0, num_elements); CUDF_TEST_EXPECT_EQUAL_BUFFERS( - concatenated_bitmask.data(), gold_mask.data(), num_elements / CHAR_BIT); + concatenated_bitmask.data(), gold_mask.data(), cudf::num_bitmask_words(num_elements)); } TEST_F(CopyBitmaskTest, TestCopyColumnViewVectorDiscontiguous) @@ -493,7 +495,55 @@ TEST_F(CopyBitmaskTest, TestCopyColumnViewVectorDiscontiguous) rmm::device_buffer concatenated_bitmask = cudf::concatenate_masks(views); cleanEndWord(concatenated_bitmask, 0, num_elements); CUDF_TEST_EXPECT_EQUAL_BUFFERS( - concatenated_bitmask.data(), gold_mask.data(), num_elements / CHAR_BIT); + concatenated_bitmask.data(), gold_mask.data(), cudf::num_bitmask_words(num_elements)); +} + +struct MergeBitmaskTest : public cudf::test::BaseFixture {}; + +TEST_F(MergeBitmaskTest, TestBitmaskAnd) +{ + cudf::test::fixed_width_column_wrapper const bools_col1({0, 1, 0, 1, 1}, {0, 1, 1, 1, 0}); + cudf::test::fixed_width_column_wrapper const bools_col2({0, 2, 1, 0, 255}, {1, 1, 0, 1, 0}); + cudf::test::fixed_width_column_wrapper const bools_col3({0, 2, 1, 0, 255}); + + auto const input1 = cudf::table_view({bools_col3}); + auto const input2 = cudf::table_view({bools_col1, bools_col2}); + auto const input3 = cudf::table_view({bools_col1, bools_col2, bools_col3}); + + rmm::device_buffer result1 = cudf::bitmask_and(input1); + rmm::device_buffer result2 = cudf::bitmask_and(input2); + rmm::device_buffer result3 = cudf::bitmask_and(input3); + + auto odd_indices = + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i%2; }); + auto odd = cudf::test::detail::make_null_mask(odd_indices, odd_indices + input2.num_rows()); + + EXPECT_EQ(nullptr, result1.data()); + CUDF_TEST_EXPECT_EQUAL_BUFFERS(result2.data(), odd.data(), cudf::num_bitmask_words(input2.num_rows())); + CUDF_TEST_EXPECT_EQUAL_BUFFERS(result3.data(), odd.data(), cudf::num_bitmask_words(input2.num_rows())); +} + +TEST_F(MergeBitmaskTest, TestBitmaskOr) +{ + cudf::test::fixed_width_column_wrapper const bools_col1({0, 1, 0, 1, 1}, {1, 1, 0, 0, 1}); + cudf::test::fixed_width_column_wrapper const bools_col2({0, 2, 1, 0, 255}, {0, 0, 1, 0, 1}); + cudf::test::fixed_width_column_wrapper const bools_col3({0, 2, 1, 0, 255}); + + auto const input1 = cudf::table_view({bools_col3}); + auto const input2 = cudf::table_view({bools_col1, bools_col2}); + auto const input3 = cudf::table_view({bools_col1, bools_col2, bools_col3}); + + rmm::device_buffer result1 = cudf::bitmask_or(input1); + rmm::device_buffer result2 = cudf::bitmask_or(input2); + rmm::device_buffer result3 = cudf::bitmask_or(input3); + + auto all_but_index3 = + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i!=3; }); + auto null3 = cudf::test::detail::make_null_mask(all_but_index3, all_but_index3 + input2.num_rows()); + + EXPECT_EQ(nullptr, result1.data()); + CUDF_TEST_EXPECT_EQUAL_BUFFERS(result2.data(), null3.data(), cudf::num_bitmask_words(input2.num_rows())); + EXPECT_EQ(nullptr, result3.data()); } CUDF_TEST_PROGRAM_MAIN() From 13d017a668ca20eb858f8bcd66de3442def43703 Mon Sep 17 00:00:00 2001 From: Ryan Lee Date: Tue, 23 Feb 2021 02:38:51 -0800 Subject: [PATCH 05/10] fix styling --- cpp/tests/bitmask/bitmask_tests.cu | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/cpp/tests/bitmask/bitmask_tests.cu b/cpp/tests/bitmask/bitmask_tests.cu index 3b7b54e0316..32af582b549 100644 --- a/cpp/tests/bitmask/bitmask_tests.cu +++ b/cpp/tests/bitmask/bitmask_tests.cu @@ -498,7 +498,8 @@ TEST_F(CopyBitmaskTest, TestCopyColumnViewVectorDiscontiguous) concatenated_bitmask.data(), gold_mask.data(), cudf::num_bitmask_words(num_elements)); } -struct MergeBitmaskTest : public cudf::test::BaseFixture {}; +struct MergeBitmaskTest : public cudf::test::BaseFixture { +}; TEST_F(MergeBitmaskTest, TestBitmaskAnd) { @@ -515,12 +516,14 @@ TEST_F(MergeBitmaskTest, TestBitmaskAnd) rmm::device_buffer result3 = cudf::bitmask_and(input3); auto odd_indices = - cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i%2; }); + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 2; }); auto odd = cudf::test::detail::make_null_mask(odd_indices, odd_indices + input2.num_rows()); EXPECT_EQ(nullptr, result1.data()); - CUDF_TEST_EXPECT_EQUAL_BUFFERS(result2.data(), odd.data(), cudf::num_bitmask_words(input2.num_rows())); - CUDF_TEST_EXPECT_EQUAL_BUFFERS(result3.data(), odd.data(), cudf::num_bitmask_words(input2.num_rows())); + CUDF_TEST_EXPECT_EQUAL_BUFFERS( + result2.data(), odd.data(), cudf::num_bitmask_words(input2.num_rows())); + CUDF_TEST_EXPECT_EQUAL_BUFFERS( + result3.data(), odd.data(), cudf::num_bitmask_words(input2.num_rows())); } TEST_F(MergeBitmaskTest, TestBitmaskOr) @@ -538,11 +541,13 @@ TEST_F(MergeBitmaskTest, TestBitmaskOr) rmm::device_buffer result3 = cudf::bitmask_or(input3); auto all_but_index3 = - cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i!=3; }); - auto null3 = cudf::test::detail::make_null_mask(all_but_index3, all_but_index3 + input2.num_rows()); + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 3; }); + auto null3 = + cudf::test::detail::make_null_mask(all_but_index3, all_but_index3 + input2.num_rows()); EXPECT_EQ(nullptr, result1.data()); - CUDF_TEST_EXPECT_EQUAL_BUFFERS(result2.data(), null3.data(), cudf::num_bitmask_words(input2.num_rows())); + CUDF_TEST_EXPECT_EQUAL_BUFFERS( + result2.data(), null3.data(), cudf::num_bitmask_words(input2.num_rows())); EXPECT_EQ(nullptr, result3.data()); } From 7a7f8a7789a6d96b63658c9bec237c2c8019aad7 Mon Sep 17 00:00:00 2001 From: Ryan Lee Date: Wed, 24 Feb 2021 12:47:44 -0800 Subject: [PATCH 06/10] fix java tests --- java/src/main/native/src/ColumnViewJni.cpp | 1 + .../src/test/java/ai/rapids/cudf/ColumnVectorTest.java | 10 +++++----- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/java/src/main/native/src/ColumnViewJni.cpp b/java/src/main/native/src/ColumnViewJni.cpp index 1cdd5ec29ef..5b7afb36e3f 100644 --- a/java/src/main/native/src/ColumnViewJni.cpp +++ b/java/src/main/native/src/ColumnViewJni.cpp @@ -1315,6 +1315,7 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_bitwiseMergeAndSetValidit default: JNI_THROW_NEW(env, "java/lang/IllegalArgumentException", "Unsupported merge operation", 0); } + return reinterpret_cast(copy.release()); } CATCH_STD(env, 0); diff --git a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java index 253ab77142b..b11fa243a26 100644 --- a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java +++ b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java @@ -594,16 +594,16 @@ void testOrNullReconfigureNulls() { ColumnVector intResultV0V1 = v1.mergeAndSetValidity(BinaryOp.BITWISE_OR, v0, v1); ColumnVector intResultMulti = v1.mergeAndSetValidity(BinaryOp.BITWISE_OR, v0, v0, v1, v1, v0, v1, v0); ColumnVector intResultv0v1v2 = v2.mergeAndSetValidity(BinaryOp.BITWISE_OR, v0, v1, v2); - ColumnVector v2 = ColumnVector.fromStrings("0", "100", "1", "2", "MIN_VALUE", "3"); - ColumnVector stringResult = v2.mergeAndSetValidity(BinaryOp.BITWISE_OR, v0, v1); + ColumnVector v3 = ColumnVector.fromStrings("0", "100", "1", "2", "MIN_VALUE", "3"); + ColumnVector stringResult = v3.mergeAndSetValidity(BinaryOp.BITWISE_OR, v0, v1); ColumnVector stringExpected = ColumnVector.fromStrings("0", "100", "1", "2", "MIN_VALUE", null); - ColumnVector noMaskResult = v2.mergeAndSetValidity(BinaryOp.BITWISE_OR)) { + ColumnVector noMaskResult = v3.mergeAndSetValidity(BinaryOp.BITWISE_OR)) { assertColumnsAreEqual(v0, intResultV0); assertColumnsAreEqual(v1, intResultV0V1); assertColumnsAreEqual(v1, intResultMulti); - assertColumnsAreEqual(v1, intResultv0v1v2); + assertColumnsAreEqual(v2, intResultv0v1v2); assertColumnsAreEqual(stringExpected, stringResult); - assertColumnsAreEqual(v2, noMaskResult); + assertColumnsAreEqual(v3, noMaskResult); } } From 08b6278b678422259f6a9f0e27593d29734ee4da Mon Sep 17 00:00:00 2001 From: Ryan Lee Date: Wed, 24 Feb 2021 13:02:22 -0800 Subject: [PATCH 07/10] convert vectors to host spans and code cleanup --- cpp/include/cudf/detail/null_mask.cuh | 98 +++++++++++---------- cpp/include/cudf/detail/null_mask.hpp | 34 ++++--- cpp/include/cudf/null_mask.hpp | 2 +- cpp/src/bitmask/null_mask.cu | 29 +++--- cpp/src/structs/structs_column_factories.cu | 24 ++--- cpp/tests/bitmask/bitmask_tests.cu | 5 +- 6 files changed, 102 insertions(+), 90 deletions(-) diff --git a/cpp/include/cudf/detail/null_mask.cuh b/cpp/include/cudf/detail/null_mask.cuh index b50712a4519..22e80227808 100644 --- a/cpp/include/cudf/detail/null_mask.cuh +++ b/cpp/include/cudf/detail/null_mask.cuh @@ -26,91 +26,85 @@ using cudf::detail::device_span; namespace cudf { -namespace { +namespace detail { /** * @brief Computes the merger of an array of bitmasks using a binary operator * * @param op The binary operator used to combine the bitmasks * @param destination The bitmask to write result into * @param source Array of source mask pointers. All masks must be of same size - * @param begin_bit Array of offsets into corresponding @p source masks. - * Must be same size as source array - * @param num_sources Number of masks in @p source array - * @param source_size Number of bits in each mask in @p source - * @param number_of_mask_words The number of words of type bitmask_type to copy + * @param source_begin_bits Array of offsets into corresponding @p source masks. + * Must be same size as source array + * @param source_size_bits Number of bits in each mask in @p source */ template __global__ void offset_bitmask_binop(Binop op, device_span destination, - device_span source, - device_span begin_bit, - size_type source_size) + device_span const source, + device_span const source_begin_bits, + size_type source_size_bits) { for (size_type destination_word_index = threadIdx.x + blockIdx.x * blockDim.x; destination_word_index < destination.size(); destination_word_index += blockDim.x * gridDim.x) { - bitmask_type destination_word = detail::get_mask_offset_word( - source[0], destination_word_index, begin_bit[0], begin_bit[0] + source_size); + bitmask_type destination_word = + detail::get_mask_offset_word(source[0], + destination_word_index, + source_begin_bits[0], + source_begin_bits[0] + source_size_bits); for (size_type i = 1; i < source.size(); i++) { destination_word = + op(destination_word, - detail::get_mask_offset_word( - source[i], destination_word_index, begin_bit[i], begin_bit[i] + source_size)); + detail::get_mask_offset_word(source[i], + destination_word_index, + source_begin_bits[i], + source_begin_bits[i] + source_size_bits)); } destination[destination_word_index] = destination_word; } } -} // namespace -namespace detail { + /** - * @copydoc bitmask_binop(Binop op, std::vector, std::vector const&, - * size_type, rmm::mr::device_memory_resource *) + * @copydoc bitmask_binop(Binop op, host_span const, host_span + * const, size_type, rmm::mr::device_memory_resource *) * * @param stream CUDA stream used for device memory operations and kernel launches */ template rmm::device_buffer bitmask_binop( Binop op, - std::vector const &masks, - std::vector const &begin_bits, - size_type mask_size, + host_span const masks, + host_span const masks_begin_bits, + size_type mask_size_bits, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource()) { - CUDF_EXPECTS(std::all_of(begin_bits.begin(), begin_bits.end(), [](auto b) { return b >= 0; }), - "Invalid range."); - CUDF_EXPECTS(std::all_of(masks.begin(), masks.end(), [](auto p) { return p != nullptr; }), - "Mask pointer cannot be null"); - rmm::device_buffer dest_mask{}; - auto num_bytes = bitmask_allocation_size_bytes(mask_size); - - rmm::device_vector d_masks(masks); - rmm::device_vector d_begin_bits(begin_bits); - - dest_mask = rmm::device_buffer{num_bytes, stream, mr}; + auto dest_mask = rmm::device_buffer{bitmask_allocation_size_bytes(mask_size_bits), stream, mr}; inplace_bitmask_binop(op, device_span(static_cast(dest_mask.data()), - num_bitmask_words(mask_size)), - device_span(d_masks.data().get(), d_masks.size()), - device_span(d_begin_bits.data().get(), d_begin_bits.size()), - mask_size, + num_bitmask_words(mask_size_bits)), + masks, + masks_begin_bits, + mask_size_bits, stream, mr); + stream.synchronize(); return dest_mask; } /** - * @brief Performs a merger of the specified bitmasks using the binary operator + * @brief Performs a merge of the specified bitmasks using the binary operator * provided, and writes in place to destination * * @param op The binary operator used to combine the bitmasks - * @param dest_mask Destination to which the AND result is written - * @param masks The list of data pointers of the bitmasks to be ANDed - * @param begin_bits The bit offsets from which each mask is to be ANDed - * @param mask_size The number of bits to be ANDed in each mask + * @param dest_mask Destination to which the merged result is written + * @param masks The list of data pointers of the bitmasks to be merged + * @param masks_begin_bits The bit offsets from which each mask is to be merged + * @param mask_size_bits The number of bits to be ANDed in each mask * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned device_buffer * @return rmm::device_buffer Output bitmask @@ -119,17 +113,31 @@ template void inplace_bitmask_binop( Binop op, device_span dest_mask, - device_span masks, - device_span begin_bits, - size_type mask_size, + host_span const masks, + host_span const masks_begin_bits, + size_type mask_size_bits, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource()) { - CUDF_EXPECTS(mask_size > 0, "Invalid bit range."); + CUDF_EXPECTS( + std::all_of(masks_begin_bits.begin(), masks_begin_bits.end(), [](auto b) { return b >= 0; }), + "Invalid range."); + CUDF_EXPECTS(mask_size_bits > 0, "Invalid bit range."); + CUDF_EXPECTS(std::all_of(masks.begin(), masks.end(), [](auto p) { return p != nullptr; }), + "Mask pointer cannot be null"); + + rmm::device_uvector d_masks(masks.size(), stream, mr); + rmm::device_uvector d_begin_bits(masks_begin_bits.size(), stream, mr); + + CUDA_TRY(cudaMemcpy(d_masks.data(), masks.data(), masks.size_bytes(), cudaMemcpyHostToDevice)); + CUDA_TRY(cudaMemcpy(d_begin_bits.data(), + masks_begin_bits.data(), + masks_begin_bits.size_bytes(), + cudaMemcpyHostToDevice)); cudf::detail::grid_1d config(dest_mask.size(), 256); offset_bitmask_binop<<>>( - op, dest_mask, masks, begin_bits, mask_size); + op, dest_mask, d_masks, d_begin_bits, mask_size_bits); CHECK_CUDA(stream.value()); } diff --git a/cpp/include/cudf/detail/null_mask.hpp b/cpp/include/cudf/detail/null_mask.hpp index 4a8edf39640..9840527dfc1 100644 --- a/cpp/include/cudf/detail/null_mask.hpp +++ b/cpp/include/cudf/detail/null_mask.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -89,15 +89,15 @@ rmm::device_buffer copy_bitmask( rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource()); /** - * @copydoc bitmask_and(std::vector, std::vector const&, size_type, - * rmm::mr::device_memory_resource *) + * @copydoc bitmask_and(host_span const, host_span const, + * size_type, rmm::mr::device_memory_resource *) * * @param stream CUDA stream used for device memory operations and kernel launches */ rmm::device_buffer bitmask_and( - std::vector const &masks, - std::vector const &begin_bits, - size_type mask_size, + host_span const masks, + host_span const masks_begin_bits, + size_type mask_size_bits, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource()); @@ -111,23 +111,33 @@ rmm::device_buffer bitmask_and( rmm::cuda_stream_view stream, rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource()); +/** + * @copydoc cudf::bitmask_or + * + * @param[in] stream CUDA stream used for device memory operations and kernel launches. + */ +rmm::device_buffer bitmask_or( + table_view const &view, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource()); + /** * @brief Performs a bitwise AND of the specified bitmasks, * and writes in place to destination * * @param dest_mask Destination to which the AND result is written * @param masks The list of data pointers of the bitmasks to be ANDed - * @param begin_bits The bit offsets from which each mask is to be ANDed - * @param mask_size The number of bits to be ANDed in each mask + * @param masks_begin_bits The bit offsets from which each mask is to be ANDed + * @param mask_size_bits The number of bits to be ANDed in each mask * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned device_buffer * @return rmm::device_buffer Output bitmask */ void inplace_bitmask_and( - bitmask_type *dest_mask, - std::vector const &masks, - std::vector const &begin_bits, - size_type mask_size, + device_span dest_mask, + host_span const masks, + host_span const masks_begin_bits, + size_type mask_size_bits, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource()); diff --git a/cpp/include/cudf/null_mask.hpp b/cpp/include/cudf/null_mask.hpp index 663c42cec17..0d4de1a9beb 100644 --- a/cpp/include/cudf/null_mask.hpp +++ b/cpp/include/cudf/null_mask.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/src/bitmask/null_mask.cu b/cpp/src/bitmask/null_mask.cu index a1cb10aaaf4..19452093ab9 100644 --- a/cpp/src/bitmask/null_mask.cu +++ b/cpp/src/bitmask/null_mask.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -29,6 +29,7 @@ #include #include #include +#include #include #include @@ -394,34 +395,27 @@ rmm::device_buffer copy_bitmask(column_view const &view, } // Inplace Bitwise AND of the masks -void inplace_bitmask_and(bitmask_type *dest_mask, - std::vector const &masks, - std::vector const &begin_bits, +void inplace_bitmask_and(device_span dest_mask, + host_span const masks, + host_span const begin_bits, size_type mask_size, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource *mr) { - CUDF_EXPECTS(std::all_of(begin_bits.begin(), begin_bits.end(), [](auto b) { return b >= 0; }), - "Invalid range."); - CUDF_EXPECTS(std::all_of(masks.begin(), masks.end(), [](auto p) { return p != nullptr; }), - "Mask pointer cannot be null"); - - rmm::device_vector d_masks(masks); - rmm::device_vector d_begin_bits(begin_bits); - inplace_bitmask_binop( [] __device__(bitmask_type left, bitmask_type right) { return left & right; }, - device_span(dest_mask, num_bitmask_words(mask_size)), - device_span(d_masks.data().get(), d_masks.size()), - device_span(d_begin_bits.data().get(), d_begin_bits.size()), + dest_mask, + masks, + begin_bits, mask_size, stream, mr); + stream.synchronize(); } // Bitwise AND of the masks -rmm::device_buffer bitmask_and(std::vector const &masks, - std::vector const &begin_bits, +rmm::device_buffer bitmask_and(host_span const masks, + host_span const begin_bits, size_type mask_size, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource *mr) @@ -629,6 +623,7 @@ rmm::device_buffer bitmask_and(table_view const &view, return null_mask; } +// Returns the bitwise OR of the null masks of all columns in the table view rmm::device_buffer bitmask_or(table_view const &view, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource *mr) diff --git a/cpp/src/structs/structs_column_factories.cu b/cpp/src/structs/structs_column_factories.cu index 5f92fea76f5..2bd71767265 100644 --- a/cpp/src/structs/structs_column_factories.cu +++ b/cpp/src/structs/structs_column_factories.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,7 +24,6 @@ #include #include - namespace cudf { namespace { // Helper function to superimpose validity of parent struct @@ -44,18 +43,19 @@ void superimpose_parent_nullmask(bitmask_type const* parent_null_mask, // Child should have a null mask. // `AND` the child's null mask with the parent's. - auto data_type{child.type()}; - auto num_rows{child.size()}; - auto current_child_mask = child.mutable_view().null_mask(); - cudf::detail::inplace_bitmask_and(current_child_mask, - {reinterpret_cast(parent_null_mask), - reinterpret_cast(current_child_mask)}, - {0, 0}, - child.size(), - stream, - mr); + std::vector masks{ + reinterpret_cast(parent_null_mask), + reinterpret_cast(current_child_mask)}; + std::vector begin_bits{0, 0}; + cudf::detail::inplace_bitmask_and( + detail::device_span(current_child_mask, num_bitmask_words(child.size())), + masks, + begin_bits, + child.size(), + stream, + mr); child.set_null_count(UNKNOWN_NULL_COUNT); } diff --git a/cpp/tests/bitmask/bitmask_tests.cu b/cpp/tests/bitmask/bitmask_tests.cu index 32af582b549..2f820da687e 100644 --- a/cpp/tests/bitmask/bitmask_tests.cu +++ b/cpp/tests/bitmask/bitmask_tests.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,7 +13,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include #include #include #include @@ -23,10 +22,10 @@ #include #include #include -#include "rmm/device_buffer.hpp" #include #include +#include struct BitmaskUtilitiesTest : public cudf::test::BaseFixture { }; From 47a0b874215c40757fdbae665eeef852451c2ef3 Mon Sep 17 00:00:00 2001 From: Ryan Lee Date: Mon, 1 Mar 2021 18:50:52 -0800 Subject: [PATCH 08/10] spans with const types and async memcopy --- cpp/include/cudf/detail/null_mask.cuh | 25 ++++++++++++---------- cpp/include/cudf/detail/null_mask.hpp | 8 +++---- cpp/src/bitmask/null_mask.cu | 8 +++---- java/src/main/native/src/ColumnViewJni.cpp | 2 +- 4 files changed, 23 insertions(+), 20 deletions(-) diff --git a/cpp/include/cudf/detail/null_mask.cuh b/cpp/include/cudf/detail/null_mask.cuh index 22e80227808..7b25080c5d0 100644 --- a/cpp/include/cudf/detail/null_mask.cuh +++ b/cpp/include/cudf/detail/null_mask.cuh @@ -40,8 +40,8 @@ namespace detail { template __global__ void offset_bitmask_binop(Binop op, device_span destination, - device_span const source, - device_span const source_begin_bits, + device_span source, + device_span source_begin_bits, size_type source_size_bits) { for (size_type destination_word_index = threadIdx.x + blockIdx.x * blockDim.x; @@ -75,8 +75,8 @@ __global__ void offset_bitmask_binop(Binop op, template rmm::device_buffer bitmask_binop( Binop op, - host_span const masks, - host_span const masks_begin_bits, + host_span masks, + host_span masks_begin_bits, size_type mask_size_bits, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource()) @@ -113,8 +113,8 @@ template void inplace_bitmask_binop( Binop op, device_span dest_mask, - host_span const masks, - host_span const masks_begin_bits, + host_span masks, + host_span masks_begin_bits, size_type mask_size_bits, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource()) @@ -129,13 +129,16 @@ void inplace_bitmask_binop( rmm::device_uvector d_masks(masks.size(), stream, mr); rmm::device_uvector d_begin_bits(masks_begin_bits.size(), stream, mr); - CUDA_TRY(cudaMemcpy(d_masks.data(), masks.data(), masks.size_bytes(), cudaMemcpyHostToDevice)); - CUDA_TRY(cudaMemcpy(d_begin_bits.data(), - masks_begin_bits.data(), - masks_begin_bits.size_bytes(), - cudaMemcpyHostToDevice)); + CUDA_TRY(cudaMemcpyAsync( + d_masks.data(), masks.data(), masks.size_bytes(), cudaMemcpyHostToDevice, stream.value())); + CUDA_TRY(cudaMemcpyAsync(d_begin_bits.data(), + masks_begin_bits.data(), + masks_begin_bits.size_bytes(), + cudaMemcpyHostToDevice, + stream.value())); cudf::detail::grid_1d config(dest_mask.size(), 256); + stream.synchronize(); offset_bitmask_binop<<>>( op, dest_mask, d_masks, d_begin_bits, mask_size_bits); CHECK_CUDA(stream.value()); diff --git a/cpp/include/cudf/detail/null_mask.hpp b/cpp/include/cudf/detail/null_mask.hpp index 9840527dfc1..b0870ef8d9a 100644 --- a/cpp/include/cudf/detail/null_mask.hpp +++ b/cpp/include/cudf/detail/null_mask.hpp @@ -95,8 +95,8 @@ rmm::device_buffer copy_bitmask( * @param stream CUDA stream used for device memory operations and kernel launches */ rmm::device_buffer bitmask_and( - host_span const masks, - host_span const masks_begin_bits, + host_span masks, + host_span masks_begin_bits, size_type mask_size_bits, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource()); @@ -135,8 +135,8 @@ rmm::device_buffer bitmask_or( */ void inplace_bitmask_and( device_span dest_mask, - host_span const masks, - host_span const masks_begin_bits, + host_span masks, + host_span masks_begin_bits, size_type mask_size_bits, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource()); diff --git a/cpp/src/bitmask/null_mask.cu b/cpp/src/bitmask/null_mask.cu index 19452093ab9..dc10317ecdc 100644 --- a/cpp/src/bitmask/null_mask.cu +++ b/cpp/src/bitmask/null_mask.cu @@ -396,8 +396,8 @@ rmm::device_buffer copy_bitmask(column_view const &view, // Inplace Bitwise AND of the masks void inplace_bitmask_and(device_span dest_mask, - host_span const masks, - host_span const begin_bits, + host_span masks, + host_span begin_bits, size_type mask_size, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource *mr) @@ -414,8 +414,8 @@ void inplace_bitmask_and(device_span dest_mask, } // Bitwise AND of the masks -rmm::device_buffer bitmask_and(host_span const masks, - host_span const begin_bits, +rmm::device_buffer bitmask_and(host_span masks, + host_span begin_bits, size_type mask_size, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource *mr) diff --git a/java/src/main/native/src/ColumnViewJni.cpp b/java/src/main/native/src/ColumnViewJni.cpp index 5b7afb36e3f..03d4285ad10 100644 --- a/java/src/main/native/src/ColumnViewJni.cpp +++ b/java/src/main/native/src/ColumnViewJni.cpp @@ -1313,7 +1313,7 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_bitwiseMergeAndSetValidit copy->set_null_mask(cudf::bitmask_or(*input_table)); break; default: - JNI_THROW_NEW(env, "java/lang/IllegalArgumentException", "Unsupported merge operation", 0); + JNI_THROW_NEW(env, cudf::jni::ILLEGAL_ARG_CLASS, "Unsupported merge operation", 0); } return reinterpret_cast(copy.release()); From 83d8c02a7f9e060539b91a0945f2eb5be2cc2ef3 Mon Sep 17 00:00:00 2001 From: Ryan Lee Date: Tue, 2 Mar 2021 03:09:13 -0800 Subject: [PATCH 09/10] remove unecessary synchronize --- cpp/include/cudf/detail/null_mask.cuh | 1 - 1 file changed, 1 deletion(-) diff --git a/cpp/include/cudf/detail/null_mask.cuh b/cpp/include/cudf/detail/null_mask.cuh index 7b25080c5d0..990906a640e 100644 --- a/cpp/include/cudf/detail/null_mask.cuh +++ b/cpp/include/cudf/detail/null_mask.cuh @@ -138,7 +138,6 @@ void inplace_bitmask_binop( stream.value())); cudf::detail::grid_1d config(dest_mask.size(), 256); - stream.synchronize(); offset_bitmask_binop<<>>( op, dest_mask, d_masks, d_begin_bits, mask_size_bits); CHECK_CUDA(stream.value()); From b697b0ee2e521553d08d96402db812949ebc6ae4 Mon Sep 17 00:00:00 2001 From: Ryan Lee Date: Tue, 2 Mar 2021 10:57:45 -0800 Subject: [PATCH 10/10] move sync --- cpp/include/cudf/detail/null_mask.cuh | 2 +- cpp/src/bitmask/null_mask.cu | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/cpp/include/cudf/detail/null_mask.cuh b/cpp/include/cudf/detail/null_mask.cuh index 990906a640e..daefa2a5ffd 100644 --- a/cpp/include/cudf/detail/null_mask.cuh +++ b/cpp/include/cudf/detail/null_mask.cuh @@ -92,7 +92,6 @@ rmm::device_buffer bitmask_binop( stream, mr); - stream.synchronize(); return dest_mask; } @@ -141,6 +140,7 @@ void inplace_bitmask_binop( offset_bitmask_binop<<>>( op, dest_mask, d_masks, d_begin_bits, mask_size_bits); CHECK_CUDA(stream.value()); + stream.synchronize(); } } // namespace detail diff --git a/cpp/src/bitmask/null_mask.cu b/cpp/src/bitmask/null_mask.cu index dc10317ecdc..d87bbc02fc3 100644 --- a/cpp/src/bitmask/null_mask.cu +++ b/cpp/src/bitmask/null_mask.cu @@ -410,7 +410,6 @@ void inplace_bitmask_and(device_span dest_mask, mask_size, stream, mr); - stream.synchronize(); } // Bitwise AND of the masks