Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

bitmask_or implementation with bitmask refactor #7406

Merged
merged 10 commits into from
Mar 8, 2021
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cpp/include/cudf/column/column_device_view.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -805,7 +805,7 @@ namespace detail {
* @brief Convenience function to get offset word from a bitmask
*
* @see copy_offset_bitmask
* @see offset_bitmask_and
* @see offset_bitmask_binop
*/
__device__ inline bitmask_type get_mask_offset_word(bitmask_type const* __restrict__ source,
size_type destination_word_index,
Expand Down
148 changes: 148 additions & 0 deletions cpp/include/cudf/detail/null_mask.cuh
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once

#include <cudf/column/column_device_view.cuh>
#include <cudf/detail/utilities/cuda.cuh>
#include <cudf/null_mask.hpp>
#include <cudf/types.hpp>
#include <cudf/utilities/span.hpp>

#include <rmm/cuda_stream_view.hpp>

using cudf::detail::device_span;

namespace cudf {
namespace detail {
/**
* @brief Computes the merger of an array of bitmasks using a binary operator
*
* @param op The binary operator used to combine the bitmasks
* @param destination The bitmask to write result into
* @param source Array of source mask pointers. All masks must be of same size
* @param source_begin_bits Array of offsets into corresponding @p source masks.
* Must be same size as source array
* @param source_size_bits Number of bits in each mask in @p source
*/
template <typename Binop>
__global__ void offset_bitmask_binop(Binop op,
device_span<bitmask_type> destination,
device_span<bitmask_type const *> source,
device_span<size_type const> source_begin_bits,
size_type source_size_bits)
{
for (size_type destination_word_index = threadIdx.x + blockIdx.x * blockDim.x;
destination_word_index < destination.size();
destination_word_index += blockDim.x * gridDim.x) {
bitmask_type destination_word =
detail::get_mask_offset_word(source[0],
destination_word_index,
source_begin_bits[0],
source_begin_bits[0] + source_size_bits);
for (size_type i = 1; i < source.size(); i++) {
destination_word =

op(destination_word,
detail::get_mask_offset_word(source[i],
destination_word_index,
source_begin_bits[i],
source_begin_bits[i] + source_size_bits));
}

destination[destination_word_index] = destination_word;
}
}

/**
* @copydoc bitmask_binop(Binop op, host_span<bitmask_type const *> const, host_span<size_type>
* const, size_type, rmm::mr::device_memory_resource *)
*
* @param stream CUDA stream used for device memory operations and kernel launches
*/
template <typename Binop>
rmm::device_buffer bitmask_binop(
Binop op,
host_span<bitmask_type const *> masks,
host_span<size_type const> masks_begin_bits,
size_type mask_size_bits,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource())
{
auto dest_mask = rmm::device_buffer{bitmask_allocation_size_bytes(mask_size_bits), stream, mr};

inplace_bitmask_binop(op,
device_span<bitmask_type>(static_cast<bitmask_type *>(dest_mask.data()),
num_bitmask_words(mask_size_bits)),
masks,
masks_begin_bits,
mask_size_bits,
stream,
mr);

stream.synchronize();
rwlee marked this conversation as resolved.
Show resolved Hide resolved
return dest_mask;
rwlee marked this conversation as resolved.
Show resolved Hide resolved
}

/**
* @brief Performs a merge of the specified bitmasks using the binary operator
* provided, and writes in place to destination
*
* @param op The binary operator used to combine the bitmasks
* @param dest_mask Destination to which the merged result is written
* @param masks The list of data pointers of the bitmasks to be merged
* @param masks_begin_bits The bit offsets from which each mask is to be merged
* @param mask_size_bits The number of bits to be ANDed in each mask
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned device_buffer
* @return rmm::device_buffer Output bitmask
*/
template <typename Binop>
void inplace_bitmask_binop(
Binop op,
device_span<bitmask_type> dest_mask,
host_span<bitmask_type const *> masks,
host_span<size_type const> masks_begin_bits,
size_type mask_size_bits,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource())
{
CUDF_EXPECTS(
std::all_of(masks_begin_bits.begin(), masks_begin_bits.end(), [](auto b) { return b >= 0; }),
"Invalid range.");
CUDF_EXPECTS(mask_size_bits > 0, "Invalid bit range.");
CUDF_EXPECTS(std::all_of(masks.begin(), masks.end(), [](auto p) { return p != nullptr; }),
"Mask pointer cannot be null");

rmm::device_uvector<bitmask_type const *> d_masks(masks.size(), stream, mr);
rmm::device_uvector<size_type> d_begin_bits(masks_begin_bits.size(), stream, mr);

CUDA_TRY(cudaMemcpyAsync(
d_masks.data(), masks.data(), masks.size_bytes(), cudaMemcpyHostToDevice, stream.value()));
CUDA_TRY(cudaMemcpyAsync(d_begin_bits.data(),
masks_begin_bits.data(),
masks_begin_bits.size_bytes(),
cudaMemcpyHostToDevice,
stream.value()));

cudf::detail::grid_1d config(dest_mask.size(), 256);
offset_bitmask_binop<<<config.num_blocks, config.num_threads_per_block, 0, stream.value()>>>(
op, dest_mask, d_masks, d_begin_bits, mask_size_bits);
CHECK_CUDA(stream.value());
}

} // namespace detail

} // namespace cudf
35 changes: 23 additions & 12 deletions cpp/include/cudf/detail/null_mask.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2020, NVIDIA CORPORATION.
* Copyright (c) 2019-2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -16,6 +16,7 @@
#pragma once

#include <cudf/types.hpp>
#include <cudf/utilities/span.hpp>

#include <rmm/cuda_stream_view.hpp>

Expand Down Expand Up @@ -88,15 +89,15 @@ rmm::device_buffer copy_bitmask(
rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource());

/**
* @copydoc bitmask_and(std::vector<bitmask_type const*>, std::vector<size_type> const&, size_type,
* rmm::mr::device_memory_resource *)
* @copydoc bitmask_and(host_span<bitmask_type const *> const, host_span<size_type> const,
* size_type, rmm::mr::device_memory_resource *)
*
* @param stream CUDA stream used for device memory operations and kernel launches
*/
rmm::device_buffer bitmask_and(
std::vector<bitmask_type const *> const &masks,
std::vector<size_type> const &begin_bits,
size_type mask_size,
host_span<bitmask_type const *> masks,
host_span<size_type const> masks_begin_bits,
size_type mask_size_bits,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource());

Expand All @@ -110,23 +111,33 @@ rmm::device_buffer bitmask_and(
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource());

/**
* @copydoc cudf::bitmask_or
*
* @param[in] stream CUDA stream used for device memory operations and kernel launches.
*/
rmm::device_buffer bitmask_or(
table_view const &view,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource());

/**
* @brief Performs a bitwise AND of the specified bitmasks,
* and writes in place to destination
*
* @param dest_mask Destination to which the AND result is written
* @param masks The list of data pointers of the bitmasks to be ANDed
* @param begin_bits The bit offsets from which each mask is to be ANDed
* @param mask_size The number of bits to be ANDed in each mask
* @param masks_begin_bits The bit offsets from which each mask is to be ANDed
* @param mask_size_bits The number of bits to be ANDed in each mask
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned device_buffer
* @return rmm::device_buffer Output bitmask
*/
void inplace_bitmask_and(
bitmask_type *dest_mask,
std::vector<bitmask_type const *> const &masks,
std::vector<size_type> const &begin_bits,
size_type mask_size,
device_span<bitmask_type> dest_mask,
host_span<bitmask_type const *> masks,
host_span<size_type const> masks_begin_bits,
size_type mask_size_bits,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource());

Expand Down
16 changes: 15 additions & 1 deletion cpp/include/cudf/null_mask.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2020, NVIDIA CORPORATION.
* Copyright (c) 2019-2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -220,5 +220,19 @@ rmm::device_buffer bitmask_and(
table_view const& view,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief Returns a bitwise OR of the bitmasks of columns of a table
*
* If any of the columns isn't nullable, it is considered all valid.
* If no column in the table is nullable, an empty bitmask is returned.
*
* @param view The table of columns
* @param mr Device memory resource used to allocate the returned device_buffer
* @return rmm::device_buffer Output bitmask
*/
rmm::device_buffer bitmask_or(
table_view const& view,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/** @} */ // end of group
} // namespace cudf
Loading