diff --git a/cpp/include/cudf/column/column_device_view.cuh b/cpp/include/cudf/column/column_device_view.cuh
index 9f5c2f33aa6..23c8557f3bc 100644
--- a/cpp/include/cudf/column/column_device_view.cuh
+++ b/cpp/include/cudf/column/column_device_view.cuh
@@ -805,7 +805,7 @@ namespace detail {
  * @brief Convenience function to get offset word from a bitmask
  *
  * @see copy_offset_bitmask
- * @see offset_bitmask_and
+ * @see offset_bitmask_binop
  */
 __device__ inline bitmask_type get_mask_offset_word(bitmask_type const* __restrict__ source,
                                                     size_type destination_word_index,
diff --git a/cpp/include/cudf/detail/null_mask.cuh b/cpp/include/cudf/detail/null_mask.cuh
new file mode 100644
index 00000000000..daefa2a5ffd
--- /dev/null
+++ b/cpp/include/cudf/detail/null_mask.cuh
@@ -0,0 +1,148 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include <cudf/column/column_device_view.cuh>
+#include <cudf/detail/utilities/cuda.cuh>
+#include <cudf/null_mask.hpp>
+#include <cudf/types.hpp>
+#include <cudf/utilities/span.hpp>
+
+#include <rmm/cuda_stream_view.hpp>
+
+using cudf::detail::device_span;
+
+namespace cudf {
+namespace detail {
+/**
+ * @brief Computes the merger of an array of bitmasks using a binary operator
+ *
+ * @param op The binary operator used to combine the bitmasks
+ * @param destination The bitmask to write result into
+ * @param source Array of source mask pointers. All masks must be of same size
+ * @param source_begin_bits Array of offsets into corresponding @p source masks.
+ *                          Must be same size as source array
+ * @param source_size_bits Number of bits in each mask in @p source
+ */
+template <typename Binop>
+__global__ void offset_bitmask_binop(Binop op,
+                                     device_span<bitmask_type> destination,
+                                     device_span<bitmask_type const *> source,
+                                     device_span<size_type const> source_begin_bits,
+                                     size_type source_size_bits)
+{
+  for (size_type destination_word_index = threadIdx.x + blockIdx.x * blockDim.x;
+       destination_word_index < destination.size();
+       destination_word_index += blockDim.x * gridDim.x) {
+    bitmask_type destination_word =
+      detail::get_mask_offset_word(source[0],
+                                   destination_word_index,
+                                   source_begin_bits[0],
+                                   source_begin_bits[0] + source_size_bits);
+    for (size_type i = 1; i < source.size(); i++) {
+      destination_word =
+
+        op(destination_word,
+           detail::get_mask_offset_word(source[i],
+                                        destination_word_index,
+                                        source_begin_bits[i],
+                                        source_begin_bits[i] + source_size_bits));
+    }
+
+    destination[destination_word_index] = destination_word;
+  }
+}
+
+/**
+ * @copydoc bitmask_binop(Binop op, host_span<bitmask_type const *> const, host_span<size_type>
+ * const, size_type, rmm::mr::device_memory_resource *)
+ *
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ */
+template <typename Binop>
+rmm::device_buffer bitmask_binop(
+  Binop op,
+  host_span<bitmask_type const *> masks,
+  host_span<size_type const> masks_begin_bits,
+  size_type mask_size_bits,
+  rmm::cuda_stream_view stream,
+  rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource())
+{
+  auto dest_mask = rmm::device_buffer{bitmask_allocation_size_bytes(mask_size_bits), stream, mr};
+
+  inplace_bitmask_binop(op,
+                        device_span<bitmask_type>(static_cast<bitmask_type *>(dest_mask.data()),
+                                                  num_bitmask_words(mask_size_bits)),
+                        masks,
+                        masks_begin_bits,
+                        mask_size_bits,
+                        stream,
+                        mr);
+
+  return dest_mask;
+}
+
+/**
+ * @brief Performs a merge of the specified bitmasks using the binary operator
+ *        provided, and writes in place to destination
+ *
+ * @param op The binary operator used to combine the bitmasks
+ * @param dest_mask Destination to which the merged result is written
+ * @param masks The list of data pointers of the bitmasks to be merged
+ * @param masks_begin_bits The bit offsets from which each mask is to be merged
+ * @param mask_size_bits The number of bits to be ANDed in each mask
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ * @param mr Device memory resource used to allocate the returned device_buffer
+ * @return rmm::device_buffer Output bitmask
+ */
+template <typename Binop>
+void inplace_bitmask_binop(
+  Binop op,
+  device_span<bitmask_type> dest_mask,
+  host_span<bitmask_type const *> masks,
+  host_span<size_type const> masks_begin_bits,
+  size_type mask_size_bits,
+  rmm::cuda_stream_view stream,
+  rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource())
+{
+  CUDF_EXPECTS(
+    std::all_of(masks_begin_bits.begin(), masks_begin_bits.end(), [](auto b) { return b >= 0; }),
+    "Invalid range.");
+  CUDF_EXPECTS(mask_size_bits > 0, "Invalid bit range.");
+  CUDF_EXPECTS(std::all_of(masks.begin(), masks.end(), [](auto p) { return p != nullptr; }),
+               "Mask pointer cannot be null");
+
+  rmm::device_uvector<bitmask_type const *> d_masks(masks.size(), stream, mr);
+  rmm::device_uvector<size_type> d_begin_bits(masks_begin_bits.size(), stream, mr);
+
+  CUDA_TRY(cudaMemcpyAsync(
+    d_masks.data(), masks.data(), masks.size_bytes(), cudaMemcpyHostToDevice, stream.value()));
+  CUDA_TRY(cudaMemcpyAsync(d_begin_bits.data(),
+                           masks_begin_bits.data(),
+                           masks_begin_bits.size_bytes(),
+                           cudaMemcpyHostToDevice,
+                           stream.value()));
+
+  cudf::detail::grid_1d config(dest_mask.size(), 256);
+  offset_bitmask_binop<<<config.num_blocks, config.num_threads_per_block, 0, stream.value()>>>(
+    op, dest_mask, d_masks, d_begin_bits, mask_size_bits);
+  CHECK_CUDA(stream.value());
+  stream.synchronize();
+}
+
+}  // namespace detail
+
+}  // namespace cudf
diff --git a/cpp/include/cudf/detail/null_mask.hpp b/cpp/include/cudf/detail/null_mask.hpp
index 2f2bc91cb74..b0870ef8d9a 100644
--- a/cpp/include/cudf/detail/null_mask.hpp
+++ b/cpp/include/cudf/detail/null_mask.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2020, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -16,6 +16,7 @@
 #pragma once
 
 #include <cudf/types.hpp>
+#include <cudf/utilities/span.hpp>
 
 #include <rmm/cuda_stream_view.hpp>
 
@@ -88,15 +89,15 @@ rmm::device_buffer copy_bitmask(
   rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource());
 
 /**
- * @copydoc bitmask_and(std::vector<bitmask_type const*>, std::vector<size_type> const&, size_type,
- * rmm::mr::device_memory_resource *)
+ * @copydoc bitmask_and(host_span<bitmask_type const *> const, host_span<size_type> const,
+ * size_type, rmm::mr::device_memory_resource *)
  *
  * @param stream CUDA stream used for device memory operations and kernel launches
  */
 rmm::device_buffer bitmask_and(
-  std::vector<bitmask_type const *> const &masks,
-  std::vector<size_type> const &begin_bits,
-  size_type mask_size,
+  host_span<bitmask_type const *> masks,
+  host_span<size_type const> masks_begin_bits,
+  size_type mask_size_bits,
   rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource());
 
@@ -110,23 +111,33 @@ rmm::device_buffer bitmask_and(
   rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource());
 
+/**
+ * @copydoc cudf::bitmask_or
+ *
+ * @param[in] stream CUDA stream used for device memory operations and kernel launches.
+ */
+rmm::device_buffer bitmask_or(
+  table_view const &view,
+  rmm::cuda_stream_view stream,
+  rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource());
+
 /**
  * @brief Performs a bitwise AND of the specified bitmasks,
  *        and writes in place to destination
  *
  * @param dest_mask Destination to which the AND result is written
  * @param masks The list of data pointers of the bitmasks to be ANDed
- * @param begin_bits The bit offsets from which each mask is to be ANDed
- * @param mask_size The number of bits to be ANDed in each mask
+ * @param masks_begin_bits The bit offsets from which each mask is to be ANDed
+ * @param mask_size_bits The number of bits to be ANDed in each mask
  * @param stream CUDA stream used for device memory operations and kernel launches
  * @param mr Device memory resource used to allocate the returned device_buffer
  * @return rmm::device_buffer Output bitmask
  */
 void inplace_bitmask_and(
-  bitmask_type *dest_mask,
-  std::vector<bitmask_type const *> const &masks,
-  std::vector<size_type> const &begin_bits,
-  size_type mask_size,
+  device_span<bitmask_type> dest_mask,
+  host_span<bitmask_type const *> masks,
+  host_span<size_type const> masks_begin_bits,
+  size_type mask_size_bits,
   rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource());
 
diff --git a/cpp/include/cudf/null_mask.hpp b/cpp/include/cudf/null_mask.hpp
index 5e1f0f0802e..0d4de1a9beb 100644
--- a/cpp/include/cudf/null_mask.hpp
+++ b/cpp/include/cudf/null_mask.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2020, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -220,5 +220,19 @@ rmm::device_buffer bitmask_and(
   table_view const& view,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
+/**
+ * @brief Returns a bitwise OR of the bitmasks of columns of a table
+ *
+ * If any of the columns isn't nullable, it is considered all valid.
+ * If no column in the table is nullable, an empty bitmask is returned.
+ *
+ * @param view The table of columns
+ * @param mr Device memory resource used to allocate the returned device_buffer
+ * @return rmm::device_buffer Output bitmask
+ */
+rmm::device_buffer bitmask_or(
+  table_view const& view,
+  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+
 /** @} */  // end of group
 }  // namespace cudf
diff --git a/cpp/src/bitmask/null_mask.cu b/cpp/src/bitmask/null_mask.cu
index 8cdcefe9796..d87bbc02fc3 100644
--- a/cpp/src/bitmask/null_mask.cu
+++ b/cpp/src/bitmask/null_mask.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2020, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -15,6 +15,7 @@
  */
 
 #include <cudf/column/column_device_view.cuh>
+#include <cudf/detail/null_mask.cuh>
 #include <cudf/detail/null_mask.hpp>
 #include <cudf/detail/nvtx/ranges.hpp>
 #include <cudf/detail/utilities/cuda.cuh>
@@ -23,10 +24,12 @@
 #include <cudf/table/table_view.hpp>
 #include <cudf/utilities/bit.hpp>
 #include <cudf/utilities/error.hpp>
+#include <cudf/utilities/span.hpp>
 
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/device_buffer.hpp>
 #include <rmm/device_scalar.hpp>
+#include <rmm/device_uvector.hpp>
 #include <rmm/device_vector.hpp>
 #include <rmm/mr/device/device_memory_resource.hpp>
 
@@ -41,6 +44,8 @@
 #include <numeric>
 #include <type_traits>
 
+using cudf::detail::device_span;
+
 namespace cudf {
 size_type state_null_count(mask_state state, size_type size)
 {
@@ -315,37 +320,6 @@ __global__ void copy_offset_bitmask(bitmask_type *__restrict__ destination,
   }
 }
 
-/**
- * @brief Computes the bitwise AND of an array of bitmasks
- *
- * @param destination The bitmask to write result into
- * @param source Array of source mask pointers. All masks must be of same size
- * @param begin_bit Array of offsets into corresponding @p source masks.
- *                  Must be same size as source array
- * @param num_sources Number of masks in @p source array
- * @param source_size Number of bits in each mask in @p source
- * @param number_of_mask_words The number of words of type bitmask_type to copy
- */
-__global__ void offset_bitmask_and(bitmask_type *__restrict__ destination,
-                                   bitmask_type const *const *__restrict__ source,
-                                   size_type const *__restrict__ begin_bit,
-                                   size_type num_sources,
-                                   size_type source_size,
-                                   size_type number_of_mask_words)
-{
-  for (size_type destination_word_index = threadIdx.x + blockIdx.x * blockDim.x;
-       destination_word_index < number_of_mask_words;
-       destination_word_index += blockDim.x * gridDim.x) {
-    bitmask_type destination_word = ~bitmask_type{0};  // All bits 1
-    for (size_type i = 0; i < num_sources; i++) {
-      destination_word &= detail::get_mask_offset_word(
-        source[i], destination_word_index, begin_bit[i], begin_bit[i] + source_size);
-    }
-
-    destination[destination_word_index] = destination_word;
-  }
-}
-
 // convert [first_bit_index,last_bit_index) to
 // [first_word_index,last_word_index)
 struct to_word_index : public thrust::unary_function<size_type, size_type> {
@@ -421,51 +395,37 @@ rmm::device_buffer copy_bitmask(column_view const &view,
 }
 
 // Inplace Bitwise AND of the masks
-void inplace_bitmask_and(bitmask_type *dest_mask,
-                         std::vector<bitmask_type const *> const &masks,
-                         std::vector<size_type> const &begin_bits,
+void inplace_bitmask_and(device_span<bitmask_type> dest_mask,
+                         host_span<bitmask_type const *> masks,
+                         host_span<size_type const> begin_bits,
                          size_type mask_size,
                          rmm::cuda_stream_view stream,
                          rmm::mr::device_memory_resource *mr)
 {
-  CUDF_EXPECTS(std::all_of(begin_bits.begin(), begin_bits.end(), [](auto b) { return b >= 0; }),
-               "Invalid range.");
-  CUDF_EXPECTS(mask_size > 0, "Invalid bit range.");
-  CUDF_EXPECTS(std::all_of(masks.begin(), masks.end(), [](auto p) { return p != nullptr; }),
-               "Mask pointer cannot be null");
-
-  auto number_of_mask_words = num_bitmask_words(mask_size);
-
-  rmm::device_vector<bitmask_type const *> d_masks(masks);
-  rmm::device_vector<size_type> d_begin_bits(begin_bits);
-
-  cudf::detail::grid_1d config(number_of_mask_words, 256);
-  offset_bitmask_and<<<config.num_blocks, config.num_threads_per_block, 0, stream.value()>>>(
+  inplace_bitmask_binop(
+    [] __device__(bitmask_type left, bitmask_type right) { return left & right; },
     dest_mask,
-    d_masks.data().get(),
-    d_begin_bits.data().get(),
-    d_masks.size(),
+    masks,
+    begin_bits,
     mask_size,
-    number_of_mask_words);
-
-  CHECK_CUDA(stream.value());
+    stream,
+    mr);
 }
 
 // Bitwise AND of the masks
-rmm::device_buffer bitmask_and(std::vector<bitmask_type const *> const &masks,
-                               std::vector<size_type> const &begin_bits,
+rmm::device_buffer bitmask_and(host_span<bitmask_type const *> masks,
+                               host_span<size_type const> begin_bits,
                                size_type mask_size,
                                rmm::cuda_stream_view stream,
                                rmm::mr::device_memory_resource *mr)
 {
-  rmm::device_buffer dest_mask{};
-  auto num_bytes = bitmask_allocation_size_bytes(mask_size);
-
-  dest_mask = rmm::device_buffer{num_bytes, stream, mr};
-  inplace_bitmask_and(
-    static_cast<bitmask_type *>(dest_mask.data()), masks, begin_bits, mask_size, stream, mr);
-
-  return dest_mask;
+  return bitmask_binop(
+    [] __device__(bitmask_type left, bitmask_type right) { return left & right; },
+    masks,
+    begin_bits,
+    mask_size,
+    stream,
+    mr);
 }
 
 cudf::size_type count_set_bits(bitmask_type const *bitmask,
@@ -650,12 +610,48 @@ rmm::device_buffer bitmask_and(table_view const &view,
   }
 
   if (masks.size() > 0) {
-    return cudf::detail::bitmask_and(masks, offsets, view.num_rows(), stream, mr);
+    return cudf::detail::bitmask_binop(
+      [] __device__(bitmask_type left, bitmask_type right) { return left & right; },
+      masks,
+      offsets,
+      view.num_rows(),
+      stream,
+      mr);
   }
 
   return null_mask;
 }
 
+// Returns the bitwise OR of the null masks of all columns in the table view
+rmm::device_buffer bitmask_or(table_view const &view,
+                              rmm::cuda_stream_view stream,
+                              rmm::mr::device_memory_resource *mr)
+{
+  CUDF_FUNC_RANGE();
+  rmm::device_buffer null_mask{0, stream, mr};
+  if (view.num_rows() == 0 or view.num_columns() == 0) { return null_mask; }
+
+  std::vector<bitmask_type const *> masks;
+  std::vector<size_type> offsets;
+  for (auto &&col : view) {
+    if (col.nullable()) {
+      masks.push_back(col.null_mask());
+      offsets.push_back(col.offset());
+    }
+  }
+
+  if (static_cast<size_type>(masks.size()) == view.num_columns()) {
+    return cudf::detail::bitmask_binop(
+      [] __device__(bitmask_type left, bitmask_type right) { return left | right; },
+      masks,
+      offsets,
+      view.num_rows(),
+      stream,
+      mr);
+  }
+
+  return null_mask;
+}
 }  // namespace detail
 
 // Count non-zero bits in the specified range
@@ -708,4 +704,9 @@ rmm::device_buffer bitmask_and(table_view const &view, rmm::mr::device_memory_re
   return detail::bitmask_and(view, rmm::cuda_stream_default, mr);
 }
 
+rmm::device_buffer bitmask_or(table_view const &view, rmm::mr::device_memory_resource *mr)
+{
+  return detail::bitmask_or(view, rmm::cuda_stream_default, mr);
+}
+
 }  // namespace cudf
diff --git a/cpp/src/structs/structs_column_factories.cu b/cpp/src/structs/structs_column_factories.cu
index 5f92fea76f5..2bd71767265 100644
--- a/cpp/src/structs/structs_column_factories.cu
+++ b/cpp/src/structs/structs_column_factories.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -24,7 +24,6 @@
 
 #include <algorithm>
 #include <memory>
-
 namespace cudf {
 namespace {
 // Helper function to superimpose validity of parent struct
@@ -44,18 +43,19 @@ void superimpose_parent_nullmask(bitmask_type const* parent_null_mask,
     // Child should have a null mask.
     // `AND` the child's null mask with the parent's.
 
-    auto data_type{child.type()};
-    auto num_rows{child.size()};
-
     auto current_child_mask = child.mutable_view().null_mask();
 
-    cudf::detail::inplace_bitmask_and(current_child_mask,
-                                      {reinterpret_cast<bitmask_type const*>(parent_null_mask),
-                                       reinterpret_cast<bitmask_type const*>(current_child_mask)},
-                                      {0, 0},
-                                      child.size(),
-                                      stream,
-                                      mr);
+    std::vector<bitmask_type const*> masks{
+      reinterpret_cast<bitmask_type const*>(parent_null_mask),
+      reinterpret_cast<bitmask_type const*>(current_child_mask)};
+    std::vector<size_type> begin_bits{0, 0};
+    cudf::detail::inplace_bitmask_and(
+      detail::device_span<bitmask_type>(current_child_mask, num_bitmask_words(child.size())),
+      masks,
+      begin_bits,
+      child.size(),
+      stream,
+      mr);
     child.set_null_count(UNKNOWN_NULL_COUNT);
   }
 
diff --git a/cpp/tests/bitmask/bitmask_tests.cu b/cpp/tests/bitmask/bitmask_tests.cu
index 8afa4faa9e3..2f820da687e 100644
--- a/cpp/tests/bitmask/bitmask_tests.cu
+++ b/cpp/tests/bitmask/bitmask_tests.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -25,6 +25,7 @@
 
 #include <thrust/device_ptr.h>
 #include <thrust/device_vector.h>
+#include <rmm/device_buffer.hpp>
 
 struct BitmaskUtilitiesTest : public cudf::test::BaseFixture {
 };
@@ -413,7 +414,7 @@ TEST_F(CopyBitmaskTest, TestZeroOffset)
   cleanEndWord(splice_mask, begin_bit, end_bit);
   auto number_of_bits = end_bit - begin_bit;
   CUDF_TEST_EXPECT_EQUAL_BUFFERS(
-    gold_splice_mask.data(), splice_mask.data(), number_of_bits / CHAR_BIT);
+    gold_splice_mask.data(), splice_mask.data(), cudf::num_bitmask_words(number_of_bits));
 }
 
 TEST_F(CopyBitmaskTest, TestNonZeroOffset)
@@ -433,7 +434,7 @@ TEST_F(CopyBitmaskTest, TestNonZeroOffset)
   cleanEndWord(splice_mask, begin_bit, end_bit);
   auto number_of_bits = end_bit - begin_bit;
   CUDF_TEST_EXPECT_EQUAL_BUFFERS(
-    gold_splice_mask.data(), splice_mask.data(), number_of_bits / CHAR_BIT);
+    gold_splice_mask.data(), splice_mask.data(), cudf::num_bitmask_words(number_of_bits));
 }
 
 TEST_F(CopyBitmaskTest, TestCopyColumnViewVectorContiguous)
@@ -468,7 +469,7 @@ TEST_F(CopyBitmaskTest, TestCopyColumnViewVectorContiguous)
   rmm::device_buffer concatenated_bitmask = cudf::concatenate_masks(views);
   cleanEndWord(concatenated_bitmask, 0, num_elements);
   CUDF_TEST_EXPECT_EQUAL_BUFFERS(
-    concatenated_bitmask.data(), gold_mask.data(), num_elements / CHAR_BIT);
+    concatenated_bitmask.data(), gold_mask.data(), cudf::num_bitmask_words(num_elements));
 }
 
 TEST_F(CopyBitmaskTest, TestCopyColumnViewVectorDiscontiguous)
@@ -493,7 +494,60 @@ TEST_F(CopyBitmaskTest, TestCopyColumnViewVectorDiscontiguous)
   rmm::device_buffer concatenated_bitmask = cudf::concatenate_masks(views);
   cleanEndWord(concatenated_bitmask, 0, num_elements);
   CUDF_TEST_EXPECT_EQUAL_BUFFERS(
-    concatenated_bitmask.data(), gold_mask.data(), num_elements / CHAR_BIT);
+    concatenated_bitmask.data(), gold_mask.data(), cudf::num_bitmask_words(num_elements));
+}
+
+struct MergeBitmaskTest : public cudf::test::BaseFixture {
+};
+
+TEST_F(MergeBitmaskTest, TestBitmaskAnd)
+{
+  cudf::test::fixed_width_column_wrapper<bool> const bools_col1({0, 1, 0, 1, 1}, {0, 1, 1, 1, 0});
+  cudf::test::fixed_width_column_wrapper<bool> const bools_col2({0, 2, 1, 0, 255}, {1, 1, 0, 1, 0});
+  cudf::test::fixed_width_column_wrapper<bool> const bools_col3({0, 2, 1, 0, 255});
+
+  auto const input1 = cudf::table_view({bools_col3});
+  auto const input2 = cudf::table_view({bools_col1, bools_col2});
+  auto const input3 = cudf::table_view({bools_col1, bools_col2, bools_col3});
+
+  rmm::device_buffer result1 = cudf::bitmask_and(input1);
+  rmm::device_buffer result2 = cudf::bitmask_and(input2);
+  rmm::device_buffer result3 = cudf::bitmask_and(input3);
+
+  auto odd_indices =
+    cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 2; });
+  auto odd = cudf::test::detail::make_null_mask(odd_indices, odd_indices + input2.num_rows());
+
+  EXPECT_EQ(nullptr, result1.data());
+  CUDF_TEST_EXPECT_EQUAL_BUFFERS(
+    result2.data(), odd.data(), cudf::num_bitmask_words(input2.num_rows()));
+  CUDF_TEST_EXPECT_EQUAL_BUFFERS(
+    result3.data(), odd.data(), cudf::num_bitmask_words(input2.num_rows()));
+}
+
+TEST_F(MergeBitmaskTest, TestBitmaskOr)
+{
+  cudf::test::fixed_width_column_wrapper<bool> const bools_col1({0, 1, 0, 1, 1}, {1, 1, 0, 0, 1});
+  cudf::test::fixed_width_column_wrapper<bool> const bools_col2({0, 2, 1, 0, 255}, {0, 0, 1, 0, 1});
+  cudf::test::fixed_width_column_wrapper<bool> const bools_col3({0, 2, 1, 0, 255});
+
+  auto const input1 = cudf::table_view({bools_col3});
+  auto const input2 = cudf::table_view({bools_col1, bools_col2});
+  auto const input3 = cudf::table_view({bools_col1, bools_col2, bools_col3});
+
+  rmm::device_buffer result1 = cudf::bitmask_or(input1);
+  rmm::device_buffer result2 = cudf::bitmask_or(input2);
+  rmm::device_buffer result3 = cudf::bitmask_or(input3);
+
+  auto all_but_index3 =
+    cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 3; });
+  auto null3 =
+    cudf::test::detail::make_null_mask(all_but_index3, all_but_index3 + input2.num_rows());
+
+  EXPECT_EQ(nullptr, result1.data());
+  CUDF_TEST_EXPECT_EQUAL_BUFFERS(
+    result2.data(), null3.data(), cudf::num_bitmask_words(input2.num_rows()));
+  EXPECT_EQ(nullptr, result3.data());
 }
 
 CUDF_TEST_PROGRAM_MAIN()
diff --git a/java/src/main/java/ai/rapids/cudf/ColumnView.java b/java/src/main/java/ai/rapids/cudf/ColumnView.java
index 1dce52f7105..03bc3654110 100644
--- a/java/src/main/java/ai/rapids/cudf/ColumnView.java
+++ b/java/src/main/java/ai/rapids/cudf/ColumnView.java
@@ -520,7 +520,7 @@ public final ColumnVector normalizeNANsAndZeros() {
    * @return the new ColumnVector with merged null mask.
    */
   public final ColumnVector mergeAndSetValidity(BinaryOp mergeOp, ColumnView... columns) {
-    assert mergeOp == BinaryOp.BITWISE_AND : "Only BITWISE_AND supported right now";
+    assert mergeOp == BinaryOp.BITWISE_AND || mergeOp == BinaryOp.BITWISE_OR : "Only BITWISE_AND and BITWISE_OR supported right now";
     long[] columnViews = new long[columns.length];
     long size = getRowCount();
 
diff --git a/java/src/main/native/src/ColumnViewJni.cpp b/java/src/main/native/src/ColumnViewJni.cpp
index 82e71b04a2f..03d4285ad10 100644
--- a/java/src/main/native/src/ColumnViewJni.cpp
+++ b/java/src/main/native/src/ColumnViewJni.cpp
@@ -1305,8 +1305,15 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_bitwiseMergeAndSetValidit
     cudf::table_view *input_table = new cudf::table_view(column_views);
 
     cudf::binary_operator op = static_cast<cudf::binary_operator>(bin_op);
-    if(op == cudf::binary_operator::BITWISE_AND) {
-      copy->set_null_mask(cudf::bitmask_and(*input_table));
+    switch(op) {
+      case cudf::binary_operator::BITWISE_AND:
+        copy->set_null_mask(cudf::bitmask_and(*input_table));
+        break;
+      case cudf::binary_operator::BITWISE_OR:
+        copy->set_null_mask(cudf::bitmask_or(*input_table));
+        break;
+      default:
+        JNI_THROW_NEW(env, cudf::jni::ILLEGAL_ARG_CLASS, "Unsupported merge operation", 0);
     }
 
     return reinterpret_cast<jlong>(copy.release());
diff --git a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java
index cb1f792b99e..b11fa243a26 100644
--- a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java
+++ b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java
@@ -571,7 +571,7 @@ void testSpark32BitMurmur3HashMixed() {
   }
 
   @Test
-  void testNullReconfigureNulls() {
+  void testAndNullReconfigureNulls() {
     try (ColumnVector v0 = ColumnVector.fromBoxedInts(0, 100, null, null, Integer.MIN_VALUE, null);
          ColumnVector v1 = ColumnVector.fromBoxedInts(0, 100, 1, 2, Integer.MIN_VALUE, null);
          ColumnVector intResult = v1.mergeAndSetValidity(BinaryOp.BITWISE_AND, v0);
@@ -585,6 +585,28 @@ void testNullReconfigureNulls() {
     }
   }
 
+  @Test
+  void testOrNullReconfigureNulls() {
+    try (ColumnVector v0 = ColumnVector.fromBoxedInts(0, 100, null, null, Integer.MIN_VALUE, null);
+         ColumnVector v1 = ColumnVector.fromBoxedInts(0, 100, 1, 2, Integer.MIN_VALUE, null);
+         ColumnVector v2 = ColumnVector.fromBoxedInts(0, 100, 1, 2, Integer.MIN_VALUE, Integer.MAX_VALUE);
+         ColumnVector intResultV0 = v1.mergeAndSetValidity(BinaryOp.BITWISE_OR, v0);
+         ColumnVector intResultV0V1 = v1.mergeAndSetValidity(BinaryOp.BITWISE_OR, v0, v1);
+         ColumnVector intResultMulti = v1.mergeAndSetValidity(BinaryOp.BITWISE_OR, v0, v0, v1, v1, v0, v1, v0);
+         ColumnVector intResultv0v1v2 = v2.mergeAndSetValidity(BinaryOp.BITWISE_OR, v0, v1, v2);
+         ColumnVector v3 = ColumnVector.fromStrings("0", "100", "1", "2", "MIN_VALUE", "3");
+         ColumnVector stringResult = v3.mergeAndSetValidity(BinaryOp.BITWISE_OR, v0, v1);
+         ColumnVector stringExpected = ColumnVector.fromStrings("0", "100", "1", "2", "MIN_VALUE", null);
+         ColumnVector noMaskResult = v3.mergeAndSetValidity(BinaryOp.BITWISE_OR)) {
+      assertColumnsAreEqual(v0, intResultV0);
+      assertColumnsAreEqual(v1, intResultV0V1);
+      assertColumnsAreEqual(v1, intResultMulti);
+      assertColumnsAreEqual(v2, intResultv0v1v2);
+      assertColumnsAreEqual(stringExpected, stringResult);
+      assertColumnsAreEqual(v3, noMaskResult);
+    }
+  }
+
   @Test
   void isNotNullTestEmptyColumn() {
     try (ColumnVector v = ColumnVector.fromBoxedInts();