From 84f88ceb18225850835a9912a18e4c82245d5620 Mon Sep 17 00:00:00 2001
From: MithunR <mythrocks@gmail.com>
Date: Thu, 28 Apr 2022 23:45:40 -0700
Subject: [PATCH 01/23] Support purging non-empty null elements from
 LIST/STRING columns (#10701)

Fixes #10291.

With certain operations in `libcudf`, it is possible to produce `LIST` columns with `NULL` rows that are not also empty.
For instance, consider a `STRUCT` column is constructed with an explicit validity buffer and a `LIST` child column:
```c++
auto const lists   = lists_column_wrapper<int32_t>{ {0,1}, {2,3}, {4,5} };
auto const structs = structs_column_wrapper{ {lists}, null_at(1) };
```
Since `structs[1] == NULL`, its `LIST` member is also deemed null. However, for efficiency, the null-ness is recorded in the `LIST`'s validity buffer, without purging the unnecessary values from its child. The `LIST` columns appears as follows:
```
Validity: 101
Offsets:  [0, 2, 4, 6]
Child:    [0, 1, 2, 3, 4, 5]
```
Even though Row#1 is null, its size is `4-2 = 2`, and not `0`. (Row#1 is thus a non-empty null row.)

This commit adds a `cudf::purge_nonempty_nulls()` function that purges such rows, and reduces such columns to a more space-efficient representation, i.e.:
```
Validity: 101
Offsets:  [0, 2, 2, 4]
Child:    [0, 1, 4, 5]
```

This commit also modifies `cudf::gather()` not to produce `STRING`/`LIST` columns with "dirty" rows. Further, it adds two new functions to determine if a specified column needs such purging:
1. `cudf::may_have_nonempty_nulls()`: A fast check to check a column for the *possibility* of having non-empty nulls. This only checks whether the column or its descendants have null rows at all. If there are no nulls anywhere in the hierarchy, it does not need purging.
2. `cudf::has_nonempty_nulls()`: A deeper, more expensive check that categorically confirms whether non-empty null rows exist in any column in the hierarchy.

Authors:
  - MithunR (https://github.com/mythrocks)

Approvers:
  - Jake Hemstad (https://github.com/jrhemstad)
  - https://github.com/nvdbaranec
  - Jordan Jacobelli (https://github.com/Ethyling)

URL: https://github.com/rapidsai/cudf/pull/10701
---
 conda/recipes/libcudf/meta.yaml               |   1 +
 cpp/CMakeLists.txt                            |   1 +
 cpp/include/cudf/copying.hpp                  | 153 ++++++
 cpp/include/cudf/detail/copy.cuh              |  47 ++
 cpp/include/cudf/detail/copy.hpp              |  19 +-
 cpp/include/cudf/lists/detail/gather.cuh      |  45 +-
 cpp/include/cudf/strings/detail/gather.cuh    |  20 +-
 .../cudf/structs/structs_column_view.hpp      |   7 +-
 cpp/src/copying/purge_nonempty_nulls.cu       | 134 ++++++
 cpp/src/structs/structs_column_view.cpp       |   2 +
 cpp/tests/CMakeLists.txt                      |   1 +
 cpp/tests/column/factories_test.cpp           |   2 +-
 .../copying/purge_nonempty_nulls_tests.cpp    | 437 ++++++++++++++++++
 13 files changed, 847 insertions(+), 22 deletions(-)
 create mode 100644 cpp/include/cudf/detail/copy.cuh
 create mode 100644 cpp/src/copying/purge_nonempty_nulls.cu
 create mode 100644 cpp/tests/copying/purge_nonempty_nulls_tests.cpp

diff --git a/conda/recipes/libcudf/meta.yaml b/conda/recipes/libcudf/meta.yaml
index 0806bb964cf..68008e13897 100644
--- a/conda/recipes/libcudf/meta.yaml
+++ b/conda/recipes/libcudf/meta.yaml
@@ -79,6 +79,7 @@ outputs:
         - test -f $PREFIX/include/cudf/detail/calendrical_month_sequence.cuh
         - test -f $PREFIX/include/cudf/detail/concatenate.hpp
         - test -f $PREFIX/include/cudf/detail/copy.hpp
+        - test -f $PREFIX/include/cudf/detail/copy.cuh
         - test -f $PREFIX/include/cudf/detail/datetime.hpp
         - test -f $PREFIX/include/cudf/detail/fill.hpp
         - test -f $PREFIX/include/cudf/detail/gather.hpp
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 15caaec9bec..cbe2811afe4 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -238,6 +238,7 @@ add_library(
   src/copying/gather.cu
   src/copying/get_element.cu
   src/copying/pack.cpp
+  src/copying/purge_nonempty_nulls.cu
   src/copying/reverse.cu
   src/copying/sample.cu
   src/copying/scatter.cu
diff --git a/cpp/include/cudf/copying.hpp b/cpp/include/cudf/copying.hpp
index 2e559afef4f..8f1ad7da9b6 100644
--- a/cpp/include/cudf/copying.hpp
+++ b/cpp/include/cudf/copying.hpp
@@ -17,7 +17,10 @@
 #pragma once
 
 #include <cudf/column/column_view.hpp>
+#include <cudf/lists/lists_column_view.hpp>
 #include <cudf/scalar/scalar.hpp>
+#include <cudf/strings/strings_column_view.hpp>
+#include <cudf/structs/structs_column_view.hpp>
 #include <cudf/table/table.hpp>
 #include <cudf/types.hpp>
 
@@ -939,5 +942,155 @@ std::unique_ptr<table> sample(
   int64_t const seed                  = 0,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
+/**
+ * @brief Checks if a column or its descendants have non-empty null rows
+ *
+ * @note This function is exact. If it returns `true`, there exists one or more
+ * non-empty null elements.
+ *
+ * A LIST or STRING column might have non-empty rows that are marked as null.
+ * A STRUCT OR LIST column might have child columns that have non-empty null rows.
+ * Other types of columns are deemed incapable of having non-empty null rows.
+ * E.g. Fixed width columns have no concept of an "empty" row.
+ *
+ * @param input The column which is (and whose descendants are) to be checked for
+ * non-empty null rows.
+ * @return true If either the column or its descendants have non-empty null rows.
+ * @return false If neither the column or its descendants have non-empty null rows.
+ */
+bool has_nonempty_nulls(column_view const& input);
+
+/**
+ * @brief Approximates if a column or its descendants *may* have non-empty null elements
+ *
+ * @note This function is approximate.
+ * - `true`: Non-empty null elements could exist
+ * - `false`: Non-empty null elements definitely do not exist
+ *
+ * False positives are possible, but false negatives are not.
+ *
+ * Compared to the exact `has_nonempty_nulls()` function, this function is typically
+ * more efficient.
+ *
+ * Complexity:
+ * - Best case: `O(count_descendants(input))`
+ * - Worst case: `O(count_descendants(input)) * m`, where `m` is the number of rows in the largest
+ * descendant
+ *
+ * @param input The column which is (and whose descendants are) to be checked for
+ * non-empty null rows
+ * @return true If either the column or its decendants have null rows
+ * @return false If neither the column nor its descendants have null rows
+ */
+bool may_have_nonempty_nulls(column_view const& input);
+
+/**
+ * @brief Copies `input`, purging any non-empty null rows in the column or its descendants
+ *
+ * LIST columns may have non-empty null rows.
+ * For example:
+ * @code{.pseudo}
+ *
+ * auto const lists   = lists_column_wrapper<int32_t>{ {0,1}, {2,3}, {4,5} }.release();
+ * cudf::detail::set_null_mask(lists->null_mask(), 1, 2, false);
+ *
+ * lists[1] is now null, but the lists child column still stores `{2,3}`.
+ * The lists column contents will be:
+ *   Validity: 101
+ *   Offsets:  [0, 2, 4, 6]
+ *   Child:    [0, 1, 2, 3, 4, 5]
+ *
+ * After purging the contents of the list's null rows, the column's contents
+ * will be:
+ *   Validity: 101
+ *   Offsets:  [0, 2, 2, 4]
+ *   Child:    [0, 1, 4, 5]
+ * @endcode
+ *
+ * The purge operation only applies directly to LIST and STRING columns, but it
+ * applies indirectly to STRUCT columns as well, since LIST and STRUCT columns
+ * may have child/decendant columns that are LIST or STRING.
+ *
+ * @param input The column whose null rows are to be checked and purged
+ * @param mr Device memory resource used to allocate the returned column's device memory
+ * @return std::unique_ptr<column> Column with equivalent contents to `input`, but with
+ * the contents of null rows purged
+ */
+std::unique_ptr<column> purge_nonempty_nulls(
+  lists_column_view const& input,
+  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+
+/**
+ * @brief Copies `input`, purging any non-empty null rows in the column or its descendants
+ *
+ * STRING columns may have non-empty null rows.
+ * For example:
+ * @code{.pseudo}
+ *
+ * auto const strings = strings_column_wrapper{ "AB", "CD", "EF" }.release();
+ * cudf::detail::set_null_mask(strings->null_mask(), 1, 2, false);
+ *
+ * strings[1] is now null, but the strings column still stores `"CD"`.
+ * The lists column contents will be:
+ *   Validity: 101
+ *   Offsets:  [0, 2, 4, 6]
+ *   Child:    [A, B, C, D, E, F]
+ *
+ * After purging the contents of the list's null rows, the column's contents
+ * will be:
+ *   Validity: 101
+ *   Offsets:  [0, 2, 2, 4]
+ *   Child:    [A, B, E, F]
+ * @endcode
+ *
+ * The purge operation only applies directly to LIST and STRING columns, but it
+ * applies indirectly to STRUCT columns as well, since LIST and STRUCT columns
+ * may have child/decendant columns that are LIST or STRING.
+ *
+ * @param input The column whose null rows are to be checked and purged
+ * @param mr Device memory resource used to allocate the returned column's device memory
+ * @return std::unique_ptr<column> Column with equivalent contents to `input`, but with
+ * the contents of null rows purged
+ */
+std::unique_ptr<column> purge_nonempty_nulls(
+  strings_column_view const& input,
+  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+
+/**
+ * @brief Copies `input`, purging any non-empty null rows in the column or its descendants
+ *
+ * STRUCTS columns may have null rows, with non-empty child rows.
+ * For example:
+ * @code{.pseudo}
+ *
+ * auto const lists   = lists_column_wrapper<int32_t>{ {0,1}, {2,3}, {4,5} };
+ * auto const structs = structs_column_wrapper{ {lists}, null_at(1) };
+ *
+ * structs[1].child is now null, but the lists column still stores `{2,3}`.
+ * The lists column contents will be:
+ *   Validity: 101
+ *   Offsets:  [0, 2, 4, 6]
+ *   Child:    [0, 1, 2, 3, 4, 5]
+ *
+ * After purging the contents of the list's null rows, the column's contents
+ * will be:
+ *   Validity: 101
+ *   Offsets:  [0, 2, 2, 4]
+ *   Child:    [0, 1, 4, 5]
+ * @endcode
+ *
+ * The purge operation only applies directly to LIST and STRING columns, but it
+ * applies indirectly to STRUCT columns as well, since LIST and STRUCT columns
+ * may have child/decendant columns that are LIST or STRING.
+ *
+ * @param input The column whose null rows are to be checked and purged
+ * @param mr Device memory resource used to allocate the returned column's device memory
+ * @return std::unique_ptr<column> Column with equivalent contents to `input`, but with
+ * the contents of null rows purged
+ */
+std::unique_ptr<column> purge_nonempty_nulls(
+  structs_column_view const& input,
+  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+
 /** @} */
 }  // namespace cudf
diff --git a/cpp/include/cudf/detail/copy.cuh b/cpp/include/cudf/detail/copy.cuh
new file mode 100644
index 00000000000..773bce7131f
--- /dev/null
+++ b/cpp/include/cudf/detail/copy.cuh
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cudf/detail/copy.hpp>
+#include <cudf/detail/gather.cuh>
+
+namespace cudf::detail {
+
+/**
+ * @copydoc cudf::purge_nonempty_nulls(structs_column_view const&, rmm::mr::device_memory_resource*)
+ *
+ * @tparam ColumnViewT View type (lists_column_view, strings_column_view, or strings_column_view)
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ */
+template <typename ColumnViewT>
+std::unique_ptr<cudf::column> purge_nonempty_nulls(ColumnViewT const& input,
+                                                   rmm::cuda_stream_view stream,
+                                                   rmm::mr::device_memory_resource* mr)
+{
+  // Implement via identity gather.
+  auto const input_column = input.parent();
+  auto const gather_begin = thrust::counting_iterator<cudf::size_type>(0);
+  auto const gather_end   = gather_begin + input_column.size();
+
+  auto gathered_table = cudf::detail::gather(table_view{{input_column}},
+                                             gather_begin,
+                                             gather_end,
+                                             out_of_bounds_policy::DONT_CHECK,
+                                             stream,
+                                             mr);
+  return std::move(gathered_table->release()[0]);
+}
+
+}  // namespace cudf::detail
diff --git a/cpp/include/cudf/detail/copy.hpp b/cpp/include/cudf/detail/copy.hpp
index 50157d16876..abd14fbda89 100644
--- a/cpp/include/cudf/detail/copy.hpp
+++ b/cpp/include/cudf/detail/copy.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2021, NVIDIA CORPORATION.
+ * Copyright (c) 2018-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -299,5 +299,22 @@ std::unique_ptr<scalar> get_element(
   size_type index,
   rmm::cuda_stream_view stream        = rmm::cuda_stream_default,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+
+/**
+ * @copydoc cudf::has_nonempty_nulls
+ *
+ * @param stream CUDA stream used for device memory operations and kernel launches.
+ */
+bool has_nonempty_nulls(column_view const& input,
+                        rmm::cuda_stream_view stream = rmm::cuda_stream_default);
+
+/**
+ * @copydoc cudf::may_have_nonempty_nulls
+ *
+ * @param stream CUDA stream used for device memory operations and kernel launches.
+ */
+bool may_have_nonempty_nulls(column_view const& input,
+                             rmm::cuda_stream_view stream = rmm::cuda_stream_default);
+
 }  // namespace detail
 }  // namespace cudf
diff --git a/cpp/include/cudf/lists/detail/gather.cuh b/cpp/include/cudf/lists/detail/gather.cuh
index c637ad041ba..7df36be2385 100644
--- a/cpp/include/cudf/lists/detail/gather.cuh
+++ b/cpp/include/cudf/lists/detail/gather.cuh
@@ -18,6 +18,7 @@
 #include <cudf/column/column_factories.hpp>
 #include <cudf/detail/get_value.cuh>
 #include <cudf/lists/lists_column_view.hpp>
+#include <cudf/utilities/bit.hpp>
 
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/device_uvector.hpp>
@@ -82,6 +83,7 @@ gather_data make_gather_data(cudf::lists_column_view const& source_column,
   auto dst_offsets_c = cudf::make_fixed_width_column(
     data_type{type_id::INT32}, offset_count, mask_state::UNALLOCATED, stream, mr);
   mutable_column_view dst_offsets_v = dst_offsets_c->mutable_view();
+  auto const source_column_nullmask = source_column.null_mask();
 
   // generate the compacted outgoing offsets.
   auto count_iter = thrust::make_counting_iterator<int32_t>(0);
@@ -90,12 +92,23 @@ gather_data make_gather_data(cudf::lists_column_view const& source_column,
     count_iter,
     count_iter + offset_count,
     dst_offsets_v.begin<int32_t>(),
-    [gather_map, output_count, src_offsets, src_size] __device__(int32_t index) -> int32_t {
+    [source_column_nullmask,
+     source_column_offset = source_column.offset(),
+     gather_map,
+     output_count,
+     src_offsets,
+     src_size] __device__(int32_t index) -> int32_t {
       int32_t offset_index = index < output_count ? gather_map[index] : 0;
 
       // if this is an invalid index, this will be a NULL list
       if (NullifyOutOfBounds && ((offset_index < 0) || (offset_index >= src_size))) { return 0; }
 
+      // If the source row is null, the output row size must be 0.
+      if (source_column_nullmask != nullptr &&
+          not cudf::bit_is_set(source_column_nullmask, source_column_offset + offset_index)) {
+        return 0;
+      }
+
       // the length of this list
       return src_offsets[offset_index + 1] - src_offsets[offset_index];
     },
@@ -110,15 +123,27 @@ gather_data make_gather_data(cudf::lists_column_view const& source_column,
 
   // generate the base offsets
   rmm::device_uvector<int32_t> base_offsets = rmm::device_uvector<int32_t>(output_count, stream);
-  thrust::transform(rmm::exec_policy(stream),
-                    gather_map,
-                    gather_map + output_count,
-                    base_offsets.data(),
-                    [src_offsets, src_size, shift] __device__(int32_t index) {
-                      // if this is an invalid index, this will be a NULL list
-                      if (NullifyOutOfBounds && ((index < 0) || (index >= src_size))) { return 0; }
-                      return src_offsets[index] - shift;
-                    });
+  thrust::transform(
+    rmm::exec_policy(stream),
+    gather_map,
+    gather_map + output_count,
+    base_offsets.data(),
+    [source_column_nullmask,
+     source_column_offset = source_column.offset(),
+     src_offsets,
+     src_size,
+     shift] __device__(int32_t index) {
+      // if this is an invalid index, this will be a NULL list
+      if (NullifyOutOfBounds && ((index < 0) || (index >= src_size))) { return 0; }
+
+      // If the source row is null, the output row size must be 0.
+      if (source_column_nullmask != nullptr &&
+          not cudf::bit_is_set(source_column_nullmask, source_column_offset + index)) {
+        return 0;
+      }
+
+      return src_offsets[index] - shift;
+    });
 
   // Retrieve size of the resulting gather map for level N+1 (the last offset)
   size_type child_gather_map_size =
diff --git a/cpp/include/cudf/strings/detail/gather.cuh b/cpp/include/cudf/strings/detail/gather.cuh
index 1b10c70d6d6..d46ab3a91a1 100644
--- a/cpp/include/cudf/strings/detail/gather.cuh
+++ b/cpp/include/cudf/strings/detail/gather.cuh
@@ -303,14 +303,17 @@ std::unique_ptr<cudf::column> gather(
     data_type{type_id::INT32}, output_count + 1, mask_state::UNALLOCATED, stream, mr);
   auto const d_out_offsets = out_offsets_column->mutable_view().template data<int32_t>();
   auto const d_in_offsets  = (strings_count > 0) ? strings.offsets_begin() : nullptr;
-  thrust::transform(rmm::exec_policy(stream),
-                    begin,
-                    end,
-                    d_out_offsets,
-                    [d_in_offsets, strings_count] __device__(size_type in_idx) {
-                      if (NullifyOutOfBounds && (in_idx < 0 || in_idx >= strings_count)) return 0;
-                      return d_in_offsets[in_idx + 1] - d_in_offsets[in_idx];
-                    });
+  auto const d_strings     = column_device_view::create(strings.parent(), stream);
+  thrust::transform(
+    rmm::exec_policy(stream),
+    begin,
+    end,
+    d_out_offsets,
+    [d_strings = *d_strings, d_in_offsets, strings_count] __device__(size_type in_idx) {
+      if (NullifyOutOfBounds && (in_idx < 0 || in_idx >= strings_count)) return 0;
+      if (not d_strings.is_valid(in_idx)) return 0;
+      return d_in_offsets[in_idx + 1] - d_in_offsets[in_idx];
+    });
 
   // check total size is not too large
   size_t const total_bytes = thrust::transform_reduce(
@@ -329,7 +332,6 @@ std::unique_ptr<cudf::column> gather(
 
   // build chars column
   cudf::device_span<int32_t const> const d_out_offsets_span(d_out_offsets, output_count + 1);
-  auto const d_strings  = column_device_view::create(strings.parent(), stream);
   auto out_chars_column = gather_chars(d_strings->begin<string_view>(),
                                        begin,
                                        end,
diff --git a/cpp/include/cudf/structs/structs_column_view.hpp b/cpp/include/cudf/structs/structs_column_view.hpp
index 329c24cfe0a..ca866d8555e 100644
--- a/cpp/include/cudf/structs/structs_column_view.hpp
+++ b/cpp/include/cudf/structs/structs_column_view.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, NVIDIA CORPORATION.
+ * Copyright (c) 2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -41,6 +41,11 @@ class structs_column_view : public column_view {
 
   explicit structs_column_view(column_view const& rhs);
 
+  /**
+   * @brief Returns the parent column.
+   */
+  [[nodiscard]] column_view parent() const;
+
   using column_view::child_begin;
   using column_view::child_end;
   using column_view::has_nulls;
diff --git a/cpp/src/copying/purge_nonempty_nulls.cu b/cpp/src/copying/purge_nonempty_nulls.cu
new file mode 100644
index 00000000000..778d6c4df55
--- /dev/null
+++ b/cpp/src/copying/purge_nonempty_nulls.cu
@@ -0,0 +1,134 @@
+/*
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <cudf/copying.hpp>
+#include <cudf/detail/copy.cuh>
+
+#include <thrust/iterator/counting_iterator.h>
+
+namespace cudf {
+namespace detail {
+
+using cudf::type_id;
+
+namespace {
+
+/// Check if nonempty-null checks can be skipped for a given type.
+bool type_may_have_nonempty_nulls(cudf::type_id const& type)
+{
+  return type == type_id::STRING || type == type_id::LIST || type == type_id::STRUCT;
+}
+
+/// Check if the (STRING/LIST) column has any null rows with non-zero length.
+bool has_nonempty_null_rows(cudf::column_view const& input, rmm::cuda_stream_view stream)
+{
+  if (not input.has_nulls()) { return false; }  // No nulls => no dirty rows.
+
+  // Cross-reference nullmask and offsets.
+  auto const type         = input.type().id();
+  auto const offsets      = (type == type_id::STRING) ? (strings_column_view{input}).offsets()
+                                                      : (lists_column_view{input}).offsets();
+  auto const d_input      = cudf::column_device_view::create(input);
+  auto const is_dirty_row = [d_input = *d_input, offsets = offsets.begin<size_type>()] __device__(
+                              size_type const& row_idx) {
+    return d_input.is_null_nocheck(row_idx) && (offsets[row_idx] != offsets[row_idx + 1]);
+  };
+
+  auto const row_begin = thrust::counting_iterator<cudf::size_type>(0);
+  auto const row_end   = row_begin + input.size();
+  return thrust::count_if(rmm::exec_policy(stream), row_begin, row_end, is_dirty_row) > 0;
+}
+
+}  // namespace
+
+/**
+ * @copydoc cudf::detail::has_nonempty_nulls
+ */
+bool has_nonempty_nulls(cudf::column_view const& input, rmm::cuda_stream_view stream)
+{
+  auto const type = input.type().id();
+
+  if (not type_may_have_nonempty_nulls(type)) { return false; }
+
+  // For types with variable-length rows, check if any rows are "dirty".
+  // A dirty row is a null row with non-zero length.
+  if ((type == type_id::STRING || type == type_id::LIST) && has_nonempty_null_rows(input, stream)) {
+    return true;
+  }
+
+  // For complex types, check if child columns need purging.
+  if ((type == type_id::STRUCT || type == type_id::LIST) &&
+      std::any_of(input.child_begin(), input.child_end(), [stream](auto const& child) {
+        return cudf::detail::has_nonempty_nulls(child, stream);
+      })) {
+    return true;
+  }
+
+  return false;
+}
+}  // namespace detail
+
+/**
+ * @copydoc cudf::may_have_nonempty_nulls
+ */
+bool may_have_nonempty_nulls(column_view const& input)
+{
+  auto const type = input.type().id();
+
+  if (not detail::type_may_have_nonempty_nulls(type)) { return false; }
+
+  if ((type == type_id::STRING || type == type_id::LIST) && input.has_nulls()) { return true; }
+
+  if ((type == type_id::STRUCT || type == type_id::LIST) &&
+      std::any_of(input.child_begin(), input.child_end(), may_have_nonempty_nulls)) {
+    return true;
+  }
+
+  return false;
+}
+
+/**
+ * @copydoc cudf::has_nonempty_nulls
+ */
+bool has_nonempty_nulls(column_view const& input) { return detail::has_nonempty_nulls(input); }
+
+/**
+ * @copydoc cudf::purge_nonempty_nulls(lists_column_view const&, rmm::mr::device_memory_resource*)
+ */
+std::unique_ptr<cudf::column> purge_nonempty_nulls(lists_column_view const& input,
+                                                   rmm::mr::device_memory_resource* mr)
+{
+  return detail::purge_nonempty_nulls(input, rmm::cuda_stream_default, mr);
+}
+
+/**
+ * @copydoc cudf::purge_nonempty_nulls(structs_column_view const&, rmm::mr::device_memory_resource*)
+ */
+std::unique_ptr<cudf::column> purge_nonempty_nulls(structs_column_view const& input,
+                                                   rmm::mr::device_memory_resource* mr)
+{
+  return detail::purge_nonempty_nulls(input, rmm::cuda_stream_default, mr);
+}
+
+/**
+ * @copydoc cudf::purge_nonempty_nulls(strings_column_view const&, rmm::mr::device_memory_resource*)
+ */
+std::unique_ptr<cudf::column> purge_nonempty_nulls(strings_column_view const& input,
+                                                   rmm::mr::device_memory_resource* mr)
+{
+  return detail::purge_nonempty_nulls(input, rmm::cuda_stream_default, mr);
+}
+
+}  // namespace cudf
diff --git a/cpp/src/structs/structs_column_view.cpp b/cpp/src/structs/structs_column_view.cpp
index 681f13386ff..7d8c8837d2d 100644
--- a/cpp/src/structs/structs_column_view.cpp
+++ b/cpp/src/structs/structs_column_view.cpp
@@ -25,6 +25,8 @@ structs_column_view::structs_column_view(column_view const& rhs) : column_view{r
   CUDF_EXPECTS(type().id() == type_id::STRUCT, "structs_column_view only supports struct columns");
 }
 
+column_view structs_column_view::parent() const { return *this; }
+
 column_view structs_column_view::get_sliced_child(int index) const
 {
   std::vector<column_view> children;
diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt
index e016f47616b..95c54d7596e 100644
--- a/cpp/tests/CMakeLists.txt
+++ b/cpp/tests/CMakeLists.txt
@@ -250,6 +250,7 @@ ConfigureTest(
   copying/gather_tests.cpp
   copying/get_value_tests.cpp
   copying/pack_tests.cpp
+  copying/purge_nonempty_nulls_tests.cpp
   copying/sample_tests.cpp
   copying/scatter_tests.cpp
   copying/scatter_list_tests.cpp
diff --git a/cpp/tests/column/factories_test.cpp b/cpp/tests/column/factories_test.cpp
index 4e0e70bf15c..44a79e63cd8 100644
--- a/cpp/tests/column/factories_test.cpp
+++ b/cpp/tests/column/factories_test.cpp
@@ -645,7 +645,7 @@ TYPED_TEST(ListsStructsLeafTest, FromNonNested)
                                           0,
                                           cudf::create_null_mask(2, cudf::mask_state::UNALLOCATED));
 
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*col, *expected);
+  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*col, *expected);
 }
 
 TYPED_TEST(ListsStructsLeafTest, FromNested)
diff --git a/cpp/tests/copying/purge_nonempty_nulls_tests.cpp b/cpp/tests/copying/purge_nonempty_nulls_tests.cpp
new file mode 100644
index 00000000000..77fd3f66ee5
--- /dev/null
+++ b/cpp/tests/copying/purge_nonempty_nulls_tests.cpp
@@ -0,0 +1,437 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <cudf/column/column_view.hpp>
+#include <cudf/copying.hpp>
+#include <cudf/detail/gather.hpp>
+#include <cudf/detail/iterator.cuh>
+#include <cudf/detail/null_mask.hpp>
+#include <cudf/lists/lists_column_view.hpp>
+#include <cudf/strings/strings_column_view.hpp>
+#include <cudf/table/table.hpp>
+#include <cudf/table/table_view.hpp>
+
+#include <cudf_test/base_fixture.hpp>
+#include <cudf_test/column_utilities.hpp>
+#include <cudf_test/column_wrapper.hpp>
+#include <cudf_test/cudf_gtest.hpp>
+#include <cudf_test/iterator_utilities.hpp>
+
+namespace cudf::test {
+
+using iterators::no_nulls;
+using iterators::null_at;
+using iterators::nulls_at;
+using T             = int32_t;  // The actual type of the leaf node isn't really important.
+using values_col_t  = fixed_width_column_wrapper<T>;
+using offsets_col_t = fixed_width_column_wrapper<size_type>;
+using gather_map_t  = fixed_width_column_wrapper<size_type>;
+
+template <typename T>
+using LCW = cudf::test::lists_column_wrapper<T, int32_t>;
+
+struct PurgeNonEmptyNullsTest : public cudf::test::BaseFixture {
+  /// Helper to run gather() on a single column, and extract the single column from the result.
+  std::unique_ptr<cudf::column> gather(column_view const& input, gather_map_t const& gather_map)
+  {
+    auto gathered =
+      cudf::gather(cudf::table_view{{input}}, gather_map, out_of_bounds_policy::NULLIFY);
+    return std::move(gathered->release()[0]);
+  }
+
+  /// Verify that the result of `sanitize()` is equivalent to the unsanitized input,
+  /// except that the null rows are also empty.
+  template <typename ColumnViewT>
+  void test_purge(ColumnViewT const& unpurged)
+  {
+    auto const purged = cudf::purge_nonempty_nulls(unpurged);
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(unpurged.parent(), *purged);
+    EXPECT_FALSE(cudf::has_nonempty_nulls(*purged));
+  }
+};
+
+// List<T>.
+TEST_F(PurgeNonEmptyNullsTest, SingleLevelList)
+{
+  auto const input = LCW<T>{{{{1, 2, 3, 4}, null_at(2)},
+                             {5},
+                             {6, 7},  // <--- Will be set to NULL. Unsanitized row.
+                             {8, 9, 10}},
+                            no_nulls()}
+                       .release();
+  EXPECT_FALSE(cudf::may_have_nonempty_nulls(*input));
+  EXPECT_FALSE(cudf::has_nonempty_nulls(*input));
+
+  // Set nullmask, post construction.
+  cudf::detail::set_null_mask(input->mutable_view().null_mask(), 2, 3, false);
+  EXPECT_TRUE(cudf::may_have_nonempty_nulls(*input));
+  EXPECT_TRUE(cudf::has_nonempty_nulls(*input));
+
+  test_purge(lists_column_view{*input});
+
+  {
+    // Selecting all rows from input, in different order.
+    auto const results           = gather(input->view(), {1, 2, 0, 3});
+    auto const results_list_view = lists_column_view(*results);
+
+    auto const expected = LCW<T>{{{5},
+                                  {},  // NULL.
+                                  {{1, 2, 3, 4}, null_at(2)},
+                                  {8, 9, 10}},
+                                 null_at(1)};
+
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(results_list_view.offsets(), offsets_col_t{0, 1, 1, 5, 8});
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(results_list_view.child(),
+                                   values_col_t{{5, 1, 2, 3, 4, 8, 9, 10}, null_at(3)});
+    EXPECT_TRUE(cudf::may_have_nonempty_nulls(*results));
+    EXPECT_FALSE(cudf::has_nonempty_nulls(*results));
+  }
+  {
+    // Test when gather selects rows preceded by unsanitized rows.
+    auto const results  = gather(input->view(), {3, 100, 0});
+    auto const expected = LCW<T>{{
+                                   {8, 9, 10},
+                                   {},  // NULL.
+                                   {{1, 2, 3, 4}, null_at(2)},
+                                 },
+                                 null_at(1)};
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), expected);
+    EXPECT_TRUE(cudf::may_have_nonempty_nulls(*results));
+    EXPECT_FALSE(cudf::has_nonempty_nulls(*results));
+  }
+  {
+    // Test when gather selects rows followed by unsanitized rows.
+    auto const results  = gather(input->view(), {1, 100, 0});
+    auto const expected = LCW<T>{{
+                                   {5},
+                                   {},  // NULL.
+                                   {{1, 2, 3, 4}, null_at(2)},
+                                 },
+                                 null_at(1)};
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), expected);
+    EXPECT_TRUE(cudf::may_have_nonempty_nulls(*results));
+    EXPECT_FALSE(cudf::has_nonempty_nulls(*results));
+  }
+  {
+    // Test when gather selects unsanitized row specifically.
+    auto const results            = gather(input->view(), {2});
+    auto const results_lists_view = lists_column_view(*results);
+    auto const expected           = LCW<T>{{
+                                   LCW<T>{}  // NULL.
+                                 },
+                                 null_at(0)};
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), expected);
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(results_lists_view.offsets(), offsets_col_t{0, 0});
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(results_lists_view.child(), values_col_t{});
+    EXPECT_TRUE(cudf::may_have_nonempty_nulls(*results));
+    EXPECT_FALSE(cudf::has_nonempty_nulls(*results));
+  }
+}
+
+// List<List<T>>.
+TEST_F(PurgeNonEmptyNullsTest, TwoLevelList)
+{
+  auto const input =
+    LCW<T>{
+      {{{1, 2, 3}, {4, 5, 6, 7}, {8}, {9, 1}, {2}},
+       {{11, 12}, {13, 14, 15}, {16, 17, 18}, {19}},
+       {{21}, {22, 23}, {24, 25, 26}},
+       {{31, 32}, {33, 34, 35, 36}, {}, {37, 38}},  //<--- Will be set to NULL. Unsanitized row.
+       {{41}, {42, 43}}},
+      no_nulls()}
+      .release();
+  EXPECT_FALSE(cudf::may_have_nonempty_nulls(*input));
+  EXPECT_FALSE(cudf::has_nonempty_nulls(*input));
+
+  // Set nullmask, post construction.
+  cudf::detail::set_null_mask(input->mutable_view().null_mask(), 3, 4, false);
+  EXPECT_TRUE(cudf::may_have_nonempty_nulls(*input));
+  EXPECT_TRUE(cudf::has_nonempty_nulls(*input));
+
+  test_purge(lists_column_view{*input});
+
+  {
+    // Verify that gather() output is sanitized.
+    auto const results            = gather(input->view(), {100, 3, 0, 1});
+    auto const results_lists_view = lists_column_view(*results);
+
+    auto const expected = LCW<T>{{
+                                   LCW<T>{},  // NULL, because of out of bounds.
+                                   LCW<T>{},  // NULL, because input row was null.
+                                   {{1, 2, 3}, {4, 5, 6, 7}, {8}, {9, 1}, {2}},  // i.e. input[0]
+                                   {{11, 12}, {13, 14, 15}, {16, 17, 18}, {19}}  // i.e. input[1]
+                                 },
+                                 nulls_at({0, 1})};
+
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(results_lists_view.offsets(), offsets_col_t{0, 0, 0, 5, 9});
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+      results_lists_view.child(),
+      LCW<T>{
+        {1, 2, 3}, {4, 5, 6, 7}, {8}, {9, 1}, {2}, {11, 12}, {13, 14, 15}, {16, 17, 18}, {19}});
+
+    auto const child_lists_view = lists_column_view(results_lists_view.child());
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(child_lists_view.offsets(),
+                                   offsets_col_t{0, 3, 7, 8, 10, 11, 13, 16, 19, 20});
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+      child_lists_view.child(),
+      values_col_t{1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 11, 12, 13, 14, 15, 16, 17, 18, 19});
+    EXPECT_TRUE(cudf::may_have_nonempty_nulls(*results));
+    EXPECT_FALSE(cudf::has_nonempty_nulls(*results));
+  }
+}
+
+// List<List<List<T>>>.
+TEST_F(PurgeNonEmptyNullsTest, ThreeLevelList)
+{
+  auto const input = LCW<T>{{{{{1, 2}, {3}}, {{4, 5}, {6, 7}}, {{8, 8}, {}}, {{9, 1}}, {{2, 3}}},
+                             {{{11, 12}}, {{13}, {14, 15}}, {{16, 17, 18}}, {{19, 19}, {}}},
+                             {{{21, 21}}, {{22, 23}, {}}, {{24, 25}, {26}}},
+                             {{{31, 32}, {}},
+                              {{33, 34, 35}, {36}},
+                              {},
+                              {{37, 38}}},  //<--- Will be set to NULL. Unsanitized row.
+                             {{{41, 41, 41}}, {{42, 43}}}},
+                            no_nulls()}
+                       .release();
+  EXPECT_FALSE(cudf::may_have_nonempty_nulls(*input));
+  EXPECT_FALSE(cudf::has_nonempty_nulls(*input));
+
+  // Set nullmask, post construction.
+  cudf::detail::set_null_mask(input->mutable_view().null_mask(), 3, 4, false);
+  EXPECT_TRUE(cudf::may_have_nonempty_nulls(*input));
+  EXPECT_TRUE(cudf::has_nonempty_nulls(*input));
+
+  test_purge(lists_column_view{*input});
+
+  {
+    auto const results            = gather(input->view(), {100, 3, 0, 1});
+    auto const results_lists_view = lists_column_view(*results);
+
+    auto const expected = LCW<T>{
+      {
+        LCW<T>{},  // NULL, because of out of bounds.
+        LCW<T>{},  // NULL, because input row was null.
+        {{{1, 2}, {3}}, {{4, 5}, {6, 7}}, {{8, 8}, {}}, {{9, 1}}, {{2, 3}}},  // i.e. input[0]
+        {{{11, 12}}, {{13}, {14, 15}}, {{16, 17, 18}}, {{19, 19}, {}}}        // i.e. input[1]
+      },
+      nulls_at({0, 1})};
+
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(results_lists_view.offsets(), offsets_col_t{0, 0, 0, 5, 9});
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(results_lists_view.child(),
+                                   LCW<T>{{{1, 2}, {3}},
+                                          {{4, 5}, {6, 7}},
+                                          {{8, 8}, {}},
+                                          {{9, 1}},
+                                          {{2, 3}},
+                                          {{11, 12}},
+                                          {{13}, {14, 15}},
+                                          {{16, 17, 18}},
+                                          {{19, 19}, {}}});
+    EXPECT_TRUE(cudf::may_have_nonempty_nulls(*results));
+    EXPECT_FALSE(cudf::has_nonempty_nulls(*results));
+  }
+}
+
+// List<string>.
+TEST_F(PurgeNonEmptyNullsTest, ListOfStrings)
+{
+  using T = string_view;
+
+  auto const input = LCW<T>{{{{"1", "22", "", "4444"}, null_at(2)},
+                             {"55555"},
+                             {"666666", "7777777"},  // <--- Will be set to NULL. Unsanitized row.
+                             {"88888888", "999999999", "1010101010"},
+                             {"11", "22", "33", "44"},
+                             {"55", "66", "77", "88"}},
+                            no_nulls()}
+                       .release();
+  EXPECT_TRUE(cudf::may_have_nonempty_nulls(*input));
+  EXPECT_FALSE(cudf::has_nonempty_nulls(*input));
+
+  // Set nullmask, post construction.
+  cudf::detail::set_null_mask(input->mutable_view().null_mask(), 2, 3, false);
+  EXPECT_TRUE(cudf::may_have_nonempty_nulls(*input));
+  EXPECT_TRUE(cudf::has_nonempty_nulls(*input));
+
+  test_purge(lists_column_view{*input});
+
+  {
+    // Selecting all rows from input, in different order.
+    auto const results           = gather(input->view(), {1, 2, 0, 3});
+    auto const results_list_view = lists_column_view(*results);
+
+    auto const expected = LCW<T>{{{"55555"},
+                                  {},  // NULL.
+                                  {{"1", "22", "", "4444"}, null_at(2)},
+                                  {"88888888", "999999999", "1010101010"}},
+                                 null_at(1)};
+
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(results_list_view.offsets(), offsets_col_t{0, 1, 1, 5, 8});
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+      results_list_view.child(),
+      strings_column_wrapper{
+        {"55555", "1", "22", "", "4444", "88888888", "999999999", "1010101010"}, null_at(3)});
+    EXPECT_TRUE(cudf::may_have_nonempty_nulls(*results));
+    EXPECT_FALSE(cudf::has_nonempty_nulls(*results));
+  }
+  {
+    // Gathering from a sliced column.
+    auto const sliced = cudf::slice({input->view()}, {1, 5})[0];  // Lop off 1 row at each end.
+    EXPECT_TRUE(cudf::may_have_nonempty_nulls(sliced));
+    EXPECT_TRUE(cudf::has_nonempty_nulls(sliced));
+
+    auto const results           = gather(sliced, {1, 2, 0, 3});
+    auto const results_list_view = lists_column_view(*results);
+    auto const expected          = LCW<T>{{
+                                   {},
+                                   {"88888888", "999999999", "1010101010"},
+                                   {"55555"},
+                                   {"11", "22", "33", "44"},
+                                 },
+                                 null_at(0)};
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(results_list_view.offsets(), offsets_col_t{0, 0, 3, 4, 8});
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+      results_list_view.child(),
+      strings_column_wrapper{
+        "88888888", "999999999", "1010101010", "55555", "11", "22", "33", "44"});
+    EXPECT_TRUE(cudf::may_have_nonempty_nulls(*results));
+    EXPECT_FALSE(cudf::has_nonempty_nulls(*results));
+  }
+}
+
+// List<string>.
+TEST_F(PurgeNonEmptyNullsTest, UnsanitizedListOfUnsanitizedStrings)
+{
+  auto strings =
+    strings_column_wrapper{
+      {"1", "22", "3", "44", "5", "66", "7", "8888", "9", "1010"},  //<--- "8888" will be
+                                                                    // unsanitized.
+      no_nulls()}
+      .release();
+  EXPECT_FALSE(cudf::may_have_nonempty_nulls(*strings));
+  EXPECT_FALSE(cudf::has_nonempty_nulls(*strings));
+
+  // Set strings nullmask, post construction.
+  set_null_mask(strings->mutable_view().null_mask(), 7, 8, false);
+  EXPECT_TRUE(cudf::may_have_nonempty_nulls(*strings));
+  EXPECT_TRUE(cudf::has_nonempty_nulls(*strings));
+
+  test_purge(strings_column_view{*strings});
+
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    strings_column_view(*strings).offsets(), offsets_col_t{0, 1, 3, 4, 6, 7, 9, 10, 14, 15, 19}
+    // 10-14 indicates that "8888" is unsanitized.
+  );
+
+  // Construct a list column from the strings column.
+  auto const lists = make_lists_column(4,
+                                       offsets_col_t{0, 4, 5, 7, 10}.release(),
+                                       std::move(strings),
+                                       0,
+                                       detail::make_null_mask(no_nulls(), no_nulls() + 4));
+  EXPECT_TRUE(cudf::may_have_nonempty_nulls(*lists));
+  EXPECT_TRUE(cudf::has_nonempty_nulls(*lists));
+
+  // Set lists nullmask, post construction.
+  cudf::detail::set_null_mask(lists->mutable_view().null_mask(), 2, 3, false);
+  EXPECT_TRUE(cudf::may_have_nonempty_nulls(*lists));
+  EXPECT_TRUE(cudf::has_nonempty_nulls(*lists));
+
+  test_purge(lists_column_view{*lists});
+
+  // At this point,
+  // 1. {"66", "7"} will be unsanitized.
+  // 2. {"8888", "9", "1010"} will be actually be {NULL, "9", "1010"}.
+
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    lists_column_view(*lists).offsets(),
+    offsets_col_t{0, 4, 5, 7, 10});  // 5-7 indicates that list row#2 is unsanitized.
+
+  auto const result   = gather(lists->view(), {1, 2, 0, 3});
+  auto const expected = LCW<string_view>{{{"5"},
+                                          {},  // NULL.
+                                          {"1", "22", "3", "44"},
+                                          {{"", "9", "1010"}, null_at(0)}},
+                                         null_at(1)};
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expected);
+
+  // Ensure row#2 has been sanitized.
+  auto const results_lists_view = lists_column_view(*result);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(results_lists_view.offsets(), offsets_col_t{0, 1, 1, 5, 8}
+                                 // 1-1 indicates that row#2 is sanitized.
+  );
+
+  // Ensure that "8888" has been sanitized, and stored as "".
+  auto const child_strings_view = strings_column_view(results_lists_view.child());
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(child_strings_view.offsets(),
+                                 offsets_col_t{0, 1, 2, 4, 5, 7, 7, 8, 12});
+  EXPECT_TRUE(cudf::may_have_nonempty_nulls(*result));
+  EXPECT_FALSE(cudf::has_nonempty_nulls(*result));
+}
+
+// Struct<List<T>>.
+TEST_F(PurgeNonEmptyNullsTest, StructOfList)
+{
+  auto const structs_input =
+    [] {
+      auto child = LCW<T>{{{{1, 2, 3, 4}, null_at(2)},
+                           {5},
+                           {6, 7},  //<--- Unsanitized row.
+                           {8, 9, 10}},
+                          no_nulls()};
+      EXPECT_FALSE(cudf::has_nonempty_nulls(child));
+      return structs_column_wrapper{{child}, null_at(2)};
+    }()
+      .release();
+
+  EXPECT_TRUE(cudf::may_have_nonempty_nulls(*structs_input));
+  EXPECT_TRUE(cudf::has_nonempty_nulls(*structs_input));
+
+  test_purge(structs_column_view{*structs_input});
+
+  // At this point, even though the structs column has a null at index 2,
+  // the child column has a non-empty list row at index 2: {6, 7}.
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(lists_column_view(structs_input->child(0)).child(),
+                                 values_col_t{{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, null_at(2)});
+
+  {
+    // Test rearrange.
+    auto const gather_map      = gather_map_t{1, 2, 0, 3};
+    auto const result          = gather(structs_input->view(), gather_map);
+    auto const expected_result = [] {
+      auto child = LCW<T>{{{5},
+                           LCW<T>{},  //<--- Now, sanitized.
+                           {{1, 2, 3, 4}, null_at(2)},
+                           {8, 9, 10}},
+                          null_at(1)};
+      return structs_column_wrapper{{child}, null_at(1)};
+    }();
+
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(result->view(), expected_result);
+    auto const results_child = lists_column_view(result->child(0));
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(results_child.offsets(), offsets_col_t{0, 1, 1, 5, 8});
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(results_child.child(),
+                                   values_col_t{{5, 1, 2, 3, 4, 8, 9, 10}, null_at(3)});
+    EXPECT_TRUE(cudf::may_have_nonempty_nulls(*result));
+    EXPECT_FALSE(cudf::has_nonempty_nulls(*result));
+  }
+}
+
+}  // namespace cudf::test

From 3c208a618f7f3443d021c01ad27f560a7d71e7d7 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Fri, 29 Apr 2022 09:36:29 -0400
Subject: [PATCH 02/23] Enable pydocstyle rules involving quotes (#10748)

This PR enables D30* errors for pydocstyle. It also sets up the `ignore-decorators` configuration so that future PRs involving D10* errors will treat docutils decorators appropriately. Contributes to #10711.

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/10748
---
 .pre-commit-config.yaml                | 15 ++++++
 python/.flake8                         | 24 +++++-----
 python/cudf/cudf/comm/gpuarrow.py      |  4 +-
 python/cudf/cudf/core/column/string.py | 66 +++++++++++++-------------
 python/cudf/cudf/core/frame.py         |  4 +-
 python/cudf/cudf/core/series.py        |  4 +-
 6 files changed, 66 insertions(+), 51 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 5f690f5f827..cd7b8aea6d7 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,3 +1,5 @@
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
+
 repos:
       - repo: https://github.com/PyCQA/isort
         rev: 5.6.4
@@ -56,6 +58,19 @@ repos:
         hooks:
               - id: pydocstyle
                 args: ["--config=python/.flake8"]
+                exclude: |
+                    (?x)^(
+                    ci|
+                    cpp|
+                    conda|
+                    docs|
+                    java|
+                    notebooks|
+                    python/dask_cudf|
+                    python/cudf_kafka|
+                    python/custreamz|
+                    python/cudf/cudf/tests
+                    )
       - repo: https://github.com/pre-commit/mirrors-clang-format
         rev: v11.1.0
         hooks:
diff --git a/python/.flake8 b/python/.flake8
index c645c46a216..667875030cc 100644
--- a/python/.flake8
+++ b/python/.flake8
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2021, NVIDIA CORPORATION.
+# Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
 [flake8]
 exclude = __init__.py
@@ -9,14 +9,14 @@ ignore =
     E203
 
 [pydocstyle]
-match = ^(.*abc\.py|.*api/types\.py|.*single_column_frame\.py|.*indexed_frame\.py)$
-# Due to https://github.com/PyCQA/pydocstyle/issues/363, we must exclude rather than include using match-dir.
-match-dir = ^(?!ci|cpp|python/dask_cudf|python/cudf_kafka|python/custreamz).*$
-# In addition to numpy style, we additionally ignore:
-add-ignore =
-    # magic methods
-    D105,
-    # no docstring in __init__
-    D107,
-    # newlines before docstrings
-    D204
+# Due to https://github.com/PyCQA/pydocstyle/issues/363, we must exclude rather
+# than include using match-dir. Note that as discussed in
+# https://stackoverflow.com/questions/65478393/how-to-filter-directories-using-the-match-dir-flag-for-pydocstyle,
+# unlike the match option above this match-dir will have no effect when
+# pydocstyle is invoked from pre-commit. Therefore this exclusion list must
+# also be maintained in the pre-commit config file.
+match-dir = ^(?!(ci|cpp|conda|docs|java|notebooks|dask_cudf|cudf_kafka|custreamz|tests)).*$
+# Allow missing docstrings for docutils
+ignore-decorators = .*(docutils|doc_apply|copy_docstring).*
+select = 
+    D30
diff --git a/python/cudf/cudf/comm/gpuarrow.py b/python/cudf/cudf/comm/gpuarrow.py
index 09b4cc5ffba..0c4d9d7f77e 100644
--- a/python/cudf/cudf/comm/gpuarrow.py
+++ b/python/cudf/cudf/comm/gpuarrow.py
@@ -119,12 +119,12 @@ def null(self):
 
     @property
     def data_raw(self):
-        "Accessor for the data buffer as a device array"
+        """Accessor for the data buffer as a device array"""
         return self._series._column.data_array_view
 
     @property
     def null_raw(self):
-        "Accessor for the null buffer as a device array"
+        """Accessor for the null buffer as a device array"""
         return self._series._column.mask_array_view
 
     def make_series(self):
diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py
index 1d836d9b759..0db7e7d9a27 100644
--- a/python/cudf/cudf/core/column/string.py
+++ b/python/cudf/cudf/core/column/string.py
@@ -201,7 +201,7 @@ def __getitem__(self, key):
             return self.get(key)
 
     def len(self) -> SeriesOrIndex:
-        """
+        r"""
         Computes the length of each element in the Series/Index.
 
         Returns
@@ -213,7 +213,7 @@ def len(self) -> SeriesOrIndex:
         Examples
         --------
         >>> import cudf
-        >>> s = cudf.Series(["dog", "", "\\n", None])
+        >>> s = cudf.Series(["dog", "", "\n", None])
         >>> s.str.len()
         0       3
         1       0
@@ -960,7 +960,7 @@ def replace(
         )
 
     def replace_with_backrefs(self, pat: str, repl: str) -> SeriesOrIndex:
-        """
+        r"""
         Use the ``repl`` back-ref template to create a new string
         with the extracted elements found using the ``pat`` expression.
 
@@ -980,7 +980,7 @@ def replace_with_backrefs(self, pat: str, repl: str) -> SeriesOrIndex:
         --------
         >>> import cudf
         >>> s = cudf.Series(["A543","Z756"])
-        >>> s.str.replace_with_backrefs('(\\\\d)(\\\\d)', 'V\\\\2\\\\1')
+        >>> s.str.replace_with_backrefs('(\\d)(\\d)', 'V\\2\\1')
         0    AV453
         1    ZV576
         dtype: object
@@ -1195,7 +1195,7 @@ def istimestamp(self, format: str) -> SeriesOrIndex:
         )
 
     def isfloat(self) -> SeriesOrIndex:
-        """
+        r"""
         Check whether all characters in each string form floating value.
 
         If a string has zero characters, False is returned for
@@ -1249,7 +1249,7 @@ def isfloat(self) -> SeriesOrIndex:
         4     True
         5    False
         dtype: bool
-        >>> s = cudf.Series(["this is plain text", "\\t\\n", "9.9", "9.9.9"])
+        >>> s = cudf.Series(["this is plain text", "\t\n", "9.9", "9.9.9"])
         >>> s.str.isfloat()
         0    False
         1    False
@@ -2239,7 +2239,7 @@ def get(self, i: int = 0) -> SeriesOrIndex:
         return self._return_or_inplace(libstrings.get(self._column, i))
 
     def get_json_object(self, json_path):
-        """
+        r"""
         Applies a JSONPath string to an input strings column
         where each row in the column is a valid json string
 
@@ -2258,7 +2258,7 @@ def get_json_object(self, json_path):
         >>> import cudf
         >>> s = cudf.Series(
             [
-                \\"\\"\\"
+                \"\"\"
                 {
                     "store":{
                         "book":[
@@ -2277,13 +2277,13 @@ def get_json_object(self, json_path):
                         ]
                     }
                 }
-                \\"\\"\\"
+                \"\"\"
             ])
         >>> s
-            0    {"store": {\\n        "book": [\\n        { "cat...
+            0    {"store": {\n        "book": [\n        { "cat...
             dtype: object
         >>> s.str.get_json_object("$.store.book")
-            0    [\\n        { "category": "reference",\\n       ...
+            0    [\n        { "category": "reference",\n       ...
             dtype: object
         """
 
@@ -3138,7 +3138,7 @@ def rjust(self, width: int, fillchar: str = " ") -> SeriesOrIndex:
         )
 
     def strip(self, to_strip: str = None) -> SeriesOrIndex:
-        """
+        r"""
         Remove leading and trailing characters.
 
         Strip whitespaces (including newlines) or a set of
@@ -3169,11 +3169,11 @@ def strip(self, to_strip: str = None) -> SeriesOrIndex:
         Examples
         --------
         >>> import cudf
-        >>> s = cudf.Series(['1. Ant.  ', '2. Bee!\\n', '3. Cat?\\t', None])
+        >>> s = cudf.Series(['1. Ant.  ', '2. Bee!\n', '3. Cat?\t', None])
         >>> s
         0    1. Ant.
-        1    2. Bee!\\n
-        2    3. Cat?\\t
+        1    2. Bee!\n
+        2    3. Cat?\t
         3         <NA>
         dtype: object
         >>> s.str.strip()
@@ -3182,7 +3182,7 @@ def strip(self, to_strip: str = None) -> SeriesOrIndex:
         2    3. Cat?
         3       <NA>
         dtype: object
-        >>> s.str.strip('123.!? \\n\\t')
+        >>> s.str.strip('123.!? \n\t')
         0     Ant
         1     Bee
         2     Cat
@@ -3197,7 +3197,7 @@ def strip(self, to_strip: str = None) -> SeriesOrIndex:
         )
 
     def lstrip(self, to_strip: str = None) -> SeriesOrIndex:
-        """
+        r"""
         Remove leading and trailing characters.
 
         Strip whitespaces (including newlines)
@@ -3228,11 +3228,11 @@ def lstrip(self, to_strip: str = None) -> SeriesOrIndex:
         Examples
         --------
         >>> import cudf
-        >>> s = cudf.Series(['1. Ant.  ', '2. Bee!\\n', '3. Cat?\\t', None])
+        >>> s = cudf.Series(['1. Ant.  ', '2. Bee!\n', '3. Cat?\t', None])
         >>> s.str.lstrip('123.')
         0     Ant.
-        1     Bee!\\n
-        2     Cat?\\t
+        1     Bee!\n
+        2     Cat?\t
         3       <NA>
         dtype: object
         """
@@ -3244,7 +3244,7 @@ def lstrip(self, to_strip: str = None) -> SeriesOrIndex:
         )
 
     def rstrip(self, to_strip: str = None) -> SeriesOrIndex:
-        """
+        r"""
         Remove leading and trailing characters.
 
         Strip whitespaces (including newlines)
@@ -3277,14 +3277,14 @@ def rstrip(self, to_strip: str = None) -> SeriesOrIndex:
         Examples
         --------
         >>> import cudf
-        >>> s = cudf.Series(['1. Ant.  ', '2. Bee!\\n', '3. Cat?\\t', None])
+        >>> s = cudf.Series(['1. Ant.  ', '2. Bee!\n', '3. Cat?\t', None])
         >>> s
         0    1. Ant.
-        1    2. Bee!\\n
-        2    3. Cat?\\t
+        1    2. Bee!\n
+        2    3. Cat?\t
         3         <NA>
         dtype: object
-        >>> s.str.rstrip('.!? \\n\\t')
+        >>> s.str.rstrip('.!? \n\t')
         0    1. Ant
         1    2. Bee
         2    3. Cat
@@ -3299,7 +3299,7 @@ def rstrip(self, to_strip: str = None) -> SeriesOrIndex:
         )
 
     def wrap(self, width: int, **kwargs) -> SeriesOrIndex:
-        """
+        r"""
         Wrap long strings in the Series/Index to be formatted in
         paragraphs with length less than a given width.
 
@@ -3340,8 +3340,8 @@ def wrap(self, width: int, **kwargs) -> SeriesOrIndex:
         >>> data = ['line to be wrapped', 'another line to be wrapped']
         >>> s = cudf.Series(data)
         >>> s.str.wrap(12)
-        0             line to be\\nwrapped
-        1    another line\\nto be\\nwrapped
+        0             line to be\nwrapped
+        1    another line\nto be\nwrapped
         dtype: object
         """
         if not is_integer(width):
@@ -3575,7 +3575,7 @@ def isempty(self) -> SeriesOrIndex:
         return self._return_or_inplace((self._column == "").fillna(False))
 
     def isspace(self) -> SeriesOrIndex:
-        """
+        r"""
         Check whether all characters in each string are whitespace.
 
         This is equivalent to running the Python string method
@@ -3623,7 +3623,7 @@ def isspace(self) -> SeriesOrIndex:
         Examples
         --------
         >>> import cudf
-        >>> s = cudf.Series([' ', '\\t\\r\\n ', ''])
+        >>> s = cudf.Series([' ', '\t\r\n ', ''])
         >>> s.str.isspace()
         0     True
         1     True
@@ -4271,7 +4271,7 @@ def normalize_spaces(self) -> SeriesOrIndex:
         )
 
     def normalize_characters(self, do_lower: bool = True) -> SeriesOrIndex:
-        """
+        r"""
         Normalizes strings characters for tokenizing.
 
         This uses the normalizer that is built into the
@@ -4280,7 +4280,7 @@ def normalize_characters(self, do_lower: bool = True) -> SeriesOrIndex:
             - adding padding around punctuation (unicode category starts with
               "P") as well as certain ASCII symbols like "^" and "$"
             - adding padding around the CJK Unicode block characters
-            - changing whitespace (e.g. ``\\t``, ``\\n``, ``\\r``) to space
+            - changing whitespace (e.g. ``\t``, ``\n``, ``\r``) to space
             - removing control characters (unicode categories "Cc" and "Cf")
 
         If `do_lower_case = true`, lower-casing also removes the accents.
@@ -4303,7 +4303,7 @@ def normalize_characters(self, do_lower: bool = True) -> SeriesOrIndex:
         Examples
         --------
         >>> import cudf
-        >>> ser = cudf.Series(["héllo, \\tworld","ĂĆCĖÑTED","$99"])
+        >>> ser = cudf.Series(["héllo, \tworld","ĂĆCĖÑTED","$99"])
         >>> ser.str.normalize_characters()
         0    hello ,  world
         1          accented
diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index 104ed3eeb67..d0e9e6d94c1 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -3356,7 +3356,7 @@ def to_dlpack(self):
 
     @_cudf_nvtx_annotate
     def to_string(self):
-        """
+        r"""
         Convert to string
 
         cuDF uses Pandas internals for efficient string formatting.
@@ -3373,7 +3373,7 @@ def to_string(self):
         >>> df['key'] = [0, 1, 2]
         >>> df['val'] = [float(i + 10) for i in range(3)]
         >>> df.to_string()
-        '   key   val\\n0    0  10.0\\n1    1  11.0\\n2    2  12.0'
+        '   key   val\n0    0  10.0\n1    1  11.0\n2    2  12.0'
         """
         return repr(self)
 
diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index 4ff671509a0..d813db58d1e 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -4614,13 +4614,13 @@ def _align_indices(series_list, how="outer", allow_non_unique=False):
 
 @_cudf_nvtx_annotate
 def isclose(a, b, rtol=1e-05, atol=1e-08, equal_nan=False):
-    """Returns a boolean array where two arrays are equal within a tolerance.
+    r"""Returns a boolean array where two arrays are equal within a tolerance.
 
     Two values in ``a`` and ``b`` are  considered equal when the following
     equation is satisfied.
 
     .. math::
-       |a - b| \\le \\mathrm{atol} + \\mathrm{rtol} |b|
+       |a - b| \le \mathrm{atol} + \mathrm{rtol} |b|
 
     Parameters
     ----------

From 15e49824a8cb2a5a7ec6a6e5f273589a66f1c120 Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Fri, 29 Apr 2022 10:22:10 -0500
Subject: [PATCH 03/23] Enable pydocstyle for all packages. (#10759)

Follow-up to #10748 to enable the base pydocstyle rules on all Python packages (`dask_cudf`, `cudf_kafka`, `custreamz`) and test files. Contributes to #10711, #10758.

Authors:
  - Bradley Dice (https://github.com/bdice)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

URL: https://github.com/rapidsai/cudf/pull/10759
---
 .pre-commit-config.yaml             | 6 +-----
 python/.flake8                      | 2 +-
 python/custreamz/custreamz/kafka.py | 2 +-
 3 files changed, 3 insertions(+), 7 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index cd7b8aea6d7..46d5223f7d3 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -65,11 +65,7 @@ repos:
                     conda|
                     docs|
                     java|
-                    notebooks|
-                    python/dask_cudf|
-                    python/cudf_kafka|
-                    python/custreamz|
-                    python/cudf/cudf/tests
+                    notebooks
                     )
       - repo: https://github.com/pre-commit/mirrors-clang-format
         rev: v11.1.0
diff --git a/python/.flake8 b/python/.flake8
index 667875030cc..b763c209fc1 100644
--- a/python/.flake8
+++ b/python/.flake8
@@ -15,7 +15,7 @@ ignore =
 # unlike the match option above this match-dir will have no effect when
 # pydocstyle is invoked from pre-commit. Therefore this exclusion list must
 # also be maintained in the pre-commit config file.
-match-dir = ^(?!(ci|cpp|conda|docs|java|notebooks|dask_cudf|cudf_kafka|custreamz|tests)).*$
+match-dir = ^(?!(ci|cpp|conda|docs|java|notebooks)).*$
 # Allow missing docstrings for docutils
 ignore-decorators = .*(docutils|doc_apply|copy_docstring).*
 select = 
diff --git a/python/custreamz/custreamz/kafka.py b/python/custreamz/custreamz/kafka.py
index f5d5031602f..0198757c68d 100644
--- a/python/custreamz/custreamz/kafka.py
+++ b/python/custreamz/custreamz/kafka.py
@@ -95,7 +95,7 @@ def read_gdf(
         message_format="json",
     ):
 
-        """
+        r"""
         Read messages from the underlying KafkaDatasource connection and create
         a cudf Dataframe
 

From 3c4e72e68d9406d65939b7d2fdf28b0b921840dd Mon Sep 17 00:00:00 2001
From: Devavret Makkar <devavret@users.noreply.github.com>
Date: Fri, 29 Apr 2022 21:24:12 +0530
Subject: [PATCH 04/23] Add row hasher with nested column support (#10641)

Contributes to #10186

Authors:
  - Devavret Makkar (https://github.com/devavret)
  - Vyas Ramasubramani (https://github.com/vyasr)
  - Bradley Dice (https://github.com/bdice)

Approvers:
  - Yunsong Wang (https://github.com/PointKernel)
  - Bradley Dice (https://github.com/bdice)
  - Nghia Truong (https://github.com/ttnghia)

URL: https://github.com/rapidsai/cudf/pull/10641
---
 cpp/benchmarks/stream_compaction/distinct.cpp |  41 +++
 cpp/include/cudf/detail/hashing.hpp           |   5 +-
 cpp/include/cudf/detail/iterator.cuh          |   8 +-
 .../cudf/detail/utilities/algorithm.cuh       |  28 ++
 cpp/include/cudf/detail/utilities/column.hpp  |  10 +-
 .../cudf/table/experimental/row_operators.cuh | 273 +++++++++++++++---
 cpp/src/hash/hashing.cu                       |  29 +-
 cpp/src/hash/murmur_hash.cu                   |  28 +-
 cpp/src/stream_compaction/distinct.cu         |  18 +-
 .../stream_compaction_common.cuh              |  22 ++
 cpp/src/table/row_operators.cu                |  60 ++--
 cpp/tests/hashing/hash_test.cpp               | 224 +++++++++++++-
 cpp/tests/reductions/list_rank_test.cpp       |   4 +-
 .../stream_compaction/distinct_tests.cpp      | 242 ++++++++++++++++
 python/cudf/cudf/tests/test_dataframe.py      |   2 +-
 15 files changed, 880 insertions(+), 114 deletions(-)
 create mode 100644 cpp/include/cudf/detail/utilities/algorithm.cuh

diff --git a/cpp/benchmarks/stream_compaction/distinct.cpp b/cpp/benchmarks/stream_compaction/distinct.cpp
index 749badc715d..149c6ad7219 100644
--- a/cpp/benchmarks/stream_compaction/distinct.cpp
+++ b/cpp/benchmarks/stream_compaction/distinct.cpp
@@ -19,6 +19,7 @@
 
 #include <cudf/column/column_view.hpp>
 #include <cudf/detail/stream_compaction.hpp>
+#include <cudf/lists/list_view.cuh>
 #include <cudf/types.hpp>
 
 #include <nvbench/nvbench.cuh>
@@ -55,3 +56,43 @@ NVBENCH_BENCH_TYPES(nvbench_distinct, NVBENCH_TYPE_AXES(data_type))
   .set_name("distinct")
   .set_type_axes_names({"Type"})
   .add_int64_axis("NumRows", {10'000, 100'000, 1'000'000, 10'000'000});
+
+template <typename Type>
+void nvbench_distinct_list(nvbench::state& state, nvbench::type_list<Type>)
+{
+  cudf::rmm_pool_raii pool_raii;
+
+  auto const size             = state.get_int64("ColumnSize");
+  auto const dtype            = cudf::type_to_id<Type>();
+  double const null_frequency = state.get_float64("null_frequency");
+
+  data_profile table_data_profile;
+  if (dtype == cudf::type_id::LIST) {
+    table_data_profile.set_distribution_params(dtype, distribution_id::UNIFORM, 0, 4);
+    table_data_profile.set_distribution_params(
+      cudf::type_id::INT32, distribution_id::UNIFORM, 0, 4);
+    table_data_profile.set_list_depth(1);
+  } else {
+    // We're comparing distinct() on a non-nested column to that on a list column with the same
+    // number of distinct rows. The max list size is 4 and the number of distinct values in the
+    // list's child is 5. So the number of distinct rows in the list = 1 + 5 + 5^2 + 5^3 + 5^4 = 781
+    // We want this column to also have 781 distinct values.
+    table_data_profile.set_distribution_params(dtype, distribution_id::UNIFORM, 0, 781);
+  }
+  table_data_profile.set_null_frequency(null_frequency);
+
+  auto const table = create_random_table(
+    {dtype}, table_size_bytes{static_cast<size_t>(size)}, table_data_profile, 0);
+
+  state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
+    rmm::cuda_stream_view stream_view{launch.get_stream()};
+    auto result = cudf::detail::distinct(*table, {0}, cudf::null_equality::EQUAL, stream_view);
+  });
+}
+
+NVBENCH_BENCH_TYPES(nvbench_distinct_list,
+                    NVBENCH_TYPE_AXES(nvbench::type_list<int32_t, cudf::list_view>))
+  .set_name("distinct_list")
+  .set_type_axes_names({"Type"})
+  .add_float64_axis("null_frequency", {0.0, 0.1})
+  .add_int64_axis("ColumnSize", {100'000'000});
diff --git a/cpp/include/cudf/detail/hashing.hpp b/cpp/include/cudf/detail/hashing.hpp
index e8e100aaec5..9958fa8f3a4 100644
--- a/cpp/include/cudf/detail/hashing.hpp
+++ b/cpp/include/cudf/detail/hashing.hpp
@@ -33,19 +33,20 @@ namespace detail {
 std::unique_ptr<column> hash(
   table_view const& input,
   hash_id hash_function               = hash_id::HASH_MURMUR3,
-  uint32_t seed                       = 0,
+  uint32_t seed                       = cudf::DEFAULT_HASH_SEED,
   rmm::cuda_stream_view stream        = rmm::cuda_stream_default,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 std::unique_ptr<column> murmur_hash3_32(
   table_view const& input,
+  uint32_t seed                       = cudf::DEFAULT_HASH_SEED,
   rmm::cuda_stream_view stream        = rmm::cuda_stream_default,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 template <template <typename> class hash_function>
 std::unique_ptr<column> serial_murmur_hash3_32(
   table_view const& input,
-  uint32_t seed                       = 0,
+  uint32_t seed                       = cudf::DEFAULT_HASH_SEED,
   rmm::cuda_stream_view stream        = rmm::cuda_stream_default,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
diff --git a/cpp/include/cudf/detail/iterator.cuh b/cpp/include/cudf/detail/iterator.cuh
index 7a83298c72a..01ab435bca7 100644
--- a/cpp/include/cudf/detail/iterator.cuh
+++ b/cpp/include/cudf/detail/iterator.cuh
@@ -120,7 +120,7 @@ struct null_replaced_value_accessor {
  * @brief validity accessor of column with null bitmask
  * A unary functor that returns validity at index `i`.
  *
- * @tparam safe If false, the accessor with throw logic_error if the column is not nullable. If
+ * @tparam safe If false, the accessor will throw a logic_error if the column is not nullable. If
  * true, the accessor checks for nullability and if col is not nullable, returns true.
  */
 template <bool safe = false>
@@ -306,12 +306,12 @@ auto make_pair_rep_iterator(column_device_view const& column)
  *
  * Dereferencing the returned iterator for element `i` will return the validity
  * of `column[i]`
- * This iterator is only allowed for nullable columns if `safe` = false
+ * If `safe` = false, the column must be nullable.
  * When safe = true, if the column is not nullable then the validity is always true.
  *
- * @throws cudf::logic_error if the column is not nullable when safe = false
+ * @throws cudf::logic_error if the column is not nullable and safe = false
  *
- * @tparam safe If false, the accessor with throw logic_error if the column is not nullable. If
+ * @tparam safe If false, the accessor will throw a logic_error if the column is not nullable. If
  * true, the accessor checks for nullability and if col is not nullable, returns true.
  * @param column The column to iterate
  * @return auto Iterator that returns validities of column elements.
diff --git a/cpp/include/cudf/detail/utilities/algorithm.cuh b/cpp/include/cudf/detail/utilities/algorithm.cuh
new file mode 100644
index 00000000000..f05a09a8df1
--- /dev/null
+++ b/cpp/include/cudf/detail/utilities/algorithm.cuh
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+namespace cudf::detail {
+
+template <typename Iterator, typename T, typename BinaryOp>
+__device__ __forceinline__ T accumulate(Iterator first, Iterator last, T init, BinaryOp op)
+{
+  for (; first != last; ++first) {
+    init = op(std::move(init), *first);
+  }
+  return init;
+}
+}  // namespace cudf::detail
\ No newline at end of file
diff --git a/cpp/include/cudf/detail/utilities/column.hpp b/cpp/include/cudf/detail/utilities/column.hpp
index 7d22bbd60af..05b46cc8e13 100644
--- a/cpp/include/cudf/detail/utilities/column.hpp
+++ b/cpp/include/cudf/detail/utilities/column.hpp
@@ -72,13 +72,9 @@ struct linked_column_view : public column_view_base {
  */
 inline LinkedColVector table_to_linked_columns(table_view const& table)
 {
-  LinkedColVector result;
-  result.reserve(table.num_columns());
-  std::transform(table.begin(), table.end(), std::back_inserter(result), [&](column_view const& c) {
-    return std::make_shared<linked_column_view>(c);
-  });
-
-  return result;
+  auto linked_it = thrust::make_transform_iterator(
+    table.begin(), [](auto const& c) { return std::make_shared<linked_column_view>(c); });
+  return LinkedColVector(linked_it, linked_it + table.num_columns());
 }
 
 }  // namespace cudf::detail
\ No newline at end of file
diff --git a/cpp/include/cudf/table/experimental/row_operators.cuh b/cpp/include/cudf/table/experimental/row_operators.cuh
index 32b71e660ac..2ed45c71633 100644
--- a/cpp/include/cudf/table/experimental/row_operators.cuh
+++ b/cpp/include/cudf/table/experimental/row_operators.cuh
@@ -17,7 +17,9 @@
 #pragma once
 
 #include <cudf/column/column_device_view.cuh>
+#include <cudf/detail/hashing.hpp>
 #include <cudf/detail/iterator.cuh>
+#include <cudf/detail/utilities/algorithm.cuh>
 #include <cudf/detail/utilities/assert.cuh>
 #include <cudf/detail/utilities/hash_functions.cuh>
 #include <cudf/lists/list_device_view.cuh>
@@ -82,7 +84,7 @@ namespace lexicographic {
  * second letter in both words is the first non-equal letter, and `a < b`, thus
  * `aac < abb`.
  *
- * @tparam Nullate A cudf::nullate type describing how to check for nulls.
+ * @tparam Nullate A cudf::nullate type describing whether to check for nulls.
  */
 template <typename Nullate>
 class device_row_comparator {
@@ -92,7 +94,7 @@ class device_row_comparator {
    * @brief Construct a function object for performing a lexicographic
    * comparison between the rows of two tables.
    *
-   * @param has_nulls Indicates if either input table contains columns with nulls.
+   * @param check_nulls Indicates if either input table contains columns with nulls.
    * @param lhs The first table
    * @param rhs The second table (may be the same table as `lhs`)
    * @param depth Optional, device array the same length as a row that contains starting depths of
@@ -105,7 +107,7 @@ class device_row_comparator {
    * `null_order::BEFORE` for all columns.
    */
   device_row_comparator(
-    Nullate has_nulls,
+    Nullate check_nulls,
     table_device_view lhs,
     table_device_view rhs,
     std::optional<device_span<int const>> depth                  = std::nullopt,
@@ -113,7 +115,7 @@ class device_row_comparator {
     std::optional<device_span<null_order const>> null_precedence = std::nullopt) noexcept
     : _lhs{lhs},
       _rhs{rhs},
-      _nulls{has_nulls},
+      _check_nulls{check_nulls},
       _depth{depth},
       _column_order{column_order},
       _null_precedence{null_precedence}
@@ -131,19 +133,19 @@ class device_row_comparator {
      *
      * @note `lhs` and `rhs` may be the same.
      *
-     * @param has_nulls Indicates if either input column contains nulls.
+     * @param check_nulls Indicates if either input column contains nulls.
      * @param lhs The column containing the first element
      * @param rhs The column containing the second element (may be the same as lhs)
      * @param null_precedence Indicates how null values are ordered with other values
      * @param depth The depth of the column if part of a nested column @see
      * preprocessed_table::depths
      */
-    __device__ element_comparator(Nullate has_nulls,
+    __device__ element_comparator(Nullate check_nulls,
                                   column_device_view lhs,
                                   column_device_view rhs,
                                   null_order null_precedence = null_order::BEFORE,
                                   int depth                  = 0)
-      : _lhs{lhs}, _rhs{rhs}, _nulls{has_nulls}, _null_precedence{null_precedence}, _depth{depth}
+      : _lhs{lhs}, _rhs{rhs}, _nulls{check_nulls}, _null_precedence{null_precedence}, _depth{depth}
     {
     }
 
@@ -204,8 +206,8 @@ class device_row_comparator {
         }
 
         // Non-empty structs have been modified to only have 1 child when using this.
-        lcol = lcol.children()[0];
-        rcol = rcol.children()[0];
+        lcol = detail::structs_column_device_view(lcol).sliced_child(0);
+        rcol = detail::structs_column_device_view(rcol).sliced_child(0);
         ++depth;
       }
 
@@ -245,7 +247,7 @@ class device_row_comparator {
         _null_precedence.has_value() ? (*_null_precedence)[i] : null_order::BEFORE;
 
       auto const comparator =
-        element_comparator{_nulls, _lhs.column(i), _rhs.column(i), null_precedence, depth};
+        element_comparator{_check_nulls, _lhs.column(i), _rhs.column(i), null_precedence, depth};
 
       weak_ordering state;
       cuda::std::tie(state, last_null_depth) =
@@ -261,7 +263,7 @@ class device_row_comparator {
  private:
   table_device_view const _lhs;
   table_device_view const _rhs;
-  Nullate const _nulls{};
+  Nullate const _check_nulls{};
   std::optional<device_span<int const>> const _depth;
   std::optional<device_span<order const>> const _column_order;
   std::optional<device_span<null_order const>> const _null_precedence;
@@ -408,11 +410,11 @@ class self_comparator {
   /**
    * @brief Return the binary operator for comparing rows in the table.
    *
-   * Returns a binary callable, `F`, with signature `bool F(size_t, size_t)`.
+   * Returns a binary callable, `F`, with signature `bool F(size_type, size_type)`.
    *
    * `F(i,j)` returns true if and only if row `i` compares lexicographically less than row `j`.
    *
-   * @tparam Nullate Optional, A cudf::nullate type describing how to check for nulls.
+   * @tparam Nullate A cudf::nullate type describing whether to check for nulls.
    */
   template <typename Nullate>
   device_row_comparator<Nullate> device_comparator(Nullate nullate = {}) const
@@ -427,6 +429,10 @@ class self_comparator {
 
 }  // namespace lexicographic
 
+namespace hash {
+class row_hasher;
+}
+
 namespace equality {
 
 template <typename Nullate>
@@ -438,7 +444,7 @@ class device_row_comparator {
    * @brief Checks whether the row at `lhs_index` in the `lhs` table is equal to the row at
    * `rhs_index` in the `rhs` table.
    *
-   * @param lhs_index The index of row in the `lhs` table to examine
+   * @param lhs_index The index of the row in the `lhs` table to examine
    * @param rhs_index The index of the row in the `rhs` table to examine
    * @return `true` if row from the `lhs` table is equal to the row in the `rhs` table
    */
@@ -446,7 +452,7 @@ class device_row_comparator {
   {
     auto equal_elements = [=](column_device_view l, column_device_view r) {
       return cudf::type_dispatcher(
-        l.type(), element_comparator{nulls, l, r, nulls_are_equal}, lhs_index, rhs_index);
+        l.type(), element_comparator{check_nulls, l, r, nulls_are_equal}, lhs_index, rhs_index);
     };
 
     return thrust::equal(thrust::seq, lhs.begin(), lhs.end(), rhs.begin(), equal_elements);
@@ -457,23 +463,21 @@ class device_row_comparator {
    * @brief Construct a function object for performing equality comparison between the rows of two
    * tables.
    *
-   * @param has_nulls Indicates if either input table contains columns with nulls.
+   * @param check_nulls Indicates if either input table contains columns with nulls.
    * @param lhs The first table
    * @param rhs The second table (may be the same table as `lhs`)
    * @param nulls_are_equal Indicates if two null elements are treated as equivalent
    */
-  device_row_comparator(Nullate has_nulls,
+  device_row_comparator(Nullate check_nulls,
                         table_device_view lhs,
                         table_device_view rhs,
                         null_equality nulls_are_equal = null_equality::EQUAL) noexcept
-    : lhs{lhs}, rhs{rhs}, nulls{has_nulls}, nulls_are_equal{nulls_are_equal}
+    : lhs{lhs}, rhs{rhs}, check_nulls{check_nulls}, nulls_are_equal{nulls_are_equal}
   {
   }
 
   /**
    * @brief Performs an equality comparison between two elements in two columns.
-   *
-   * @tparam Nullate A cudf::nullate type describing how to check for nulls.
    */
   class element_comparator {
    public:
@@ -483,16 +487,16 @@ class device_row_comparator {
      *
      * @note `lhs` and `rhs` may be the same.
      *
-     * @param has_nulls Indicates if either input column contains nulls.
+     * @param check_nulls Indicates if either input column contains nulls.
      * @param lhs The column containing the first element
      * @param rhs The column containing the second element (may be the same as lhs)
      * @param nulls_are_equal Indicates if two null elements are treated as equivalent
      */
-    __device__ element_comparator(Nullate has_nulls,
+    __device__ element_comparator(Nullate check_nulls,
                                   column_device_view lhs,
                                   column_device_view rhs,
                                   null_equality nulls_are_equal = null_equality::EQUAL) noexcept
-      : lhs{lhs}, rhs{rhs}, nulls{has_nulls}, nulls_are_equal{nulls_are_equal}
+      : lhs{lhs}, rhs{rhs}, check_nulls{check_nulls}, nulls_are_equal{nulls_are_equal}
     {
     }
 
@@ -502,13 +506,13 @@ class device_row_comparator {
      * @param lhs_element_index The index of the first element
      * @param rhs_element_index The index of the second element
      * @return True if lhs and rhs are equal or if both lhs and rhs are null and nulls are
-     * configured to be considered equal (`nulls_are_equal` == `null_equality::EQUAL`)
+     * considered equal (`nulls_are_equal` == `null_equality::EQUAL`)
      */
     template <typename Element, CUDF_ENABLE_IF(cudf::is_equality_comparable<Element, Element>())>
     __device__ bool operator()(size_type const lhs_element_index,
                                size_type const rhs_element_index) const noexcept
     {
-      if (nulls) {
+      if (check_nulls) {
         bool const lhs_is_null{lhs.is_null(lhs_element_index)};
         bool const rhs_is_null{rhs.is_null(rhs_element_index)};
         if (lhs_is_null and rhs_is_null) {
@@ -538,7 +542,7 @@ class device_row_comparator {
       column_device_view lcol = lhs.slice(lhs_element_index, 1);
       column_device_view rcol = rhs.slice(rhs_element_index, 1);
       while (is_nested(lcol.type())) {
-        if (nulls) {
+        if (check_nulls) {
           auto lvalid = detail::make_validity_iterator<true>(lcol);
           auto rvalid = detail::make_validity_iterator<true>(rcol);
           if (nulls_are_equal == null_equality::UNEQUAL) {
@@ -556,6 +560,7 @@ class device_row_comparator {
         }
         if (lcol.type().id() == type_id::STRUCT) {
           if (lcol.num_child_columns() == 0) { return true; }
+          // Non-empty structs are assumed to be decomposed and contain only one child
           lcol = detail::structs_column_device_view(lcol).sliced_child(0);
           rcol = detail::structs_column_device_view(rcol).sliced_child(0);
         } else if (lcol.type().id() == type_id::LIST) {
@@ -574,8 +579,8 @@ class device_row_comparator {
         }
       }
 
-      auto comp =
-        column_comparator{element_comparator{nulls, lcol, rcol, nulls_are_equal}, lcol.size()};
+      auto comp = column_comparator{element_comparator{check_nulls, lcol, rcol, nulls_are_equal},
+                                    lcol.size()};
       return type_dispatcher<dispatch_void_if_nested>(lcol.type(), comp);
     }
 
@@ -583,7 +588,7 @@ class device_row_comparator {
     /**
      * @brief Serially compare two columns for equality.
      *
-     * When we want to get the equivalence of two columns by serially comparing all elements in a
+     * When we want to get the equivalence of two columns by serially comparing all elements in
      * one column with the corresponding elements in the other column, this saves us from type
      * dispatching for each individual element in the range
      */
@@ -616,13 +621,13 @@ class device_row_comparator {
 
     column_device_view const lhs;
     column_device_view const rhs;
-    Nullate const nulls;
+    Nullate const check_nulls;
     null_equality const nulls_are_equal;
   };
 
   table_device_view const lhs;
   table_device_view const rhs;
-  Nullate const nulls;
+  Nullate const check_nulls;
   null_equality const nulls_are_equal;
 };
 
@@ -642,6 +647,7 @@ struct preprocessed_table {
 
  private:
   friend class self_comparator;
+  friend class hash::row_hasher;
 
   using table_device_view_owner =
     std::invoke_result_t<decltype(table_device_view::create), table_view, rmm::cuda_stream_view>;
@@ -692,16 +698,17 @@ class self_comparator {
   /**
    * @brief Get the comparison operator to use on the device
    *
-   * Returns a binary callable, `F`, with signature `bool F(size_t, size_t)`.
+   * Returns a binary callable, `F`, with signature `bool F(size_type, size_type)`.
    *
    * `F(i,j)` returns true if and only if row `i` compares equal to row `j`.
    *
-   * @tparam Nullate Optional, A cudf::nullate type describing how to check for nulls.
+   * @tparam Nullate A cudf::nullate type describing whether to check for nulls.
    */
   template <typename Nullate>
-  device_row_comparator<Nullate> device_comparator(Nullate nullate = {}) const
+  device_row_comparator<Nullate> device_comparator(
+    Nullate nullate = {}, null_equality nulls_are_equal = null_equality::EQUAL) const
   {
-    return device_row_comparator(nullate, *d_t, *d_t);
+    return device_row_comparator(nullate, *d_t, *d_t, nulls_are_equal);
   }
 
  private:
@@ -710,6 +717,202 @@ class self_comparator {
 
 }  // namespace equality
 
+namespace hash {
+
+/**
+ * @brief Computes the hash value of an element in the given column.
+ *
+ * @tparam hash_function Hash functor to use for hashing elements.
+ * @tparam Nullate A cudf::nullate type describing whether to check for nulls.
+ */
+template <template <typename> class hash_function, typename Nullate>
+class element_hasher {
+ public:
+  __device__ element_hasher(
+    Nullate nulls,
+    uint32_t seed             = DEFAULT_HASH_SEED,
+    hash_value_type null_hash = std::numeric_limits<hash_value_type>::max()) noexcept
+    : _check_nulls(nulls), _seed(seed), _null_hash(null_hash)
+  {
+  }
+
+  template <typename T, CUDF_ENABLE_IF(column_device_view::has_element_accessor<T>())>
+  __device__ hash_value_type operator()(column_device_view const& col,
+                                        size_type row_index) const noexcept
+  {
+    if (_check_nulls && col.is_null(row_index)) { return _null_hash; }
+    return hash_function<T>{_seed}(col.element<T>(row_index));
+  }
+
+  template <typename T, CUDF_ENABLE_IF(not column_device_view::has_element_accessor<T>())>
+  __device__ hash_value_type operator()(column_device_view const& col,
+                                        size_type row_index) const noexcept
+  {
+    CUDF_UNREACHABLE("Unsupported type in hash.");
+  }
+
+  uint32_t _seed;
+  hash_value_type _null_hash;
+  Nullate _check_nulls;
+};
+
+/**
+ * @brief Computes the hash value of a row in the given table.
+ *
+ * @tparam hash_function Hash functor to use for hashing elements.
+ * @tparam Nullate A cudf::nullate type describing whether to check for nulls.
+ */
+template <template <typename> class hash_function, typename Nullate>
+class device_row_hasher {
+  friend class row_hasher;
+
+ public:
+  device_row_hasher() = delete;
+
+  __device__ auto operator()(size_type row_index) const noexcept
+  {
+    auto it = thrust::make_transform_iterator(_table.begin(), [=](auto const& column) {
+      return cudf::type_dispatcher<dispatch_storage_type>(
+        column.type(), element_hasher_adapter<hash_function>{_check_nulls}, column, row_index);
+    });
+
+    // Hash each element and combine all the hash values together
+    return detail::accumulate(it, it + _table.num_columns(), _seed, [](auto hash, auto h) {
+      return cudf::detail::hash_combine(hash, h);
+    });
+  }
+
+ private:
+  /**
+   * @brief Computes the hash value of an element in the given column.
+   *
+   * When the column is non-nested, this is a simple wrapper around the element_hasher.
+   * When the column is nested, this uses the element_hasher to hash the shape and values of the
+   * column.
+   */
+  template <template <typename> class hash_fn>
+  class element_hasher_adapter {
+    static constexpr hash_value_type NULL_HASH     = std::numeric_limits<hash_value_type>::max();
+    static constexpr hash_value_type NON_NULL_HASH = 0;
+
+   public:
+    __device__ element_hasher_adapter(Nullate check_nulls) noexcept
+      : _element_hasher(check_nulls), _check_nulls(check_nulls)
+    {
+    }
+
+    template <typename T, CUDF_ENABLE_IF(not cudf::is_nested<T>())>
+    __device__ hash_value_type operator()(column_device_view const& col,
+                                          size_type row_index) const noexcept
+    {
+      return _element_hasher.template operator()<T>(col, row_index);
+    }
+
+    template <typename T, CUDF_ENABLE_IF(cudf::is_nested<T>())>
+    __device__ hash_value_type operator()(column_device_view const& col,
+                                          size_type row_index) const noexcept
+    {
+      auto hash                   = hash_value_type{0};
+      column_device_view curr_col = col.slice(row_index, 1);
+      while (is_nested(curr_col.type())) {
+        if (_check_nulls) {
+          auto validity_it = detail::make_validity_iterator<true>(curr_col);
+          hash             = detail::accumulate(
+            validity_it, validity_it + curr_col.size(), hash, [](auto hash, auto is_valid) {
+              return cudf::detail::hash_combine(hash, is_valid ? NON_NULL_HASH : NULL_HASH);
+            });
+        }
+        if (curr_col.type().id() == type_id::STRUCT) {
+          if (curr_col.num_child_columns() == 0) { return hash; }
+          // Non-empty structs are assumed to be decomposed and contain only one child
+          curr_col = detail::structs_column_device_view(curr_col).sliced_child(0);
+        } else if (curr_col.type().id() == type_id::LIST) {
+          auto list_col   = detail::lists_column_device_view(curr_col);
+          auto list_sizes = make_list_size_iterator(list_col);
+          hash            = detail::accumulate(
+            list_sizes, list_sizes + list_col.size(), hash, [](auto hash, auto size) {
+              return cudf::detail::hash_combine(hash, hash_fn<size_type>{}(size));
+            });
+          curr_col = list_col.sliced_child();
+        }
+      }
+      for (int i = 0; i < curr_col.size(); ++i) {
+        hash = cudf::detail::hash_combine(
+          hash,
+          type_dispatcher<dispatch_void_if_nested>(curr_col.type(), _element_hasher, curr_col, i));
+      }
+      return hash;
+    }
+
+    element_hasher<hash_fn, Nullate> const _element_hasher;
+    Nullate const _check_nulls;
+  };
+
+  CUDF_HOST_DEVICE device_row_hasher(Nullate check_nulls,
+                                     table_device_view t,
+                                     uint32_t seed = DEFAULT_HASH_SEED) noexcept
+    : _table{t}, _seed(seed), _check_nulls{check_nulls}
+  {
+  }
+
+  table_device_view const _table;
+  Nullate const _check_nulls;
+  uint32_t const _seed;
+};
+
+// Inject row::equality::preprocessed_table into the row::hash namespace
+// As a result, row::equality::preprocessed_table and row::hash::preprocessed table are the same
+// type and are interchangeable.
+using preprocessed_table = row::equality::preprocessed_table;
+
+class row_hasher {
+ public:
+  /**
+   * @brief Construct an owning object for hashing the rows of a table
+   *
+   * @param t The table containing rows to hash
+   * @param stream The stream to construct this object on. Not the stream that will be used for
+   * comparisons using this object.
+   */
+  row_hasher(table_view const& t, rmm::cuda_stream_view stream)
+    : d_t(preprocessed_table::create(t, stream))
+  {
+  }
+
+  /**
+   * @brief Construct an owning object for hashing the rows of a table from an existing
+   * preprocessed_table
+   *
+   * This constructor allows independently constructing a `preprocessed_table` and sharing it among
+   * multiple `row_hasher` and `equality::self_comparator` objects.
+   *
+   * @param t A table preprocessed for hashing or equality.
+   */
+  row_hasher(std::shared_ptr<preprocessed_table> t) : d_t{std::move(t)} {}
+
+  /**
+   * @brief Get the hash operator to use on the device
+   *
+   * Returns a unary callable, `F`, with signature `hash_function::hash_value_type F(size_type)`.
+   *
+   * `F(i)` returns the hash of row i.
+   *
+   * @tparam Nullate A cudf::nullate type describing whether to check for nulls.
+   */
+  template <template <typename> class hash_function = detail::default_hash, typename Nullate>
+  device_row_hasher<hash_function, Nullate> device_hasher(Nullate nullate = {},
+                                                          uint32_t seed   = DEFAULT_HASH_SEED) const
+  {
+    return device_row_hasher<hash_function, Nullate>(nullate, *d_t, seed);
+  }
+
+ private:
+  std::shared_ptr<preprocessed_table> d_t;
+};
+
+}  // namespace hash
+
 }  // namespace row
+
 }  // namespace experimental
 }  // namespace cudf
diff --git a/cpp/src/hash/hashing.cu b/cpp/src/hash/hashing.cu
index 33984ad5ce3..dc47dc39cfe 100644
--- a/cpp/src/hash/hashing.cu
+++ b/cpp/src/hash/hashing.cu
@@ -16,8 +16,9 @@
 #include <cudf/column/column_factories.hpp>
 #include <cudf/detail/hashing.hpp>
 #include <cudf/detail/nvtx/ranges.hpp>
+#include <cudf/detail/utilities/algorithm.cuh>
 #include <cudf/detail/utilities/hash_functions.cuh>
-#include <cudf/table/row_operators.cuh>
+#include <cudf/table/experimental/row_operators.cuh>
 #include <cudf/table/table_device_view.cuh>
 
 #include <rmm/cuda_stream_view.hpp>
@@ -70,18 +71,18 @@ std::unique_ptr<column> serial_murmur_hash3_32(table_view const& input,
     output_view.begin<int32_t>(),
     output_view.end<int32_t>(),
     [device_input = *device_input, nulls = has_nulls(leaf_table), seed] __device__(auto row_index) {
-      return thrust::reduce(thrust::seq,
-                            device_input.begin(),
-                            device_input.end(),
-                            seed,
-                            [rindex = row_index, nulls] __device__(auto hash, auto column) {
-                              return cudf::type_dispatcher(
-                                column.type(),
-                                element_hasher_with_seed<hash_function, nullate::DYNAMIC>{
-                                  nullate::DYNAMIC{nulls}, hash, hash},
-                                column,
-                                rindex);
-                            });
+      return detail::accumulate(
+        device_input.begin(),
+        device_input.end(),
+        seed,
+        [row_index, nulls] __device__(auto hash, auto column) {
+          return cudf::type_dispatcher(
+            column.type(),
+            experimental::row::hash::element_hasher<hash_function, nullate::DYNAMIC>{
+              nullate::DYNAMIC{nulls}, hash, hash},
+            column,
+            row_index);
+        });
     });
 
   return output;
@@ -94,7 +95,7 @@ std::unique_ptr<column> hash(table_view const& input,
                              rmm::mr::device_memory_resource* mr)
 {
   switch (hash_function) {
-    case (hash_id::HASH_MURMUR3): return murmur_hash3_32(input, stream, mr);
+    case (hash_id::HASH_MURMUR3): return murmur_hash3_32(input, seed, stream, mr);
     case (hash_id::HASH_SERIAL_MURMUR3):
       return serial_murmur_hash3_32<MurmurHash3_32>(input, seed, stream, mr);
     case (hash_id::HASH_SPARK_MURMUR3):
diff --git a/cpp/src/hash/murmur_hash.cu b/cpp/src/hash/murmur_hash.cu
index bc8d3577513..1b75c818f36 100644
--- a/cpp/src/hash/murmur_hash.cu
+++ b/cpp/src/hash/murmur_hash.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2021, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -17,7 +17,7 @@
 #include <cudf/detail/hashing.hpp>
 #include <cudf/detail/utilities/hash_functions.cuh>
 #include <cudf/detail/utilities/vector_factories.hpp>
-#include <cudf/table/row_operators.cuh>
+#include <cudf/table/experimental/row_operators.cuh>
 #include <cudf/table/table_device_view.cuh>
 
 #include <rmm/cuda_stream_view.hpp>
@@ -29,26 +29,28 @@ namespace cudf {
 namespace detail {
 
 std::unique_ptr<column> murmur_hash3_32(table_view const& input,
+                                        uint32_t seed,
                                         rmm::cuda_stream_view stream,
                                         rmm::mr::device_memory_resource* mr)
 {
-  // TODO this should be UINT32
-  auto output = make_numeric_column(
-    data_type(type_id::INT32), input.num_rows(), mask_state::UNALLOCATED, stream, mr);
+  auto output = make_numeric_column(data_type(type_to_id<hash_value_type>()),
+                                    input.num_rows(),
+                                    mask_state::UNALLOCATED,
+                                    stream,
+                                    mr);
 
   // Return early if there's nothing to hash
   if (input.num_columns() == 0 || input.num_rows() == 0) { return output; }
 
-  bool const nullable     = has_nulls(input);
-  auto const device_input = table_device_view::create(input, stream);
-  auto output_view        = output->mutable_view();
+  bool const nullable   = has_nulls(input);
+  auto const row_hasher = cudf::experimental::row::hash::row_hasher(input, stream);
+  auto output_view      = output->mutable_view();
 
   // Compute the hash value for each row
-  thrust::tabulate(
-    rmm::exec_policy(stream),
-    output_view.begin<int32_t>(),
-    output_view.end<int32_t>(),
-    row_hasher<MurmurHash3_32, nullate::DYNAMIC>(nullate::DYNAMIC{nullable}, *device_input));
+  thrust::tabulate(rmm::exec_policy(stream),
+                   output_view.begin<hash_value_type>(),
+                   output_view.end<hash_value_type>(),
+                   row_hasher.device_hasher<MurmurHash3_32>(nullable, seed));
 
   return output;
 }
diff --git a/cpp/src/stream_compaction/distinct.cu b/cpp/src/stream_compaction/distinct.cu
index d74946406d8..35c74178620 100644
--- a/cpp/src/stream_compaction/distinct.cu
+++ b/cpp/src/stream_compaction/distinct.cu
@@ -27,7 +27,7 @@
 #include <cudf/detail/sorting.hpp>
 #include <cudf/detail/stream_compaction.hpp>
 #include <cudf/stream_compaction.hpp>
-#include <cudf/table/row_operators.cuh>
+#include <cudf/table/experimental/row_operators.cuh>
 #include <cudf/table/table.hpp>
 #include <cudf/table/table_view.hpp>
 #include <cudf/types.hpp>
@@ -57,9 +57,10 @@ std::unique_ptr<table> distinct(table_view const& input,
   }
 
   auto keys_view = input.select(keys);
-  auto table_ptr = cudf::table_device_view::create(keys_view, stream);
-  auto has_null  = nullate::DYNAMIC{cudf::has_nulls(keys_view)};
-  auto const num_rows{table_ptr->num_rows()};
+  auto preprocessed_keys =
+    cudf::experimental::row::hash::preprocessed_table::create(keys_view, stream);
+  auto has_null = nullate::DYNAMIC{cudf::has_nulls(keys_view)};
+  auto const num_rows{keys_view.num_rows()};
 
   hash_map_type key_map{compute_hash_table_size(num_rows),
                         COMPACTION_EMPTY_KEY_SENTINEL,
@@ -67,13 +68,16 @@ std::unique_ptr<table> distinct(table_view const& input,
                         detail::hash_table_allocator_type{default_allocator<char>{}, stream},
                         stream.value()};
 
-  compaction_hash hash_key{has_null, *table_ptr};
-  row_equality_comparator row_equal(has_null, *table_ptr, *table_ptr, nulls_equal);
+  auto row_hash = cudf::experimental::row::hash::row_hasher(preprocessed_keys);
+  experimental::compaction_hash hash_key(row_hash.device_hasher(has_null));
+
+  cudf::experimental::row::equality::self_comparator row_equal(preprocessed_keys);
+  auto key_equal = row_equal.device_comparator(has_null, nulls_equal);
 
   auto iter = cudf::detail::make_counting_transform_iterator(
     0, [] __device__(size_type i) { return cuco::make_pair(i, i); });
   // insert distinct indices into the map.
-  key_map.insert(iter, iter + num_rows, hash_key, row_equal, stream.value());
+  key_map.insert(iter, iter + num_rows, hash_key, key_equal, stream.value());
 
   auto counting_iter = thrust::make_counting_iterator<size_type>(0);
   rmm::device_uvector<bool> index_exists_in_map(num_rows, stream, mr);
diff --git a/cpp/src/stream_compaction/stream_compaction_common.cuh b/cpp/src/stream_compaction/stream_compaction_common.cuh
index f49e17112c1..0970a99edad 100644
--- a/cpp/src/stream_compaction/stream_compaction_common.cuh
+++ b/cpp/src/stream_compaction/stream_compaction_common.cuh
@@ -47,6 +47,28 @@ class compaction_hash {
   row_hash _hash;
 };
 
+namespace experimental {
+
+/**
+ * @brief Device callable to hash a given row.
+ */
+template <typename RowHash>
+class compaction_hash {
+ public:
+  compaction_hash(RowHash row_hasher) : _hash{row_hasher} {}
+
+  __device__ inline auto operator()(size_type i) const noexcept
+  {
+    auto hash = _hash(i);
+    return (hash == COMPACTION_EMPTY_KEY_SENTINEL) ? (hash - 1) : hash;
+  }
+
+ private:
+  RowHash _hash;
+};
+
+}  // namespace experimental
+
 /**
 ￼ * @brief Device functor to determine if a row is valid.
 ￼ */
diff --git a/cpp/src/table/row_operators.cu b/cpp/src/table/row_operators.cu
index 408d4e51425..3c51ae22418 100644
--- a/cpp/src/table/row_operators.cu
+++ b/cpp/src/table/row_operators.cu
@@ -31,43 +31,38 @@ namespace experimental {
 namespace {
 
 /**
- * @brief Applies the offsets of struct column onto its children
+ * @brief Removes the offsets of struct column's children
  *
- * @param c The column whose children are to be sliced
- * @return Children of `c` with offsets applied
+ * @param c The column whose children are to be un-sliced
+ * @return Children of `c` with offsets removed
  */
-std::vector<column_view> slice_children(column_view const& c)
+std::vector<column_view> unslice_children(column_view const& c)
 {
   if (c.type().id() == type_id::STRUCT) {
-    std::vector<column_view> sliced_children;
-    sliced_children.reserve(c.num_children());
-    auto struct_col = structs_column_view(c);
-    for (size_type i = 0; i < struct_col.num_children(); ++i) {
-      auto sliced = struct_col.get_sliced_child(i);
-      // We cannot directly use the output of `structs_column_view::get_sliced_child` because we
-      // must first traverse its children recursively to push offsets all the way down to the leaf
-      // children.
-      sliced_children.emplace_back(sliced.type(),
-                                   sliced.size(),
-                                   sliced.head<uint8_t>(),
-                                   sliced.null_mask(),
-                                   sliced.null_count(),
-                                   sliced.offset(),
-                                   slice_children(sliced));
-    }
-    return sliced_children;
+    auto child_it = thrust::make_transform_iterator(c.child_begin(), [](auto const& child) {
+      return column_view(
+        child.type(),
+        child.offset() + child.size(),  // This is hacky, we don't know the actual unsliced size but
+                                        // it is at least offset + size
+        child.head(),
+        child.null_mask(),
+        child.null_count(),
+        0,
+        unslice_children(child));
+    });
+    return {child_it, child_it + c.num_children()};
   }
   return {c.child_begin(), c.child_end()};
 };
 
 /**
- * @brief Applies the offsets of struct columns in a table onto their children.
+ * @brief Removes the child column offsets of struct columns in a table.
  *
  * Given a table, this replaces any struct columns with similar struct columns that have their
- * offsets applied to their children. Structs that are children of list columns are not affected.
+ * offsets removed from their children. Structs that are children of list columns are not affected.
  *
  */
-table_view pushdown_struct_offsets(table_view table)
+table_view remove_struct_child_offsets(table_view table)
 {
   std::vector<column_view> cols;
   cols.reserve(table.num_columns());
@@ -78,7 +73,7 @@ table_view pushdown_struct_offsets(table_view table)
                        c.null_mask(),
                        c.null_count(),
                        c.offset(),
-                       slice_children(c));
+                       unslice_children(c));
   });
   return table_view(cols);
 }
@@ -159,8 +154,7 @@ auto decompose_structs(table_view table,
                        host_span<order const> column_order         = {},
                        host_span<null_order const> null_precedence = {})
 {
-  auto sliced         = pushdown_struct_offsets(table);
-  auto linked_columns = detail::table_to_linked_columns(sliced);
+  auto linked_columns = detail::table_to_linked_columns(table);
 
   std::vector<column_view> verticalized_columns;
   std::vector<order> new_column_order;
@@ -225,6 +219,15 @@ auto decompose_structs(table_view table,
               UNKNOWN_NULL_COUNT,
               parent->offset(),
               {*parent->children[lists_column_view::offsets_column_index], temp_col});
+          } else if (parent->type().id() == type_id::STRUCT) {
+            // Replace offset with parent's offset
+            temp_col = column_view(temp_col.type(),
+                                   parent->size(),
+                                   temp_col.head(),
+                                   temp_col.null_mask(),
+                                   UNKNOWN_NULL_COUNT,
+                                   parent->offset(),
+                                   {temp_col.child_begin(), temp_col.child_end()});
           }
         }
         verticalized_columns.push_back(temp_col);
@@ -334,7 +337,8 @@ std::shared_ptr<preprocessed_table> preprocessed_table::create(table_view const&
   check_eq_compatibility(t);
 
   auto null_pushed_table              = structs::detail::superimpose_parent_nulls(t, stream);
-  auto [verticalized_lhs, _, __, ___] = decompose_structs(std::get<0>(null_pushed_table));
+  auto struct_offset_removed_table    = remove_struct_child_offsets(std::get<0>(null_pushed_table));
+  auto [verticalized_lhs, _, __, ___] = decompose_structs(struct_offset_removed_table);
 
   auto d_t = table_device_view_owner(table_device_view::create(verticalized_lhs, stream));
   return std::shared_ptr<preprocessed_table>(
diff --git a/cpp/tests/hashing/hash_test.cpp b/cpp/tests/hashing/hash_test.cpp
index da933b44b8d..5ba010255ca 100644
--- a/cpp/tests/hashing/hash_test.cpp
+++ b/cpp/tests/hashing/hash_test.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2021, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -129,6 +129,228 @@ TEST_F(HashTest, MultiValueNulls)
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(spark_output1->view(), spark_output2->view());
 }
 
+TEST_F(HashTest, BasicList)
+{
+  using LCW = cudf::test::lists_column_wrapper<uint64_t>;
+  using ICW = cudf::test::fixed_width_column_wrapper<uint32_t>;
+
+  auto const col = LCW{{}, {}, {1}, {1, 1}, {1}, {1, 2}, {2, 2}, {2}, {2}, {2, 1}, {2, 2}, {2, 2}};
+  auto const input  = cudf::table_view({col});
+  auto const expect = ICW{1607593296,
+                          1607593296,
+                          -636010097,
+                          -132459357,
+                          -636010097,
+                          -2008850957,
+                          -1023787369,
+                          761197503,
+                          761197503,
+                          1340177511,
+                          -1023787369,
+                          -1023787369};
+
+  auto const output = cudf::hash(input);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expect, output->view(), verbosity);
+
+  auto const expect_seeded = ICW{1607594268u,
+                                 1607594268u,
+                                 3658958173u,
+                                 4162508905u,
+                                 3658958173u,
+                                 2286117305u,
+                                 3271180885u,
+                                 761198477u,
+                                 761198477u,
+                                 1340178469u,
+                                 3271180885u,
+                                 3271180885u};
+
+  auto const seeded_output = cudf::hash(input, cudf::hash_id::HASH_MURMUR3, 15);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expect_seeded, seeded_output->view(), verbosity);
+}
+
+TEST_F(HashTest, NullableList)
+{
+  using LCW = cudf::test::lists_column_wrapper<uint64_t>;
+  using ICW = cudf::test::fixed_width_column_wrapper<uint32_t>;
+
+  auto const valids = std::vector<bool>{1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0};
+  auto const col =
+    LCW{{{}, {}, {1}, {1}, {2, 2}, {2}, {2}, {}, {2, 2}, {2, 2}, {}}, valids.begin()};
+  auto expect = ICW{-2023148619,
+                    -2023148619,
+                    -31671896,
+                    -31671896,
+                    -1205248335,
+                    1865773848,
+                    1865773848,
+                    -2023148682,
+                    -1205248335,
+                    -1205248335,
+                    -2023148682};
+
+  auto const output = cudf::hash(cudf::table_view({col}));
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expect, output->view(), verbosity);
+
+  auto const expect_seeded = ICW{2271820643u,
+                                 2271820643u,
+                                 4263297392u,
+                                 4263297392u,
+                                 3089720935u,
+                                 1865775808u,
+                                 1865775808u,
+                                 2271820578u,
+                                 3089720935u,
+                                 3089720935u,
+                                 2271820578u};
+
+  auto const seeded_output = cudf::hash(cudf::table_view({col}), cudf::hash_id::HASH_MURMUR3, 31);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expect_seeded, seeded_output->view(), verbosity);
+}
+
+TEST_F(HashTest, ListOfStruct)
+{
+  auto col1 = cudf::test::fixed_width_column_wrapper<int32_t>{
+    {-1, -1, 0, 2, 2, 2, 1, 2, 0, 2, 0, 2, 0, 2, 0, 0, 1, 2},
+    {1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0}};
+  auto col2 = cudf::test::strings_column_wrapper{
+    {"x", "x", "a", "a", "b", "b", "a", "b", "a", "b", "a", "c", "a", "c", "a", "c", "b", "b"},
+    {1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1}};
+  auto struct_col = cudf::test::structs_column_wrapper{
+    {col1, col2}, {0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}};
+
+  auto offsets = cudf::test::fixed_width_column_wrapper<cudf::size_type>{
+    0, 0, 0, 0, 0, 2, 3, 4, 5, 6, 8, 10, 12, 14, 15, 16, 17, 18};
+
+  auto list_nullmask = std::vector<bool>{1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
+  auto nullmask_buf =
+    cudf::test::detail::make_null_mask(list_nullmask.begin(), list_nullmask.end());
+  auto list_column = cudf::make_lists_column(
+    17, offsets.release(), struct_col.release(), cudf::UNKNOWN_NULL_COUNT, std::move(nullmask_buf));
+
+  auto expect = cudf::test::fixed_width_column_wrapper<uint32_t>{83451479,
+                                                                 83451479,
+                                                                 83455332,
+                                                                 83455332,
+                                                                 -759684425,
+                                                                 -959632766,
+                                                                 -959632766,
+                                                                 -959632766,
+                                                                 -959636527,
+                                                                 -656998704,
+                                                                 613652814,
+                                                                 1902080426,
+                                                                 1902080426,
+                                                                 2061025592,
+                                                                 2061025592,
+                                                                 -319840811,
+                                                                 -319840811};
+
+  auto const output = cudf::hash(cudf::table_view({*list_column}));
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expect, output->view(), verbosity);
+
+  auto expect_seeded = cudf::test::fixed_width_column_wrapper<uint32_t>{81710442u,
+                                                                        81710442u,
+                                                                        81729816u,
+                                                                        81729816u,
+                                                                        3532787573u,
+                                                                        3642097855u,
+                                                                        3642097855u,
+                                                                        3642097855u,
+                                                                        3642110391u,
+                                                                        3624905718u,
+                                                                        608933631u,
+                                                                        1899376347u,
+                                                                        1899376347u,
+                                                                        2058877614u,
+                                                                        2058877614u,
+                                                                        4013395891u,
+                                                                        4013395891u};
+
+  auto const seeded_output =
+    cudf::hash(cudf::table_view({*list_column}), cudf::hash_id::HASH_MURMUR3, 619);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expect_seeded, seeded_output->view(), verbosity);
+}
+
+TEST_F(HashTest, ListOfEmptyStruct)
+{
+  // []
+  // []
+  // Null
+  // Null
+  // [Null, Null]
+  // [Null, Null]
+  // [Null, Null]
+  // [Null]
+  // [Null]
+  // [{}]
+  // [{}]
+  // [{}, {}]
+  // [{}, {}]
+
+  auto struct_validity = std::vector<bool>{0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1};
+  auto struct_validity_buffer =
+    cudf::test::detail::make_null_mask(struct_validity.begin(), struct_validity.end());
+  auto struct_col =
+    cudf::make_structs_column(14, {}, cudf::UNKNOWN_NULL_COUNT, std::move(struct_validity_buffer));
+
+  auto offsets = cudf::test::fixed_width_column_wrapper<cudf::size_type>{
+    0, 0, 0, 0, 0, 2, 4, 6, 7, 8, 9, 10, 12, 14};
+  auto list_nullmask = std::vector<bool>{1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1};
+  auto list_validity_buffer =
+    cudf::test::detail::make_null_mask(list_nullmask.begin(), list_nullmask.end());
+  auto list_column = cudf::make_lists_column(13,
+                                             offsets.release(),
+                                             std::move(struct_col),
+                                             cudf::UNKNOWN_NULL_COUNT,
+                                             std::move(list_validity_buffer));
+
+  auto expect = cudf::test::fixed_width_column_wrapper<uint32_t>{-2023148619,
+                                                                 -2023148619,
+                                                                 -2023148682,
+                                                                 -2023148682,
+                                                                 -340558283,
+                                                                 -340558283,
+                                                                 -340558283,
+                                                                 -1999301021,
+                                                                 -1999301021,
+                                                                 -1999301020,
+                                                                 -1999301020,
+                                                                 -340558244,
+                                                                 -340558244};
+
+  auto output = cudf::hash(cudf::table_view({*list_column}));
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expect, output->view(), verbosity);
+}
+
+TEST_F(HashTest, EmptyDeepList)
+{
+  // List<List<int>>, where all lists are empty
+  // []
+  // []
+  // Null
+  // Null
+
+  // Internal empty list
+  auto list1 = cudf::test::lists_column_wrapper<int>{};
+
+  auto offsets       = cudf::test::fixed_width_column_wrapper<cudf::size_type>{0, 0, 0, 0, 0};
+  auto list_nullmask = std::vector<bool>{1, 1, 0, 0};
+  auto list_validity_buffer =
+    cudf::test::detail::make_null_mask(list_nullmask.begin(), list_nullmask.end());
+  auto list_column = cudf::make_lists_column(4,
+                                             offsets.release(),
+                                             list1.release(),
+                                             cudf::UNKNOWN_NULL_COUNT,
+                                             std::move(list_validity_buffer));
+
+  auto expect = cudf::test::fixed_width_column_wrapper<uint32_t>{
+    -2023148619, -2023148619, -2023148682, -2023148682};
+
+  auto output = cudf::hash(cudf::table_view({*list_column}));
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expect, output->view(), verbosity);
+}
+
 template <typename T>
 class HashTestTyped : public cudf::test::BaseFixture {
 };
diff --git a/cpp/tests/reductions/list_rank_test.cpp b/cpp/tests/reductions/list_rank_test.cpp
index b3a8e7e0c28..9be68e8458b 100644
--- a/cpp/tests/reductions/list_rank_test.cpp
+++ b/cpp/tests/reductions/list_rank_test.cpp
@@ -120,7 +120,7 @@ TEST_F(ListRankScanTest, ListOfStruct)
   auto col2 = cudf::test::strings_column_wrapper{
     {"x", "x", "a", "a", "b", "b", "a", "b", "a", "b", "a", "c", "a", "c", "a", "c", "b", "b"},
     {1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1}};
-  auto struc = cudf::test::structs_column_wrapper{
+  auto struct_col = cudf::test::structs_column_wrapper{
     {col1, col2}, {0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}};
 
   auto offsets = cudf::test::fixed_width_column_wrapper<cudf::size_type>{
@@ -135,7 +135,7 @@ TEST_F(ListRankScanTest, ListOfStruct)
                                        static_cast<cudf::bitmask_type*>(nullmask_buf.data()),
                                        cudf::UNKNOWN_NULL_COUNT,
                                        0,
-                                       {offsets, struc});
+                                       {offsets, struct_col});
 
   {  // Non-sliced
     auto expect = cudf::test::fixed_width_column_wrapper<cudf::size_type>{
diff --git a/cpp/tests/stream_compaction/distinct_tests.cpp b/cpp/tests/stream_compaction/distinct_tests.cpp
index 866239efc9d..2c822b93444 100644
--- a/cpp/tests/stream_compaction/distinct_tests.cpp
+++ b/cpp/tests/stream_compaction/distinct_tests.cpp
@@ -138,3 +138,245 @@ TEST_F(Distinct, WithNull)
 
   CUDF_TEST_EXPECT_TABLES_EQUAL(expected_unequal, sorted_unequal->view());
 }
+
+TEST_F(Distinct, BasicList)
+{
+  using LCW = cudf::test::lists_column_wrapper<uint64_t>;
+  using ICW = cudf::test::fixed_width_column_wrapper<cudf::size_type>;
+
+  // clang-format off
+  auto const idx = ICW{ 0,  0,   1,      2,   1,      3,      4,   5,   5,      6,      4,     4 };
+  auto const col = LCW{{}, {}, {1}, {1, 1}, {1}, {1, 2}, {2, 2}, {2}, {2}, {2, 1}, {2, 2}, {2, 2}};
+  // clang-format on
+  auto const input = cudf::table_view({idx, col});
+
+  auto const exp_idx = ICW{0, 1, 2, 3, 4, 5, 6};
+  auto const exp_val = LCW{{}, {1}, {1, 1}, {1, 2}, {2, 2}, {2}, {2, 1}};
+  auto const expect  = cudf::table_view({exp_idx, exp_val});
+
+  auto result        = cudf::distinct(input, {1});
+  auto sorted_result = cudf::sort_by_key(*result, result->select({0}));
+
+  CUDF_TEST_EXPECT_TABLES_EQUAL(expect, *sorted_result);
+}
+
+TEST_F(Distinct, NullableList)
+{
+  using LCW  = cudf::test::lists_column_wrapper<uint64_t>;
+  using ICW  = cudf::test::fixed_width_column_wrapper<cudf::size_type>;
+  using mask = std::vector<bool>;
+
+  // clang-format off
+  auto const idx    = ICW {  0,  0,   1,   1,      4,   5,   5,  6,       4,     4,  6};
+  auto const valids = mask{  1,  1,   1,   1,      1,   1,   1,  0,       1,     1,  0};
+  auto const col    = LCW {{{}, {}, {1}, {1}, {2, 2}, {2}, {2}, {}, {2, 2}, {2, 2}, {}}, valids.begin()};
+
+  auto const exp_idx    = ICW {  0,   1,      4,   5,  6};
+  auto const exp_valids = mask{  1,   1,      1,   1,  0};
+  auto const exp_val    = LCW {{{}, {1}, {2, 2}, {2}, {}}, exp_valids.begin()};
+
+  // clang-format on
+  auto const input  = cudf::table_view({idx, col});
+  auto const expect = cudf::table_view({exp_idx, exp_val});
+
+  auto result        = cudf::distinct(input, {1});
+  auto sorted_result = cudf::sort_by_key(*result, result->select({0}));
+
+  CUDF_TEST_EXPECT_TABLES_EQUAL(expect, *sorted_result);
+}
+
+TEST_F(Distinct, ListOfStruct)
+{
+  // Constructing a list of struct of two elements
+  // 0.   []                  ==
+  // 1.   []                  !=
+  // 2.   Null                ==
+  // 3.   Null                !=
+  // 4.   [Null, Null]        !=
+  // 5.   [Null]              ==
+  // 6.   [Null]              ==
+  // 7.   [Null]              !=
+  // 8.   [{Null, Null}]      !=
+  // 9.   [{1,'a'}, {2,'b'}]  !=
+  // 10.  [{0,'a'}, {2,'b'}]  !=
+  // 11.  [{0,'a'}, {2,'c'}]  ==
+  // 12.  [{0,'a'}, {2,'c'}]  !=
+  // 13.  [{0,Null}]          ==
+  // 14.  [{0,Null}]          !=
+  // 15.  [{Null, 'b'}]       ==
+  // 16.  [{Null, 'b'}]
+
+  auto col1 = cudf::test::fixed_width_column_wrapper<int32_t>{
+    {-1, -1, 0, 2, 2, 2, 1, 2, 0, 2, 0, 2, 0, 2, 0, 0, 1, 2},
+    {1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0}};
+  auto col2 = cudf::test::strings_column_wrapper{
+    {"x", "x", "a", "a", "b", "b", "a", "b", "a", "b", "a", "c", "a", "c", "a", "c", "b", "b"},
+    {1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1}};
+  auto struct_col = cudf::test::structs_column_wrapper{
+    {col1, col2}, {0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}};
+
+  auto offsets = cudf::test::fixed_width_column_wrapper<cudf::size_type>{
+    0, 0, 0, 0, 0, 2, 3, 4, 5, 6, 8, 10, 12, 14, 15, 16, 17, 18};
+
+  auto list_nullmask = std::vector<bool>{1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
+  auto nullmask_buf =
+    cudf::test::detail::make_null_mask(list_nullmask.begin(), list_nullmask.end());
+  auto list_column = cudf::column_view(cudf::data_type(cudf::type_id::LIST),
+                                       17,
+                                       nullptr,
+                                       static_cast<cudf::bitmask_type*>(nullmask_buf.data()),
+                                       cudf::UNKNOWN_NULL_COUNT,
+                                       0,
+                                       {offsets, struct_col});
+
+  auto idx = cudf::test::fixed_width_column_wrapper<cudf::size_type>{
+    1, 1, 2, 2, 3, 4, 4, 4, 5, 6, 7, 8, 8, 9, 9, 10, 10};
+
+  auto input = cudf::table_view({idx, list_column});
+
+  auto expect_map =
+    cudf::test::fixed_width_column_wrapper<cudf::size_type>{0, 2, 4, 5, 8, 9, 10, 11, 13, 15};
+
+  auto expect_table = cudf::gather(input, expect_map);
+
+  auto result        = cudf::distinct(input, {1});
+  auto sorted_result = cudf::sort_by_key(*result, result->select({0}));
+
+  CUDF_TEST_EXPECT_TABLES_EQUAL(*expect_table, *sorted_result);
+}
+
+TEST_F(Distinct, StructOfStruct)
+{
+  using FWCW = cudf::test::fixed_width_column_wrapper<int>;
+  using MASK = std::vector<bool>;
+
+  /*
+    `@` indicates null
+
+       /+-------------+
+       |s1{s2{a,b}, c}|
+       +--------------+
+     0 |  { {1, 1}, 5}|
+     1 |  { {1, 2}, 4}|
+     2 |  {@{2, 1}, 6}|
+     3 |  {@{2, 2}, 4}|
+     4 | @{ {2, 2}, 3}|
+     5 | @{ {1, 1}, 3}|  // Same as 4
+     6 |  { {1, 1}, 5}|  // Same as 0
+     7 |  {@{1, 1}, 4}|  // Same as 3
+     8 |  { {2, 1}, 5}|
+       +--------------+
+  */
+
+  auto col_a   = FWCW{1, 1, 2, 2, 2, 1, 1, 1, 2};
+  auto col_b   = FWCW{1, 2, 1, 2, 2, 1, 1, 1, 1};
+  auto s2_mask = MASK{1, 1, 0, 0, 1, 1, 1, 0, 1};
+  auto col_c   = FWCW{5, 4, 6, 4, 3, 3, 5, 4, 5};
+  auto s1_mask = MASK{1, 1, 1, 1, 0, 0, 1, 1, 1};
+  auto idx     = FWCW{0, 1, 2, 3, 4, 5, 6, 7, 8};
+
+  std::vector<std::unique_ptr<cudf::column>> s2_children;
+  s2_children.push_back(col_a.release());
+  s2_children.push_back(col_b.release());
+  auto s2 = cudf::test::structs_column_wrapper(std::move(s2_children), s2_mask);
+
+  std::vector<std::unique_ptr<cudf::column>> s1_children;
+  s1_children.push_back(s2.release());
+  s1_children.push_back(col_c.release());
+  auto s1 = cudf::test::structs_column_wrapper(std::move(s1_children), s1_mask);
+
+  auto input = cudf::table_view({idx, s1});
+
+  auto expect_map = cudf::test::fixed_width_column_wrapper<cudf::size_type>{0, 1, 2, 3, 4, 8};
+  auto expect     = cudf::gather(input, expect_map);
+
+  auto result        = cudf::distinct(input, {1});
+  auto sorted_result = cudf::sort_by_key(*result, result->select({0}));
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expect->get_column(1), sorted_result->get_column(1));
+
+  auto sliced_input      = cudf::slice(input, {1, 7});
+  auto sliced_expect_map = cudf::test::fixed_width_column_wrapper<cudf::size_type>{1, 2, 3, 4, 6};
+  auto sliced_expect     = cudf::gather(input, sliced_expect_map);
+
+  auto sliced_result        = cudf::distinct(sliced_input, {1});
+  auto sorted_sliced_result = cudf::sort_by_key(*sliced_result, sliced_result->select({0}));
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(sliced_expect->get_column(1), sorted_sliced_result->get_column(1));
+}
+
+TEST_F(Distinct, ListOfEmptyStruct)
+{
+  // 0.  []             ==
+  // 1.  []             !=
+  // 2.  Null           ==
+  // 3.  Null           !=
+  // 4.  [Null, Null]   ==
+  // 5.  [Null, Null]   ==
+  // 6.  [Null, Null]   !=
+  // 7.  [Null]         ==
+  // 8.  [Null]         !=
+  // 9.  [{}]           ==
+  // 10. [{}]           !=
+  // 11. [{}, {}]       ==
+  // 12. [{}, {}]
+
+  using mask = std::vector<bool>;
+
+  auto struct_validity = mask{0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1};
+  auto struct_validity_buffer =
+    cudf::test::detail::make_null_mask(struct_validity.begin(), struct_validity.end());
+  auto struct_col =
+    cudf::make_structs_column(14, {}, cudf::UNKNOWN_NULL_COUNT, std::move(struct_validity_buffer));
+
+  auto offsets = cudf::test::fixed_width_column_wrapper<cudf::size_type>{
+    0, 0, 0, 0, 0, 2, 4, 6, 7, 8, 9, 10, 12, 14};
+  auto list_nullmask = mask{1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1};
+  auto list_validity_buffer =
+    cudf::test::detail::make_null_mask(list_nullmask.begin(), list_nullmask.end());
+  auto list_column = cudf::make_lists_column(13,
+                                             offsets.release(),
+                                             std::move(struct_col),
+                                             cudf::UNKNOWN_NULL_COUNT,
+                                             std::move(list_validity_buffer));
+  auto idx =
+    cudf::test::fixed_width_column_wrapper<cudf::size_type>{1, 1, 2, 2, 3, 3, 3, 4, 4, 5, 5, 6, 6};
+  auto input = cudf::table_view({idx, *list_column});
+
+  auto expect_map = cudf::test::fixed_width_column_wrapper<cudf::size_type>{0, 2, 4, 7, 9, 11};
+  auto expect     = cudf::gather(input, expect_map);
+
+  auto result        = cudf::distinct(input, {1});
+  auto sorted_result = cudf::sort_by_key(*result, result->select({0}));
+  CUDF_TEST_EXPECT_TABLES_EQUAL(*expect, *sorted_result);
+}
+
+TEST_F(Distinct, EmptyDeepList)
+{
+  // List<List<int>>, where all lists are empty
+  // []
+  // []
+  // Null
+  // Null
+
+  // Internal empty list
+  auto list1 = cudf::test::lists_column_wrapper<int>{};
+
+  auto offsets       = cudf::test::fixed_width_column_wrapper<cudf::size_type>{0, 0, 0, 0, 0};
+  auto list_nullmask = std::vector<bool>{1, 1, 0, 0};
+  auto list_validity_buffer =
+    cudf::test::detail::make_null_mask(list_nullmask.begin(), list_nullmask.end());
+  auto list_column = cudf::make_lists_column(4,
+                                             offsets.release(),
+                                             list1.release(),
+                                             cudf::UNKNOWN_NULL_COUNT,
+                                             std::move(list_validity_buffer));
+
+  auto idx   = cudf::test::fixed_width_column_wrapper<cudf::size_type>{1, 1, 2, 2};
+  auto input = cudf::table_view({idx, *list_column});
+
+  auto expect_map = cudf::test::fixed_width_column_wrapper<cudf::size_type>{0, 2};
+  auto expect     = cudf::gather(input, expect_map);
+
+  auto result        = cudf::distinct(input, {1});
+  auto sorted_result = cudf::sort_by_key(*result, result->select({0}));
+  CUDF_TEST_EXPECT_TABLES_EQUAL(*expect, *sorted_result);
+}
diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
index d95fe278469..9f2a3d45778 100644
--- a/python/cudf/cudf/tests/test_dataframe.py
+++ b/python/cudf/cudf/tests/test_dataframe.py
@@ -1148,7 +1148,7 @@ def test_dataframe_hash_values(nrows, method):
     out = gdf.hash_values()
     assert isinstance(out, cudf.Series)
     assert len(out) == nrows
-    assert out.dtype == np.int32
+    assert out.dtype == np.uint32
 
     # Check single column
     out_one = gdf[["a"]].hash_values(method=method)

From 9b8d26f8bf98424bf740627a1b226233861f961e Mon Sep 17 00:00:00 2001
From: nvdbaranec <56695930+nvdbaranec@users.noreply.github.com>
Date: Fri, 29 Apr 2022 13:04:18 -0500
Subject: [PATCH 05/23] Fix an issue with one_level_list schemas in parquet
 reader. (#10750)

Partially addresses: https://github.com/rapidsai/cudf/issues/10733

For a particular way of encoding list schemas (an old way that Spark seems to use sometimes), the parquet reader was accidentally propagating incorrect nesting information between columns.  Just a simple bug of not popping an extra value off a stack.

Note:  this is simply a fix so that the files read correctly, however the internal data in the file is actually of binary type and cudf converts these to string columns.  This PR does not add support for binary as a real type in cudf.

Authors:
  - https://github.com/nvdbaranec

Approvers:
  - Yunsong Wang (https://github.com/PointKernel)
  - MithunR (https://github.com/mythrocks)
  - GALI PREM SAGAR (https://github.com/galipremsagar)

URL: https://github.com/rapidsai/cudf/pull/10750
---
 cpp/src/io/parquet/reader_impl.cu             |   4 +++
 .../data/parquet/one_level_list2.parquet      | Bin 0 -> 656 bytes
 python/cudf/cudf/tests/test_parquet.py        |  26 ++++++++++++++++++
 3 files changed, 30 insertions(+)
 create mode 100644 python/cudf/cudf/tests/data/parquet/one_level_list2.parquet

diff --git a/cpp/src/io/parquet/reader_impl.cu b/cpp/src/io/parquet/reader_impl.cu
index a40993ee2dd..f165bd5ec3b 100644
--- a/cpp/src/io/parquet/reader_impl.cu
+++ b/cpp/src/io/parquet/reader_impl.cu
@@ -679,6 +679,10 @@ class aggregate_reader_metadata {
           }
 
           std::copy(nesting.cbegin(), nesting.cend(), std::back_inserter(input_col.nesting));
+
+          // pop off the extra nesting element.
+          if (schema_elem.is_one_level_list()) { nesting.pop_back(); }
+
           path_is_valid = true;  // If we're able to reach leaf then path is valid
         }
 
diff --git a/python/cudf/cudf/tests/data/parquet/one_level_list2.parquet b/python/cudf/cudf/tests/data/parquet/one_level_list2.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..cd5acd045949218a7a419c66a11305eb7ddeb0d7
GIT binary patch
literal 656
zcmZuvL2JS=6i%B^iwHX<q|!q#76-1Zt)haL-L{+T<Y5S{X+X&8R_iwXJv;eZJnjeW
zrONEwN}89x_kG`c{mAt0F{FrgX}3oS94u<P9KV+($Egqu!DGUUF@_z|IzR!CE_G`Z
zqEVa}u-8cXQy2ciwGOXHHF(wO2p3fJs9g<Yxx6t5240#7j-wzt6Pgv_N^3#hh7bp_
zCpStp;C$i`T@8Z>wA!>?8yPF5xZaqSsiaIT1sQ5O>YkL@x};4+TOE@dABdVdZclNi
z^MvC(D~||Etrm&O5F?K3Jo&auTh72jwgX%WStgnE?a76?e(plE6K-4K=4|fglcPB`
zNBx6QgYJHy2LrGdcKUl$x9?cUsU`%l&GrT?s7(H`)BnQfc~5^*b(pak)L`mgX0cDg
z2<kq(B*p6{D{r0)?=36V%T?}$!6*p5Zn{}2;q{X=>5C*Ars=KdXCht5;W&zuER5r^
Uj7Cz#awr$_8WnI=rf?U(0hF3wRR910

literal 0
HcmV?d00001

diff --git a/python/cudf/cudf/tests/test_parquet.py b/python/cudf/cudf/tests/test_parquet.py
index 727200293f7..3a07ce6234c 100644
--- a/python/cudf/cudf/tests/test_parquet.py
+++ b/python/cudf/cudf/tests/test_parquet.py
@@ -2386,6 +2386,32 @@ def test_parquet_reader_one_level_list(datadir):
     assert_eq(expect, got)
 
 
+# testing a specific bug-fix/edge case.
+# specifically:  int a parquet file containing a particular way of representing
+#                a list column in a schema, the cudf reader was confusing
+#                nesting information between a list column and a subsequent
+#                string column, ultimately causing a crash.
+def test_parquet_reader_one_level_list2(datadir):
+    # we are reading in a file containing binary types, but cudf returns
+    # those as strings. so we have to massage the pandas data to get
+    # them to compare correctly.
+    def postprocess(val):
+        if isinstance(val, bytes):
+            return val.decode()
+        elif isinstance(val, np.ndarray):
+            return np.array([v.decode() for v in val])
+        else:
+            return val
+
+    fname = datadir / "one_level_list2.parquet"
+
+    expect = pd.read_parquet(fname)
+    expect = expect.applymap(postprocess)
+    got = cudf.read_parquet(fname)
+
+    assert_eq(expect, got, check_dtype=False)
+
+
 @pytest.mark.parametrize("size_bytes", [4_000_000, 1_000_000, 600_000])
 @pytest.mark.parametrize("size_rows", [1_000_000, 100_000, 10_000])
 def test_parquet_writer_row_group_size(

From 91129078e5146ea551e3cdf5d4a701b62addc1c3 Mon Sep 17 00:00:00 2001
From: Michael Wang <isVoid@users.noreply.github.com>
Date: Fri, 29 Apr 2022 15:05:01 -0700
Subject: [PATCH 06/23] Support Segmented Min/Max Reduction on String Type
 (#10447)

This PR adds `min/max` segmented reduction to string type.

Part of https://github.com/rapidsai/cudf/issues/10417

Authors:
  - Michael Wang (https://github.com/isVoid)
  - Bradley Dice (https://github.com/bdice)

Approvers:
  - Vukasin Milovanovic (https://github.com/vuule)
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/10447
---
 cpp/include/cudf/detail/reduction.cuh         |  27 ++--
 .../detail/utilities/element_argminmax.cuh    |  61 ++++++++
 .../sort/group_single_pass_reduction_util.cuh |  36 +----
 cpp/src/reductions/simple_segmented.cuh       | 136 +++++++++++++++++-
 .../reductions/segmented_reduction_tests.cpp  | 121 ++++++++++++++++
 5 files changed, 331 insertions(+), 50 deletions(-)
 create mode 100644 cpp/include/cudf/detail/utilities/element_argminmax.cuh

diff --git a/cpp/include/cudf/detail/reduction.cuh b/cpp/include/cudf/detail/reduction.cuh
index 76afbf7e4b8..023d83f3c24 100644
--- a/cpp/include/cudf/detail/reduction.cuh
+++ b/cpp/include/cudf/detail/reduction.cuh
@@ -227,36 +227,36 @@ std::unique_ptr<scalar> reduce(InputIterator d_in,
  * @brief Compute the specified simple reduction over each of the segments in the
  * input range of elements.
  *
- * @tparam Op               the reduction operator with device binary operator
  * @tparam InputIterator    the input column iterator
  * @tparam OffsetIterator   the offset column iterator
+ * @tparam BinaryOp         the device binary operator used to reduce
  * @tparam OutputType       the output type of reduction
  *
  * @param[in] d_in          the begin iterator to input
  * @param[in] d_offset      the begin iterator to offset
  * @param[in] num_segments  the number of segments
- * @param[in] sop           the reduction operator
+ * @param[in] binary_op     the reduction operator
+ * @param[in] identity      the identity element of the reduction operator
  * @param[in] stream        CUDA stream used for device memory operations and kernel launches.
  * @param[in] mr            Device memory resource used to allocate the returned column's device
  * memory
  * @returns   Output column in device memory
  *
  */
-template <typename Op,
-          typename InputIterator,
+template <typename InputIterator,
           typename OffsetIterator,
+          typename BinaryOp,
           typename OutputType = typename thrust::iterator_value<InputIterator>::type,
           typename std::enable_if_t<is_fixed_width<OutputType>() &&
-                                    not cudf::is_fixed_point<OutputType>()>* = nullptr>
+                                    !cudf::is_fixed_point<OutputType>()>* = nullptr>
 std::unique_ptr<column> segmented_reduce(InputIterator d_in,
                                          OffsetIterator d_offset,
                                          cudf::size_type num_segments,
-                                         op::simple_op<Op> sop,
+                                         BinaryOp binary_op,
+                                         OutputType identity,
                                          rmm::cuda_stream_view stream,
                                          rmm::mr::device_memory_resource* mr)
 {
-  auto binary_op  = sop.get_binary_op();
-  auto identity   = sop.template get_identity<OutputType>();
   auto dev_result = make_fixed_width_column(
     data_type{type_to_id<OutputType>()}, num_segments, mask_state::UNALLOCATED, stream, mr);
   auto dev_result_mview = dev_result->mutable_view();
@@ -291,16 +291,17 @@ std::unique_ptr<column> segmented_reduce(InputIterator d_in,
   return dev_result;
 }
 
-template <typename Op,
-          typename InputIterator,
+template <typename InputIterator,
           typename OffsetIterator,
+          typename BinaryOp,
           typename OutputType = typename thrust::iterator_value<InputIterator>::type,
-          typename std::enable_if_t<not is_fixed_width<OutputType>() ||
-                                    is_fixed_point<OutputType>()>* = nullptr>
+          typename std::enable_if_t<!(is_fixed_width<OutputType>() &&
+                                      !cudf::is_fixed_point<OutputType>())>* = nullptr>
 std::unique_ptr<column> segmented_reduce(InputIterator,
                                          OffsetIterator,
                                          cudf::size_type,
-                                         op::simple_op<Op>,
+                                         BinaryOp,
+                                         OutputType,
                                          rmm::cuda_stream_view,
                                          rmm::mr::device_memory_resource*)
 {
diff --git a/cpp/include/cudf/detail/utilities/element_argminmax.cuh b/cpp/include/cudf/detail/utilities/element_argminmax.cuh
new file mode 100644
index 00000000000..45b56278dba
--- /dev/null
+++ b/cpp/include/cudf/detail/utilities/element_argminmax.cuh
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <cudf/column/column_device_view.cuh>
+#include <cudf/types.hpp>
+#include <cudf/utilities/error.hpp>
+#include <cudf/utilities/traits.hpp>
+
+#include <type_traits>
+
+namespace cudf {
+namespace detail {
+
+/**
+ * @brief Binary `argmin`/`argmax` operator
+ *
+ * @tparam T Type of the underlying column. Must support '<' operator.
+ */
+template <typename T>
+struct element_argminmax_fn {
+  column_device_view const d_col;
+  bool const has_nulls;
+  bool const arg_min;
+
+  __device__ inline auto operator()(size_type const& lhs_idx, size_type const& rhs_idx) const
+  {
+    // The extra bounds checking is due to issue github.com/rapidsai/cudf/9156 and
+    // github.com/NVIDIA/thrust/issues/1525
+    // where invalid random values may be passed here by thrust::reduce_by_key
+    auto out_of_bound_or_null = [this] __device__(size_type const& idx) {
+      return idx < 0 || idx >= this->d_col.size() ||
+             (this->has_nulls && this->d_col.is_null_nocheck(idx));
+    };
+    if (out_of_bound_or_null(lhs_idx)) { return rhs_idx; }
+    if (out_of_bound_or_null(rhs_idx)) { return lhs_idx; }
+
+    // Return `lhs_idx` iff:
+    //   row(lhs_idx) <  row(rhs_idx) and finding ArgMin, or
+    //   row(lhs_idx) >= row(rhs_idx) and finding ArgMax.
+    auto const less = d_col.element<T>(lhs_idx) < d_col.element<T>(rhs_idx);
+    return less == arg_min ? lhs_idx : rhs_idx;
+  }
+};
+
+}  // namespace detail
+}  // namespace cudf
diff --git a/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh b/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh
index 8e1463f7964..93d5e6c032c 100644
--- a/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh
+++ b/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh
@@ -23,6 +23,7 @@
 #include <cudf/column/column_view.hpp>
 #include <cudf/detail/aggregation/aggregation.cuh>
 #include <cudf/detail/iterator.cuh>
+#include <cudf/detail/utilities/element_argminmax.cuh>
 #include <cudf/detail/valid_if.cuh>
 #include <cudf/table/row_operators.cuh>
 #include <cudf/types.hpp>
@@ -40,37 +41,6 @@ namespace cudf {
 namespace groupby {
 namespace detail {
 
-/**
- * @brief Binary operator with index values into the input column.
- *
- * @tparam T Type of the underlying column. Must support '<' operator.
- */
-template <typename T>
-struct element_arg_minmax_fn {
-  column_device_view const d_col;
-  bool const has_nulls;
-  bool const arg_min;
-
-  __device__ inline auto operator()(size_type const& lhs_idx, size_type const& rhs_idx) const
-  {
-    // The extra bounds checking is due to issue github.com/rapidsai/cudf/9156 and
-    // github.com/NVIDIA/thrust/issues/1525
-    // where invalid random values may be passed here by thrust::reduce_by_key
-    if (lhs_idx < 0 || lhs_idx >= d_col.size() || (has_nulls && d_col.is_null_nocheck(lhs_idx))) {
-      return rhs_idx;
-    }
-    if (rhs_idx < 0 || rhs_idx >= d_col.size() || (has_nulls && d_col.is_null_nocheck(rhs_idx))) {
-      return lhs_idx;
-    }
-
-    // Return `lhs_idx` iff:
-    //   row(lhs_idx) <  row(rhs_idx) and finding ArgMin, or
-    //   row(lhs_idx) >= row(rhs_idx) and finding ArgMax.
-    auto const less = d_col.element<T>(lhs_idx) < d_col.element<T>(rhs_idx);
-    return less == arg_min ? lhs_idx : rhs_idx;
-  }
-};
-
 /**
  * @brief Value accessor for column which supports dictionary column too.
  *
@@ -211,8 +181,8 @@ struct group_reduction_functor<K, T, std::enable_if_t<is_group_reduction_support
 
     if constexpr (K == aggregation::ARGMAX || K == aggregation::ARGMIN) {
       auto const count_iter = thrust::make_counting_iterator<ResultType>(0);
-      auto const binop =
-        element_arg_minmax_fn<T>{*d_values_ptr, values.has_nulls(), K == aggregation::ARGMIN};
+      auto const binop      = cudf::detail::element_argminmax_fn<T>{
+        *d_values_ptr, values.has_nulls(), K == aggregation::ARGMIN};
       do_reduction(count_iter, result_begin, binop);
     } else {
       using OpType    = cudf::detail::corresponding_operator_t<K>;
diff --git a/cpp/src/reductions/simple_segmented.cuh b/cpp/src/reductions/simple_segmented.cuh
index 99837e67398..7796794502d 100644
--- a/cpp/src/reductions/simple_segmented.cuh
+++ b/cpp/src/reductions/simple_segmented.cuh
@@ -16,12 +16,15 @@
 
 #pragma once
 
+#include <cudf/detail/aggregation/aggregation.hpp>
 #include <cudf/detail/copy.hpp>
+#include <cudf/detail/gather.hpp>
 #include <cudf/detail/null_mask.cuh>
 #include <cudf/detail/null_mask.hpp>
 #include <cudf/detail/reduction.cuh>
 #include <cudf/detail/unary.hpp>
 #include <cudf/detail/utilities/cuda.cuh>
+#include <cudf/detail/utilities/element_argminmax.cuh>
 #include <cudf/detail/valid_if.cuh>
 #include <cudf/null_mask.hpp>
 #include <cudf/types.hpp>
@@ -31,9 +34,12 @@
 
 #include <rmm/cuda_stream_view.hpp>
 
+#include <thrust/iterator/counting_iterator.h>
 #include <thrust/iterator/transform_iterator.h>
 #include <thrust/iterator/zip_iterator.h>
 
+#include <type_traits>
+
 namespace cudf {
 namespace reduction {
 namespace simple {
@@ -70,18 +76,21 @@ std::unique_ptr<column> simple_segmented_reduction(column_view const& col,
   auto simple_op         = Op{};
   size_type num_segments = offsets.size() - 1;
 
+  auto binary_op = simple_op.get_binary_op();
+  auto identity  = simple_op.template get_identity<ResultType>();
+
   // TODO: Explore rewriting null_replacing_element_transformer/element_transformer with nullate
   auto result = [&] {
     if (col.has_nulls()) {
       auto f  = simple_op.template get_null_replacing_element_transformer<ResultType>();
       auto it = thrust::make_transform_iterator(dcol->pair_begin<InputType, true>(), f);
       return cudf::reduction::detail::segmented_reduce(
-        it, offsets.begin(), num_segments, simple_op, stream, mr);
+        it, offsets.begin(), num_segments, binary_op, identity, stream, mr);
     } else {
       auto f  = simple_op.template get_element_transformer<ResultType>();
       auto it = thrust::make_transform_iterator(dcol->begin<InputType>(), f);
       return cudf::reduction::detail::segmented_reduce(
-        it, offsets.begin(), num_segments, simple_op, stream, mr);
+        it, offsets.begin(), num_segments, binary_op, identity, stream, mr);
     }
   }();
 
@@ -103,6 +112,112 @@ std::unique_ptr<column> simple_segmented_reduction(column_view const& col,
   return result;
 }
 
+/**
+ * @brief String segmented reduction for 'min', 'max'.
+ *
+ * This algorithm uses argmin/argmax as a custom comparator to build a gather
+ * map, then builds the output.
+ *
+ * @tparam InputType    the input column data-type
+ * @tparam Op           the operator of cudf::reduction::op::
+
+ * @param col Input column of data to reduce.
+ * @param offsets Indices to segment boundaries.
+ * @param null_handling If `null_policy::INCLUDE`, all elements in a segment
+ * must be valid for the reduced value to be valid. If `null_policy::EXCLUDE`,
+ * the reduced value is valid if any element in the segment is valid.
+ * @param stream Used for device memory operations and kernel launches.
+ * @param mr Device memory resource used to allocate the returned column's device memory
+ * @return Output column in device memory
+ */
+
+template <typename InputType,
+          typename Op,
+          CUDF_ENABLE_IF(std::is_same_v<Op, cudf::reduction::op::min> ||
+                         std::is_same_v<Op, cudf::reduction::op::max>)>
+std::unique_ptr<column> string_segmented_reduction(column_view const& col,
+                                                   device_span<size_type const> offsets,
+                                                   null_policy null_handling,
+                                                   rmm::cuda_stream_view stream,
+                                                   rmm::mr::device_memory_resource* mr)
+{
+  // Pass to simple_segmented_reduction, get indices to gather, perform gather here.
+  auto device_col = cudf::column_device_view::create(col, stream);
+
+  auto it                 = thrust::make_counting_iterator(0);
+  auto const num_segments = static_cast<size_type>(offsets.size()) - 1;
+
+  bool constexpr is_argmin = std::is_same_v<Op, cudf::reduction::op::min>;
+  auto string_comparator =
+    cudf::detail::element_argminmax_fn<InputType>{*device_col, col.has_nulls(), is_argmin};
+  auto constexpr identity =
+    is_argmin ? cudf::detail::ARGMIN_SENTINEL : cudf::detail::ARGMAX_SENTINEL;
+
+  auto gather_map =
+    cudf::reduction::detail::segmented_reduce(it,
+                                              offsets.begin(),
+                                              num_segments,
+                                              string_comparator,
+                                              identity,
+                                              stream,
+                                              rmm::mr::get_current_device_resource());
+  auto result = std::move(cudf::detail::gather(table_view{{col}},
+                                               *gather_map,
+                                               cudf::out_of_bounds_policy::NULLIFY,
+                                               cudf::detail::negative_index_policy::NOT_ALLOWED,
+                                               stream,
+                                               mr)
+                            ->release()[0]);
+  auto const [segmented_null_mask, segmented_null_count] =
+    cudf::detail::segmented_null_mask_reduction(col.null_mask(),
+                                                offsets.begin(),
+                                                offsets.end() - 1,
+                                                offsets.begin() + 1,
+                                                null_handling,
+                                                stream,
+                                                mr);
+
+  // If the segmented null mask contains any null values, the segmented null mask
+  // must be combined with the result null mask.
+  if (segmented_null_count > 0) {
+    if (result->null_count() == 0) {
+      // The result has no nulls. Use the segmented null mask.
+      result->set_null_mask(segmented_null_mask, segmented_null_count, stream);
+    } else {
+      // Compute the logical AND of the segmented output null mask and the
+      // result null mask to update the result null mask and null count.
+      auto result_mview = result->mutable_view();
+      std::vector masks{static_cast<bitmask_type const*>(result_mview.null_mask()),
+                        static_cast<bitmask_type const*>(segmented_null_mask.data())};
+      std::vector<size_type> begin_bits{0, 0};
+      auto const valid_count = cudf::detail::inplace_bitmask_and(
+        device_span<bitmask_type>(static_cast<bitmask_type*>(result_mview.null_mask()),
+                                  num_bitmask_words(result->size())),
+        masks,
+        begin_bits,
+        result->size(),
+        stream,
+        mr);
+      result->set_null_count(result->size() - valid_count);
+    }
+  }
+
+  return result;
+}
+
+template <typename InputType,
+          typename Op,
+          CUDF_ENABLE_IF(!std::is_same_v<Op, cudf::reduction::op::min>() &&
+                         !std::is_same_v<Op, cudf::reduction::op::max>())>
+std::unique_ptr<column> string_segmented_reduction(column_view const& col,
+                                                   device_span<size_type const> offsets,
+                                                   null_policy null_handling,
+                                                   rmm::cuda_stream_view stream,
+                                                   rmm::mr::device_memory_resource* mr)
+{
+  CUDF_FAIL("Segmented reduction on string column only supports min and max reduction.");
+}
+
 /**
  * @brief Call reduce and return a column of type bool.
  *
@@ -153,7 +268,9 @@ struct same_column_type_dispatcher {
   }
 
  public:
-  template <typename ElementType, std::enable_if_t<is_supported<ElementType>()>* = nullptr>
+  template <typename ElementType,
+            CUDF_ENABLE_IF(is_supported<ElementType>() &&
+                           !std::is_same_v<ElementType, string_view>)>
   std::unique_ptr<column> operator()(column_view const& col,
                                      device_span<size_type const> offsets,
                                      null_policy null_handling,
@@ -164,7 +281,18 @@ struct same_column_type_dispatcher {
       col, offsets, null_handling, stream, mr);
   }
 
-  template <typename ElementType, std::enable_if_t<not is_supported<ElementType>()>* = nullptr>
+  template <typename ElementType,
+            CUDF_ENABLE_IF(is_supported<ElementType>() && std::is_same_v<ElementType, string_view>)>
+  std::unique_ptr<column> operator()(column_view const& col,
+                                     device_span<size_type const> offsets,
+                                     null_policy null_handling,
+                                     rmm::cuda_stream_view stream,
+                                     rmm::mr::device_memory_resource* mr)
+  {
+    return string_segmented_reduction<ElementType, Op>(col, offsets, null_handling, stream, mr);
+  }
+
+  template <typename ElementType, CUDF_ENABLE_IF(!is_supported<ElementType>())>
   std::unique_ptr<column> operator()(column_view const&,
                                      device_span<size_type const>,
                                      null_policy,
diff --git a/cpp/tests/reductions/segmented_reduction_tests.cpp b/cpp/tests/reductions/segmented_reduction_tests.cpp
index f750c432efb..8a9a8fb549e 100644
--- a/cpp/tests/reductions/segmented_reduction_tests.cpp
+++ b/cpp/tests/reductions/segmented_reduction_tests.cpp
@@ -387,6 +387,127 @@ TEST_F(SegmentedReductionTestUntyped, ReduceEmptyColumn)
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*res, expect);
 }
 
+// String min/max test grid
+// Segment: Length 0, length 1, length 2
+// Element nulls: No nulls, all nulls, some nulls
+// String: Empty string,
+// Position of the min/max: start of segment, end of segment
+// Include null, exclude null
+
+#undef XXX
+#define XXX ""  // null placeholder
+
+struct SegmentedReductionStringTest : public cudf::test::BaseFixture {
+  std::pair<strings_column_wrapper, fixed_width_column_wrapper<size_type>> input()
+  {
+    return std::pair(
+      strings_column_wrapper{
+        {"world", "cudf", XXX, "", "rapids", "i am", "ai", "apples", "zebras", XXX, XXX, XXX},
+        {true, true, false, true, true, true, true, true, true, false, false, false}},
+      fixed_width_column_wrapper<size_type>{0, 1, 4, 7, 9, 9, 10, 12});
+  }
+};
+
+TEST_F(SegmentedReductionStringTest, MaxIncludeNulls)
+{
+  // data: ['world'], ['cudf', NULL, ''], ['rapids', 'i am', 'ai'], ['apples', 'zebras'],
+  //       [], [NULL], [NULL, NULL]
+  // values:  {"world", "cudf", XXX, "", "rapids", "i am", "ai", "apples", "zebras", XXX, XXX, XXX}
+  // nullmask:{1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0}
+  // offsets: {0, 1, 4, 7, 9, 9, 10, 12}
+  // output_dtype: string dtype
+  // outputs: {"world", XXX, "rapids", "zebras", XXX, XXX, XXX}
+  // output nullmask: {1, 0, 1, 1, 0, 0, 0}
+
+  auto const [input, offsets] = this->input();
+  data_type output_dtype{type_id::STRING};
+
+  strings_column_wrapper expect{{"world", XXX, "rapids", "zebras", XXX, XXX, XXX},
+                                {true, false, true, true, false, false, false}};
+
+  auto res = segmented_reduce(input,
+                              column_view(offsets),
+                              *make_max_aggregation<segmented_reduce_aggregation>(),
+                              output_dtype,
+                              null_policy::INCLUDE);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*res, expect);
+}
+
+TEST_F(SegmentedReductionStringTest, MaxExcludeNulls)
+{
+  // data: ['world'], ['cudf', NULL, ''], ['rapids', 'i am', 'ai'], ['apples', 'zebras'],
+  //       [], [NULL], [NULL, NULL]
+  // values:  {"world", "cudf", XXX, "", "rapids", "i am", "ai", "apples", "zebras", XXX, XXX, XXX}
+  // nullmask:{1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0}
+  // offsets: {0, 1, 4, 7, 9, 9, 10, 12}
+  // output_dtype: string dtype
+  // outputs: {"world", "cudf", "rapids", "zebras", XXX, XXX, XXX}
+  // output nullmask: {1, 1, 1, 1, 0, 0, 0}
+
+  auto const [input, offsets] = this->input();
+  data_type output_dtype{type_id::STRING};
+
+  strings_column_wrapper expect{{"world", "cudf", "rapids", "zebras", XXX, XXX, XXX},
+                                {true, true, true, true, false, false, false}};
+
+  auto res = segmented_reduce(input,
+                              column_view(offsets),
+                              *make_max_aggregation<segmented_reduce_aggregation>(),
+                              output_dtype,
+                              null_policy::EXCLUDE);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*res, expect);
+}
+
+TEST_F(SegmentedReductionStringTest, MinIncludeNulls)
+{
+  // data: ['world'], ['cudf', NULL, ''], ['rapids', 'i am', 'ai'], ['apples', 'zebras'],
+  //       [], [NULL], [NULL, NULL]
+  // values:  {"world", "cudf", XXX, "", "rapids", "i am", "ai", "apples", "zebras", XXX, XXX, XXX}
+  // nullmask:{1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0}
+  // offsets: {0, 1, 4, 7, 9, 9, 10, 12}
+  // output_dtype: string dtype
+  // outputs: {"world", XXX, "ai", "apples", XXX, XXX, XXX}
+  // output nullmask: {1, 0, 1, 1, 0, 0, 0}
+
+  auto const [input, offsets] = this->input();
+  data_type output_dtype{type_id::STRING};
+
+  strings_column_wrapper expect{{"world", XXX, "ai", "apples", XXX, XXX, XXX},
+                                {true, false, true, true, false, false, false}};
+
+  auto res = segmented_reduce(input,
+                              column_view(offsets),
+                              *make_min_aggregation<segmented_reduce_aggregation>(),
+                              output_dtype,
+                              null_policy::INCLUDE);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*res, expect);
+}
+
+TEST_F(SegmentedReductionStringTest, MinExcludeNulls)
+{
+  // data: ['world'], ['cudf', NULL, ''], ['rapids', 'i am', 'ai'], ['apples', 'zebras'],
+  //       [], [NULL], [NULL, NULL]
+  // values:  {"world", "cudf", XXX, "", "rapids", "i am", "ai", "apples", "zebras", XXX, XXX, XXX}
+  // nullmask:{1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0}
+  // offsets: {0, 1, 4, 7, 9, 9, 10, 12}
+  // output_dtype: string dtype
+  // outputs: {"world", "", "ai", "apples", XXX, XXX, XXX}
+  // output nullmask: {1, 1, 1, 1, 0, 0, 0}
+
+  auto const [input, offsets] = this->input();
+  data_type output_dtype{type_id::STRING};
+
+  strings_column_wrapper expect{{"world", "", "ai", "apples", XXX, XXX, XXX},
+                                {true, true, true, true, false, false, false}};
+
+  auto res = segmented_reduce(input,
+                              column_view(offsets),
+                              *make_min_aggregation<segmented_reduce_aggregation>(),
+                              output_dtype,
+                              null_policy::EXCLUDE);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*res, expect);
+}
+
 #undef XXX
 
 }  // namespace test

From bf10a9471979e1eaae4d12aa20e4bea45cfb7506 Mon Sep 17 00:00:00 2001
From: Vukasin Milovanovic <vmilovanovic@nvidia.com>
Date: Fri, 29 Apr 2022 16:41:22 -0700
Subject: [PATCH 07/23] Flush output streams before creating a process to drop
 caches (#10762)

Small improvement for the `try_drop_l3_cache` feature in cuIO benchmarks.
Prevents unflushed output from the original process from intermingling with the output from the `popen` process.

Authors:
  - Vukasin Milovanovic (https://github.com/vuule)

Approvers:
  - https://github.com/nvdbaranec
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/10762
---
 cpp/benchmarks/io/cuio_common.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/cpp/benchmarks/io/cuio_common.cpp b/cpp/benchmarks/io/cuio_common.cpp
index 7d356263220..da64c1bbf3c 100644
--- a/cpp/benchmarks/io/cuio_common.cpp
+++ b/cpp/benchmarks/io/cuio_common.cpp
@@ -16,6 +16,7 @@
 
 #include <benchmarks/io/cuio_common.hpp>
 
+#include <cstdio>
 #include <fstream>
 #include <numeric>
 #include <string>
@@ -145,6 +146,8 @@ std::vector<cudf::size_type> segments_in_chunk(int num_segments, int num_chunks,
 // Executes the command and returns stderr output
 std::string exec_cmd(std::string_view cmd)
 {
+  // Prevent the output from the command from mixing with the original process' output
+  std::fflush(nullptr);
   // Switch stderr and stdout to only capture stderr
   auto const redirected_cmd = std::string{"( "}.append(cmd).append(" 3>&2 2>&1 1>&3) 2>/dev/null");
   std::unique_ptr<FILE, decltype(&pclose)> pipe(popen(redirected_cmd.c_str(), "r"), pclose);

From 027c34aefbf8a5abf5394da15a7b6f1dcc63b06c Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Fri, 29 Apr 2022 19:30:33 -0500
Subject: [PATCH 08/23] Use generator expressions in any/all functions.
 (#10736)

This PR uses generator expressions in `any(...)` and `all(...)` to avoid allocating a list in memory while maximizing the potential benefit of early exit from the `any`/`all` function.

I also fixed a few miscellaneous things (~ 10 lines):
- Use `cls` in `classmethod`s
- Simplify a lambda expression
- Use `super()` with no arguments if the arguments are the parent class and `self`
- Parenthesize multi-line strings with implicit concatenation to clarify the behavior when written in a tuple of values

Note: Some of these were caught by https://codereview.doctor/rapidsai/cudf. In some places, the bot correctly identified a problem but its suggestions were invalid or incomplete. I identified steps for improvement beyond what the bot suggested for most of these cases.

Authors:
  - Bradley Dice (https://github.com/bdice)

Approvers:
  - Ashwin Srinath (https://github.com/shwina)
  - Michael Wang (https://github.com/isVoid)

URL: https://github.com/rapidsai/cudf/pull/10736
---
 python/cudf/cudf/_fuzz_testing/utils.py        | 12 ++++--------
 python/cudf/cudf/core/column/interval.py       |  4 ++--
 python/cudf/cudf/core/column/struct.py         |  2 +-
 python/cudf/cudf/core/dataframe.py             | 14 ++++++--------
 python/cudf/cudf/core/tools/datetimes.py       |  2 +-
 python/cudf/cudf/io/parquet.py                 |  2 +-
 python/cudf/cudf/testing/_utils.py             |  2 +-
 python/cudf/cudf/tests/test_custom_accessor.py |  4 ++--
 python/cudf/cudf/tests/test_dtypes.py          |  6 ++----
 python/cudf/cudf/tests/test_multiindex.py      |  8 ++++----
 python/cudf/cudf/tests/test_text.py            | 12 ++++++++----
 11 files changed, 32 insertions(+), 36 deletions(-)

diff --git a/python/cudf/cudf/_fuzz_testing/utils.py b/python/cudf/cudf/_fuzz_testing/utils.py
index 87a8fc46374..9f3c0ab6d5f 100644
--- a/python/cudf/cudf/_fuzz_testing/utils.py
+++ b/python/cudf/cudf/_fuzz_testing/utils.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2021, NVIDIA CORPORATION.
+# Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
 import random
 from collections import OrderedDict
@@ -312,13 +312,9 @@ def sanitize(value, struct_type):
         return tuple(values_list)
 
     has_nulls_or_nullable_dtype = any(
-        [
-            True
-            if df[col].dtype in pandas_dtypes_to_np_dtypes
-            or df[col].isnull().any()
-            else False
-            for col in df.columns
-        ]
+        (col := df[colname]).dtype in pandas_dtypes_to_np_dtypes
+        or col.isnull().any()
+        for colname in df.columns
     )
     pdf = df.copy(deep=True)
     for field in arrow_table_schema:
diff --git a/python/cudf/cudf/core/column/interval.py b/python/cudf/cudf/core/column/interval.py
index a873a0f98a5..bfaf65d45e2 100644
--- a/python/cudf/cudf/core/column/interval.py
+++ b/python/cudf/cudf/core/column/interval.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2018-2021, NVIDIA CORPORATION.
+# Copyright (c) 2018-2022, NVIDIA CORPORATION.
 import pandas as pd
 import pyarrow as pa
 
@@ -39,7 +39,7 @@ def closed(self):
         return self._closed
 
     @classmethod
-    def from_arrow(self, data):
+    def from_arrow(cls, data):
         new_col = super().from_arrow(data.storage)
         size = len(data)
         dtype = IntervalDtype.from_arrow(data.type)
diff --git a/python/cudf/cudf/core/column/struct.py b/python/cudf/cudf/core/column/struct.py
index 53e6e9972b1..ed5e1c9450d 100644
--- a/python/cudf/cudf/core/column/struct.py
+++ b/python/cudf/cudf/core/column/struct.py
@@ -31,7 +31,7 @@ def base_size(self):
             return len(self.base_children[0])
 
     @classmethod
-    def from_arrow(self, data):
+    def from_arrow(cls, data):
         size = len(data)
         dtype = cudf.core.dtypes.StructDtype.from_arrow(data.type)
 
diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 0d3b3ee0300..8c459e855c1 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -160,7 +160,7 @@ def _can_downcast_to_series(self, df, arg):
                 ):
                     return True
             dtypes = df.dtypes.values.tolist()
-            all_numeric = all([is_numeric_dtype(t) for t in dtypes])
+            all_numeric = all(is_numeric_dtype(t) for t in dtypes)
             if all_numeric:
                 return True
         if ncols == 1:
@@ -720,7 +720,7 @@ def _init_from_series_list(self, data, columns, index):
 
             final_index = as_index(index)
 
-        series_lengths = list(map(lambda x: len(x), data))
+        series_lengths = list(map(len, data))
         data = numeric_normalize_types(*data)
         if series_lengths.count(series_lengths[0]) == len(series_lengths):
             # Calculating the final dataframe columns by
@@ -2999,11 +2999,11 @@ def agg(self, aggs, axis=None):
 
         elif isinstance(aggs, dict):
             cols = aggs.keys()
-            if any([callable(val) for val in aggs.values()]):
+            if any(callable(val) for val in aggs.values()):
                 raise NotImplementedError(
                     "callable parameter is not implemented yet"
                 )
-            elif all([isinstance(val, str) for val in aggs.values()]):
+            elif all(isinstance(val, str) for val in aggs.values()):
                 result = cudf.Series(index=cols)
                 for key, value in aggs.items():
                     col = df_normalized[key]
@@ -3013,7 +3013,7 @@ def agg(self, aggs, axis=None):
                             f"'Series' object"
                         )
                     result[key] = getattr(col, value)()
-            elif all([isinstance(val, abc.Iterable) for val in aggs.values()]):
+            elif all(isinstance(val, abc.Iterable) for val in aggs.values()):
                 idxs = set()
                 for val in aggs.values():
                     if isinstance(val, str):
@@ -6032,9 +6032,7 @@ def append(
             if (cols.get_indexer(other._data.to_pandas_index()) >= 0).all():
                 other = other.reindex(columns=cols)
 
-        return super(DataFrame, self)._append(
-            other, ignore_index, verify_integrity, sort
-        )
+        return super()._append(other, ignore_index, verify_integrity, sort)
 
     @_cudf_nvtx_annotate
     @copy_docstring(reshape.pivot)
diff --git a/python/cudf/cudf/core/tools/datetimes.py b/python/cudf/cudf/core/tools/datetimes.py
index 3ce89bc27e8..ccd23b82c88 100644
--- a/python/cudf/cudf/core/tools/datetimes.py
+++ b/python/cudf/cudf/core/tools/datetimes.py
@@ -629,7 +629,7 @@ def _generate_months_column(self, size, op):
     def _is_no_op(self) -> bool:
         # some logic could be implemented here for more complex cases
         # such as +1 year, -12 months
-        return all([i == 0 for i in self._kwds.values()])
+        return all(i == 0 for i in self._kwds.values())
 
     def __neg__(self):
         new_scalars = {k: -v for k, v in self._kwds.items()}
diff --git a/python/cudf/cudf/io/parquet.py b/python/cudf/cudf/io/parquet.py
index baedc3f174b..5746bf6fec9 100644
--- a/python/cudf/cudf/io/parquet.py
+++ b/python/cudf/cudf/io/parquet.py
@@ -56,7 +56,7 @@ def _write_parquet(
         "row_group_size_rows": row_group_size_rows,
         "partitions_info": partitions_info,
     }
-    if all([ioutils.is_fsspec_open_file(buf) for buf in paths_or_bufs]):
+    if all(ioutils.is_fsspec_open_file(buf) for buf in paths_or_bufs):
         with ExitStack() as stack:
             fsspec_objs = [stack.enter_context(file) for file in paths_or_bufs]
             file_objs = [
diff --git a/python/cudf/cudf/testing/_utils.py b/python/cudf/cudf/testing/_utils.py
index 607d9121630..5232d1adb64 100644
--- a/python/cudf/cudf/testing/_utils.py
+++ b/python/cudf/cudf/testing/_utils.py
@@ -133,7 +133,7 @@ def assert_eq(left, right, **kwargs):
         # Use the overloaded __eq__ of the operands
         if left == right:
             return True
-        elif any([np.issubdtype(type(x), np.floating) for x in (left, right)]):
+        elif any(np.issubdtype(type(x), np.floating) for x in (left, right)):
             np.testing.assert_almost_equal(left, right)
         else:
             np.testing.assert_equal(left, right)
diff --git a/python/cudf/cudf/tests/test_custom_accessor.py b/python/cudf/cudf/tests/test_custom_accessor.py
index bfd2ccbccef..35cc107b257 100644
--- a/python/cudf/cudf/tests/test_custom_accessor.py
+++ b/python/cudf/cudf/tests/test_custom_accessor.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
+# Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
 import pandas as pd
 import pytest
@@ -17,7 +17,7 @@ def __init__(self, obj):
     @staticmethod
     def _validate(obj):
         cols = obj.columns
-        if not all([vertex in cols for vertex in ["x", "y"]]):
+        if not all(vertex in cols for vertex in ["x", "y"]):
             raise AttributeError("Must have vertices 'x', 'y'.")
 
     @property
diff --git a/python/cudf/cudf/tests/test_dtypes.py b/python/cudf/cudf/tests/test_dtypes.py
index 356685c976e..f6a0e41a0c7 100644
--- a/python/cudf/cudf/tests/test_dtypes.py
+++ b/python/cudf/cudf/tests/test_dtypes.py
@@ -189,10 +189,8 @@ def assert_column_array_dtype_equal(column: ColumnBase, array: pa.array):
         )
     elif isinstance(column.dtype, StructDtype):
         return array.type.equals(column.dtype.to_arrow()) and all(
-            [
-                assert_column_array_dtype_equal(child, array.field(i))
-                for i, child in enumerate(column.base_children)
-            ]
+            assert_column_array_dtype_equal(child, array.field(i))
+            for i, child in enumerate(column.base_children)
         )
     elif isinstance(
         column.dtype, (Decimal128Dtype, Decimal64Dtype, Decimal32Dtype)
diff --git a/python/cudf/cudf/tests/test_multiindex.py b/python/cudf/cudf/tests/test_multiindex.py
index 38225b3efb9..f3830ed386a 100644
--- a/python/cudf/cudf/tests/test_multiindex.py
+++ b/python/cudf/cudf/tests/test_multiindex.py
@@ -762,7 +762,7 @@ def test_multiindex_copy_deep(data, deep):
         lptrs = [child.base_data.ptr for child in lchildren]
         rptrs = [child.base_data.ptr for child in rchildren]
 
-        assert all([(x == y) is same_ref for x, y in zip(lptrs, rptrs)])
+        assert all((x == y) == same_ref for x, y in zip(lptrs, rptrs))
 
     elif isinstance(data, cudf.MultiIndex):
         mi1 = data
@@ -772,19 +772,19 @@ def test_multiindex_copy_deep(data, deep):
         lptrs = [lv._data._data[None].base_data.ptr for lv in mi1._levels]
         rptrs = [lv._data._data[None].base_data.ptr for lv in mi2._levels]
 
-        assert all([(x == y) is same_ref for x, y in zip(lptrs, rptrs)])
+        assert all((x == y) == same_ref for x, y in zip(lptrs, rptrs))
 
         # Assert ._codes identity
         lptrs = [c.base_data.ptr for _, c in mi1._codes._data.items()]
         rptrs = [c.base_data.ptr for _, c in mi2._codes._data.items()]
 
-        assert all([(x == y) is same_ref for x, y in zip(lptrs, rptrs)])
+        assert all((x == y) == same_ref for x, y in zip(lptrs, rptrs))
 
         # Assert ._data identity
         lptrs = [d.base_data.ptr for _, d in mi1._data.items()]
         rptrs = [d.base_data.ptr for _, d in mi2._data.items()]
 
-        assert all([(x == y) is same_ref for x, y in zip(lptrs, rptrs)])
+        assert all((x == y) == same_ref for x, y in zip(lptrs, rptrs))
 
 
 @pytest.mark.parametrize(
diff --git a/python/cudf/cudf/tests/test_text.py b/python/cudf/cudf/tests/test_text.py
index 21c22110910..a4edaeff545 100644
--- a/python/cudf/cudf/tests/test_text.py
+++ b/python/cudf/cudf/tests/test_text.py
@@ -308,8 +308,10 @@ def test_character_tokenize_series():
             "hello world",
             "sdf",
             None,
-            "goodbye, one-two:three~four+five_six@sev"
-            "en#eight^nine heŒŽ‘•™œ$µ¾ŤƠé Ǆ",
+            (
+                "goodbye, one-two:three~four+five_six@sev"
+                "en#eight^nine heŒŽ‘•™œ$µ¾ŤƠé Ǆ"
+            ),
         ]
     )
     expected = cudf.Series(
@@ -423,8 +425,10 @@ def test_character_tokenize_index():
             "hello world",
             "sdf",
             None,
-            "goodbye, one-two:three~four+five_six@sev"
-            "en#eight^nine heŒŽ‘•™œ$µ¾ŤƠé Ǆ",
+            (
+                "goodbye, one-two:three~four+five_six@sev"
+                "en#eight^nine heŒŽ‘•™œ$µ¾ŤƠé Ǆ"
+            ),
         ]
     )
     expected = cudf.core.index.as_index(

From 6128e0dd79131f866240484e3610f94b6c24bb2f Mon Sep 17 00:00:00 2001
From: David Wendt <45795991+davidwendt@users.noreply.github.com>
Date: Mon, 2 May 2022 08:42:29 -0400
Subject: [PATCH 09/23] Use warp per string for long strings in
 cudf::strings::contains() (#10739)

Improves the performance on `cudf::strings::contains()` for long strings. This executes a warp per string to match a target over sections of a single string in parallel. The benchmark showed this to be faster than the current implementation only for longer strings (greater than 64 bytes). It also proved somewhat faster and more consistent than a pure character-parallel approach.

This change may also help improve the performance of the regex `contains_re()` function in the future.

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Mike Wilson (https://github.com/hyperbolic2346)
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/10739
---
 cpp/src/strings/search/find.cu   | 88 +++++++++++++++++++++++++++++++-
 cpp/tests/strings/find_tests.cpp | 20 ++++++++
 2 files changed, 106 insertions(+), 2 deletions(-)

diff --git a/cpp/src/strings/search/find.cu b/cpp/src/strings/search/find.cu
index 15d89069ba3..1390b304e43 100644
--- a/cpp/src/strings/search/find.cu
+++ b/cpp/src/strings/search/find.cu
@@ -18,6 +18,8 @@
 #include <cudf/column/column_factories.hpp>
 #include <cudf/detail/null_mask.hpp>
 #include <cudf/detail/nvtx/ranges.hpp>
+#include <cudf/detail/utilities/cuda.cuh>
+#include <cudf/detail/utilities/device_atomics.cuh>
 #include <cudf/scalar/scalar_factories.hpp>
 #include <cudf/strings/detail/utilities.hpp>
 #include <cudf/strings/find.hpp>
@@ -28,6 +30,7 @@
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/exec_policy.hpp>
 
+#include <thrust/binary_search.h>
 #include <thrust/iterator/counting_iterator.h>
 #include <thrust/transform.h>
 
@@ -162,6 +165,81 @@ std::unique_ptr<column> rfind(strings_column_view const& strings,
 
 namespace detail {
 namespace {
+
+/**
+ * @brief Threshold to decide on using string or warp parallel functions.
+ *
+ * If the average byte length of a string in a column exceeds this value then
+ * the warp-parallel `contains_warp_fn` function is used.
+ * Otherwise, the string-parallel function in `contains_fn` is used.
+ *
+ * This is only used for the scalar version of `contains()` right now.
+ */
+constexpr size_type AVG_CHAR_BYTES_THRESHOLD = 64;
+
+/**
+ * @brief Check if `d_target` appears in a row in `d_strings`.
+ *
+ * This executes as a warp per string/row.
+ */
+struct contains_warp_fn {
+  column_device_view const d_strings;
+  string_view const d_target;
+  bool* d_results;
+
+  __device__ void operator()(std::size_t idx)
+  {
+    auto const str_idx = static_cast<size_type>(idx / cudf::detail::warp_size);
+    if (d_strings.is_null(str_idx)) { return; }
+    // get the string for this warp
+    auto const d_str = d_strings.element<string_view>(str_idx);
+    // each thread of the warp will check just part of the string
+    auto found = false;
+    for (auto i = static_cast<size_type>(idx % cudf::detail::warp_size);
+         !found && (i + d_target.size_bytes()) < d_str.size_bytes();
+         i += cudf::detail::warp_size) {
+      // check the target matches this part of the d_str data
+      if (d_target.compare(d_str.data() + i, d_target.size_bytes()) == 0) { found = true; }
+    }
+    if (found) { atomicOr(d_results + str_idx, true); }
+  }
+};
+
+std::unique_ptr<column> contains_warp_parallel(strings_column_view const& input,
+                                               string_scalar const& target,
+                                               rmm::cuda_stream_view stream,
+                                               rmm::mr::device_memory_resource* mr)
+{
+  CUDF_EXPECTS(target.is_valid(stream), "Parameter target must be valid.");
+  auto d_target = string_view(target.data(), target.size());
+
+  // create output column
+  auto results = make_numeric_column(data_type{type_id::BOOL8},
+                                     input.size(),
+                                     cudf::detail::copy_bitmask(input.parent(), stream, mr),
+                                     input.null_count(),
+                                     stream,
+                                     mr);
+
+  // fill the output with `false` unless the `d_target` is empty
+  auto results_view = results->mutable_view();
+  thrust::fill(rmm::exec_policy(stream),
+               results_view.begin<bool>(),
+               results_view.end<bool>(),
+               d_target.empty());
+
+  if (!d_target.empty()) {
+    // launch warp per string
+    auto d_strings = column_device_view::create(input.parent(), stream);
+    thrust::for_each_n(rmm::exec_policy(stream),
+                       thrust::make_counting_iterator<std::size_t>(0),
+                       static_cast<std::size_t>(input.size()) * cudf::detail::warp_size,
+                       contains_warp_fn{*d_strings, d_target, results_view.data<bool>()});
+  }
+  results->set_null_count(input.null_count());
+  return results;
+}
+
 /**
  * @brief Utility to return a bool column indicating the presence of
  * a given target string in a strings column.
@@ -286,15 +364,21 @@ std::unique_ptr<column> contains_fn(strings_column_view const& strings,
 }  // namespace
 
 std::unique_ptr<column> contains(
-  strings_column_view const& strings,
+  strings_column_view const& input,
   string_scalar const& target,
   rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
 {
+  // use warp parallel when the average string width is greater than the threshold
+  if (!input.is_empty() && ((input.chars_size() / input.size()) > AVG_CHAR_BYTES_THRESHOLD)) {
+    return contains_warp_parallel(input, target, stream, mr);
+  }
+
+  // benchmark measurements showed this to be faster for smaller strings
   auto pfn = [] __device__(string_view d_string, string_view d_target) {
     return d_string.find(d_target) >= 0;
   };
-  return contains_fn(strings, target, pfn, stream, mr);
+  return contains_fn(input, target, pfn, stream, mr);
 }
 
 std::unique_ptr<column> contains(
diff --git a/cpp/tests/strings/find_tests.cpp b/cpp/tests/strings/find_tests.cpp
index 177e6d97f7f..208063adcb0 100644
--- a/cpp/tests/strings/find_tests.cpp
+++ b/cpp/tests/strings/find_tests.cpp
@@ -82,6 +82,26 @@ TEST_F(StringsFindTest, Contains)
   }
 }
 
+TEST_F(StringsFindTest, ContainsLongStrings)
+{
+  cudf::test::strings_column_wrapper strings(
+    {"Héllo, there world and goodbye",
+     "quick brown fox jumped over the lazy brown dog; the fat cats jump in place without moving",
+     "the following code snippet demonstrates how to use search for values in an ordered range",
+     "it returns the last position where value could be inserted without violating the ordering",
+     "algorithms execution is parallelized as determined by an execution policy. t",
+     "he this is a continuation of previous row to make sure string boundaries are honored",
+     ""});
+  auto strings_view = cudf::strings_column_view(strings);
+  auto results      = cudf::strings::contains(strings_view, cudf::string_scalar("e"));
+  cudf::test::fixed_width_column_wrapper<bool> expected({1, 1, 1, 1, 1, 1, 0});
+  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected);
+
+  results = cudf::strings::contains(strings_view, cudf::string_scalar(" the "));
+  cudf::test::fixed_width_column_wrapper<bool> expected2({0, 1, 0, 1, 0, 0, 0});
+  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected2);
+}
+
 TEST_F(StringsFindTest, StartsWith)
 {
   cudf::test::strings_column_wrapper strings({"Héllo", "thesé", "", "lease", "tést strings", ""},

From 0ddb3d9319426da49d8cb4b9cbb95819dc9b5263 Mon Sep 17 00:00:00 2001
From: Yunsong Wang <yunsongw@nvidia.com>
Date: Mon, 2 May 2022 17:12:14 -0400
Subject: [PATCH 10/23] Add row bitmask as a `detail::hash_join` member
 (#10248)

When working on https://github.com/rapidsai/cudf/pull/8934, we observed a performance regression when nulls are unequal. One major reason is that the new hash map uses a CG-based double hashing algorithm. This algorithm is dedicated to improving hash collision handling. The existing implementation determines hash map size by the number of rows in the build table regardless of how many rows are valid. In the case of nulls being unequal, the actual map occupancy is, therefore, lower than the default 50% thus resulting in fewer hash collisions. The old scalar linear probing is more efficient in this case due to less CG-related overhead and the probe will mostly end at the first probe slot.

To improve this situation, the original idea of this PR was to construct the hash map based on the number of valid rows. There are supposed to be two benefits:

1. Increases map occupancy to benefit more from CG-based double hashing thus improving runtime efficiency
2. Reduces peak memory usage: for 1'000 elements with 75% nulls, the new capacity would be 500 (1000 * 0.25 * 2) as opposed to 2000 (1000 * 2)

During this work, however, we noticed the first assumption is improper since it didn't consider the performance degradation along with reduced capacity (see https://github.com/rapidsai/cudf/pull/10248#issuecomment-1045206917). Though this effort will reduce peak memory usage, it seems Python/Spark workflows would never benefit from it since they tend to drop nulls before any join operations.

Finally, all changes related to map size reduction are discarded. This PR only adds `_composite_bitmask` as a `detail::hash_join` member which is a preparation step for https://github.com/rapidsai/cudf/issues/9151

Authors:
  - Yunsong Wang (https://github.com/PointKernel)

Approvers:
  - Karthikeyan (https://github.com/karthikeyann)
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/10248
---
 cpp/include/cudf/detail/join.hpp   | 8 +++++---
 cpp/src/join/hash_join.cu          | 7 ++++++-
 cpp/src/join/join_common_utils.cuh | 5 +++--
 cpp/src/join/mixed_join.cu         | 8 ++++++--
 4 files changed, 20 insertions(+), 8 deletions(-)

diff --git a/cpp/include/cudf/detail/join.hpp b/cpp/include/cudf/detail/join.hpp
index 12e4aaa03fd..2a94ee22a0d 100644
--- a/cpp/include/cudf/detail/join.hpp
+++ b/cpp/include/cudf/detail/join.hpp
@@ -22,6 +22,7 @@
 #include <cudf/types.hpp>
 
 #include <rmm/cuda_stream_view.hpp>
+#include <rmm/device_buffer.hpp>
 #include <rmm/device_uvector.hpp>
 #include <rmm/mr/device/polymorphic_allocator.hpp>
 
@@ -68,9 +69,10 @@ struct hash_join {
   hash_join& operator=(hash_join&&) = delete;
 
  private:
-  bool const _is_empty;                    ///< true if `_hash_table` is empty
-  cudf::null_equality const _nulls_equal;  ///< whether to consider nulls as equal
-  cudf::table_view _build;                 ///< input table to build the hash map
+  bool const _is_empty;                         ///< true if `_hash_table` is empty
+  rmm::device_buffer const _composite_bitmask;  ///< Bitmask to denote whether a row is valid
+  cudf::null_equality const _nulls_equal;       ///< whether to consider nulls as equal
+  cudf::table_view _build;                      ///< input table to build the hash map
   cudf::structs::detail::flattened_table
     _flattened_build_table;  ///< flattened data structures for `_build`
   map_type _hash_table;      ///< hash table built on `_build`
diff --git a/cpp/src/join/hash_join.cu b/cpp/src/join/hash_join.cu
index 3e0e76de708..07995ba2785 100644
--- a/cpp/src/join/hash_join.cu
+++ b/cpp/src/join/hash_join.cu
@@ -283,6 +283,7 @@ hash_join<Hasher>::hash_join(cudf::table_view const& build,
                              cudf::null_equality compare_nulls,
                              rmm::cuda_stream_view stream)
   : _is_empty{build.num_rows() == 0},
+    _composite_bitmask{cudf::detail::bitmask_and(build, stream).first},
     _nulls_equal{compare_nulls},
     _hash_table{compute_hash_table_size(build.num_rows()),
                 std::numeric_limits<hash_value_type>::max(),
@@ -302,7 +303,11 @@ hash_join<Hasher>::hash_join(cudf::table_view const& build,
 
   if (_is_empty) { return; }
 
-  cudf::detail::build_join_hash_table(_build, _hash_table, _nulls_equal, stream);
+  cudf::detail::build_join_hash_table(_build,
+                                      _hash_table,
+                                      _nulls_equal,
+                                      static_cast<bitmask_type const*>(_composite_bitmask.data()),
+                                      stream);
 }
 
 template <typename Hasher>
diff --git a/cpp/src/join/join_common_utils.cuh b/cpp/src/join/join_common_utils.cuh
index fdb63419c84..b3994685623 100644
--- a/cpp/src/join/join_common_utils.cuh
+++ b/cpp/src/join/join_common_utils.cuh
@@ -143,6 +143,7 @@ get_trivial_left_join_indices(
  * @param build Table of columns used to build join hash.
  * @param hash_table Build hash table.
  * @param nulls_equal Flag to denote nulls are equal or not.
+ * @param bitmask Bitmask to denote whether a row is valid.
  * @param stream CUDA stream used for device memory operations and kernel launches.
  *
  */
@@ -150,6 +151,7 @@ template <typename MultimapType>
 void build_join_hash_table(cudf::table_view const& build,
                            MultimapType& hash_table,
                            null_equality const nulls_equal,
+                           [[maybe_unused]] bitmask_type const* bitmask,
                            rmm::cuda_stream_view stream)
 {
   auto build_table_ptr = cudf::table_device_view::create(build, stream);
@@ -168,8 +170,7 @@ void build_join_hash_table(cudf::table_view const& build,
     hash_table.insert(iter, iter + build_table_num_rows, stream.value());
   } else {
     thrust::counting_iterator<size_type> stencil(0);
-    auto const row_bitmask = cudf::detail::bitmask_and(build, stream).first;
-    row_is_valid pred{static_cast<bitmask_type const*>(row_bitmask.data())};
+    row_is_valid pred{bitmask};
 
     // insert valid rows
     hash_table.insert_if(iter, iter + build_table_num_rows, stencil, pred, stream.value());
diff --git a/cpp/src/join/mixed_join.cu b/cpp/src/join/mixed_join.cu
index 27ee77e3edd..11553858e5f 100644
--- a/cpp/src/join/mixed_join.cu
+++ b/cpp/src/join/mixed_join.cu
@@ -135,7 +135,9 @@ mixed_join(
   // TODO: To add support for nested columns we will need to flatten in many
   // places. However, this probably isn't worth adding any time soon since we
   // won't be able to support AST conditions for those types anyway.
-  build_join_hash_table(build, hash_table, compare_nulls, stream);
+  auto const row_bitmask = cudf::detail::bitmask_and(build, stream).first;
+  build_join_hash_table(
+    build, hash_table, compare_nulls, static_cast<bitmask_type const*>(row_bitmask.data()), stream);
   auto hash_table_view = hash_table.get_device_view();
 
   auto left_conditional_view  = table_device_view::create(left_conditional, stream);
@@ -381,7 +383,9 @@ compute_mixed_join_output_size(table_view const& left_equality,
   // TODO: To add support for nested columns we will need to flatten in many
   // places. However, this probably isn't worth adding any time soon since we
   // won't be able to support AST conditions for those types anyway.
-  build_join_hash_table(build, hash_table, compare_nulls, stream);
+  auto const row_bitmask = cudf::detail::bitmask_and(build, stream).first;
+  build_join_hash_table(
+    build, hash_table, compare_nulls, static_cast<bitmask_type const*>(row_bitmask.data()), stream);
   auto hash_table_view = hash_table.get_device_view();
 
   auto left_conditional_view  = table_device_view::create(left_conditional, stream);

From a9eb47cab6976e515af597cc6f9a90b846cb6706 Mon Sep 17 00:00:00 2001
From: Michael Wang <isVoid@users.noreply.github.com>
Date: Mon, 2 May 2022 16:11:26 -0700
Subject: [PATCH 11/23] Deprecate `merge_sorted`, change dask cudf usage to
 internal method (#10713)

This PR deprecates non-pandas conform method `cudf.merge_sorted` and change dask cudf usage to internal method `_merge_sorted`.

I also updated msg keyword in pytest.skip in multiple tests to reason, this removes 1000+ test warnings.

cc @vyasr @rjzamora

Authors:
  - Michael Wang (https://github.com/isVoid)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)
  - Richard (Rick) Zamora (https://github.com/rjzamora)
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/10713
---
 python/cudf/cudf/core/reshape.py       | 19 +++++++++++++++++++
 python/cudf/cudf/tests/test_reshape.py | 17 +++++++++--------
 python/dask_cudf/dask_cudf/sorting.py  |  2 +-
 3 files changed, 29 insertions(+), 9 deletions(-)

diff --git a/python/cudf/cudf/core/reshape.py b/python/cudf/cudf/core/reshape.py
index 5977b63777f..b405c018983 100644
--- a/python/cudf/cudf/core/reshape.py
+++ b/python/cudf/cudf/core/reshape.py
@@ -1,6 +1,7 @@
 # Copyright (c) 2018-2022, NVIDIA CORPORATION.
 
 import itertools
+import warnings
 from collections import abc
 from typing import Dict, Optional
 
@@ -791,6 +792,24 @@ def merge_sorted(
     A new, lexicographically sorted, DataFrame/Series.
     """
 
+    warnings.warn(
+        "merge_sorted is deprecated and will be removed in a "
+        "future release.",
+        FutureWarning,
+    )
+    return _merge_sorted(
+        objs, keys, by_index, ignore_index, ascending, na_position
+    )
+
+
+def _merge_sorted(
+    objs,
+    keys=None,
+    by_index=False,
+    ignore_index=False,
+    ascending=True,
+    na_position="last",
+):
     if not pd.api.types.is_list_like(objs):
         raise TypeError("objs must be a list-like of Frame-like objects")
 
diff --git a/python/cudf/cudf/tests/test_reshape.py b/python/cudf/cudf/tests/test_reshape.py
index 14fa4be7fed..5f40de74a65 100644
--- a/python/cudf/cudf/tests/test_reshape.py
+++ b/python/cudf/cudf/tests/test_reshape.py
@@ -24,7 +24,7 @@
 @pytest.mark.parametrize("nulls", ["none", "some", "all"])
 def test_melt(nulls, num_id_vars, num_value_vars, num_rows, dtype):
     if dtype not in ["float32", "float64"] and nulls in ["some", "all"]:
-        pytest.skip(msg="nulls not supported in dtype: " + dtype)
+        pytest.skip(reason="nulls not supported in dtype: " + dtype)
 
     pdf = pd.DataFrame()
     id_vars = []
@@ -87,7 +87,7 @@ def test_melt(nulls, num_id_vars, num_value_vars, num_rows, dtype):
 @pytest.mark.parametrize("nulls", ["none", "some"])
 def test_df_stack(nulls, num_cols, num_rows, dtype):
     if dtype not in ["float32", "float64"] and nulls in ["some"]:
-        pytest.skip(msg="nulls not supported in dtype: " + dtype)
+        pytest.skip(reason="nulls not supported in dtype: " + dtype)
 
     pdf = pd.DataFrame()
     for i in range(num_cols):
@@ -139,7 +139,7 @@ def test_df_stack_reset_index():
 def test_interleave_columns(nulls, num_cols, num_rows, dtype):
 
     if dtype not in ["float32", "float64"] and nulls in ["some"]:
-        pytest.skip(msg="nulls not supported in dtype: " + dtype)
+        pytest.skip(reason="nulls not supported in dtype: " + dtype)
 
     pdf = pd.DataFrame(dtype=dtype)
     for i in range(num_cols):
@@ -176,7 +176,7 @@ def test_interleave_columns(nulls, num_cols, num_rows, dtype):
 def test_tile(nulls, num_cols, num_rows, dtype, count):
 
     if dtype not in ["float32", "float64"] and nulls in ["some"]:
-        pytest.skip(msg="nulls not supported in dtype: " + dtype)
+        pytest.skip(reason="nulls not supported in dtype: " + dtype)
 
     pdf = pd.DataFrame(dtype=dtype)
     for i in range(num_cols):
@@ -269,7 +269,7 @@ def test_df_merge_sorted(nparts, keys, na_position, ascending):
     expect = df.sort_values(
         keys_1, na_position=na_position, ascending=ascending
     )
-    result = cudf.merge_sorted(
+    result = cudf.core.reshape._merge_sorted(
         dfs, keys=keys, na_position=na_position, ascending=ascending
     )
     if keys:
@@ -290,7 +290,8 @@ def test_df_merge_sorted_index(nparts, index, ascending):
     )
 
     expect = df.sort_index(ascending=ascending)
-    result = cudf.merge_sorted(dfs, by_index=True, ascending=ascending)
+    with pytest.warns(FutureWarning, match="deprecated and will be removed"):
+        result = cudf.merge_sorted(dfs, by_index=True, ascending=ascending)
 
     assert_eq(expect.index, result.index)
 
@@ -317,7 +318,7 @@ def test_df_merge_sorted_ignore_index(keys, na_position, ascending):
     expect = df.sort_values(
         keys_1, na_position=na_position, ascending=ascending
     )
-    result = cudf.merge_sorted(
+    result = cudf.core.reshape._merge_sorted(
         dfs,
         keys=keys,
         na_position=na_position,
@@ -347,7 +348,7 @@ def test_series_merge_sorted(nparts, key, na_position, ascending):
     )
 
     expect = df.sort_values(na_position=na_position, ascending=ascending)
-    result = cudf.merge_sorted(
+    result = cudf.core.reshape._merge_sorted(
         dfs, na_position=na_position, ascending=ascending
     )
 
diff --git a/python/dask_cudf/dask_cudf/sorting.py b/python/dask_cudf/dask_cudf/sorting.py
index 880e2365fe6..1c89baba592 100644
--- a/python/dask_cudf/dask_cudf/sorting.py
+++ b/python/dask_cudf/dask_cudf/sorting.py
@@ -85,7 +85,7 @@ def _append_counts(val, count):
         return val
 
     # Sort by calculated quantile values, then number of observations.
-    combined_vals_counts = gd.merge_sorted(
+    combined_vals_counts = gd.core.reshape._merge_sorted(
         [*map(_append_counts, vals, counts)]
     )
     combined_counts = cupy.asnumpy(combined_vals_counts["_counts"].values)

From 0e326245fbbc1332e0a83c16f296f09fbf33a7d1 Mon Sep 17 00:00:00 2001
From: David Wendt <45795991+davidwendt@users.noreply.github.com>
Date: Tue, 3 May 2022 07:39:50 -0400
Subject: [PATCH 12/23] Add multiple rows to subword tokenizer benchmark
 (#10767)

When porting the subword tokenizer code from CLX, the benchmark was not updated to measure multiple rows. This updates the benchmark to include a row test range and add the missing `cuda_event_timer`.

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Conor Hoekstra (https://github.com/codereport)
  - Karthikeyan (https://github.com/karthikeyann)

URL: https://github.com/rapidsai/cudf/pull/10767
---
 cpp/benchmarks/text/subword.cpp | 24 +++++++++++++++++++-----
 1 file changed, 19 insertions(+), 5 deletions(-)

diff --git a/cpp/benchmarks/text/subword.cpp b/cpp/benchmarks/text/subword.cpp
index d8357dcf92c..2c430868341 100644
--- a/cpp/benchmarks/text/subword.cpp
+++ b/cpp/benchmarks/text/subword.cpp
@@ -14,7 +14,8 @@
  * limitations under the License.
  */
 
-#include <benchmark/benchmark.h>
+#include <benchmarks/fixture/benchmark_fixture.hpp>
+#include <benchmarks/synchronization/synchronization.hpp>
 
 #include <cudf_test/column_wrapper.hpp>
 
@@ -53,9 +54,9 @@ static std::string create_hash_vocab_file()
   return hash_file;
 }
 
-static void BM_cuda_tokenizer_cudf(benchmark::State& state)
+static void BM_subword_tokenizer(benchmark::State& state)
 {
-  uint32_t nrows = 1000;
+  auto const nrows = static_cast<cudf::size_type>(state.range(0));
   std::vector<const char*> h_strings(nrows, "This is a test ");
   cudf::test::strings_column_wrapper strings(h_strings.begin(), h_strings.end());
   std::string hash_file = create_hash_vocab_file();
@@ -67,6 +68,7 @@ static void BM_cuda_tokenizer_cudf(benchmark::State& state)
   //
   auto vocab = nvtext::load_vocabulary_file(hash_file);
   for (auto _ : state) {
+    cuda_event_timer raii(state, true);
     auto result = nvtext::subword_tokenize(cudf::strings_column_view{strings},
                                            *vocab,
                                            max_sequence_length,
@@ -76,6 +78,18 @@ static void BM_cuda_tokenizer_cudf(benchmark::State& state)
                                            MAX_ROWS_TENSOR);
   }
 }
-BENCHMARK(BM_cuda_tokenizer_cudf);
 
-BENCHMARK_MAIN();
+class Subword : public cudf::benchmark {
+};
+
+#define SUBWORD_BM_BENCHMARK_DEFINE(name)                                                        \
+  BENCHMARK_DEFINE_F(Subword, name)(::benchmark::State & state) { BM_subword_tokenizer(state); } \
+  BENCHMARK_REGISTER_F(Subword, name)                                                            \
+    ->RangeMultiplier(2)                                                                         \
+    ->Range(1 << 10, 1 << 17)                                                                    \
+    ->UseManualTime()                                                                            \
+    ->Unit(benchmark::kMillisecond);
+
+SUBWORD_BM_BENCHMARK_DEFINE(BM_subword_tokenizer);
+
+// BENCHMARK_MAIN();

From ad126065109aaa72b6eb324ba5abd555b70bb4ae Mon Sep 17 00:00:00 2001
From: David Wendt <45795991+davidwendt@users.noreply.github.com>
Date: Tue, 3 May 2022 07:44:19 -0400
Subject: [PATCH 13/23] Fix replace error when regex has only zero match
 quantifiers (#10760)

Closes #10753

Fixes `cudf::strings::replace_re` logic that was reading past the end of a string when given a regex that contained net zero match quantifier pattern (e.g. 'D*' or 'D?s?' both can match to nothing).

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Nghia Truong (https://github.com/ttnghia)
  - Bradley Dice (https://github.com/bdice)
  - Mark Harris (https://github.com/harrism)

URL: https://github.com/rapidsai/cudf/pull/10760
---
 cpp/src/strings/replace/replace_re.cu     | 15 ++++++++-------
 cpp/tests/strings/replace_regex_tests.cpp | 12 ++++++++++++
 python/cudf/cudf/tests/test_string.py     | 10 ++++++++++
 3 files changed, 30 insertions(+), 7 deletions(-)

diff --git a/cpp/src/strings/replace/replace_re.cu b/cpp/src/strings/replace/replace_re.cu
index d42359deeac..af74d8bdb92 100644
--- a/cpp/src/strings/replace/replace_re.cu
+++ b/cpp/src/strings/replace/replace_re.cu
@@ -54,18 +54,19 @@ struct replace_regex_fn {
       return;
     }
 
-    auto const d_str = d_strings.element<string_view>(idx);
-    auto nbytes      = d_str.size_bytes();                  // number of bytes in input string
-    auto mxn = maxrepl < 0 ? d_str.length() + 1 : maxrepl;  // max possible replaces for this string
-    auto in_ptr        = d_str.data();                      // input pointer (i)
-    auto out_ptr       = d_chars ? d_chars + d_offsets[idx]  // output pointer (o)
-                                 : nullptr;
+    auto const d_str  = d_strings.element<string_view>(idx);
+    auto const nchars = d_str.length();
+    auto nbytes       = d_str.size_bytes();             // number of bytes in input string
+    auto mxn     = maxrepl < 0 ? nchars + 1 : maxrepl;  // max possible replaces for this string
+    auto in_ptr  = d_str.data();                        // input pointer (i)
+    auto out_ptr = d_chars ? d_chars + d_offsets[idx]   // output pointer (o)
+                           : nullptr;
     size_type last_pos = 0;
     int32_t begin      = 0;   // these are for calling prog.find
     int32_t end        = -1;  // matches final word-boundary if at the end of the string
 
     // copy input to output replacing strings as we go
-    while (mxn-- > 0) {  // maximum number of replaces
+    while (mxn-- > 0 && begin <= nchars) {  // maximum number of replaces
 
       if (prog.is_empty() || prog.find<stack_size>(idx, d_str, begin, end) <= 0) {
         break;  // no more matches
diff --git a/cpp/tests/strings/replace_regex_tests.cpp b/cpp/tests/strings/replace_regex_tests.cpp
index 2b9e8b7aae7..1ccbc6fa676 100644
--- a/cpp/tests/strings/replace_regex_tests.cpp
+++ b/cpp/tests/strings/replace_regex_tests.cpp
@@ -157,6 +157,18 @@ TEST_F(StringsReplaceRegexTest, WordBoundary)
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected);
 }
 
+TEST_F(StringsReplaceRegexTest, ZeroLengthMatch)
+{
+  cudf::test::strings_column_wrapper input({"DD", "zéz", "DsDs", ""});
+  auto repl     = cudf::string_scalar("_");
+  auto results  = cudf::strings::replace_re(cudf::strings_column_view(input), "D*", repl);
+  auto expected = cudf::test::strings_column_wrapper({"__", "_z_é_z_", "__s__s_", "_"});
+  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected);
+  results  = cudf::strings::replace_re(cudf::strings_column_view(input), "D?s?", repl);
+  expected = cudf::test::strings_column_wrapper({"___", "_z_é_z_", "___", "_"});
+  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected);
+}
+
 TEST_F(StringsReplaceRegexTest, Multiline)
 {
   auto const multiline = cudf::strings::regex_flags::MULTILINE;
diff --git a/python/cudf/cudf/tests/test_string.py b/python/cudf/cudf/tests/test_string.py
index d600fdeee27..d212c6b2072 100644
--- a/python/cudf/cudf/tests/test_string.py
+++ b/python/cudf/cudf/tests/test_string.py
@@ -923,6 +923,16 @@ def test_string_replace(
         assert_eq(expect, got)
 
 
+@pytest.mark.parametrize("pat", ["A*", "F?H?"])
+def test_string_replace_zero_length(ps_gs, pat):
+    ps, gs = ps_gs
+
+    expect = ps.str.replace(pat, "_", regex=True)
+    got = gs.str.replace(pat, "_", regex=True)
+
+    assert_eq(expect, got)
+
+
 def test_string_lower(ps_gs):
     ps, gs = ps_gs
 

From 8d861ce3dd254d77a7bfe1655c52a156263bd747 Mon Sep 17 00:00:00 2001
From: Mike Wilson <hyperbolic2346@users.noreply.github.com>
Date: Tue, 3 May 2022 16:57:06 -0400
Subject: [PATCH 14/23] Fixing deprecation warnings in test_orc.py (#10772)

This change fixes the deprecation warnings in `test_orc.py`. Fixed warnings:

- parsing timezone aware datetimes is deprecated; this will raise an error in the future
- DeprecationWarning: elementwise comparison failed; this will raise an error in the future.
- FutureWarning: elementwise comparison failed; returning scalar instead, but in the future will perform elementwise comparison
- DeprecationWarning: In future, it will be an error for 'np.bool_' scalars to be interpreted as an index
- FutureWarning: pandas.util.testing is deprecated. Use the functions in the public API at pandas.testing instead.

Authors:
  - Mike Wilson (https://github.com/hyperbolic2346)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/10772
---
 python/cudf/cudf/testing/_utils.py | 23 ++++++++++++++---------
 python/cudf/cudf/tests/test_orc.py |  8 +++++++-
 2 files changed, 21 insertions(+), 10 deletions(-)

diff --git a/python/cudf/cudf/testing/_utils.py b/python/cudf/cudf/testing/_utils.py
index 5232d1adb64..e9f836d9702 100644
--- a/python/cudf/cudf/testing/_utils.py
+++ b/python/cudf/cudf/testing/_utils.py
@@ -108,20 +108,25 @@ def assert_eq(left, right, **kwargs):
     if isinstance(right, cupy.ndarray):
         right = cupy.asnumpy(right)
 
-    if isinstance(left, pd.DataFrame):
-        tm.assert_frame_equal(left, right, **kwargs)
-    elif isinstance(left, pd.Series):
+    if isinstance(left, (pd.DataFrame, pd.Series, pd.Index)):
         # TODO: A warning is emitted from the function
-        # pandas.testing.assert_series_equal for some inputs:
+        # pandas.testing.assert_[series, frame, index]_equal for some inputs:
         # "DeprecationWarning: elementwise comparison failed; this will raise
         # an error in the future."
+        # or "FutureWarning: elementwise ..."
         # This warning comes from a call from pandas to numpy. It is ignored
         # here because it cannot be fixed within cudf.
         with warnings.catch_warnings():
-            warnings.simplefilter("ignore", DeprecationWarning)
-            tm.assert_series_equal(left, right, **kwargs)
-    elif isinstance(left, pd.Index):
-        tm.assert_index_equal(left, right, **kwargs)
+            warnings.simplefilter(
+                "ignore", (DeprecationWarning, FutureWarning)
+            )
+            if isinstance(left, pd.DataFrame):
+                tm.assert_frame_equal(left, right, **kwargs)
+            elif isinstance(left, pd.Series):
+                tm.assert_series_equal(left, right, **kwargs)
+            else:
+                tm.assert_index_equal(left, right, **kwargs)
+
     elif isinstance(left, np.ndarray) and isinstance(right, np.ndarray):
         if np.issubdtype(left.dtype, np.floating) and np.issubdtype(
             right.dtype, np.floating
@@ -306,7 +311,7 @@ def gen_rand(dtype, size, **kwargs):
             np.random.randint(low=low, high=high, size=size), unit=time_unit
         )
     elif dtype.kind in ("O", "U"):
-        return pd.util.testing.rands_array(10, size)
+        return pd._testing.rands_array(10, size)
     raise NotImplementedError(f"dtype.kind={dtype.kind}")
 
 
diff --git a/python/cudf/cudf/tests/test_orc.py b/python/cudf/cudf/tests/test_orc.py
index c3969bf6c14..c28358f5fa0 100644
--- a/python/cudf/cudf/tests/test_orc.py
+++ b/python/cudf/cudf/tests/test_orc.py
@@ -310,7 +310,7 @@ def test_orc_read_skiprows(tmpdir):
     writer = pyorc.Writer(buff, pyorc.Struct(a=pyorc.Boolean()))
     tuples = list(
         map(
-            lambda x: (None,) if x[0] is pd.NA else x,
+            lambda x: (None,) if x[0] is pd.NA else (bool(x[0]),),
             list(df.itertuples(index=False, name=None)),
         )
     )
@@ -640,6 +640,12 @@ def test_int_overflow(tmpdir):
 
 
 def normalized_equals(value1, value2):
+    # need naive time object for numpy to convert to datetime64
+    if isinstance(value1, datetime.datetime):
+        value1 = value1.replace(tzinfo=None)
+    if isinstance(value2, datetime.datetime):
+        value2 = value2.replace(tzinfo=None)
+
     if isinstance(value1, (datetime.datetime, np.datetime64)):
         value1 = np.datetime64(value1, "ms")
     if isinstance(value2, (datetime.datetime, np.datetime64)):

From d3a39b32a284050048e9e586694e805cd63201d1 Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Wed, 4 May 2022 13:40:10 -0500
Subject: [PATCH 15/23] Add struct utility functions. (#10776)

This PR adds some struct utility functions. This change is needed for the eventual support of structs in binary operations. See also: PR #9452.

Authors:
  - Bradley Dice (https://github.com/bdice)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)
  - Jake Hemstad (https://github.com/jrhemstad)

URL: https://github.com/rapidsai/cudf/pull/10776
---
 cpp/CMakeLists.txt                            |  2 +-
 cpp/include/cudf/detail/structs/utilities.hpp | 16 +++++++++-
 cpp/include/cudf/table/row_operators.cuh      |  2 +-
 cpp/include/cudf/utilities/traits.hpp         | 31 +++++++++++++++++++
 cpp/src/structs/utilities.cpp                 |  6 ++++
 5 files changed, 54 insertions(+), 3 deletions(-)

diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index cbe2811afe4..7870366b714 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -189,7 +189,6 @@ add_library(
   src/ast/expression_parser.cpp
   src/ast/expressions.cpp
   src/binaryop/binaryop.cpp
-  src/binaryop/compiled/binary_ops.cu
   src/binaryop/compiled/Add.cu
   src/binaryop/compiled/ATan2.cu
   src/binaryop/compiled/BitwiseAnd.cu
@@ -220,6 +219,7 @@ add_library(
   src/binaryop/compiled/ShiftRightUnsigned.cu
   src/binaryop/compiled/Sub.cu
   src/binaryop/compiled/TrueDiv.cu
+  src/binaryop/compiled/binary_ops.cu
   src/binaryop/compiled/util.cpp
   src/labeling/label_bins.cu
   src/bitmask/null_mask.cu
diff --git a/cpp/include/cudf/detail/structs/utilities.hpp b/cpp/include/cudf/detail/structs/utilities.hpp
index 751b7c00e8a..45d4c3b5ae4 100644
--- a/cpp/include/cudf/detail/structs/utilities.hpp
+++ b/cpp/include/cudf/detail/structs/utilities.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2021, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -245,6 +245,20 @@ std::tuple<cudf::table_view, std::vector<rmm::device_buffer>> superimpose_parent
   table_view const& table,
   rmm::cuda_stream_view stream        = rmm::cuda_stream_default,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+
+/**
+ * @brief Checks if a column or any of its children is a struct column with structs that are null.
+ *
+ * This function searches for structs that are null -- differentiating between structs that are null
+ * and structs containing null values. Null structs add a column to the result of the flatten column
+ * utility and necessitates column_nullability::FORCE when flattening the column for comparison
+ * operations.
+ *
+ * @param col Column to check for null structs
+ * @return A boolean indicating if the column is or contains a struct column that contains a null
+ * struct.
+ */
+bool contains_null_structs(column_view const& col);
 }  // namespace detail
 }  // namespace structs
 }  // namespace cudf
diff --git a/cpp/include/cudf/table/row_operators.cuh b/cpp/include/cudf/table/row_operators.cuh
index 4eca03a800c..4d503cd53b8 100644
--- a/cpp/include/cudf/table/row_operators.cuh
+++ b/cpp/include/cudf/table/row_operators.cuh
@@ -74,7 +74,7 @@ __device__ weak_ordering compare_elements(Element lhs, Element rhs)
  * @brief A specialization for floating-point `Element` type relational comparison
  * to derive the order of the elements with respect to `lhs`.
  *
- * This Specialization handles `nan` in the following order:
+ * This specialization handles `nan` in the following order:
  * `[-Inf, -ve, 0, -0, +ve, +Inf, NaN, NaN, null] (for null_order::AFTER)`
  * `[null, -Inf, -ve, 0, -0, +ve, +Inf, NaN, NaN] (for null_order::BEFORE)`
  *
diff --git a/cpp/include/cudf/utilities/traits.hpp b/cpp/include/cudf/utilities/traits.hpp
index ed24517f55b..d8fa7bff0b8 100644
--- a/cpp/include/cudf/utilities/traits.hpp
+++ b/cpp/include/cudf/utilities/traits.hpp
@@ -699,6 +699,37 @@ constexpr inline bool is_nested(data_type type)
   return cudf::type_dispatcher(type, is_nested_impl{});
 }
 
+/**
+ * @brief Indicates whether `T` is a struct type.
+ *
+ * @param T The type to verify
+ * @return A boolean indicating if T is a struct type
+ */
+template <typename T>
+constexpr inline bool is_struct()
+{
+  return std::is_same_v<T, cudf::struct_view>;
+}
+
+struct is_struct_impl {
+  template <typename T>
+  constexpr bool operator()()
+  {
+    return is_struct<T>();
+  }
+};
+
+/**
+ * @brief Indicates whether `type` is a struct type.
+ *
+ * @param type The `data_type` to verify
+ * @return A boolean indicating if `type` is a struct type
+ */
+constexpr inline bool is_struct(data_type type)
+{
+  return cudf::type_dispatcher(type, is_struct_impl{});
+}
+
 template <typename FromType>
 struct is_bit_castable_to_impl {
   template <typename ToType, std::enable_if_t<is_compound<ToType>()>* = nullptr>
diff --git a/cpp/src/structs/utilities.cpp b/cpp/src/structs/utilities.cpp
index a2c173cae5f..5baab0f09a2 100644
--- a/cpp/src/structs/utilities.cpp
+++ b/cpp/src/structs/utilities.cpp
@@ -441,6 +441,12 @@ std::tuple<cudf::table_view, std::vector<rmm::device_buffer>> superimpose_parent
   return {table_view{superimposed_columns}, std::move(superimposed_nullmasks)};
 }
 
+bool contains_null_structs(column_view const& col)
+{
+  return (is_struct(col) && col.has_nulls()) ||
+         std::any_of(col.child_begin(), col.child_end(), contains_null_structs);
+}
+
 }  // namespace detail
 }  // namespace structs
 }  // namespace cudf

From 0d11591f23b566e99f30cd06593e78097262a6fe Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Wed, 4 May 2022 15:16:51 -0500
Subject: [PATCH 16/23] Use column_views instead of column_device_views in
 binary operations. (#10780)

This PR changes the internal APIs used for binary operations to use `column_view` objects instead of `column_device_view` objects. This change is needed for the eventual support of structs in binary operations. See also: PR #9452.

Authors:
  - Bradley Dice (https://github.com/bdice)

Approvers:
  - Ryan Lee (https://github.com/rwlee)
  - Nghia Truong (https://github.com/ttnghia)
  - Jake Hemstad (https://github.com/jrhemstad)

URL: https://github.com/rapidsai/cudf/pull/10780
---
 cpp/src/binaryop/compiled/ATan2.cu            |  8 +--
 cpp/src/binaryop/compiled/Add.cu              |  8 +--
 cpp/src/binaryop/compiled/BitwiseAnd.cu       |  8 +--
 cpp/src/binaryop/compiled/BitwiseOr.cu        |  8 +--
 cpp/src/binaryop/compiled/BitwiseXor.cu       |  8 +--
 cpp/src/binaryop/compiled/Div.cu              |  8 +--
 cpp/src/binaryop/compiled/FloorDiv.cu         |  8 +--
 cpp/src/binaryop/compiled/Greater.cu          |  8 +--
 cpp/src/binaryop/compiled/GreaterEqual.cu     |  8 +--
 cpp/src/binaryop/compiled/Less.cu             |  8 +--
 cpp/src/binaryop/compiled/LessEqual.cu        |  8 +--
 cpp/src/binaryop/compiled/LogBase.cu          |  8 +--
 cpp/src/binaryop/compiled/LogicalAnd.cu       |  8 +--
 cpp/src/binaryop/compiled/LogicalOr.cu        |  8 +--
 cpp/src/binaryop/compiled/Mod.cu              |  8 +--
 cpp/src/binaryop/compiled/Mul.cu              |  8 +--
 cpp/src/binaryop/compiled/NullEquals.cu       |  8 +--
 cpp/src/binaryop/compiled/NullLogicalAnd.cu   |  6 +-
 cpp/src/binaryop/compiled/NullLogicalOr.cu    |  6 +-
 cpp/src/binaryop/compiled/NullMax.cu          |  8 +--
 cpp/src/binaryop/compiled/NullMin.cu          |  8 +--
 cpp/src/binaryop/compiled/PMod.cu             |  8 +--
 cpp/src/binaryop/compiled/Pow.cu              |  8 +--
 cpp/src/binaryop/compiled/PyMod.cu            |  8 +--
 cpp/src/binaryop/compiled/ShiftLeft.cu        |  8 +--
 cpp/src/binaryop/compiled/ShiftRight.cu       |  8 +--
 .../binaryop/compiled/ShiftRightUnsigned.cu   |  8 +--
 cpp/src/binaryop/compiled/Sub.cu              |  8 +--
 cpp/src/binaryop/compiled/TrueDiv.cu          |  8 +--
 cpp/src/binaryop/compiled/binary_ops.cu       | 57 +++++++------------
 cpp/src/binaryop/compiled/binary_ops.cuh      | 25 +++++---
 cpp/src/binaryop/compiled/binary_ops.hpp      | 40 ++++++-------
 cpp/src/binaryop/compiled/equality_ops.cu     | 34 +++++------
 33 files changed, 190 insertions(+), 194 deletions(-)

diff --git a/cpp/src/binaryop/compiled/ATan2.cu b/cpp/src/binaryop/compiled/ATan2.cu
index 8e5cbf57f55..f43a469a2c9 100644
--- a/cpp/src/binaryop/compiled/ATan2.cu
+++ b/cpp/src/binaryop/compiled/ATan2.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -17,9 +17,9 @@
 #include "binary_ops.cuh"
 
 namespace cudf::binops::compiled {
-template void apply_binary_op<ops::ATan2>(mutable_column_device_view&,
-                                          column_device_view const&,
-                                          column_device_view const&,
+template void apply_binary_op<ops::ATan2>(mutable_column_view&,
+                                          column_view const&,
+                                          column_view const&,
                                           bool is_lhs_scalar,
                                           bool is_rhs_scalar,
                                           rmm::cuda_stream_view);
diff --git a/cpp/src/binaryop/compiled/Add.cu b/cpp/src/binaryop/compiled/Add.cu
index 4cd2ced66f4..1dbfa5b4718 100644
--- a/cpp/src/binaryop/compiled/Add.cu
+++ b/cpp/src/binaryop/compiled/Add.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -17,9 +17,9 @@
 #include "binary_ops.cuh"
 
 namespace cudf::binops::compiled {
-template void apply_binary_op<ops::Add>(mutable_column_device_view&,
-                                        column_device_view const&,
-                                        column_device_view const&,
+template void apply_binary_op<ops::Add>(mutable_column_view&,
+                                        column_view const&,
+                                        column_view const&,
                                         bool is_lhs_scalar,
                                         bool is_rhs_scalar,
                                         rmm::cuda_stream_view);
diff --git a/cpp/src/binaryop/compiled/BitwiseAnd.cu b/cpp/src/binaryop/compiled/BitwiseAnd.cu
index 6abac2bd197..cfabb1402ce 100644
--- a/cpp/src/binaryop/compiled/BitwiseAnd.cu
+++ b/cpp/src/binaryop/compiled/BitwiseAnd.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -17,9 +17,9 @@
 #include "binary_ops.cuh"
 
 namespace cudf::binops::compiled {
-template void apply_binary_op<ops::BitwiseAnd>(mutable_column_device_view&,
-                                               column_device_view const&,
-                                               column_device_view const&,
+template void apply_binary_op<ops::BitwiseAnd>(mutable_column_view&,
+                                               column_view const&,
+                                               column_view const&,
                                                bool is_lhs_scalar,
                                                bool is_rhs_scalar,
                                                rmm::cuda_stream_view);
diff --git a/cpp/src/binaryop/compiled/BitwiseOr.cu b/cpp/src/binaryop/compiled/BitwiseOr.cu
index 6d523cbf1d1..01ef118665b 100644
--- a/cpp/src/binaryop/compiled/BitwiseOr.cu
+++ b/cpp/src/binaryop/compiled/BitwiseOr.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -17,9 +17,9 @@
 #include "binary_ops.cuh"
 
 namespace cudf::binops::compiled {
-template void apply_binary_op<ops::BitwiseOr>(mutable_column_device_view&,
-                                              column_device_view const&,
-                                              column_device_view const&,
+template void apply_binary_op<ops::BitwiseOr>(mutable_column_view&,
+                                              column_view const&,
+                                              column_view const&,
                                               bool is_lhs_scalar,
                                               bool is_rhs_scalar,
                                               rmm::cuda_stream_view);
diff --git a/cpp/src/binaryop/compiled/BitwiseXor.cu b/cpp/src/binaryop/compiled/BitwiseXor.cu
index 45175681574..44f74bab876 100644
--- a/cpp/src/binaryop/compiled/BitwiseXor.cu
+++ b/cpp/src/binaryop/compiled/BitwiseXor.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -17,9 +17,9 @@
 #include "binary_ops.cuh"
 
 namespace cudf::binops::compiled {
-template void apply_binary_op<ops::BitwiseXor>(mutable_column_device_view&,
-                                               column_device_view const&,
-                                               column_device_view const&,
+template void apply_binary_op<ops::BitwiseXor>(mutable_column_view&,
+                                               column_view const&,
+                                               column_view const&,
                                                bool is_lhs_scalar,
                                                bool is_rhs_scalar,
                                                rmm::cuda_stream_view);
diff --git a/cpp/src/binaryop/compiled/Div.cu b/cpp/src/binaryop/compiled/Div.cu
index 7cc895ecd06..f377778c427 100644
--- a/cpp/src/binaryop/compiled/Div.cu
+++ b/cpp/src/binaryop/compiled/Div.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -17,9 +17,9 @@
 #include "binary_ops.cuh"
 
 namespace cudf::binops::compiled {
-template void apply_binary_op<ops::Div>(mutable_column_device_view&,
-                                        column_device_view const&,
-                                        column_device_view const&,
+template void apply_binary_op<ops::Div>(mutable_column_view&,
+                                        column_view const&,
+                                        column_view const&,
                                         bool is_lhs_scalar,
                                         bool is_rhs_scalar,
                                         rmm::cuda_stream_view);
diff --git a/cpp/src/binaryop/compiled/FloorDiv.cu b/cpp/src/binaryop/compiled/FloorDiv.cu
index 99ea2706b86..f9cd323caec 100644
--- a/cpp/src/binaryop/compiled/FloorDiv.cu
+++ b/cpp/src/binaryop/compiled/FloorDiv.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -17,9 +17,9 @@
 #include "binary_ops.cuh"
 
 namespace cudf::binops::compiled {
-template void apply_binary_op<ops::FloorDiv>(mutable_column_device_view&,
-                                             column_device_view const&,
-                                             column_device_view const&,
+template void apply_binary_op<ops::FloorDiv>(mutable_column_view&,
+                                             column_view const&,
+                                             column_view const&,
                                              bool is_lhs_scalar,
                                              bool is_rhs_scalar,
                                              rmm::cuda_stream_view);
diff --git a/cpp/src/binaryop/compiled/Greater.cu b/cpp/src/binaryop/compiled/Greater.cu
index 679e029b5fc..db06cc409da 100644
--- a/cpp/src/binaryop/compiled/Greater.cu
+++ b/cpp/src/binaryop/compiled/Greater.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -17,9 +17,9 @@
 #include "binary_ops.cuh"
 
 namespace cudf::binops::compiled {
-template void apply_binary_op<ops::Greater>(mutable_column_device_view&,
-                                            column_device_view const&,
-                                            column_device_view const&,
+template void apply_binary_op<ops::Greater>(mutable_column_view&,
+                                            column_view const&,
+                                            column_view const&,
                                             bool is_lhs_scalar,
                                             bool is_rhs_scalar,
                                             rmm::cuda_stream_view);
diff --git a/cpp/src/binaryop/compiled/GreaterEqual.cu b/cpp/src/binaryop/compiled/GreaterEqual.cu
index 23b0c6aaa0d..c239e1e1345 100644
--- a/cpp/src/binaryop/compiled/GreaterEqual.cu
+++ b/cpp/src/binaryop/compiled/GreaterEqual.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -17,9 +17,9 @@
 #include "binary_ops.cuh"
 
 namespace cudf::binops::compiled {
-template void apply_binary_op<ops::GreaterEqual>(mutable_column_device_view&,
-                                                 column_device_view const&,
-                                                 column_device_view const&,
+template void apply_binary_op<ops::GreaterEqual>(mutable_column_view&,
+                                                 column_view const&,
+                                                 column_view const&,
                                                  bool is_lhs_scalar,
                                                  bool is_rhs_scalar,
                                                  rmm::cuda_stream_view);
diff --git a/cpp/src/binaryop/compiled/Less.cu b/cpp/src/binaryop/compiled/Less.cu
index 7ab5dfe3478..e8663715c87 100644
--- a/cpp/src/binaryop/compiled/Less.cu
+++ b/cpp/src/binaryop/compiled/Less.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -17,9 +17,9 @@
 #include "binary_ops.cuh"
 
 namespace cudf::binops::compiled {
-template void apply_binary_op<ops::Less>(mutable_column_device_view&,
-                                         column_device_view const&,
-                                         column_device_view const&,
+template void apply_binary_op<ops::Less>(mutable_column_view&,
+                                         column_view const&,
+                                         column_view const&,
                                          bool is_lhs_scalar,
                                          bool is_rhs_scalar,
                                          rmm::cuda_stream_view);
diff --git a/cpp/src/binaryop/compiled/LessEqual.cu b/cpp/src/binaryop/compiled/LessEqual.cu
index 983c50c9575..d2f88fab81b 100644
--- a/cpp/src/binaryop/compiled/LessEqual.cu
+++ b/cpp/src/binaryop/compiled/LessEqual.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -17,9 +17,9 @@
 #include "binary_ops.cuh"
 
 namespace cudf::binops::compiled {
-template void apply_binary_op<ops::LessEqual>(mutable_column_device_view&,
-                                              column_device_view const&,
-                                              column_device_view const&,
+template void apply_binary_op<ops::LessEqual>(mutable_column_view&,
+                                              column_view const&,
+                                              column_view const&,
                                               bool is_lhs_scalar,
                                               bool is_rhs_scalar,
                                               rmm::cuda_stream_view);
diff --git a/cpp/src/binaryop/compiled/LogBase.cu b/cpp/src/binaryop/compiled/LogBase.cu
index bdc709b86bf..8a2162c4ca4 100644
--- a/cpp/src/binaryop/compiled/LogBase.cu
+++ b/cpp/src/binaryop/compiled/LogBase.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -17,9 +17,9 @@
 #include "binary_ops.cuh"
 
 namespace cudf::binops::compiled {
-template void apply_binary_op<ops::LogBase>(mutable_column_device_view&,
-                                            column_device_view const&,
-                                            column_device_view const&,
+template void apply_binary_op<ops::LogBase>(mutable_column_view&,
+                                            column_view const&,
+                                            column_view const&,
                                             bool is_lhs_scalar,
                                             bool is_rhs_scalar,
                                             rmm::cuda_stream_view);
diff --git a/cpp/src/binaryop/compiled/LogicalAnd.cu b/cpp/src/binaryop/compiled/LogicalAnd.cu
index 08112fadfff..64e5c1a31c0 100644
--- a/cpp/src/binaryop/compiled/LogicalAnd.cu
+++ b/cpp/src/binaryop/compiled/LogicalAnd.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -17,9 +17,9 @@
 #include "binary_ops.cuh"
 
 namespace cudf::binops::compiled {
-template void apply_binary_op<ops::LogicalAnd>(mutable_column_device_view&,
-                                               column_device_view const&,
-                                               column_device_view const&,
+template void apply_binary_op<ops::LogicalAnd>(mutable_column_view&,
+                                               column_view const&,
+                                               column_view const&,
                                                bool is_lhs_scalar,
                                                bool is_rhs_scalar,
                                                rmm::cuda_stream_view);
diff --git a/cpp/src/binaryop/compiled/LogicalOr.cu b/cpp/src/binaryop/compiled/LogicalOr.cu
index bc400afd4cd..a4b64cc6afc 100644
--- a/cpp/src/binaryop/compiled/LogicalOr.cu
+++ b/cpp/src/binaryop/compiled/LogicalOr.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -17,9 +17,9 @@
 #include "binary_ops.cuh"
 
 namespace cudf::binops::compiled {
-template void apply_binary_op<ops::LogicalOr>(mutable_column_device_view&,
-                                              column_device_view const&,
-                                              column_device_view const&,
+template void apply_binary_op<ops::LogicalOr>(mutable_column_view&,
+                                              column_view const&,
+                                              column_view const&,
                                               bool is_lhs_scalar,
                                               bool is_rhs_scalar,
                                               rmm::cuda_stream_view);
diff --git a/cpp/src/binaryop/compiled/Mod.cu b/cpp/src/binaryop/compiled/Mod.cu
index 0b82c09c8a6..fcdd01b7be8 100644
--- a/cpp/src/binaryop/compiled/Mod.cu
+++ b/cpp/src/binaryop/compiled/Mod.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -17,9 +17,9 @@
 #include "binary_ops.cuh"
 
 namespace cudf::binops::compiled {
-template void apply_binary_op<ops::Mod>(mutable_column_device_view&,
-                                        column_device_view const&,
-                                        column_device_view const&,
+template void apply_binary_op<ops::Mod>(mutable_column_view&,
+                                        column_view const&,
+                                        column_view const&,
                                         bool is_lhs_scalar,
                                         bool is_rhs_scalar,
                                         rmm::cuda_stream_view);
diff --git a/cpp/src/binaryop/compiled/Mul.cu b/cpp/src/binaryop/compiled/Mul.cu
index 15394245259..de6506d43f1 100644
--- a/cpp/src/binaryop/compiled/Mul.cu
+++ b/cpp/src/binaryop/compiled/Mul.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -17,9 +17,9 @@
 #include "binary_ops.cuh"
 
 namespace cudf::binops::compiled {
-template void apply_binary_op<ops::Mul>(mutable_column_device_view&,
-                                        column_device_view const&,
-                                        column_device_view const&,
+template void apply_binary_op<ops::Mul>(mutable_column_view&,
+                                        column_view const&,
+                                        column_view const&,
                                         bool is_lhs_scalar,
                                         bool is_rhs_scalar,
                                         rmm::cuda_stream_view);
diff --git a/cpp/src/binaryop/compiled/NullEquals.cu b/cpp/src/binaryop/compiled/NullEquals.cu
index 3fc76e804f7..f4780c13bef 100644
--- a/cpp/src/binaryop/compiled/NullEquals.cu
+++ b/cpp/src/binaryop/compiled/NullEquals.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -17,9 +17,9 @@
 #include "binary_ops.cuh"
 
 namespace cudf::binops::compiled {
-template void apply_binary_op<ops::NullEquals>(mutable_column_device_view&,
-                                               column_device_view const&,
-                                               column_device_view const&,
+template void apply_binary_op<ops::NullEquals>(mutable_column_view&,
+                                               column_view const&,
+                                               column_view const&,
                                                bool is_lhs_scalar,
                                                bool is_rhs_scalar,
                                                rmm::cuda_stream_view);
diff --git a/cpp/src/binaryop/compiled/NullLogicalAnd.cu b/cpp/src/binaryop/compiled/NullLogicalAnd.cu
index 48ae125bc93..55e71a52dae 100644
--- a/cpp/src/binaryop/compiled/NullLogicalAnd.cu
+++ b/cpp/src/binaryop/compiled/NullLogicalAnd.cu
@@ -17,9 +17,9 @@
 #include "binary_ops.cuh"
 
 namespace cudf::binops::compiled {
-template void apply_binary_op<ops::NullLogicalAnd>(mutable_column_device_view&,
-                                                   column_device_view const&,
-                                                   column_device_view const&,
+template void apply_binary_op<ops::NullLogicalAnd>(mutable_column_view&,
+                                                   column_view const&,
+                                                   column_view const&,
                                                    bool is_lhs_scalar,
                                                    bool is_rhs_scalar,
                                                    rmm::cuda_stream_view);
diff --git a/cpp/src/binaryop/compiled/NullLogicalOr.cu b/cpp/src/binaryop/compiled/NullLogicalOr.cu
index e0ea95ac3ee..ee3b27c0934 100644
--- a/cpp/src/binaryop/compiled/NullLogicalOr.cu
+++ b/cpp/src/binaryop/compiled/NullLogicalOr.cu
@@ -17,9 +17,9 @@
 #include "binary_ops.cuh"
 
 namespace cudf::binops::compiled {
-template void apply_binary_op<ops::NullLogicalOr>(mutable_column_device_view&,
-                                                  column_device_view const&,
-                                                  column_device_view const&,
+template void apply_binary_op<ops::NullLogicalOr>(mutable_column_view&,
+                                                  column_view const&,
+                                                  column_view const&,
                                                   bool is_lhs_scalar,
                                                   bool is_rhs_scalar,
                                                   rmm::cuda_stream_view);
diff --git a/cpp/src/binaryop/compiled/NullMax.cu b/cpp/src/binaryop/compiled/NullMax.cu
index 78a44041cba..6fae253d41f 100644
--- a/cpp/src/binaryop/compiled/NullMax.cu
+++ b/cpp/src/binaryop/compiled/NullMax.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -17,9 +17,9 @@
 #include "binary_ops.cuh"
 
 namespace cudf::binops::compiled {
-template void apply_binary_op<ops::NullMax>(mutable_column_device_view&,
-                                            column_device_view const&,
-                                            column_device_view const&,
+template void apply_binary_op<ops::NullMax>(mutable_column_view&,
+                                            column_view const&,
+                                            column_view const&,
                                             bool is_lhs_scalar,
                                             bool is_rhs_scalar,
                                             rmm::cuda_stream_view);
diff --git a/cpp/src/binaryop/compiled/NullMin.cu b/cpp/src/binaryop/compiled/NullMin.cu
index 629ab600fd7..cb7fdb4f76a 100644
--- a/cpp/src/binaryop/compiled/NullMin.cu
+++ b/cpp/src/binaryop/compiled/NullMin.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -17,9 +17,9 @@
 #include "binary_ops.cuh"
 
 namespace cudf::binops::compiled {
-template void apply_binary_op<ops::NullMin>(mutable_column_device_view&,
-                                            column_device_view const&,
-                                            column_device_view const&,
+template void apply_binary_op<ops::NullMin>(mutable_column_view&,
+                                            column_view const&,
+                                            column_view const&,
                                             bool is_lhs_scalar,
                                             bool is_rhs_scalar,
                                             rmm::cuda_stream_view);
diff --git a/cpp/src/binaryop/compiled/PMod.cu b/cpp/src/binaryop/compiled/PMod.cu
index 36902c0ed10..63b1f1f8269 100644
--- a/cpp/src/binaryop/compiled/PMod.cu
+++ b/cpp/src/binaryop/compiled/PMod.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -17,9 +17,9 @@
 #include "binary_ops.cuh"
 
 namespace cudf::binops::compiled {
-template void apply_binary_op<ops::PMod>(mutable_column_device_view&,
-                                         column_device_view const&,
-                                         column_device_view const&,
+template void apply_binary_op<ops::PMod>(mutable_column_view&,
+                                         column_view const&,
+                                         column_view const&,
                                          bool is_lhs_scalar,
                                          bool is_rhs_scalar,
                                          rmm::cuda_stream_view);
diff --git a/cpp/src/binaryop/compiled/Pow.cu b/cpp/src/binaryop/compiled/Pow.cu
index c6f897ee18d..435e1ac044a 100644
--- a/cpp/src/binaryop/compiled/Pow.cu
+++ b/cpp/src/binaryop/compiled/Pow.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -17,9 +17,9 @@
 #include "binary_ops.cuh"
 
 namespace cudf::binops::compiled {
-template void apply_binary_op<ops::Pow>(mutable_column_device_view&,
-                                        column_device_view const&,
-                                        column_device_view const&,
+template void apply_binary_op<ops::Pow>(mutable_column_view&,
+                                        column_view const&,
+                                        column_view const&,
                                         bool is_lhs_scalar,
                                         bool is_rhs_scalar,
                                         rmm::cuda_stream_view);
diff --git a/cpp/src/binaryop/compiled/PyMod.cu b/cpp/src/binaryop/compiled/PyMod.cu
index b05dcd8e7bc..1e213598681 100644
--- a/cpp/src/binaryop/compiled/PyMod.cu
+++ b/cpp/src/binaryop/compiled/PyMod.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -17,9 +17,9 @@
 #include "binary_ops.cuh"
 
 namespace cudf::binops::compiled {
-template void apply_binary_op<ops::PyMod>(mutable_column_device_view&,
-                                          column_device_view const&,
-                                          column_device_view const&,
+template void apply_binary_op<ops::PyMod>(mutable_column_view&,
+                                          column_view const&,
+                                          column_view const&,
                                           bool is_lhs_scalar,
                                           bool is_rhs_scalar,
                                           rmm::cuda_stream_view);
diff --git a/cpp/src/binaryop/compiled/ShiftLeft.cu b/cpp/src/binaryop/compiled/ShiftLeft.cu
index 6cc950b2d50..797821a9057 100644
--- a/cpp/src/binaryop/compiled/ShiftLeft.cu
+++ b/cpp/src/binaryop/compiled/ShiftLeft.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -17,9 +17,9 @@
 #include "binary_ops.cuh"
 
 namespace cudf::binops::compiled {
-template void apply_binary_op<ops::ShiftLeft>(mutable_column_device_view&,
-                                              column_device_view const&,
-                                              column_device_view const&,
+template void apply_binary_op<ops::ShiftLeft>(mutable_column_view&,
+                                              column_view const&,
+                                              column_view const&,
                                               bool is_lhs_scalar,
                                               bool is_rhs_scalar,
                                               rmm::cuda_stream_view);
diff --git a/cpp/src/binaryop/compiled/ShiftRight.cu b/cpp/src/binaryop/compiled/ShiftRight.cu
index 1ddd7100a73..8a2566ff775 100644
--- a/cpp/src/binaryop/compiled/ShiftRight.cu
+++ b/cpp/src/binaryop/compiled/ShiftRight.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -17,9 +17,9 @@
 #include "binary_ops.cuh"
 
 namespace cudf::binops::compiled {
-template void apply_binary_op<ops::ShiftRight>(mutable_column_device_view&,
-                                               column_device_view const&,
-                                               column_device_view const&,
+template void apply_binary_op<ops::ShiftRight>(mutable_column_view&,
+                                               column_view const&,
+                                               column_view const&,
                                                bool is_lhs_scalar,
                                                bool is_rhs_scalar,
                                                rmm::cuda_stream_view);
diff --git a/cpp/src/binaryop/compiled/ShiftRightUnsigned.cu b/cpp/src/binaryop/compiled/ShiftRightUnsigned.cu
index a87b4b9f9ac..827029bc75c 100644
--- a/cpp/src/binaryop/compiled/ShiftRightUnsigned.cu
+++ b/cpp/src/binaryop/compiled/ShiftRightUnsigned.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -17,9 +17,9 @@
 #include "binary_ops.cuh"
 
 namespace cudf::binops::compiled {
-template void apply_binary_op<ops::ShiftRightUnsigned>(mutable_column_device_view&,
-                                                       column_device_view const&,
-                                                       column_device_view const&,
+template void apply_binary_op<ops::ShiftRightUnsigned>(mutable_column_view&,
+                                                       column_view const&,
+                                                       column_view const&,
                                                        bool is_lhs_scalar,
                                                        bool is_rhs_scalar,
                                                        rmm::cuda_stream_view);
diff --git a/cpp/src/binaryop/compiled/Sub.cu b/cpp/src/binaryop/compiled/Sub.cu
index e0cf47c1310..3022294f86f 100644
--- a/cpp/src/binaryop/compiled/Sub.cu
+++ b/cpp/src/binaryop/compiled/Sub.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -17,9 +17,9 @@
 #include "binary_ops.cuh"
 
 namespace cudf::binops::compiled {
-template void apply_binary_op<ops::Sub>(mutable_column_device_view&,
-                                        column_device_view const&,
-                                        column_device_view const&,
+template void apply_binary_op<ops::Sub>(mutable_column_view&,
+                                        column_view const&,
+                                        column_view const&,
                                         bool is_lhs_scalar,
                                         bool is_rhs_scalar,
                                         rmm::cuda_stream_view);
diff --git a/cpp/src/binaryop/compiled/TrueDiv.cu b/cpp/src/binaryop/compiled/TrueDiv.cu
index d8f1d956340..4d0fc2d456b 100644
--- a/cpp/src/binaryop/compiled/TrueDiv.cu
+++ b/cpp/src/binaryop/compiled/TrueDiv.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -17,9 +17,9 @@
 #include "binary_ops.cuh"
 
 namespace cudf::binops::compiled {
-template void apply_binary_op<ops::TrueDiv>(mutable_column_device_view&,
-                                            column_device_view const&,
-                                            column_device_view const&,
+template void apply_binary_op<ops::TrueDiv>(mutable_column_view&,
+                                            column_view const&,
+                                            column_view const&,
                                             bool is_lhs_scalar,
                                             bool is_rhs_scalar,
                                             rmm::cuda_stream_view);
diff --git a/cpp/src/binaryop/compiled/binary_ops.cu b/cpp/src/binaryop/compiled/binary_ops.cu
index c01359b80d0..d260aa6d6a0 100644
--- a/cpp/src/binaryop/compiled/binary_ops.cu
+++ b/cpp/src/binaryop/compiled/binary_ops.cu
@@ -37,23 +37,20 @@ namespace compiled {
 
 namespace {
 /**
- * @brief Converts scalar to column_device_view with single element.
+ * @brief Converts scalar to column_view with single element.
  *
- * @return pair with column_device_view and column containing any auxilary data to create
- * column_view from scalar
+ * @return pair with column_view and column containing any auxilary data to create column_view from
+ * scalar
  */
-struct scalar_as_column_device_view {
-  using return_type = typename std::pair<decltype(column_device_view::create(column_view{})),
-                                         std::unique_ptr<column>>;
+struct scalar_as_column_view {
+  using return_type = typename std::pair<column_view, std::unique_ptr<column>>;
   template <typename T, std::enable_if_t<(is_fixed_width<T>())>* = nullptr>
-  return_type operator()(scalar const& s,
-                         rmm::cuda_stream_view stream,
-                         rmm::mr::device_memory_resource*)
+  return_type operator()(scalar const& s, rmm::cuda_stream_view, rmm::mr::device_memory_resource*)
   {
     auto& h_scalar_type_view = static_cast<cudf::scalar_type_t<T>&>(const_cast<scalar&>(s));
     auto col_v =
       column_view(s.type(), 1, h_scalar_type_view.data(), (bitmask_type const*)s.validity_data());
-    return std::pair{column_device_view::create(col_v, stream), std::unique_ptr<column>(nullptr)};
+    return std::pair{col_v, std::unique_ptr<column>(nullptr)};
   }
   template <typename T, std::enable_if_t<(!is_fixed_width<T>())>* = nullptr>
   return_type operator()(scalar const&, rmm::cuda_stream_view, rmm::mr::device_memory_resource*)
@@ -63,10 +60,8 @@ struct scalar_as_column_device_view {
 };
 // specialization for cudf::string_view
 template <>
-scalar_as_column_device_view::return_type
-scalar_as_column_device_view::operator()<cudf::string_view>(scalar const& s,
-                                                            rmm::cuda_stream_view stream,
-                                                            rmm::mr::device_memory_resource* mr)
+scalar_as_column_view::return_type scalar_as_column_view::operator()<cudf::string_view>(
+  scalar const& s, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr)
 {
   using T                  = cudf::string_view;
   auto& h_scalar_type_view = static_cast<cudf::scalar_type_t<T>&>(const_cast<scalar&>(s));
@@ -87,24 +82,24 @@ scalar_as_column_device_view::operator()<cudf::string_view>(scalar const& s,
                            cudf::UNKNOWN_NULL_COUNT,
                            0,
                            {offsets_column->view(), chars_column_v});
-  return std::pair{column_device_view::create(col_v, stream), std::move(offsets_column)};
+  return std::pair{col_v, std::move(offsets_column)};
 }
 
 /**
- * @brief Converts scalar to column_device_view with single element.
+ * @brief Converts scalar to column_view with single element.
  *
  * @param scal    scalar to convert
  * @param stream  CUDA stream used for device memory operations and kernel launches.
  * @param mr      Device memory resource used to allocate the returned column's device memory
- * @return        pair with column_device_view and column containing any auxilary data to create
+ * @return        pair with column_view and column containing any auxilary data to create
  * column_view from scalar
  */
-auto scalar_to_column_device_view(
+auto scalar_to_column_view(
   scalar const& scal,
   rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
 {
-  return type_dispatcher(scal.type(), scalar_as_column_device_view{}, scal, stream, mr);
+  return type_dispatcher(scal.type(), scalar_as_column_view{}, scal, stream, mr);
 }
 
 // This functor does the actual comparison between string column value and a scalar string
@@ -300,9 +295,9 @@ std::unique_ptr<column> string_null_min_max(column_view const& lhs,
     *lhs_device_view, *rhs_device_view, op, output_type, lhs.size(), stream, mr);
 }
 
-void operator_dispatcher(mutable_column_device_view& out,
-                         column_device_view const& lhs,
-                         column_device_view const& rhs,
+void operator_dispatcher(mutable_column_view& out,
+                         column_view const& lhs,
+                         column_view const& rhs,
                          bool is_lhs_scalar,
                          bool is_rhs_scalar,
                          binary_operator op,
@@ -358,10 +353,7 @@ void binary_operation(mutable_column_view& out,
                       binary_operator op,
                       rmm::cuda_stream_view stream)
 {
-  auto lhsd = column_device_view::create(lhs, stream);
-  auto rhsd = column_device_view::create(rhs, stream);
-  auto outd = mutable_column_device_view::create(out, stream);
-  operator_dispatcher(*outd, *lhsd, *rhsd, false, false, op, stream);
+  operator_dispatcher(out, lhs, rhs, false, false, op, stream);
 }
 // scalar_vector
 void binary_operation(mutable_column_view& out,
@@ -370,10 +362,8 @@ void binary_operation(mutable_column_view& out,
                       binary_operator op,
                       rmm::cuda_stream_view stream)
 {
-  auto [lhsd, aux] = scalar_to_column_device_view(lhs, stream);
-  auto rhsd        = column_device_view::create(rhs, stream);
-  auto outd        = mutable_column_device_view::create(out, stream);
-  operator_dispatcher(*outd, *lhsd, *rhsd, true, false, op, stream);
+  auto [lhsv, aux] = scalar_to_column_view(lhs, stream);
+  operator_dispatcher(out, lhsv, rhs, true, false, op, stream);
 }
 // vector_scalar
 void binary_operation(mutable_column_view& out,
@@ -382,12 +372,9 @@ void binary_operation(mutable_column_view& out,
                       binary_operator op,
                       rmm::cuda_stream_view stream)
 {
-  auto lhsd        = column_device_view::create(lhs, stream);
-  auto [rhsd, aux] = scalar_to_column_device_view(rhs, stream);
-  auto outd        = mutable_column_device_view::create(out, stream);
-  operator_dispatcher(*outd, *lhsd, *rhsd, false, true, op, stream);
+  auto [rhsv, aux] = scalar_to_column_view(rhs, stream);
+  operator_dispatcher(out, lhs, rhsv, false, true, op, stream);
 }
-
 }  // namespace compiled
 }  // namespace binops
 }  // namespace cudf
diff --git a/cpp/src/binaryop/compiled/binary_ops.cuh b/cpp/src/binaryop/compiled/binary_ops.cuh
index ec41fbb8883..d88d2be2499 100644
--- a/cpp/src/binaryop/compiled/binary_ops.cuh
+++ b/cpp/src/binaryop/compiled/binary_ops.cuh
@@ -20,6 +20,7 @@
 #include "operation.cuh"
 
 #include <cudf/column/column_device_view.cuh>
+#include <cudf/column/column_view.hpp>
 #include <cudf/detail/utilities/integer_utils.hpp>
 
 #include <rmm/cuda_stream_view.hpp>
@@ -271,30 +272,36 @@ void for_each(rmm::cuda_stream_view stream, cudf::size_type size, Functor f)
   const int grid_size = util::div_rounding_up_safe(size, 2 * block_size);
   for_each_kernel<<<grid_size, block_size, 0, stream.value()>>>(size, std::forward<Functor&&>(f));
 }
-
+namespace detail {
+template <class T, class... Ts>
+inline constexpr bool is_any_v = std::disjunction<std::is_same<T, Ts>...>::value;
+}
 template <class BinaryOperator>
-void apply_binary_op(mutable_column_device_view& outd,
-                     column_device_view const& lhsd,
-                     column_device_view const& rhsd,
+void apply_binary_op(mutable_column_view& out,
+                     column_view const& lhs,
+                     column_view const& rhs,
                      bool is_lhs_scalar,
                      bool is_rhs_scalar,
                      rmm::cuda_stream_view stream)
 {
-  auto common_dtype = get_common_type(outd.type(), lhsd.type(), rhsd.type());
+  auto common_dtype = get_common_type(out.type(), lhs.type(), rhs.type());
 
+  auto lhsd = column_device_view::create(lhs, stream);
+  auto rhsd = column_device_view::create(rhs, stream);
+  auto outd = mutable_column_device_view::create(out, stream);
   // Create binop functor instance
   if (common_dtype) {
     // Execute it on every element
     for_each(stream,
-             outd.size(),
+             out.size(),
              binary_op_device_dispatcher<BinaryOperator>{
-               *common_dtype, outd, lhsd, rhsd, is_lhs_scalar, is_rhs_scalar});
+               *common_dtype, *outd, *lhsd, *rhsd, is_lhs_scalar, is_rhs_scalar});
   } else {
     // Execute it on every element
     for_each(stream,
-             outd.size(),
+             out.size(),
              binary_op_double_device_dispatcher<BinaryOperator>{
-               outd, lhsd, rhsd, is_lhs_scalar, is_rhs_scalar});
+               *outd, *lhsd, *rhsd, is_lhs_scalar, is_rhs_scalar});
   }
 }
 
diff --git a/cpp/src/binaryop/compiled/binary_ops.hpp b/cpp/src/binaryop/compiled/binary_ops.hpp
index 26a0f26b59c..d1a40e15326 100644
--- a/cpp/src/binaryop/compiled/binary_ops.hpp
+++ b/cpp/src/binaryop/compiled/binary_ops.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2021, NVIDIA CORPORATION.
+ * Copyright (c) 2018-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -175,45 +175,45 @@ bool is_supported_operation(data_type out, data_type lhs, data_type rhs, binary_
 // Defined in individual .cu files.
 /**
  * @brief Deploys single type or double type dispatcher that runs binary operation on each element
- * of @p lhsd and @p rhsd columns.
+ * of @p lhs and @p rhs columns.
  *
  * This template is instantiated for each binary operator.
  *
  * @tparam BinaryOperator Binary operator functor
- * @param outd mutable device view of output column
- * @param lhsd device view of left operand column
- * @param rhsd device view of right operand column
- * @param is_lhs_scalar true if @p lhsd is a single element column representing a scalar
- * @param is_rhs_scalar true if @p rhsd is a single element column representing a scalar
+ * @param out mutable view of output column
+ * @param lhs view of left operand column
+ * @param rhs view of right operand column
+ * @param is_lhs_scalar true if @p lhs is a single element column representing a scalar
+ * @param is_rhs_scalar true if @p rhs is a single element column representing a scalar
  * @param stream CUDA stream used for device memory operations
  */
 template <class BinaryOperator>
-void apply_binary_op(mutable_column_device_view&,
-                     column_device_view const&,
-                     column_device_view const&,
+void apply_binary_op(mutable_column_view& out,
+                     column_view const& lhs,
+                     column_view const& rhs,
                      bool is_lhs_scalar,
                      bool is_rhs_scalar,
                      rmm::cuda_stream_view stream);
 /**
  * @brief Deploys single type or double type dispatcher that runs equality operation on each element
- * of @p lhsd and @p rhsd columns.
+ * of @p lhs and @p rhs columns.
  *
  * Comparison operators are EQUAL, NOT_EQUAL, NULL_EQUALS.
- * @p outd type is boolean.
+ * @p out type is boolean.
  *
  * This template is instantiated for each binary operator.
  *
- * @param outd mutable device view of output column
- * @param lhsd device view of left operand column
- * @param rhsd device view of right operand column
- * @param is_lhs_scalar true if @p lhsd is a single element column representing a scalar
- * @param is_rhs_scalar true if @p rhsd is a single element column representing a scalar
+ * @param out mutable view of output column
+ * @param lhs view of left operand column
+ * @param rhs view of right operand column
+ * @param is_lhs_scalar true if @p lhs is a single element column representing a scalar
+ * @param is_rhs_scalar true if @p rhs is a single element column representing a scalar
  * @param op comparison binary operator
  * @param stream CUDA stream used for device memory operations
  */
-void dispatch_equality_op(mutable_column_device_view& outd,
-                          column_device_view const& lhsd,
-                          column_device_view const& rhsd,
+void dispatch_equality_op(mutable_column_view& out,
+                          column_view const& lhs,
+                          column_view const& rhs,
                           bool is_lhs_scalar,
                           bool is_rhs_scalar,
                           binary_operator op,
diff --git a/cpp/src/binaryop/compiled/equality_ops.cu b/cpp/src/binaryop/compiled/equality_ops.cu
index 03c3e373476..61f02252a26 100644
--- a/cpp/src/binaryop/compiled/equality_ops.cu
+++ b/cpp/src/binaryop/compiled/equality_ops.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -17,41 +17,43 @@
 #include "binary_ops.cuh"
 
 namespace cudf::binops::compiled {
-void dispatch_equality_op(mutable_column_device_view& outd,
-                          column_device_view const& lhsd,
-                          column_device_view const& rhsd,
+void dispatch_equality_op(mutable_column_view& out,
+                          column_view const& lhs,
+                          column_view const& rhs,
                           bool is_lhs_scalar,
                           bool is_rhs_scalar,
                           binary_operator op,
                           rmm::cuda_stream_view stream)
 {
-  auto common_dtype = get_common_type(outd.type(), lhsd.type(), rhsd.type());
-
-  // Execute it on every element
-
+  CUDF_EXPECTS(op == binary_operator::EQUAL || op == binary_operator::NOT_EQUAL,
+               "Unsupported operator for these types");
+  auto common_dtype = get_common_type(out.type(), lhs.type(), rhs.type());
+  auto outd         = mutable_column_device_view::create(out, stream);
+  auto lhsd         = column_device_view::create(lhs, stream);
+  auto rhsd         = column_device_view::create(rhs, stream);
   if (common_dtype) {
     if (op == binary_operator::EQUAL) {
       for_each(stream,
-               outd.size(),
+               out.size(),
                binary_op_device_dispatcher<ops::Equal>{
-                 *common_dtype, outd, lhsd, rhsd, is_lhs_scalar, is_rhs_scalar});
+                 *common_dtype, *outd, *lhsd, *rhsd, is_lhs_scalar, is_rhs_scalar});
     } else if (op == binary_operator::NOT_EQUAL) {
       for_each(stream,
-               outd.size(),
+               out.size(),
                binary_op_device_dispatcher<ops::NotEqual>{
-                 *common_dtype, outd, lhsd, rhsd, is_lhs_scalar, is_rhs_scalar});
+                 *common_dtype, *outd, *lhsd, *rhsd, is_lhs_scalar, is_rhs_scalar});
     }
   } else {
     if (op == binary_operator::EQUAL) {
       for_each(stream,
-               outd.size(),
+               out.size(),
                binary_op_double_device_dispatcher<ops::Equal>{
-                 outd, lhsd, rhsd, is_lhs_scalar, is_rhs_scalar});
+                 *outd, *lhsd, *rhsd, is_lhs_scalar, is_rhs_scalar});
     } else if (op == binary_operator::NOT_EQUAL) {
       for_each(stream,
-               outd.size(),
+               out.size(),
                binary_op_double_device_dispatcher<ops::NotEqual>{
-                 outd, lhsd, rhsd, is_lhs_scalar, is_rhs_scalar});
+                 *outd, *lhsd, *rhsd, is_lhs_scalar, is_rhs_scalar});
     }
   }
 }

From dd68db3b644c4448f9c87a43dcb303e9fb055ad4 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <3190405+shwina@users.noreply.github.com>
Date: Wed, 4 May 2022 17:39:26 -0400
Subject: [PATCH 17/23] Reorganize cuDF Python docs (#10691)

This PR is composed of two high-level changes:

* Replaces the use of ReStructuredText with [MyST Markdown](https://myst-parser.readthedocs.io/en/latest/). I used [rst2myst](https://github.com/executablebooks/rst2myst) for this and it worked pretty well. The rationale for this change is simple: we use `myst-nb` to render notebooks into documentation, and for consistency, it's nice to use `myst-parser` to parse the rest of our docs too. As a matter of opinion, I think Markdown is simpler and more familiar to most developers.

* Reorganizes the docs (see below):

Prior to this PR, the cuDF documentation was divided into 3 sections:

* A user guide
* A "Basics" section
* API reference

The distinction between the first two sections was never clear. I've gone ahead and merged those into a single section named "User Guide". This is also more consistent with Pandas.

This PR also makes a couple of other changes:

- Renamed the "Basics" page under the previous "Basics" section to "Data Types", as that reflects its contents more accurately. I also modified the content here a bit.
- Renamed the "10 minutes to CuPy and CuDF" notebook to "Interoperability between CuPy and CuDF" as that more accurately describes what that page is about.

----

Compare the TOC from this PR (below) with our [currently published docs](https://docs.rapids.ai/api/cudf/stable/).

<img width="710" alt="Screen Shot 2022-04-20 at 1 13 04 PM" src="https://user-images.githubusercontent.com/3190405/164286913-2e3bfd2a-caa7-4324-9cad-bd131058999f.png">

Authors:
  - Ashwin Srinath (https://github.com/shwina)
  - Mike McCarty (https://github.com/mmccarty)
  - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)
  - Mike McCarty (https://github.com/mmccarty)
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/10691
---
 docs/cudf/source/_static/params.css           |   8 +-
 docs/cudf/source/basics/PandasCompat.rst      |   4 -
 docs/cudf/source/basics/basics.rst            |  62 --
 docs/cudf/source/basics/dask-cudf.rst         | 107 ----
 docs/cudf/source/basics/groupby.rst           | 274 --------
 docs/cudf/source/basics/index.rst             |  15 -
 docs/cudf/source/basics/internals.rst         | 216 -------
 .../cudf/source/basics/io-gds-integration.rst |  42 --
 .../source/basics/io-nvcomp-integration.rst   |  27 -
 docs/cudf/source/basics/io.rst                |  13 -
 docs/cudf/source/index.rst                    |   1 -
 docs/cudf/source/user_guide/10min.ipynb       | 371 +++++++----
 docs/cudf/source/user_guide/PandasCompat.md   |   5 +
 ...min-cudf-cupy.ipynb => cupy-interop.ipynb} | 246 ++++---
 docs/cudf/source/user_guide/dask-cudf.md      | 104 +++
 docs/cudf/source/user_guide/data-types.md     | 153 +++++
 docs/cudf/source/user_guide/groupby.md        | 273 ++++++++
 .../source/user_guide/guide-to-udfs.ipynb     | 149 ++++-
 docs/cudf/source/user_guide/index.md          |  16 +
 docs/cudf/source/user_guide/index.rst         |  12 -
 docs/cudf/source/user_guide/internals.md      | 212 +++++++
 .../io.md}                                    | 113 +++-
 ...-missing-data.ipynb => missing-data.ipynb} | 598 ++++++++++--------
 23 files changed, 1738 insertions(+), 1283 deletions(-)
 delete mode 100644 docs/cudf/source/basics/PandasCompat.rst
 delete mode 100644 docs/cudf/source/basics/basics.rst
 delete mode 100644 docs/cudf/source/basics/dask-cudf.rst
 delete mode 100644 docs/cudf/source/basics/groupby.rst
 delete mode 100644 docs/cudf/source/basics/index.rst
 delete mode 100644 docs/cudf/source/basics/internals.rst
 delete mode 100644 docs/cudf/source/basics/io-gds-integration.rst
 delete mode 100644 docs/cudf/source/basics/io-nvcomp-integration.rst
 delete mode 100644 docs/cudf/source/basics/io.rst
 create mode 100644 docs/cudf/source/user_guide/PandasCompat.md
 rename docs/cudf/source/user_guide/{10min-cudf-cupy.ipynb => cupy-interop.ipynb} (87%)
 create mode 100644 docs/cudf/source/user_guide/dask-cudf.md
 create mode 100644 docs/cudf/source/user_guide/data-types.md
 create mode 100644 docs/cudf/source/user_guide/groupby.md
 create mode 100644 docs/cudf/source/user_guide/index.md
 delete mode 100644 docs/cudf/source/user_guide/index.rst
 create mode 100644 docs/cudf/source/user_guide/internals.md
 rename docs/cudf/source/{basics/io-supported-types.rst => user_guide/io.md} (69%)
 rename docs/cudf/source/user_guide/{Working-with-missing-data.ipynb => missing-data.ipynb} (87%)

diff --git a/docs/cudf/source/_static/params.css b/docs/cudf/source/_static/params.css
index 9e6be7ca75f..17c9d5accbd 100644
--- a/docs/cudf/source/_static/params.css
+++ b/docs/cudf/source/_static/params.css
@@ -50,11 +50,17 @@ table.io-supported-types-table thead{
 
 }
 
+/* Used to make special-table scrollable when it overflows */
+.special-table-wrapper {
+    width: 100%;
+    overflow: auto !important;
+}
+
 .special-table td, .special-table th {
     border: 1px solid #dee2e6;
 }
 
-/* Needed to resolve https://github.com/executablebooks/jupyter-book/issues/1611 */ 
+/* Needed to resolve https://github.com/executablebooks/jupyter-book/issues/1611 */
 .output.text_html {
     overflow: auto;
 }
diff --git a/docs/cudf/source/basics/PandasCompat.rst b/docs/cudf/source/basics/PandasCompat.rst
deleted file mode 100644
index fe9161e49c3..00000000000
--- a/docs/cudf/source/basics/PandasCompat.rst
+++ /dev/null
@@ -1,4 +0,0 @@
-Pandas Compatibility Notes
-==========================
-
-.. pandas-compat-list::
diff --git a/docs/cudf/source/basics/basics.rst b/docs/cudf/source/basics/basics.rst
deleted file mode 100644
index 9b8983fba49..00000000000
--- a/docs/cudf/source/basics/basics.rst
+++ /dev/null
@@ -1,62 +0,0 @@
-Basics
-======
-
-
-Supported Dtypes
-----------------
-
-cuDF uses dtypes for Series or individual columns of a DataFrame. cuDF uses NumPy dtypes, NumPy provides support for ``float``, ``int``, ``bool``,
-``'timedelta64[s]'``, ``'timedelta64[ms]'``, ``'timedelta64[us]'``, ``'timedelta64[ns]'``, ``'datetime64[s]'``, ``'datetime64[ms]'``,
-``'datetime64[us]'``, ``'datetime64[ns]'`` (note that NumPy does not support timezone-aware datetimes).
-
-
-The following table lists all of cudf types. For methods requiring dtype arguments, strings can be specified as indicated. See the respective documentation sections for more on each type.
-
-.. rst-class:: special-table
-.. table::
-
-    +-----------------+------------------+--------------------------------------------------------------+----------------------------------------------+
-    | Kind of Data    | Data Type        | Scalar                                                       | String Aliases                               |
-    +=================+==================+==============================================================+==============================================+
-    | Integer         |                  | np.int8_, np.int16_, np.int32_, np.int64_, np.uint8_,        | ``'int8'``, ``'int16'``, ``'int32'``,        |
-    |                 |                  | np.uint16_, np.uint32_, np.uint64_                           | ``'int64'``, ``'uint8'``, ``'uint16'``,      |
-    |                 |                  |                                                              | ``'uint32'``, ``'uint64'``                   |
-    +-----------------+------------------+--------------------------------------------------------------+----------------------------------------------+
-    | Float           |                  | np.float32_, np.float64_                                     | ``'float32'``, ``'float64'``                 |
-    +-----------------+------------------+--------------------------------------------------------------+----------------------------------------------+
-    | Strings         |                  | `str <https://docs.python.org/3/library/stdtypes.html#str>`_ | ``'string'``, ``'object'``                   |
-    +-----------------+------------------+--------------------------------------------------------------+----------------------------------------------+
-    | Datetime        |                  | np.datetime64_                                               | ``'datetime64[s]'``, ``'datetime64[ms]'``,   |
-    |                 |                  |                                                              | ``'datetime64[us]'``, ``'datetime64[ns]'``   |
-    +-----------------+------------------+--------------------------------------------------------------+----------------------------------------------+
-    | Timedelta       |                  | np.timedelta64_                                              | ``'timedelta64[s]'``, ``'timedelta64[ms]'``, |
-    | (duration type) |                  |                                                              | ``'timedelta64[us]'``, ``'timedelta64[ns]'`` |
-    +-----------------+------------------+--------------------------------------------------------------+----------------------------------------------+
-    | Categorical     | CategoricalDtype | (none)                                                       | ``'category'``                               |
-    +-----------------+------------------+--------------------------------------------------------------+----------------------------------------------+
-    | Boolean         |                  | np.bool_                                                     | ``'bool'``                                   |
-    +-----------------+------------------+--------------------------------------------------------------+----------------------------------------------+
-    | Decimal         | Decimal32Dtype,  | (none)                                                       | (none)                                       |
-    |                 | Decimal64Dtype,  |                                                              |                                              |
-    |                 | Decimal128Dtype  |                                                              |                                              |
-    +-----------------+------------------+--------------------------------------------------------------+----------------------------------------------+
-    | Lists           | ListDtype        | list                                                         | ``'list'``                                   |
-    +-----------------+------------------+--------------------------------------------------------------+----------------------------------------------+
-    | Structs         | StructDtype      | dict                                                         | ``'struct'``                                 |
-    +-----------------+------------------+--------------------------------------------------------------+----------------------------------------------+
-
-**Note: All dtypes above are Nullable**
-
-.. _np.int8:
-.. _np.int16:
-.. _np.int32:
-.. _np.int64:
-.. _np.uint8:
-.. _np.uint16:
-.. _np.uint32:
-.. _np.uint64:
-.. _np.float32:
-.. _np.float64:
-.. _np.bool: https://numpy.org/doc/stable/user/basics.types.html
-.. _np.datetime64: https://numpy.org/doc/stable/reference/arrays.datetime.html#basic-datetimes
-.. _np.timedelta64: https://numpy.org/doc/stable/reference/arrays.datetime.html#datetime-and-timedelta-arithmetic
diff --git a/docs/cudf/source/basics/dask-cudf.rst b/docs/cudf/source/basics/dask-cudf.rst
deleted file mode 100644
index a9c65dfbfae..00000000000
--- a/docs/cudf/source/basics/dask-cudf.rst
+++ /dev/null
@@ -1,107 +0,0 @@
-Multi-GPU with Dask-cuDF
-========================
-
-cuDF is a single-GPU library. For Multi-GPU cuDF solutions we use
-`Dask <https://dask.org/>`__ and the `dask-cudf
-package <https://github.com/rapidsai/cudf/tree/main/python/dask_cudf>`__,
-which is able to scale cuDF across multiple GPUs on a single machine, or
-multiple GPUs across many machines in a cluster.
-
-`Dask DataFrame <http://docs.dask.org/en/latest/dataframe.html>`__ was
-originally designed to scale Pandas, orchestrating many Pandas
-DataFrames spread across many CPUs into a cohesive parallel DataFrame.
-Because cuDF currently implements only a subset of Pandas’s API, not all
-Dask DataFrame operations work with cuDF.
-
-The following is tested and expected to work:
-
-What works
-----------
-
--  Data ingestion
-
-   -  ``dask_cudf.read_csv``
-   -  Use standard Dask ingestion with Pandas, then convert to cuDF (For
-      Parquet and other formats this is often decently fast)
-
--  Linear operations
-
-   -  Element-wise operations: ``df.x + df.y``, ``df ** 2``
-   -  Assignment: ``df['z'] = df.x + df.y``
-   -  Row-wise selections: ``df[df.x > 0]``
-   -  Loc: ``df.loc['2001-01-01': '2005-02-02']``
-   -  Date time/string accessors: ``df.timestamp.dt.dayofweek``
-   -  ... and most similar operations in this category that are already
-      implemented in cuDF
-
--  Reductions
-
-   -  Like ``sum``, ``mean``, ``max``, ``count``, and so on on
-      ``Series`` objects
-   -  Support for reductions on full dataframes
-   -  \ ``std``\
-   -  Custom reductions with
-      `dask.dataframe.reduction <http://docs.dask.org/en/latest/generated/dask.dataframe.Series.reduction.html>`__
-
--  Groupby aggregations
-
-   -  On single columns: ``df.groupby('x').y.max()``
-   -  With custom aggregations:
-   -  groupby standard deviation
-   -  grouping on multiple columns
-   -  groupby agg for multiple outputs
-
--  Joins:
-
-   -  On full unsorted columns: ``left.merge(right, on='id')``
-      (expensive)
-   -  On sorted indexes:
-      ``left.merge(right, left_index=True, right_index=True)`` (fast)
-   -  On large and small dataframes: ``left.merge(cudf_df, on='id')``
-      (fast)
-
--  Rolling operations
--  Converting to and from other forms
-
-   -  Dask + Pandas to Dask + cuDF
-      ``df.map_partitions(cudf.from_pandas)``
-   -  Dask + cuDF to Dask + Pandas
-      ``df.map_partitions(lambda df: df.to_pandas())``
-   -  cuDF to Dask + cuDF:
-      ``dask.dataframe.from_pandas(df, npartitions=20)``
-   -  Dask + cuDF to cuDF: ``df.compute()``
-
-Additionally all generic Dask operations, like ``compute``, ``persist``,
-``visualize`` and so on work regardless.
-
-Developing the API
-------------------
-
-Above we mention the following:
-
-    and most similar operations in this category that are already
-    implemented in cuDF
-
-This is because it is difficult to create a comprehensive list of
-operations in the cuDF and Pandas libraries. The API is large enough to
-be difficult to track effectively. For any operation that operates
-row-wise like ``fillna`` or ``query`` things will likely, but not
-certainly work. If operations don't work it is often due to a slight
-inconsistency between Pandas and cuDF that is generally easy to fix. We
-encourage users to look at the `cuDF issue
-tracker <https://github.com/rapidsai/cudf/issues>`__ to see if their
-issue has already been reported and, if not, `raise a new
-issue <https://github.com/rapidsai/cudf/issues/new>`__.
-
-Navigating the API
-------------------
-
-This project reuses the `Dask
-DataFrame <https://docs.dask.org/en/latest/dataframe.html>`__ project,
-which was originally designed for Pandas, with the newer library cuDF.
-Because we use the same Dask classes for both projects there are often
-methods that are implemented for Pandas, but not yet for cuDF. As a
-result users looking at the full Dask DataFrame API can be misleading,
-and often lead to frustration when operations that are advertised in the
-Dask API do not work as expected with cuDF. We apologize for this in
-advance.
diff --git a/docs/cudf/source/basics/groupby.rst b/docs/cudf/source/basics/groupby.rst
deleted file mode 100644
index f74853769f6..00000000000
--- a/docs/cudf/source/basics/groupby.rst
+++ /dev/null
@@ -1,274 +0,0 @@
-.. _basics.groupby:
-
-GroupBy
-=======
-
-cuDF supports a small (but important) subset of Pandas' `groupby
-API <https://pandas.pydata.org/pandas-docs/stable/user_guide/groupby.html>`__.
-
-Summary of supported operations
--------------------------------
-
-1. Grouping by one or more columns
-2. Basic aggregations such as "sum", "mean", etc.
-3. Quantile aggregation
-4. A "collect" or ``list`` aggregation for collecting values in a group
-   into lists
-5. Automatic exclusion of columns with unsupported dtypes ("nuisance"
-   columns) when aggregating
-6. Iterating over the groups of a GroupBy object
-7. ``GroupBy.groups`` API that returns a mapping of group keys to row
-   labels
-8. ``GroupBy.apply`` API for performing arbitrary operations on each
-   group. Note that this has very limited functionality compared to the
-   equivalent Pandas function. See the section on
-   `apply <#groupby-apply>`__ for more details.
-9. ``GroupBy.pipe`` similar to
-   `Pandas <https://pandas.pydata.org/pandas-docs/stable/user_guide/groupby.html#piping-function-calls>`__.
-
-Grouping
---------
-
-A GroupBy object is created by grouping the values of a ``Series`` or
-``DataFrame`` by one or more columns:
-
-.. code:: python
-
-    import cudf
-
-    >>> df = cudf.DataFrame({'a': [1, 1, 1, 2, 2], 'b': [1, 1, 2, 2, 3], 'c': [1, 2, 3, 4, 5]})
-    >>> df
-    >>> gb1 = df.groupby('a')  # grouping by a single column
-    >>> gb2 = df.groupby(['a', 'b'])  # grouping by multiple columns
-    >>> gb3 = df.groupby(cudf.Series(['a', 'a', 'b', 'b', 'b']))  # grouping by an external column
-
-.. warning::
-
-       cuDF uses `sort=False` by default to achieve better performance, which provides no gaurentee to the group order in outputs. This deviates from Pandas default behavior.
-
-       For example:
-
-       .. code-block:: python
-       
-          >>> df = cudf.DataFrame({'a' : [2, 2, 1], 'b' : [42, 21, 11]})
-          >>> df.groupby('a').sum()
-             b
-          a    
-          2  63
-          1  11
-          >>> df.to_pandas().groupby('a').sum()
-             b
-          a    
-          1  11
-          2  63
-       
-       Setting `sort=True` will produce Pandas-like output, but with some performance penalty:
-
-       .. code-block:: python
-       
-          >>> df.groupby('a', sort=True).sum()
-             b
-          a    
-          1  11
-          2  63
-
-Grouping by index levels
-~~~~~~~~~~~~~~~~~~~~~~~~
-
-You can also group by one or more levels of a MultiIndex:
-
-.. code:: python
-
-    >>> df = cudf.DataFrame(
-    ...     {'a': [1, 1, 1, 2, 2], 'b': [1, 1, 2, 2, 3], 'c': [1, 2, 3, 4, 5]}
-    ... ).set_index(['a', 'b'])
-    ...
-    >>> df.groupby(level='a')
-
-The ``Grouper`` object
-~~~~~~~~~~~~~~~~~~~~~~
-
-A ``Grouper`` can be used to disambiguate between columns and levels
-when they have the same name:
-
-.. code:: python
-
-    >>> df
-       b  c
-    b
-    1  1  1
-    1  1  2
-    1  2  3
-    2  2  4
-    2  3  5
-    >>> df.groupby('b', level='b')  # ValueError: Cannot specify both by and level
-    >>> df.groupby([cudf.Grouper(key='b'), cudf.Grouper(level='b')])  # OK
-
-Aggregation
------------
-
-Aggregations on groups is supported via the ``agg`` method:
-
-.. code:: python
-
-    >>> df
-       a  b  c
-    0  1  1  1
-    1  1  1  2
-    2  1  2  3
-    3  2  2  4
-    4  2  3  5
-    >>> df.groupby('a').agg('sum')
-       b  c
-    a
-    1  4  6
-    2  5  9
-    >>> df.groupby('a').agg({'b': ['sum', 'min'], 'c': 'mean'})
-        b        c
-      sum min mean
-    a
-    1   4   1  2.0
-    2   5   2  4.5
-    >>> df.groupby("a").corr(method="pearson")
-              b          c
-    a                      
-    1 b  1.000000  0.866025
-      c  0.866025  1.000000
-    2 b  1.000000  1.000000
-      c  1.000000  1.000000
-
-The following table summarizes the available aggregations and the types
-that support them:
-
-.. rst-class:: special-table
-.. table::
-
-   +------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+
-   | Aggregations / dtypes              | Numeric   | Datetime   | String   | Categorical   | List   | Struct   | Interval   | Decimal   |
-   +====================================+===========+============+==========+===============+========+==========+============+===========+
-   | count                              | ✅        | ✅         | ✅       | ✅            |        |          |            | ✅        |
-   +------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+
-   | size                               | ✅        | ✅         | ✅       | ✅            |        |          |            | ✅        |
-   +------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+
-   | sum                                | ✅        | ✅         |          |               |        |          |            | ✅        |
-   +------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+
-   | idxmin                             | ✅        | ✅         |          |               |        |          |            | ✅        |
-   +------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+
-   | idxmax                             | ✅        | ✅         |          |               |        |          |            | ✅        |
-   +------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+
-   | min                                | ✅        | ✅         | ✅       |               |        |          |            | ✅        |
-   +------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+
-   | max                                | ✅        | ✅         | ✅       |               |        |          |            | ✅        |
-   +------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+
-   | mean                               | ✅        | ✅         |          |               |        |          |            |           |
-   +------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+
-   | var                                | ✅        | ✅         |          |               |        |          |            |           |
-   +------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+
-   | std                                | ✅        | ✅         |          |               |        |          |            |           |
-   +------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+
-   | quantile                           | ✅        | ✅         |          |               |        |          |            |           |
-   +------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+
-   | median                             | ✅        | ✅         |          |               |        |          |            |           |
-   +------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+
-   | nunique                            | ✅        | ✅         | ✅       | ✅            |        |          |            | ✅        |
-   +------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+
-   | nth                                | ✅        | ✅         | ✅       |               |        |          |            | ✅        |
-   +------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+
-   | collect                            | ✅        | ✅         | ✅       |               | ✅     |          |            | ✅        |
-   +------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+
-   | unique                             | ✅        | ✅         | ✅       | ✅            |        |          |            |           |
-   +------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+
-   | corr                               | ✅        |            |          |               |        |          |            | ✅        |
-   +------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+
-   | cov                                | ✅        |            |          |               |        |          |            | ✅        |
-   +------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+
-
-GroupBy apply
--------------
-
-To apply function on each group, use the ``GroupBy.apply()`` method:
-
-.. code:: python
-
-    >>> df
-       a  b  c
-    0  1  1  1
-    1  1  1  2
-    2  1  2  3
-    3  2  2  4
-    4  2  3  5
-    >>> df.groupby('a').apply(lambda x: x.max() - x.min())
-       a  b  c
-    a
-    0  0  1  2
-    1  0  1  1
-
-Limitations
-~~~~~~~~~~~
-
--  ``apply`` works by applying the provided function to each group
-   sequentially, and concatenating the results together. **This can be
-   very slow**, especially for a large number of small groups. For a
-   small number of large groups, it can give acceptable performance
-
--  The results may not always match Pandas exactly. For example, cuDF
-   may return a ``DataFrame`` containing a single column where Pandas
-   returns a ``Series``. Some post-processing may be required to match
-   Pandas behavior.
-
--  cuDF does not support some of the exceptional cases that Pandas
-   supports with ``apply``, such as calling |describe|_ inside the
-   callable.
-
- .. |describe| replace:: ``describe``
- .. _describe: https://pandas.pydata.org/pandas-docs/stable/user_guide/groupby.html#flexible-apply
-
-
-Transform
----------
-
-The ``.transform()`` method aggregates per group, and broadcasts the
-result to the group size, resulting in a Series/DataFrame that is of
-the same size as the input Series/DataFrame.
-
-.. code:: python
-
-     >>> import cudf
-     >>> df = cudf.DataFrame({'a': [2, 1, 1, 2, 2], 'b': [1, 2, 3, 4, 5]})
-     >>> df.groupby('a').transform('max')
-        b
-     0  5
-     1  3
-     2  3
-     3  5
-     4  5
-
-
-Rolling window calculations
----------------------------
-
-Use the ``GroupBy.rolling()`` method to perform rolling window
-calculations on each group:
-
-.. code:: python
-
-    >>> df
-       a  b  c
-    0  1  1  1
-    1  1  1  2
-    2  1  2  3
-    3  2  2  4
-    4  2  3  5
-
-Rolling window sum on each group with a window size of 2:
-
-.. code:: python
-
-    >>> df.groupby('a').rolling(2).sum()
-            a     b     c
-    a
-    1 0  <NA>  <NA>  <NA>
-      1     2     2     3
-      2     2     3     5
-    2 3  <NA>  <NA>  <NA>
-      4     4     5     9
diff --git a/docs/cudf/source/basics/index.rst b/docs/cudf/source/basics/index.rst
deleted file mode 100644
index a29866d7e32..00000000000
--- a/docs/cudf/source/basics/index.rst
+++ /dev/null
@@ -1,15 +0,0 @@
-======
-Basics
-======
-
-
-.. toctree::
-   :maxdepth: 2
-
-   basics
-   io.rst
-   groupby.rst
-   PandasCompat.rst
-   dask-cudf.rst
-   internals.rst
-   
\ No newline at end of file
diff --git a/docs/cudf/source/basics/internals.rst b/docs/cudf/source/basics/internals.rst
deleted file mode 100644
index 96ef40d51e6..00000000000
--- a/docs/cudf/source/basics/internals.rst
+++ /dev/null
@@ -1,216 +0,0 @@
-cuDF internals
-==============
-
-The cuDF API closely matches that of the
-`Pandas <https://pandas.pydata.org/>`__ library. Thus, we have the types
-``cudf.Series``, ``cudf.DataFrame`` and ``cudf.Index`` which look and
-feel very much like their Pandas counterparts.
-
-Under the hood, however, cuDF uses data structures very different from
-Pandas. In this document, we describe these internal data structures.
-
-Column
-------
-
-Columns are cuDF's core data structure and they are modeled after the
-`Apache Arrow Columnar
-Format <https://arrow.apache.org/docs/format/Columnar.html>`__.
-
-A column represents a sequence of values, any number of which may be
-"null". Columns are specialized based on the type of data they contain.
-Thus we have ``NumericalColumn``, ``StringColumn``, ``DatetimeColumn``,
-etc.,
-
-A column is composed of the following:
-
--  A **data type**, specifying the type of each element.
--  A **data buffer** that may store the data for the column elements.
-   Some column types do not have a data buffer, instead storing data in
-   the children columns.
--  A **mask buffer** whose bits represent the validity (null or not
-   null) of each element. Columns whose elements are all "valid" may not
-   have a mask buffer. Mask buffers are padded to 64 bytes.
--  A tuple of **children** columns, which enable the representation
-   complex types such as columns with non-fixed width elements such as
-   strings or lists.
--  A **size** indicating the number of elements in the column.
--  An integer **offset**: a column may represent a "slice" of another
-   column, in which case this offset represents the first element of the
-   slice. The size of the column then gives the extent of the slice. A
-   column that is not a slice has an offset of 0.
-
-For example, the ``NumericalColumn`` backing a Series with 1000 elements
-of type 'int32' and containing nulls is composed of:
-
-1. A data buffer of size 4000 bytes (sizeof(int32) \* 1000)
-2. A mask buffer of size 128 bytes (1000/8 padded to a multiple of 64
-   bytes)
-3. No children columns
-
-As another example, the ``StringColumn`` backing the Series
-``['do', 'you', 'have', 'any', 'cheese?']`` is composed of:
-
-1. No data buffer
-2. No mask buffer as there are no nulls in the Series
-3. Two children columns:
-
-    -  A column of UTF-8 characters
-       ``['d', 'o', 'y', 'o', 'u', h' ... '?']``
-    -  A column of "offsets" to the characters column (in this case,
-       ``[0, 2, 5, 9, 12, 19]``)
-
-Buffer
-------
-
-The data and mask buffers of a column represent data in GPU memory
-(a.k.a *device memory*), and are object of type
-``cudf.core.buffer.Buffer``.
-
-Buffers can be constructed from array-like objects that live either on
-the host (e.g., numpy arrays) or the device (e.g., cupy arrays). Arrays
-must be of ``uint8`` dtype or viewed as such.
-
-When constructing a Buffer from a host object such as a numpy array, new
-device memory is allocated:
-
-.. code:: python
-
-    >>> from cudf.core.buffer import Buffer
-    >>> buf = Buffer(np.array([1, 2, 3], dtype='int64').view("uint8"))
-    >>> print(buf.ptr)  # address of new device memory allocation
-    140050901762560
-    >>> print(buf.size)
-    24
-    >>> print(buf._owner)
-    <rmm._lib.device_buffer.DeviceBuffer object at 0x7f6055baab50>
-
-cuDF uses the `RMM <https://github.com/rapidsai/rmm>`__ library for
-allocating device memory. You can read more about device memory
-allocation with RMM
-`here <https://github.com/rapidsai/rmm#devicebuffers>`__.
-
-When constructing a Buffer from a device object such as a CuPy array, no
-new device memory is allocated. Instead, the Buffer points to the
-existing allocation, keeping a reference to the device array:
-
-.. code:: python
-
-    >>> import cupy as cp
-    >>> c_ary = cp.asarray([1, 2, 3], dtype='int64')
-    >>> buf = Buffer(c_ary.view("uint8"))
-    >>> print(c_ary.data.mem.ptr)
-    140050901762560
-    >>> print(buf.ptr)
-    140050901762560
-    >>> print(buf.size)
-    24
-    >>> print(buf._owner is c_ary)
-    True
-
-An uninitialized block of device memory can be allocated with
-``Buffer.empty``:
-
-.. code:: python
-
-    >>> buf = Buffer.empty(10)
-    >>> print(buf.size)
-    10
-    >>> print(buf._owner)
-    <rmm._lib.device_buffer.DeviceBuffer object at 0x7f6055baa890>
-
-ColumnAccessor
---------------
-
-cuDF ``Series``, ``DataFrame`` and ``Index`` are all subclasses of an
-internal ``Frame`` class. The underlying data structure of ``Frame`` is
-an ordered, dictionary-like object known as ``ColumnAccessor``, which
-can be accessed via the ``._data`` attribute:
-
-.. code:: python
-
-    >>> a = cudf.DataFrame({'x': [1, 2, 3], 'y': ['a', 'b', 'c']})
-    >>> a._data
-    ColumnAccessor(OrderedColumnDict([('x', <cudf.core.column.numerical.NumericalColumn object at 0x7f5a7d12e050>), ('y', <cudf.core.column.string.StringColumn object at 0x7f5a7d12e320>)]), multiindex=False, level_names=(None,))
-
-ColumnAccessor is an ordered mapping of column labels to columns. In
-addition to behaving like an OrderedDict, it supports things like
-selecting multiple columns (both by index and label), as well as
-hierarchical indexing.
-
-.. code:: python
-
-    >>> from cudf.core.column_accessor import ColumnAccessor
-
-The values of a ColumnAccessor are coerced to Columns during
-construction:
-
-.. code:: python
-
-    >>> ca = ColumnAccessor({'x': [1, 2, 3], 'y': ['a', 'b', 'c']})
-    >>> ca['x']
-    <cudf.core.column.numerical.NumericalColumn object at 0x7f5a7d5789e0>
-    >>> ca['y']
-    <cudf.core.column.string.StringColumn object at 0x7f5a7d578b90>
-    >>> ca.pop('x')
-    <cudf.core.column.numerical.NumericalColumn object at 0x7f5a7d5789e0>
-    >>> ca
-    ColumnAccessor(OrderedColumnDict([('y', <cudf.core.column.string.StringColumn object at 0x7f5a7d578b90>)]), multiindex=False, level_names=(None,))
-
-Columns can be inserted at a specified location:
-
-.. code:: python
-
-    >>> ca.insert('z', [3, 4, 5], loc=1)
-    >>> ca
-    ColumnAccessor(OrderedColumnDict([('x', <cudf.core.column.numerical.NumericalColumn object at 0x7f5a7d578dd0>), ('z', <cudf.core.column.numerical.NumericalColumn object at 0x7f5a7d578680>), ('y', <cudf.core.column.string.StringColumn object at 0x7f5a7d12e3b0>)]), multiindex=False, level_names=(None,))
-
-Selecting columns by index:
-
-.. code:: python
-
-    >>> ca = ColumnAccessor({'x': [1, 2, 3], 'y': ['a', 'b', 'c'], 'z': [4, 5, 6]})
-    >>> ca.select_by_index(1)
-    ColumnAccessor(OrderedColumnDict([('y', <cudf.core.column.string.StringColumn object at 0x7f5a7d578830>)]), multiindex=False, level_names=(None,))
-    >>> ca.select_by_index([0, 1])
-    ColumnAccessor(OrderedColumnDict([('x', <cudf.core.column.numerical.NumericalColumn object at 0x7f5a7d5789e0>), ('y', <cudf.core.column.string.StringColumn object at 0x7f5a7d578830>)]), multiindex=False, level_names=(None,))
-    >>> ca.select_by_index(slice(1, 3))
-    ColumnAccessor(OrderedColumnDict([('y', <cudf.core.column.string.StringColumn object at 0x7f5a7d578830>), ('z', <cudf.core.column.numerical.NumericalColumn object at 0x7f5a7d5788c0>)]), multiindex=False, level_names=(None,))
-
-Selecting columns by label:
-
-.. code:: python
-
-    >>> ca.select_by_label(['y', 'z'])
-    ColumnAccessor(OrderedColumnDict([('y', <cudf.core.column.string.StringColumn object at 0x7f5a7d578830>), ('z', <cudf.core.column.numerical.NumericalColumn object at 0x7f5a7d5788c0>)]), multiindex=False, level_names=(None,))
-    >>> ca.select_by_label(slice('x', 'y'))
-    ColumnAccessor(OrderedColumnDict([('x', <cudf.core.column.numerical.NumericalColumn object at 0x7f5a7d5789e0>), ('y', <cudf.core.column.string.StringColumn object at 0x7f5a7d578830>)]), multiindex=False, level_names=(None,))
-
-A ColumnAccessor with tuple keys (and constructed with
-``multiindex=True``) can be hierarchically indexed:
-
-.. code:: python
-
-    >>> ca = ColumnAccessor({('a', 'b'): [1, 2, 3], ('a', 'c'): [2, 3, 4], 'b': [4, 5, 6]}, multiindex=True)
-    >>> ca.select_by_label('a')
-    ColumnAccessor(OrderedColumnDict([('b', <cudf.core.column.numerical.NumericalColumn object at 0x7f5a7d5789e0>), ('c', <cudf.core.column.numerical.NumericalColumn object at 0x7f5a7d578dd0>)]), multiindex=False, level_names=(None,))
-    >>> ca.select_by_label(('a', 'b'))
-    ColumnAccessor(OrderedColumnDict([(('a', 'b'), <cudf.core.column.numerical.NumericalColumn object at 0x7f5a7d5789e0>)]), multiindex=False, level_names=(None,))
-
-"Wildcard" indexing is also allowed:
-
-.. code:: python
-
-    >>> ca = ColumnAccessor({('a', 'b'): [1, 2, 3], ('a', 'c'): [2, 3, 4], ('d', 'b'): [4, 5, 6]}, multiindex=True)
-    >>> ca.select_by_label((slice(None), 'b'))
-    ColumnAccessor(OrderedColumnDict([(('a', 'b'), <cudf.core.column.numerical.NumericalColumn object at 0x7f5a7d578830>), (('d', 'b'), <cudf.core.column.numerical.NumericalColumn object at 0x7f5a7d578680>)]), multiindex=True, level_names=(None, None))
-
-Finally, ColumnAccessors can convert to Pandas ``Index`` or
-``MultiIndex`` objects:
-
-.. code:: python
-
-    >>> ca.to_pandas_index()
-    MultiIndex([('a', 'b'),
-                ('a', 'c'),
-                ('d', 'b')],
-               )
diff --git a/docs/cudf/source/basics/io-gds-integration.rst b/docs/cudf/source/basics/io-gds-integration.rst
deleted file mode 100644
index ce774453386..00000000000
--- a/docs/cudf/source/basics/io-gds-integration.rst
+++ /dev/null
@@ -1,42 +0,0 @@
-GPUDirect Storage Integration
-=============================
-
-Many IO APIs can use GPUDirect Storage (GDS) library to optimize IO operations.
-GDS enables a direct data path for direct memory access (DMA) transfers between GPU memory and storage, which avoids a bounce buffer through the CPU.
-GDS also has a compatibility mode that allows the library to fall back to copying through a CPU bounce buffer.
-The SDK is available for download `here <https://developer.nvidia.com/gpudirect-storage>`_.
-GDS is also included in CUDA Toolkit 11.4 and higher.
-
-Use of GPUDirect Storage in cuDF is enabled by default, but can be disabled through the environment variable ``LIBCUDF_CUFILE_POLICY``.
-This variable also controls the GDS compatibility mode.
-
-There are four valid values for the environment variable:
-
-- "GDS": Enable GDS use; GDS compatibility mode is *off*.
-- "ALWAYS": Enable GDS use; GDS compatibility mode is *on*.
-- "KVIKIO": Enable GDS through `KvikIO <https://github.com/rapidsai/kvikio>`_.
-- "OFF": Completely disable GDS use.
-
-If no value is set, behavior will be the same as the "GDS" option.
-
-This environment variable also affects how cuDF treats GDS errors.
-When ``LIBCUDF_CUFILE_POLICY`` is set to "GDS" and a GDS API call fails for any reason, cuDF falls back to the internal implementation with bounce buffers.
-When ``LIBCUDF_CUFILE_POLICY`` is set to "ALWAYS" and a GDS API call fails for any reason (unlikely, given that the compatibility mode is on),
-cuDF throws an exception to propagate the error to the user.
-When ``LIBCUDF_CUFILE_POLICY`` is set to "KVIKIO" and a KvikIO API call fails for any reason (unlikely, given that KvikIO implements its own compatibility mode) cuDF throws an exception to propagate the error to the user.
-For more information about error handling, compatibility mode, and tuning parameters in KvikIO see: https://github.com/rapidsai/kvikio
-
-Operations that support the use of GPUDirect Storage:
-
-- :py:func:`cudf.read_avro`
-- :py:func:`cudf.read_parquet`
-- :py:func:`cudf.read_orc`
-- :py:meth:`cudf.DataFrame.to_csv`
-- :py:meth:`cudf.DataFrame.to_parquet`
-- :py:meth:`cudf.DataFrame.to_orc`
-
-Several parameters that can be used to tune the performance of GDS-enabled I/O are exposed through environment variables:
-
-- ``LIBCUDF_CUFILE_THREAD_COUNT``: Integral value, maximum number of parallel reads/writes per file (default 16);
-- ``LIBCUDF_CUFILE_SLICE_SIZE``: Integral value, maximum size of each GDS read/write, in bytes (default 4MB).
-  Larger I/O operations are split into multiple calls.
diff --git a/docs/cudf/source/basics/io-nvcomp-integration.rst b/docs/cudf/source/basics/io-nvcomp-integration.rst
deleted file mode 100644
index fc24e0c15f4..00000000000
--- a/docs/cudf/source/basics/io-nvcomp-integration.rst
+++ /dev/null
@@ -1,27 +0,0 @@
-nvCOMP Integration
-=============================
-
-Some types of compression/decompression can be performed using either the `nvCOMP library <https://github.com/NVIDIA/nvcomp>`_ or the internal implementation.
-
-Which implementation is used by default depends on the data format and the compression type.
-Behavior can be influenced through environment variable ``LIBCUDF_NVCOMP_POLICY``.
-
-There are three valid values for the environment variable:
-
-- "STABLE": Only enable the nvCOMP in places where it has been deemed stable for production use.
-- "ALWAYS": Enable all available uses of nvCOMP, including new, experimental combinations.
-- "OFF": Disable nvCOMP use whenever possible and use the internal implementations instead.
-
-If no value is set, behavior will be the same as the "STABLE" option.
-
-
-.. table:: Current policy for nvCOMP use for different types
-    :widths: 20 15 15 15 15 15 15 15 15 15
-
-    +-----------------------+--------+--------+--------+--------+---------+--------+--------+--------+--------+
-    |                       |       CSV       |      Parquet    |       JSON       |       ORC       |  AVRO  |
-    +-----------------------+--------+--------+--------+--------+---------+--------+--------+--------+--------+
-    | Compression Type      | Writer | Reader | Writer | Reader | Writer¹ | Reader | Writer | Reader | Reader |
-    +=======================+========+========+========+========+=========+========+========+========+========+
-    | snappy                | ❌     | ❌     | Stable | Stable | ❌      | ❌     | Stable | Stable | ❌     |
-    +-----------------------+--------+--------+--------+--------+---------+--------+--------+--------+--------+
diff --git a/docs/cudf/source/basics/io.rst b/docs/cudf/source/basics/io.rst
deleted file mode 100644
index ee3d997d664..00000000000
--- a/docs/cudf/source/basics/io.rst
+++ /dev/null
@@ -1,13 +0,0 @@
-~~~~~~~~~~~~~~
-Input / Output
-~~~~~~~~~~~~~~
-
-This page contains Input / Output related APIs in cuDF.
-
-.. toctree::
-   :maxdepth: 2
-   :caption: Contents:
-
-   io-supported-types.rst
-   io-gds-integration.rst
-   io-nvcomp-integration.rst
\ No newline at end of file
diff --git a/docs/cudf/source/index.rst b/docs/cudf/source/index.rst
index 90b287bd1b6..2c1df4a0c12 100644
--- a/docs/cudf/source/index.rst
+++ b/docs/cudf/source/index.rst
@@ -14,7 +14,6 @@ the details of CUDA programming.
    :caption: Contents:
 
    user_guide/index
-   basics/index
    api_docs/index
 
 
diff --git a/docs/cudf/source/user_guide/10min.ipynb b/docs/cudf/source/user_guide/10min.ipynb
index 9bb95406e8a..080fce3c55c 100644
--- a/docs/cudf/source/user_guide/10min.ipynb
+++ b/docs/cudf/source/user_guide/10min.ipynb
@@ -2,6 +2,7 @@
  "cells": [
   {
    "cell_type": "markdown",
+   "id": "e9357872",
    "metadata": {},
    "source": [
     "10 Minutes to cuDF and Dask-cuDF\n",
@@ -26,6 +27,7 @@
   {
    "cell_type": "code",
    "execution_count": 1,
+   "id": "92eed4cb",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -45,6 +47,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "ed6c6047",
    "metadata": {},
    "source": [
     "Object Creation\n",
@@ -53,6 +56,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "aeedd961",
    "metadata": {},
    "source": [
     "Creating a `cudf.Series` and `dask_cudf.Series`."
@@ -61,6 +65,7 @@
   {
    "cell_type": "code",
    "execution_count": 2,
+   "id": "cf8b08e5",
    "metadata": {},
    "outputs": [
     {
@@ -87,6 +92,7 @@
   {
    "cell_type": "code",
    "execution_count": 3,
+   "id": "083a5898",
    "metadata": {},
    "outputs": [
     {
@@ -112,6 +118,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "6346e1b1",
    "metadata": {},
    "source": [
     "Creating a `cudf.DataFrame` and a `dask_cudf.DataFrame` by specifying values for each column."
@@ -120,6 +127,7 @@
   {
    "cell_type": "code",
    "execution_count": 4,
+   "id": "83d1e7f5",
    "metadata": {},
    "outputs": [
     {
@@ -313,6 +321,7 @@
   {
    "cell_type": "code",
    "execution_count": 5,
+   "id": "71b61d62",
    "metadata": {},
    "outputs": [
     {
@@ -502,6 +511,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "c7cb5abc",
    "metadata": {},
    "source": [
     "Creating a `cudf.DataFrame` from a pandas `Dataframe` and a `dask_cudf.Dataframe` from a `cudf.Dataframe`.\n",
@@ -512,6 +522,7 @@
   {
    "cell_type": "code",
    "execution_count": 6,
+   "id": "07a62244",
    "metadata": {},
    "outputs": [
     {
@@ -586,6 +597,7 @@
   {
    "cell_type": "code",
    "execution_count": 7,
+   "id": "f5cb0c65",
    "metadata": {},
    "outputs": [
     {
@@ -658,6 +670,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "025eac40",
    "metadata": {},
    "source": [
     "Viewing Data\n",
@@ -666,6 +679,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "47a567e8",
    "metadata": {},
    "source": [
     "Viewing the top rows of a GPU dataframe."
@@ -674,6 +688,7 @@
   {
    "cell_type": "code",
    "execution_count": 8,
+   "id": "ab8cbdb8",
    "metadata": {},
    "outputs": [
     {
@@ -737,6 +752,7 @@
   {
    "cell_type": "code",
    "execution_count": 9,
+   "id": "2e923d8a",
    "metadata": {},
    "outputs": [
     {
@@ -799,6 +815,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "61257b4b",
    "metadata": {},
    "source": [
     "Sorting by values."
@@ -807,6 +824,7 @@
   {
    "cell_type": "code",
    "execution_count": 10,
+   "id": "512770f9",
    "metadata": {},
    "outputs": [
     {
@@ -996,6 +1014,7 @@
   {
    "cell_type": "code",
    "execution_count": 11,
+   "id": "1a13993f",
    "metadata": {},
    "outputs": [
     {
@@ -1184,6 +1203,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "19bce4c4",
    "metadata": {},
    "source": [
     "Selection\n",
@@ -1194,6 +1214,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "ba55980e",
    "metadata": {},
    "source": [
     "Selecting a single column, which initially yields a `cudf.Series` or `dask_cudf.Series`. Calling `compute` results in a `cudf.Series` (equivalent to `df.a`)."
@@ -1202,6 +1223,7 @@
   {
    "cell_type": "code",
    "execution_count": 12,
+   "id": "885989a6",
    "metadata": {},
    "outputs": [
     {
@@ -1242,6 +1264,7 @@
   {
    "cell_type": "code",
    "execution_count": 13,
+   "id": "14a74255",
    "metadata": {},
    "outputs": [
     {
@@ -1281,6 +1304,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "498d79f2",
    "metadata": {},
    "source": [
     "## Selection by Label"
@@ -1288,6 +1312,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "4b8b8e13",
    "metadata": {},
    "source": [
     "Selecting rows from index 2 to index 5 from columns 'a' and 'b'."
@@ -1296,6 +1321,7 @@
   {
    "cell_type": "code",
    "execution_count": 14,
+   "id": "d40bc19c",
    "metadata": {},
    "outputs": [
     {
@@ -1368,6 +1394,7 @@
   {
    "cell_type": "code",
    "execution_count": 15,
+   "id": "7688535b",
    "metadata": {},
    "outputs": [
     {
@@ -1439,6 +1466,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "8a64ce7a",
    "metadata": {},
    "source": [
     "## Selection by Position"
@@ -1446,6 +1474,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "dfba2bb2",
    "metadata": {},
    "source": [
     "Selecting via integers and integer slices, like numpy/pandas. Note that this functionality is not available for Dask-cuDF DataFrames."
@@ -1454,6 +1483,7 @@
   {
    "cell_type": "code",
    "execution_count": 16,
+   "id": "fb8d6d43",
    "metadata": {},
    "outputs": [
     {
@@ -1477,6 +1507,7 @@
   {
    "cell_type": "code",
    "execution_count": 17,
+   "id": "263231da",
    "metadata": {},
    "outputs": [
     {
@@ -1542,6 +1573,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "2223b089",
    "metadata": {},
    "source": [
     "You can also select elements of a `DataFrame` or `Series` with direct index access."
@@ -1550,6 +1582,7 @@
   {
    "cell_type": "code",
    "execution_count": 18,
+   "id": "13f6158b",
    "metadata": {},
    "outputs": [
     {
@@ -1613,6 +1646,7 @@
   {
    "cell_type": "code",
    "execution_count": 19,
+   "id": "3cf4aa26",
    "metadata": {},
    "outputs": [
     {
@@ -1634,6 +1668,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "ff633b2d",
    "metadata": {},
    "source": [
     "## Boolean Indexing"
@@ -1641,6 +1676,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "bbdef48f",
    "metadata": {},
    "source": [
     "Selecting rows in a `DataFrame` or `Series` by direct Boolean indexing."
@@ -1649,6 +1685,7 @@
   {
    "cell_type": "code",
    "execution_count": 20,
+   "id": "becb916f",
    "metadata": {},
    "outputs": [
     {
@@ -1726,6 +1763,7 @@
   {
    "cell_type": "code",
    "execution_count": 21,
+   "id": "b9475c43",
    "metadata": {},
    "outputs": [
     {
@@ -1802,6 +1840,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "ecf982f5",
    "metadata": {},
    "source": [
     "Selecting values from a `DataFrame` where a Boolean condition is met, via the `query` API."
@@ -1810,6 +1849,7 @@
   {
    "cell_type": "code",
    "execution_count": 22,
+   "id": "fc2fc9f9",
    "metadata": {},
    "outputs": [
     {
@@ -1866,6 +1906,7 @@
   {
    "cell_type": "code",
    "execution_count": 23,
+   "id": "1a05a07f",
    "metadata": {},
    "outputs": [
     {
@@ -1921,6 +1962,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "7f8955a0",
    "metadata": {},
    "source": [
     "You can also pass local variables to Dask-cuDF queries, via the `local_dict` keyword. With standard cuDF, you may either use the `local_dict` keyword or directly pass the variable via the `@` keyword. Supported logical operators include `>`, `<`, `>=`, `<=`, `==`, and `!=`."
@@ -1929,6 +1971,7 @@
   {
    "cell_type": "code",
    "execution_count": 24,
+   "id": "49485a4b",
    "metadata": {},
    "outputs": [
     {
@@ -1986,6 +2029,7 @@
   {
    "cell_type": "code",
    "execution_count": 25,
+   "id": "0f3a9116",
    "metadata": {},
    "outputs": [
     {
@@ -2042,6 +2086,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "c355af07",
    "metadata": {},
    "source": [
     "Using the `isin` method for filtering."
@@ -2050,6 +2095,7 @@
   {
    "cell_type": "code",
    "execution_count": 26,
+   "id": "f44a5a57",
    "metadata": {},
    "outputs": [
     {
@@ -2112,6 +2158,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "79a50beb",
    "metadata": {},
    "source": [
     "## MultiIndex"
@@ -2119,6 +2166,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "14e70234",
    "metadata": {},
    "source": [
     "cuDF supports hierarchical indexing of DataFrames using MultiIndex. Grouping hierarchically (see `Grouping` below) automatically produces a DataFrame with a MultiIndex."
@@ -2127,6 +2175,7 @@
   {
    "cell_type": "code",
    "execution_count": 27,
+   "id": "882973ed",
    "metadata": {},
    "outputs": [
     {
@@ -2153,6 +2202,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "c10971cc",
    "metadata": {},
    "source": [
     "This index can back either axis of a DataFrame."
@@ -2161,6 +2211,7 @@
   {
    "cell_type": "code",
    "execution_count": 28,
+   "id": "5417aeb9",
    "metadata": {},
    "outputs": [
     {
@@ -2238,6 +2289,7 @@
   {
    "cell_type": "code",
    "execution_count": 29,
+   "id": "4d6fb4ff",
    "metadata": {},
    "outputs": [
     {
@@ -2311,6 +2363,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "63dc11d8",
    "metadata": {},
    "source": [
     "Accessing values of a DataFrame with a MultiIndex. Note that slicing is not yet supported."
@@ -2319,6 +2372,7 @@
   {
    "cell_type": "code",
    "execution_count": 30,
+   "id": "3644920c",
    "metadata": {},
    "outputs": [
     {
@@ -2340,6 +2394,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "697a9a36",
    "metadata": {},
    "source": [
     "Missing Data\n",
@@ -2348,6 +2403,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "86655274",
    "metadata": {},
    "source": [
     "Missing data can be replaced by using the `fillna` method."
@@ -2356,6 +2412,7 @@
   {
    "cell_type": "code",
    "execution_count": 31,
+   "id": "28b06c52",
    "metadata": {},
    "outputs": [
     {
@@ -2381,6 +2438,7 @@
   {
    "cell_type": "code",
    "execution_count": 32,
+   "id": "7fb6a126",
    "metadata": {},
    "outputs": [
     {
@@ -2405,6 +2463,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "7a0b732f",
    "metadata": {},
    "source": [
     "Operations\n",
@@ -2413,6 +2472,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "1e8b0464",
    "metadata": {},
    "source": [
     "## Stats"
@@ -2420,6 +2480,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "7523512b",
    "metadata": {},
    "source": [
     "Calculating descriptive statistics for a `Series`."
@@ -2428,6 +2489,7 @@
   {
    "cell_type": "code",
    "execution_count": 33,
+   "id": "f7cb604e",
    "metadata": {},
    "outputs": [
     {
@@ -2448,6 +2510,7 @@
   {
    "cell_type": "code",
    "execution_count": 34,
+   "id": "b8957a5f",
    "metadata": {},
    "outputs": [
     {
@@ -2467,6 +2530,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "71fa928a",
    "metadata": {},
    "source": [
     "## Applymap"
@@ -2474,6 +2538,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "d98d6f7b",
    "metadata": {},
    "source": [
     "Applying functions to a `Series`. Note that applying user defined functions directly with Dask-cuDF is not yet implemented. For now, you can use [map_partitions](http://docs.dask.org/en/stable/generated/dask.dataframe.DataFrame.map_partitions.html) to apply a function to each partition of the distributed dataframe."
@@ -2482,6 +2547,7 @@
   {
    "cell_type": "code",
    "execution_count": 35,
+   "id": "5e627811",
    "metadata": {},
    "outputs": [
     {
@@ -2533,6 +2599,7 @@
   {
    "cell_type": "code",
    "execution_count": 36,
+   "id": "96cf628e",
    "metadata": {},
    "outputs": [
     {
@@ -2572,6 +2639,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "cd69c00a",
    "metadata": {},
    "source": [
     "## Histogramming"
@@ -2579,6 +2647,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "39982866",
    "metadata": {},
    "source": [
     "Counting the number of occurrences of each unique value of variable."
@@ -2587,6 +2656,7 @@
   {
    "cell_type": "code",
    "execution_count": 37,
+   "id": "62808675",
    "metadata": {},
    "outputs": [
     {
@@ -2627,6 +2697,7 @@
   {
    "cell_type": "code",
    "execution_count": 38,
+   "id": "5b2a42ce",
    "metadata": {},
    "outputs": [
     {
@@ -2666,6 +2737,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "2d7e62e4",
    "metadata": {},
    "source": [
     "## String Methods"
@@ -2673,6 +2745,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "4e704eca",
    "metadata": {},
    "source": [
     "Like pandas, cuDF provides string processing methods in the `str` attribute of `Series`. Full documentation of string methods is a work in progress. Please see the cuDF API documentation for more information."
@@ -2681,6 +2754,7 @@
   {
    "cell_type": "code",
    "execution_count": 39,
+   "id": "c73e70bb",
    "metadata": {},
    "outputs": [
     {
@@ -2711,6 +2785,7 @@
   {
    "cell_type": "code",
    "execution_count": 40,
+   "id": "697c1c94",
    "metadata": {},
    "outputs": [
     {
@@ -2740,6 +2815,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "dfc1371e",
    "metadata": {},
    "source": [
     "## Concat"
@@ -2747,6 +2823,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "f6fb9b53",
    "metadata": {},
    "source": [
     "Concatenating `Series` and `DataFrames` row-wise."
@@ -2755,6 +2832,7 @@
   {
    "cell_type": "code",
    "execution_count": 41,
+   "id": "60538bbd",
    "metadata": {},
    "outputs": [
     {
@@ -2786,6 +2864,7 @@
   {
    "cell_type": "code",
    "execution_count": 42,
+   "id": "17953847",
    "metadata": {},
    "outputs": [
     {
@@ -2816,6 +2895,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "27f0d621",
    "metadata": {},
    "source": [
     "## Join"
@@ -2823,6 +2903,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "fd35f1a7",
    "metadata": {},
    "source": [
     "Performing SQL style merges. Note that the dataframe order is not maintained, but may be restored post-merge by sorting by the index."
@@ -2831,6 +2912,7 @@
   {
    "cell_type": "code",
    "execution_count": 43,
+   "id": "52ada00a",
    "metadata": {},
    "outputs": [
     {
@@ -2924,6 +3006,7 @@
   {
    "cell_type": "code",
    "execution_count": 44,
+   "id": "409fcf92",
    "metadata": {},
    "outputs": [
     {
@@ -3011,6 +3094,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "d9dcb86b",
    "metadata": {},
    "source": [
     "## Append"
@@ -3018,6 +3102,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "1f896819",
    "metadata": {},
    "source": [
     "Appending values from another `Series` or array-like object."
@@ -3026,6 +3111,7 @@
   {
    "cell_type": "code",
    "execution_count": 45,
+   "id": "9976c1ce",
    "metadata": {},
    "outputs": [
     {
@@ -3064,6 +3150,7 @@
   {
    "cell_type": "code",
    "execution_count": 46,
+   "id": "fe5c54ab",
    "metadata": {},
    "outputs": [
     {
@@ -3093,6 +3180,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "9fa10ef3",
    "metadata": {},
    "source": [
     "## Grouping"
@@ -3100,6 +3188,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "8a6e41f5",
    "metadata": {},
    "source": [
     "Like pandas, cuDF and Dask-cuDF support the Split-Apply-Combine groupby paradigm."
@@ -3108,6 +3197,7 @@
   {
    "cell_type": "code",
    "execution_count": 47,
+   "id": "2a8cafa7",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -3119,6 +3209,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "0179d60c",
    "metadata": {},
    "source": [
     "Grouping and then applying the `sum` function to the grouped data."
@@ -3127,6 +3218,7 @@
   {
    "cell_type": "code",
    "execution_count": 48,
+   "id": "7c56d186",
    "metadata": {},
    "outputs": [
     {
@@ -3201,6 +3293,7 @@
   {
    "cell_type": "code",
    "execution_count": 49,
+   "id": "f8823b30",
    "metadata": {},
    "outputs": [
     {
@@ -3274,6 +3367,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "a84cb883",
    "metadata": {},
    "source": [
     "Grouping hierarchically then applying the `sum` function to grouped data."
@@ -3282,6 +3376,7 @@
   {
    "cell_type": "code",
    "execution_count": 50,
+   "id": "2184e3ad",
    "metadata": {},
    "outputs": [
     {
@@ -3372,6 +3467,7 @@
   {
    "cell_type": "code",
    "execution_count": 51,
+   "id": "4ec311c1",
    "metadata": {},
    "outputs": [
     {
@@ -3461,6 +3557,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "dedfeb1b",
    "metadata": {},
    "source": [
     "Grouping and applying statistical functions to specific columns, using `agg`."
@@ -3469,6 +3566,7 @@
   {
    "cell_type": "code",
    "execution_count": 52,
+   "id": "2563d8b2",
    "metadata": {},
    "outputs": [
     {
@@ -3539,6 +3637,7 @@
   {
    "cell_type": "code",
    "execution_count": 53,
+   "id": "22c77e75",
    "metadata": {},
    "outputs": [
     {
@@ -3608,6 +3707,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "6d074822",
    "metadata": {},
    "source": [
     "## Transpose"
@@ -3615,6 +3715,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "16c0f0a8",
    "metadata": {},
    "source": [
     "Transposing a dataframe, using either the `transpose` method or `T` property. Currently, all columns must have the same type. Transposing is not currently implemented in Dask-cuDF."
@@ -3623,6 +3724,7 @@
   {
    "cell_type": "code",
    "execution_count": 54,
+   "id": "e265861e",
    "metadata": {},
    "outputs": [
     {
@@ -3690,6 +3792,7 @@
   {
    "cell_type": "code",
    "execution_count": 55,
+   "id": "1fe9b972",
    "metadata": {},
    "outputs": [
     {
@@ -3752,14 +3855,16 @@
   },
   {
    "cell_type": "markdown",
+   "id": "9ce02827",
    "metadata": {},
    "source": [
     "Time Series\n",
-    "------------\n"
+    "------------"
    ]
   },
   {
    "cell_type": "markdown",
+   "id": "fec907ff",
    "metadata": {},
    "source": [
     "`DataFrames` supports `datetime` typed columns, which allow users to interact with and filter data based on specific timestamps."
@@ -3768,6 +3873,7 @@
   {
    "cell_type": "code",
    "execution_count": 56,
+   "id": "7a425d3f",
    "metadata": {},
    "outputs": [
     {
@@ -3847,6 +3953,7 @@
   {
    "cell_type": "code",
    "execution_count": 57,
+   "id": "87f0e56e",
    "metadata": {},
    "outputs": [
     {
@@ -3919,6 +4026,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "0d0e541c",
    "metadata": {},
    "source": [
     "Categoricals\n",
@@ -3927,6 +4035,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "a36f9543",
    "metadata": {},
    "source": [
     "`DataFrames` support categorical columns."
@@ -3935,6 +4044,7 @@
   {
    "cell_type": "code",
    "execution_count": 58,
+   "id": "05bd8be8",
    "metadata": {},
    "outputs": [
     {
@@ -4021,6 +4131,7 @@
   {
    "cell_type": "code",
    "execution_count": 59,
+   "id": "676b4963",
    "metadata": {},
    "outputs": [
     {
@@ -4105,6 +4216,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "e24f2e7b",
    "metadata": {},
    "source": [
     "Accessing the categories of a column. Note that this is currently not supported in Dask-cuDF."
@@ -4113,6 +4225,7 @@
   {
    "cell_type": "code",
    "execution_count": 60,
+   "id": "06310c36",
    "metadata": {},
    "outputs": [
     {
@@ -4132,6 +4245,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "4eb6f858",
    "metadata": {},
    "source": [
     "Accessing the underlying code values of each categorical observation."
@@ -4140,6 +4254,7 @@
   {
    "cell_type": "code",
    "execution_count": 61,
+   "id": "0f6db260",
    "metadata": {},
    "outputs": [
     {
@@ -4166,6 +4281,7 @@
   {
    "cell_type": "code",
    "execution_count": 62,
+   "id": "b87c4375",
    "metadata": {},
    "outputs": [
     {
@@ -4191,6 +4307,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "3f816916",
    "metadata": {},
    "source": [
     "Converting Data Representation\n",
@@ -4199,6 +4316,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "64a17f6d",
    "metadata": {},
    "source": [
     "## Pandas"
@@ -4206,6 +4324,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "3acdcacc",
    "metadata": {},
    "source": [
     "Converting a cuDF and Dask-cuDF `DataFrame` to a pandas `DataFrame`."
@@ -4214,6 +4333,7 @@
   {
    "cell_type": "code",
    "execution_count": 63,
+   "id": "d1fed919",
    "metadata": {},
    "outputs": [
     {
@@ -4310,6 +4430,7 @@
   {
    "cell_type": "code",
    "execution_count": 64,
+   "id": "567c7363",
    "metadata": {},
    "outputs": [
     {
@@ -4405,6 +4526,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "c2121453",
    "metadata": {},
    "source": [
     "## Numpy"
@@ -4412,6 +4534,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "a9faa2c5",
    "metadata": {},
    "source": [
     "Converting a cuDF or Dask-cuDF `DataFrame` to a numpy `ndarray`."
@@ -4420,6 +4543,7 @@
   {
    "cell_type": "code",
    "execution_count": 65,
+   "id": "5490d226",
    "metadata": {},
    "outputs": [
     {
@@ -4459,6 +4583,7 @@
   {
    "cell_type": "code",
    "execution_count": 66,
+   "id": "b77ac8ae",
    "metadata": {},
    "outputs": [
     {
@@ -4497,6 +4622,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "1d24d30f",
    "metadata": {},
    "source": [
     "Converting a cuDF or Dask-cuDF `Series` to a numpy `ndarray`."
@@ -4505,6 +4631,7 @@
   {
    "cell_type": "code",
    "execution_count": 67,
+   "id": "f71a0ba3",
    "metadata": {},
    "outputs": [
     {
@@ -4526,6 +4653,7 @@
   {
    "cell_type": "code",
    "execution_count": 68,
+   "id": "a45a74b5",
    "metadata": {},
    "outputs": [
     {
@@ -4546,6 +4674,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "0d78a4d2",
    "metadata": {},
    "source": [
     "## Arrow"
@@ -4553,6 +4682,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "7e35b829",
    "metadata": {},
    "source": [
     "Converting a cuDF or Dask-cuDF `DataFrame` to a PyArrow `Table`."
@@ -4561,6 +4691,7 @@
   {
    "cell_type": "code",
    "execution_count": 69,
+   "id": "bb9e9a2a",
    "metadata": {},
    "outputs": [
     {
@@ -4592,6 +4723,7 @@
   {
    "cell_type": "code",
    "execution_count": 70,
+   "id": "4d020de7",
    "metadata": {},
    "outputs": [
     {
@@ -4622,14 +4754,16 @@
   },
   {
    "cell_type": "markdown",
+   "id": "ace7b4f9",
    "metadata": {},
    "source": [
     "Getting Data In/Out\n",
-    "------------------------\n"
+    "------------------------"
    ]
   },
   {
    "cell_type": "markdown",
+   "id": "161abb12",
    "metadata": {},
    "source": [
     "## CSV"
@@ -4637,6 +4771,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "7e5dc381",
    "metadata": {},
    "source": [
     "Writing to a CSV file."
@@ -4645,6 +4780,7 @@
   {
    "cell_type": "code",
    "execution_count": 71,
+   "id": "3a59715f",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -4657,6 +4793,7 @@
   {
    "cell_type": "code",
    "execution_count": 72,
+   "id": "4ebe98ed",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -4665,6 +4802,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "0479fc4f",
    "metadata": {},
    "source": [
     "Reading from a csv file."
@@ -4673,6 +4811,7 @@
   {
    "cell_type": "code",
    "execution_count": 73,
+   "id": "1a70e831",
    "metadata": {},
    "outputs": [
     {
@@ -4905,6 +5044,7 @@
   {
    "cell_type": "code",
    "execution_count": 74,
+   "id": "4c3d9ca3",
    "metadata": {},
    "outputs": [
     {
@@ -5136,6 +5276,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "3d739c6e",
    "metadata": {},
    "source": [
     "Reading all CSV files in a directory into a single `dask_cudf.DataFrame`, using the star wildcard."
@@ -5144,6 +5285,7 @@
   {
    "cell_type": "code",
    "execution_count": 75,
+   "id": "cb7187d2",
    "metadata": {},
    "outputs": [
     {
@@ -5555,6 +5697,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "c0939a1e",
    "metadata": {},
    "source": [
     "## Parquet"
@@ -5562,6 +5705,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "14e6a634",
    "metadata": {},
    "source": [
     "Writing to parquet files, using the CPU via PyArrow."
@@ -5570,6 +5714,7 @@
   {
    "cell_type": "code",
    "execution_count": 76,
+   "id": "1812346f",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -5578,6 +5723,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "093cd0fe",
    "metadata": {},
    "source": [
     "Reading parquet files with a GPU-accelerated parquet reader."
@@ -5586,6 +5732,7 @@
   {
    "cell_type": "code",
    "execution_count": 77,
+   "id": "2354b20b",
    "metadata": {},
    "outputs": [
     {
@@ -5817,6 +5964,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "132c3ff2",
    "metadata": {},
    "source": [
     "Writing to parquet files from a `dask_cudf.DataFrame` using PyArrow under the hood."
@@ -5825,6 +5973,7 @@
   {
    "cell_type": "code",
    "execution_count": 78,
+   "id": "c5d7686c",
    "metadata": {},
    "outputs": [
     {
@@ -5844,6 +5993,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "0d73d1dd",
    "metadata": {},
    "source": [
     "## ORC"
@@ -5851,6 +6001,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "61b5f466",
    "metadata": {},
    "source": [
     "Reading ORC files."
@@ -5858,16 +6009,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 80,
+   "execution_count": 79,
+   "id": "93364ff3",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "'/home/mmccarty/sandbox/rapids/cudf/python/cudf/cudf/tests/data/orc/TestOrcFile.test1.orc'"
+       "'/home/ashwin/workspace/rapids/cudf/python/cudf/cudf/tests/data/orc/TestOrcFile.test1.orc'"
       ]
      },
-     "execution_count": 80,
+     "execution_count": 79,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -5883,7 +6035,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 81,
+   "execution_count": 80,
+   "id": "2b6785c7",
    "metadata": {},
    "outputs": [
     {
@@ -5974,7 +6127,7 @@
        "1  [{'key': 'chani', 'value': {'int1': 5, 'string...  "
       ]
      },
-     "execution_count": 81,
+     "execution_count": 80,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -5986,6 +6139,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "238ce6a4",
    "metadata": {},
    "source": [
     "Dask Performance Tips\n",
@@ -6000,6 +6154,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "3de9aeca",
    "metadata": {},
    "source": [
     "First, we set up a GPU cluster. With our `client` set up, Dask-cuDF computation will be distributed across the GPUs in the cluster."
@@ -6007,17 +6162,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 82,
+   "execution_count": 81,
+   "id": "e4852d48",
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "2022-04-21 10:11:07,360 - distributed.diskutils - INFO - Found stale lock file and directory '/home/mmccarty/sandbox/rapids/cudf/docs/cudf/source/user_guide/dask-worker-space/worker-ghcx5g0e', purging\n",
-      "2022-04-21 10:11:07,360 - distributed.diskutils - INFO - Found stale lock file and directory '/home/mmccarty/sandbox/rapids/cudf/docs/cudf/source/user_guide/dask-worker-space/worker-wh16f0h3', purging\n",
-      "2022-04-21 10:11:07,360 - distributed.preloading - INFO - Import preload module: dask_cuda.initialize\n",
-      "2022-04-21 10:11:07,388 - distributed.preloading - INFO - Import preload module: dask_cuda.initialize\n"
+      "2022-04-21 13:26:06,860 - distributed.preloading - INFO - Import preload module: dask_cuda.initialize\n",
+      "2022-04-21 13:26:06,904 - distributed.preloading - INFO - Import preload module: dask_cuda.initialize\n"
      ]
     },
     {
@@ -6027,7 +6181,7 @@
        "    <div style=\"width: 24px; height: 24px; background-color: #e1e1e1; border: 3px solid #9D9D9D; border-radius: 5px; position: absolute;\"> </div>\n",
        "    <div style=\"margin-left: 48px;\">\n",
        "        <h3 style=\"margin-bottom: 0px;\">Client</h3>\n",
-       "        <p style=\"color: #9D9D9D; margin-bottom: 0px;\">Client-e3492c89-c17c-11ec-813e-fc3497a62adc</p>\n",
+       "        <p style=\"color: #9D9D9D; margin-bottom: 0px;\">Client-20d00fd5-c198-11ec-906c-c8d9d2247354</p>\n",
        "        <table style=\"width: 100%; text-align: left;\">\n",
        "\n",
        "        <tr>\n",
@@ -6056,7 +6210,7 @@
        "    </div>\n",
        "    <div style=\"margin-left: 48px;\">\n",
        "        <h3 style=\"margin-bottom: 0px; margin-top: 0px;\">LocalCUDACluster</h3>\n",
-       "        <p style=\"color: #9D9D9D; margin-bottom: 0px;\">db2501e1</p>\n",
+       "        <p style=\"color: #9D9D9D; margin-bottom: 0px;\">47648c26</p>\n",
        "        <table style=\"width: 100%; text-align: left;\">\n",
        "            <tr>\n",
        "                <td style=\"text-align: left;\">\n",
@@ -6093,11 +6247,11 @@
        "        <div style=\"width: 24px; height: 24px; background-color: #FFF7E5; border: 3px solid #FF6132; border-radius: 5px; position: absolute;\"> </div>\n",
        "        <div style=\"margin-left: 48px;\">\n",
        "            <h3 style=\"margin-bottom: 0px;\">Scheduler</h3>\n",
-       "            <p style=\"color: #9D9D9D; margin-bottom: 0px;\">Scheduler-6f476508-e52f-49e9-8f1f-6a8641e177bd</p>\n",
+       "            <p style=\"color: #9D9D9D; margin-bottom: 0px;\">Scheduler-f28bff16-cb70-452c-b8af-b9299a8d7b20</p>\n",
        "            <table style=\"width: 100%; text-align: left;\">\n",
        "                <tr>\n",
        "                    <td style=\"text-align: left;\">\n",
-       "                        <strong>Comm:</strong> tcp://127.0.0.1:39755\n",
+       "                        <strong>Comm:</strong> tcp://127.0.0.1:33995\n",
        "                    </td>\n",
        "                    <td style=\"text-align: left;\">\n",
        "                        <strong>Workers:</strong> 2\n",
@@ -6139,7 +6293,7 @@
        "                <table style=\"width: 100%; text-align: left;\">\n",
        "                    <tr>\n",
        "                        <td style=\"text-align: left;\">\n",
-       "                            <strong>Comm: </strong> tcp://127.0.0.1:33491\n",
+       "                            <strong>Comm: </strong> tcp://127.0.0.1:40479\n",
        "                        </td>\n",
        "                        <td style=\"text-align: left;\">\n",
        "                            <strong>Total threads: </strong> 1\n",
@@ -6147,7 +6301,7 @@
        "                    </tr>\n",
        "                    <tr>\n",
        "                        <td style=\"text-align: left;\">\n",
-       "                            <strong>Dashboard: </strong> <a href=\"http://127.0.0.1:34333/status\" target=\"_blank\">http://127.0.0.1:34333/status</a>\n",
+       "                            <strong>Dashboard: </strong> <a href=\"http://127.0.0.1:38985/status\" target=\"_blank\">http://127.0.0.1:38985/status</a>\n",
        "                        </td>\n",
        "                        <td style=\"text-align: left;\">\n",
        "                            <strong>Memory: </strong> 62.82 GiB\n",
@@ -6155,13 +6309,13 @@
        "                    </tr>\n",
        "                    <tr>\n",
        "                        <td style=\"text-align: left;\">\n",
-       "                            <strong>Nanny: </strong> tcp://127.0.0.1:43093\n",
+       "                            <strong>Nanny: </strong> tcp://127.0.0.1:33447\n",
        "                        </td>\n",
        "                        <td style=\"text-align: left;\"></td>\n",
        "                    </tr>\n",
        "                    <tr>\n",
        "                        <td colspan=\"2\" style=\"text-align: left;\">\n",
-       "                            <strong>Local directory: </strong> /home/mmccarty/sandbox/rapids/cudf/docs/cudf/source/user_guide/dask-worker-space/worker-jsuvfju4\n",
+       "                            <strong>Local directory: </strong> /home/ashwin/workspace/rapids/cudf/docs/cudf/source/user_guide/dask-worker-space/worker-be7zg92w\n",
        "                        </td>\n",
        "                    </tr>\n",
        "\n",
@@ -6193,7 +6347,7 @@
        "                <table style=\"width: 100%; text-align: left;\">\n",
        "                    <tr>\n",
        "                        <td style=\"text-align: left;\">\n",
-       "                            <strong>Comm: </strong> tcp://127.0.0.1:44033\n",
+       "                            <strong>Comm: </strong> tcp://127.0.0.1:40519\n",
        "                        </td>\n",
        "                        <td style=\"text-align: left;\">\n",
        "                            <strong>Total threads: </strong> 1\n",
@@ -6201,7 +6355,7 @@
        "                    </tr>\n",
        "                    <tr>\n",
        "                        <td style=\"text-align: left;\">\n",
-       "                            <strong>Dashboard: </strong> <a href=\"http://127.0.0.1:45225/status\" target=\"_blank\">http://127.0.0.1:45225/status</a>\n",
+       "                            <strong>Dashboard: </strong> <a href=\"http://127.0.0.1:40951/status\" target=\"_blank\">http://127.0.0.1:40951/status</a>\n",
        "                        </td>\n",
        "                        <td style=\"text-align: left;\">\n",
        "                            <strong>Memory: </strong> 62.82 GiB\n",
@@ -6209,13 +6363,13 @@
        "                    </tr>\n",
        "                    <tr>\n",
        "                        <td style=\"text-align: left;\">\n",
-       "                            <strong>Nanny: </strong> tcp://127.0.0.1:46529\n",
+       "                            <strong>Nanny: </strong> tcp://127.0.0.1:39133\n",
        "                        </td>\n",
        "                        <td style=\"text-align: left;\"></td>\n",
        "                    </tr>\n",
        "                    <tr>\n",
        "                        <td colspan=\"2\" style=\"text-align: left;\">\n",
-       "                            <strong>Local directory: </strong> /home/mmccarty/sandbox/rapids/cudf/docs/cudf/source/user_guide/dask-worker-space/worker-zlsacw8_\n",
+       "                            <strong>Local directory: </strong> /home/ashwin/workspace/rapids/cudf/docs/cudf/source/user_guide/dask-worker-space/worker-3v0c20ux\n",
        "                        </td>\n",
        "                    </tr>\n",
        "\n",
@@ -6251,10 +6405,10 @@
        "</div>"
       ],
       "text/plain": [
-       "<Client: 'tcp://127.0.0.1:39755' processes=2 threads=2, memory=125.65 GiB>"
+       "<Client: 'tcp://127.0.0.1:33995' processes=2 threads=2, memory=45.79 GiB>"
       ]
      },
-     "execution_count": 82,
+     "execution_count": 81,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -6272,6 +6426,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "181e4d10",
    "metadata": {},
    "source": [
     "### Persisting Data\n",
@@ -6280,7 +6435,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 83,
+   "execution_count": 82,
+   "id": "d47a1142",
    "metadata": {},
    "outputs": [
     {
@@ -6356,7 +6512,7 @@
        "<dask_cudf.DataFrame | 20 tasks | 5 npartitions>"
       ]
      },
-     "execution_count": 83,
+     "execution_count": 82,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -6372,45 +6528,37 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 84,
+   "execution_count": 83,
+   "id": "c3cb612a",
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Thu Apr 21 10:11:07 2022       \n",
-      "+-----------------------------------------------------------------------------+\n",
-      "| NVIDIA-SMI 495.29.05    Driver Version: 495.29.05    CUDA Version: 11.5     |\n",
-      "|-------------------------------+----------------------+----------------------+\n",
-      "| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |\n",
-      "| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |\n",
-      "|                               |                      |               MIG M. |\n",
-      "|===============================+======================+======================|\n",
-      "|   0  NVIDIA RTX A6000    On   | 00000000:01:00.0  On |                  Off |\n",
-      "| 30%   48C    P2    83W / 300W |   2970MiB / 48651MiB |      7%      Default |\n",
-      "|                               |                      |                  N/A |\n",
-      "+-------------------------------+----------------------+----------------------+\n",
-      "|   1  NVIDIA RTX A6000    On   | 00000000:02:00.0 Off |                  Off |\n",
-      "| 30%   36C    P2    25W / 300W |    265MiB / 48685MiB |      5%      Default |\n",
-      "|                               |                      |                  N/A |\n",
-      "+-------------------------------+----------------------+----------------------+\n",
-      "                                                                               \n",
-      "+-----------------------------------------------------------------------------+\n",
-      "| Processes:                                                                  |\n",
-      "|  GPU   GI   CI        PID   Type   Process name                  GPU Memory |\n",
-      "|        ID   ID                                                   Usage      |\n",
-      "|=============================================================================|\n",
-      "|    0   N/A  N/A      2292      G   /usr/lib/xorg/Xorg                871MiB |\n",
-      "|    0   N/A  N/A      2441      G   /usr/bin/gnome-shell              316MiB |\n",
-      "|    0   N/A  N/A   1240494      G   ...AAAAAAAAA= --shared-files       68MiB |\n",
-      "|    0   N/A  N/A   1240525      G   ...RendererForSitePerProcess       41MiB |\n",
-      "|    0   N/A  N/A   1243689      C   .../envs/cudf_dev/bin/python      593MiB |\n",
-      "|    0   N/A  N/A   1245502      C   .../envs/cudf_dev/bin/python      753MiB |\n",
-      "|    0   N/A  N/A   1245751      C   .../envs/cudf_dev/bin/python      257MiB |\n",
-      "|    1   N/A  N/A      2292      G   /usr/lib/xorg/Xorg                  4MiB |\n",
-      "|    1   N/A  N/A   1245748      C   .../envs/cudf_dev/bin/python      257MiB |\n",
-      "+-----------------------------------------------------------------------------+\n"
+      "Thu Apr 21 13:26:07 2022       \r\n",
+      "+-----------------------------------------------------------------------------+\r\n",
+      "| NVIDIA-SMI 495.29.05    Driver Version: 495.29.05    CUDA Version: 11.5     |\r\n",
+      "|-------------------------------+----------------------+----------------------+\r\n",
+      "| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |\r\n",
+      "| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |\r\n",
+      "|                               |                      |               MIG M. |\r\n",
+      "|===============================+======================+======================|\r\n",
+      "|   0  Quadro GV100        Off  | 00000000:15:00.0 Off |                  Off |\r\n",
+      "| 39%   52C    P2    51W / 250W |   1115MiB / 32508MiB |      0%      Default |\r\n",
+      "|                               |                      |                  N/A |\r\n",
+      "+-------------------------------+----------------------+----------------------+\r\n",
+      "|   1  Quadro GV100        Off  | 00000000:2D:00.0 Off |                  Off |\r\n",
+      "| 43%   57C    P2    52W / 250W |    306MiB / 32498MiB |      0%      Default |\r\n",
+      "|                               |                      |                  N/A |\r\n",
+      "+-------------------------------+----------------------+----------------------+\r\n",
+      "                                                                               \r\n",
+      "+-----------------------------------------------------------------------------+\r\n",
+      "| Processes:                                                                  |\r\n",
+      "|  GPU   GI   CI        PID   Type   Process name                  GPU Memory |\r\n",
+      "|        ID   ID                                                   Usage      |\r\n",
+      "|=============================================================================|\r\n",
+      "+-----------------------------------------------------------------------------+\r\n"
      ]
     }
    ],
@@ -6420,6 +6568,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "b98810c4",
    "metadata": {},
    "source": [
     "Because Dask is lazy, the computation has not yet occurred. We can see that there are twenty tasks in the task graph and we've used about 800 MB of memory. We can force computation by using `persist`. By forcing execution, the result is now explicitly in memory and our task graph only contains one task per partition (the baseline)."
@@ -6427,7 +6576,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 85,
+   "execution_count": 84,
+   "id": "a929577c",
    "metadata": {},
    "outputs": [
     {
@@ -6503,7 +6653,7 @@
        "<dask_cudf.DataFrame | 5 tasks | 5 npartitions>"
       ]
      },
-     "execution_count": 85,
+     "execution_count": 84,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -6515,45 +6665,37 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 86,
+   "execution_count": 85,
+   "id": "8aa7c079",
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Thu Apr 21 10:11:08 2022       \n",
-      "+-----------------------------------------------------------------------------+\n",
-      "| NVIDIA-SMI 495.29.05    Driver Version: 495.29.05    CUDA Version: 11.5     |\n",
-      "|-------------------------------+----------------------+----------------------+\n",
-      "| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |\n",
-      "| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |\n",
-      "|                               |                      |               MIG M. |\n",
-      "|===============================+======================+======================|\n",
-      "|   0  NVIDIA RTX A6000    On   | 00000000:01:00.0  On |                  Off |\n",
-      "| 30%   48C    P2    84W / 300W |   2970MiB / 48651MiB |      3%      Default |\n",
-      "|                               |                      |                  N/A |\n",
-      "+-------------------------------+----------------------+----------------------+\n",
-      "|   1  NVIDIA RTX A6000    On   | 00000000:02:00.0 Off |                  Off |\n",
-      "| 30%   36C    P2    37W / 300W |    265MiB / 48685MiB |      0%      Default |\n",
-      "|                               |                      |                  N/A |\n",
-      "+-------------------------------+----------------------+----------------------+\n",
-      "                                                                               \n",
-      "+-----------------------------------------------------------------------------+\n",
-      "| Processes:                                                                  |\n",
-      "|  GPU   GI   CI        PID   Type   Process name                  GPU Memory |\n",
-      "|        ID   ID                                                   Usage      |\n",
-      "|=============================================================================|\n",
-      "|    0   N/A  N/A      2292      G   /usr/lib/xorg/Xorg                871MiB |\n",
-      "|    0   N/A  N/A      2441      G   /usr/bin/gnome-shell              316MiB |\n",
-      "|    0   N/A  N/A   1240494      G   ...AAAAAAAAA= --shared-files       68MiB |\n",
-      "|    0   N/A  N/A   1240525      G   ...RendererForSitePerProcess       41MiB |\n",
-      "|    0   N/A  N/A   1243689      C   .../envs/cudf_dev/bin/python      593MiB |\n",
-      "|    0   N/A  N/A   1245502      C   .../envs/cudf_dev/bin/python      753MiB |\n",
-      "|    0   N/A  N/A   1245751      C   .../envs/cudf_dev/bin/python      257MiB |\n",
-      "|    1   N/A  N/A      2292      G   /usr/lib/xorg/Xorg                  4MiB |\n",
-      "|    1   N/A  N/A   1245748      C   .../envs/cudf_dev/bin/python      257MiB |\n",
-      "+-----------------------------------------------------------------------------+\n"
+      "Thu Apr 21 13:26:08 2022       \r\n",
+      "+-----------------------------------------------------------------------------+\r\n",
+      "| NVIDIA-SMI 495.29.05    Driver Version: 495.29.05    CUDA Version: 11.5     |\r\n",
+      "|-------------------------------+----------------------+----------------------+\r\n",
+      "| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |\r\n",
+      "| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |\r\n",
+      "|                               |                      |               MIG M. |\r\n",
+      "|===============================+======================+======================|\r\n",
+      "|   0  Quadro GV100        Off  | 00000000:15:00.0 Off |                  Off |\r\n",
+      "| 39%   52C    P2    52W / 250W |   1115MiB / 32508MiB |      3%      Default |\r\n",
+      "|                               |                      |                  N/A |\r\n",
+      "+-------------------------------+----------------------+----------------------+\r\n",
+      "|   1  Quadro GV100        Off  | 00000000:2D:00.0 Off |                  Off |\r\n",
+      "| 43%   57C    P2    51W / 250W |    306MiB / 32498MiB |      0%      Default |\r\n",
+      "|                               |                      |                  N/A |\r\n",
+      "+-------------------------------+----------------------+----------------------+\r\n",
+      "                                                                               \r\n",
+      "+-----------------------------------------------------------------------------+\r\n",
+      "| Processes:                                                                  |\r\n",
+      "|  GPU   GI   CI        PID   Type   Process name                  GPU Memory |\r\n",
+      "|        ID   ID                                                   Usage      |\r\n",
+      "|=============================================================================|\r\n",
+      "+-----------------------------------------------------------------------------+\r\n"
      ]
     }
    ],
@@ -6563,6 +6705,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "ff9e14b6",
    "metadata": {},
    "source": [
     "Because we forced computation, we now have a larger object in distributed GPU memory."
@@ -6570,6 +6713,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "bb3b3dee",
    "metadata": {},
    "source": [
     "### Wait\n",
@@ -6580,7 +6724,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 87,
+   "execution_count": 86,
+   "id": "ef71bf00",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -6598,6 +6743,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "e1099ec0",
    "metadata": {},
    "source": [
     "This function will do a basic transformation of every column in the dataframe, but the time spent in the function will vary due to the `time.sleep` statement randomly adding 1-60 seconds of time. We'll run this on every partition of our dataframe using `map_partitions`, which adds the task to our task-graph, and store the result. We can then call `persist` to force execution."
@@ -6605,7 +6751,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 88,
+   "execution_count": 87,
+   "id": "700dd799",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -6615,6 +6762,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "5eb83a7e",
    "metadata": {},
    "source": [
     "However, some partitions will be done **much** sooner than others. If we had downstream processes that should wait for all partitions to be completed, we can enforce that behavior using `wait`."
@@ -6622,16 +6770,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 89,
+   "execution_count": 88,
+   "id": "73bccf94",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "DoneAndNotDoneFutures(done={<Future: finished, type: cudf.core.dataframe.DataFrame, key: ('func-cec36d97aab9d38423f8023d1b43b6d3', 0)>, <Future: finished, type: cudf.core.dataframe.DataFrame, key: ('func-cec36d97aab9d38423f8023d1b43b6d3', 2)>, <Future: finished, type: cudf.core.dataframe.DataFrame, key: ('func-cec36d97aab9d38423f8023d1b43b6d3', 3)>, <Future: finished, type: cudf.core.dataframe.DataFrame, key: ('func-cec36d97aab9d38423f8023d1b43b6d3', 1)>, <Future: finished, type: cudf.core.dataframe.DataFrame, key: ('func-cec36d97aab9d38423f8023d1b43b6d3', 4)>}, not_done=set())"
+       "DoneAndNotDoneFutures(done={<Future: finished, type: cudf.core.dataframe.DataFrame, key: ('func-c8623f55c898739bdfb89533682776dc', 0)>, <Future: finished, type: cudf.core.dataframe.DataFrame, key: ('func-c8623f55c898739bdfb89533682776dc', 3)>, <Future: finished, type: cudf.core.dataframe.DataFrame, key: ('func-c8623f55c898739bdfb89533682776dc', 1)>, <Future: finished, type: cudf.core.dataframe.DataFrame, key: ('func-c8623f55c898739bdfb89533682776dc', 2)>, <Future: finished, type: cudf.core.dataframe.DataFrame, key: ('func-c8623f55c898739bdfb89533682776dc', 4)>}, not_done=set())"
       ]
      },
-     "execution_count": 89,
+     "execution_count": 88,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -6642,21 +6791,22 @@
   },
   {
    "cell_type": "markdown",
+   "id": "447301f5",
    "metadata": {},
    "source": [
-    "## With `wait`, we can safely proceed on in our workflow."
+    "With `wait`, we can safely proceed on in our workflow."
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
+   "id": "7e06fcf4",
    "metadata": {},
    "outputs": [],
    "source": []
   }
  ],
  "metadata": {
-  "anaconda-cloud": {},
   "kernelspec": {
    "display_name": "Python 3 (ipykernel)",
    "language": "python",
@@ -6673,21 +6823,8 @@
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
    "version": "3.8.13"
-  },
-  "toc": {
-   "base_numbering": 1,
-   "nav_menu": {},
-   "number_sections": true,
-   "sideBar": true,
-   "skip_h1_title": false,
-   "title_cell": "Table of Contents",
-   "title_sidebar": "Contents",
-   "toc_cell": false,
-   "toc_position": {},
-   "toc_section_display": true,
-   "toc_window_display": false
   }
  },
  "nbformat": 4,
- "nbformat_minor": 4
+ "nbformat_minor": 5
 }
diff --git a/docs/cudf/source/user_guide/PandasCompat.md b/docs/cudf/source/user_guide/PandasCompat.md
new file mode 100644
index 00000000000..a33a354e2f8
--- /dev/null
+++ b/docs/cudf/source/user_guide/PandasCompat.md
@@ -0,0 +1,5 @@
+# Pandas Compatibility Notes
+
+```{eval-rst}
+.. pandas-compat-list::
+```
diff --git a/docs/cudf/source/user_guide/10min-cudf-cupy.ipynb b/docs/cudf/source/user_guide/cupy-interop.ipynb
similarity index 87%
rename from docs/cudf/source/user_guide/10min-cudf-cupy.ipynb
rename to docs/cudf/source/user_guide/cupy-interop.ipynb
index 35ca21f380e..9fbac3b2578 100644
--- a/docs/cudf/source/user_guide/10min-cudf-cupy.ipynb
+++ b/docs/cudf/source/user_guide/cupy-interop.ipynb
@@ -2,9 +2,10 @@
  "cells": [
   {
    "cell_type": "markdown",
+   "id": "8e5e6878",
    "metadata": {},
    "source": [
-    "# 10 Minutes to cuDF and CuPy\n",
+    "# Interoperability between cuDF and CuPy\n",
     "\n",
     "This notebook provides introductory examples of how you can use cuDF and CuPy together to take advantage of CuPy array functionality (such as advanced linear algebra operations)."
    ]
@@ -12,6 +13,7 @@
   {
    "cell_type": "code",
    "execution_count": 1,
+   "id": "8b2d45c3",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -29,6 +31,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "e7e64b1a",
    "metadata": {},
    "source": [
     "### Converting a cuDF DataFrame to a CuPy Array\n",
@@ -45,15 +48,16 @@
   {
    "cell_type": "code",
    "execution_count": 2,
+   "id": "45c482ab",
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "183 µs ± 1.15 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n",
-      "553 µs ± 6.25 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n",
-      "546 µs ± 2.25 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n"
+      "118 µs ± 77.2 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)\n",
+      "360 µs ± 6.04 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)\n",
+      "355 µs ± 722 ns per loop (mean ± std. dev. of 7 runs, 1,000 loops each)\n"
      ]
     }
    ],
@@ -72,6 +76,7 @@
   {
    "cell_type": "code",
    "execution_count": 3,
+   "id": "a565effc",
    "metadata": {},
    "outputs": [
     {
@@ -98,6 +103,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "0759ab29",
    "metadata": {},
    "source": [
     "### Converting a cuDF Series to a CuPy Array"
@@ -105,27 +111,29 @@
   },
   {
    "cell_type": "markdown",
+   "id": "4f35ffbd",
    "metadata": {},
    "source": [
     "There are also multiple ways to convert a cuDF Series to a CuPy array:\n",
     "\n",
     "1. We can pass the Series to `cupy.asarray` as cuDF Series exposes [`__cuda_array_interface__`](https://docs-cupy.chainer.org/en/stable/reference/interoperability.html).\n",
     "2. We can leverage the dlpack interface `to_dlpack()`. \n",
-    "3. We can also use `Series.values` \n"
+    "3. We can also use `Series.values`"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 4,
+   "id": "8f97f304",
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "76.8 µs ± 636 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n",
-      "198 µs ± 2.72 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n",
-      "181 µs ± 1.1 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n"
+      "54.4 µs ± 66 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)\n",
+      "125 µs ± 1.21 µs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)\n",
+      "119 µs ± 805 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)\n"
      ]
     }
    ],
@@ -140,6 +148,7 @@
   {
    "cell_type": "code",
    "execution_count": 5,
+   "id": "f96d5676",
    "metadata": {},
    "outputs": [
     {
@@ -160,6 +169,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "c36e5b88",
    "metadata": {},
    "source": [
     "From here, we can proceed with normal CuPy workflows, such as reshaping the array, getting the diagonal, or calculating the norm."
@@ -168,6 +178,7 @@
   {
    "cell_type": "code",
    "execution_count": 6,
+   "id": "2a7ae43f",
    "metadata": {},
    "outputs": [
     {
@@ -195,6 +206,7 @@
   {
    "cell_type": "code",
    "execution_count": 7,
+   "id": "b442a30c",
    "metadata": {},
    "outputs": [
     {
@@ -219,6 +231,7 @@
   {
    "cell_type": "code",
    "execution_count": 8,
+   "id": "be7f4d32",
    "metadata": {},
    "outputs": [
     {
@@ -238,6 +251,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "b353bded",
    "metadata": {},
    "source": [
     "### Converting a CuPy Array to a cuDF DataFrame\n",
@@ -256,13 +270,14 @@
   {
    "cell_type": "code",
    "execution_count": 9,
+   "id": "8887b253",
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "23.9 ms ± 119 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
+      "14.3 ms ± 33.5 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n"
      ]
     }
    ],
@@ -273,6 +288,7 @@
   {
    "cell_type": "code",
    "execution_count": 10,
+   "id": "08ec4ffa",
    "metadata": {},
    "outputs": [
     {
@@ -475,6 +491,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "6804d291",
    "metadata": {},
    "source": [
     "We can check whether our array is Fortran contiguous by using cupy.isfortran or looking at the [flags](https://docs-cupy.chainer.org/en/stable/reference/generated/cupy.ndarray.html#cupy.ndarray.flags) of the array."
@@ -483,6 +500,7 @@
   {
    "cell_type": "code",
    "execution_count": 11,
+   "id": "65b8bd0d",
    "metadata": {},
    "outputs": [
     {
@@ -502,6 +520,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "151982ad",
    "metadata": {},
    "source": [
     "In this case, we'll need to convert it before going to a cuDF DataFrame. In the next two cells, we create the DataFrame by leveraging dlpack and the CUDA array interface, respectively."
@@ -510,13 +529,14 @@
   {
    "cell_type": "code",
    "execution_count": 12,
+   "id": "27b2f563",
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "9.15 ms ± 131 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
+      "6.57 ms ± 9.08 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n"
      ]
     }
    ],
@@ -530,13 +550,14 @@
   {
    "cell_type": "code",
    "execution_count": 13,
+   "id": "0a0cc290",
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "5.74 ms ± 29.5 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n"
+      "4.48 ms ± 7.89 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n"
      ]
     }
    ],
@@ -550,6 +571,7 @@
   {
    "cell_type": "code",
    "execution_count": 14,
+   "id": "0d2c5beb",
    "metadata": {},
    "outputs": [
     {
@@ -753,6 +775,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "395e2bba",
    "metadata": {},
    "source": [
     "### Converting a CuPy Array to a cuDF Series\n",
@@ -763,6 +786,7 @@
   {
    "cell_type": "code",
    "execution_count": 15,
+   "id": "d8518208",
    "metadata": {},
    "outputs": [
     {
@@ -787,6 +811,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "7e159619",
    "metadata": {},
    "source": [
     "### Interweaving CuDF and CuPy for Smooth PyData Workflows\n",
@@ -799,6 +824,7 @@
   {
    "cell_type": "code",
    "execution_count": 16,
+   "id": "2bb8ed81",
    "metadata": {},
    "outputs": [
     {
@@ -1000,6 +1026,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "2f3d4e78",
    "metadata": {},
    "source": [
     "We can just transform it into a CuPy array and use the `axis` argument of `sum`."
@@ -1008,6 +1035,7 @@
   {
    "cell_type": "code",
    "execution_count": 17,
+   "id": "2dde030d",
    "metadata": {},
    "outputs": [
     {
@@ -1035,6 +1063,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "4450dcc3",
    "metadata": {},
    "source": [
     "With just that single line, we're able to seamlessly move between data structures in this ecosystem, giving us enormous flexibility without sacrificing speed."
@@ -1042,6 +1071,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "61bfb868",
    "metadata": {},
    "source": [
     "### Converting a cuDF DataFrame to a CuPy Sparse Matrix\n",
@@ -1054,6 +1084,7 @@
   {
    "cell_type": "code",
    "execution_count": 18,
+   "id": "e531fd15",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1072,6 +1103,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "3f5e6ade",
    "metadata": {},
    "source": [
     "We can define a sparsely populated DataFrame to illustrate this conversion to either sparse matrix format."
@@ -1080,6 +1112,7 @@
   {
    "cell_type": "code",
    "execution_count": 19,
+   "id": "58c7e074",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1095,6 +1128,7 @@
   {
    "cell_type": "code",
    "execution_count": 20,
+   "id": "9265228d",
    "metadata": {},
    "outputs": [
     {
@@ -1143,115 +1177,115 @@
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
-       "      <td>0.000000</td>\n",
        "      <td>0.0</td>\n",
        "      <td>0.0</td>\n",
-       "      <td>0.000000</td>\n",
        "      <td>0.0</td>\n",
-       "      <td>9.37476</td>\n",
-       "      <td>0.000000</td>\n",
+       "      <td>0.0</td>\n",
        "      <td>0.0</td>\n",
        "      <td>0.0</td>\n",
        "      <td>0.000000</td>\n",
-       "      <td>6.237859</td>\n",
        "      <td>0.0</td>\n",
        "      <td>0.0</td>\n",
        "      <td>0.000000</td>\n",
        "      <td>0.0</td>\n",
        "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
        "      <td>0.00000</td>\n",
+       "      <td>0.000000</td>\n",
        "      <td>0.0</td>\n",
        "      <td>0.0</td>\n",
-       "      <td>0.000000</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>11.308953</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
-       "      <td>0.000000</td>\n",
        "      <td>0.0</td>\n",
        "      <td>0.0</td>\n",
-       "      <td>0.000000</td>\n",
        "      <td>0.0</td>\n",
-       "      <td>0.00000</td>\n",
-       "      <td>0.000000</td>\n",
        "      <td>0.0</td>\n",
        "      <td>0.0</td>\n",
-       "      <td>0.000000</td>\n",
+       "      <td>0.0</td>\n",
        "      <td>0.000000</td>\n",
        "      <td>0.0</td>\n",
        "      <td>0.0</td>\n",
-       "      <td>0.065878</td>\n",
+       "      <td>-5.241297</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>17.58476</td>\n",
+       "      <td>0.000000</td>\n",
        "      <td>0.0</td>\n",
        "      <td>0.0</td>\n",
-       "      <td>12.35705</td>\n",
        "      <td>0.0</td>\n",
        "      <td>0.0</td>\n",
        "      <td>0.000000</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
-       "      <td>3.232751</td>\n",
        "      <td>0.0</td>\n",
        "      <td>0.0</td>\n",
-       "      <td>0.000000</td>\n",
        "      <td>0.0</td>\n",
-       "      <td>0.00000</td>\n",
-       "      <td>8.341915</td>\n",
        "      <td>0.0</td>\n",
        "      <td>0.0</td>\n",
-       "      <td>0.000000</td>\n",
+       "      <td>0.0</td>\n",
        "      <td>0.000000</td>\n",
        "      <td>0.0</td>\n",
        "      <td>0.0</td>\n",
        "      <td>0.000000</td>\n",
        "      <td>0.0</td>\n",
        "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
        "      <td>0.00000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.0</td>\n",
        "      <td>0.0</td>\n",
        "      <td>0.0</td>\n",
-       "      <td>3.110362</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.000000</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
-       "      <td>0.000000</td>\n",
        "      <td>0.0</td>\n",
        "      <td>0.0</td>\n",
-       "      <td>0.000000</td>\n",
        "      <td>0.0</td>\n",
-       "      <td>0.00000</td>\n",
-       "      <td>0.000000</td>\n",
        "      <td>0.0</td>\n",
        "      <td>0.0</td>\n",
-       "      <td>0.000000</td>\n",
+       "      <td>0.0</td>\n",
        "      <td>0.000000</td>\n",
        "      <td>0.0</td>\n",
        "      <td>0.0</td>\n",
        "      <td>0.000000</td>\n",
        "      <td>0.0</td>\n",
        "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
        "      <td>0.00000</td>\n",
+       "      <td>10.869279</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
        "      <td>0.0</td>\n",
        "      <td>0.0</td>\n",
        "      <td>0.000000</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
-       "      <td>0.000000</td>\n",
        "      <td>0.0</td>\n",
        "      <td>0.0</td>\n",
-       "      <td>7.743024</td>\n",
        "      <td>0.0</td>\n",
-       "      <td>0.00000</td>\n",
-       "      <td>0.000000</td>\n",
        "      <td>0.0</td>\n",
        "      <td>0.0</td>\n",
-       "      <td>5.987098</td>\n",
-       "      <td>0.000000</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>2.526274</td>\n",
        "      <td>0.0</td>\n",
        "      <td>0.0</td>\n",
        "      <td>0.000000</td>\n",
        "      <td>0.0</td>\n",
        "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
        "      <td>0.00000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
        "      <td>0.0</td>\n",
        "      <td>0.0</td>\n",
        "      <td>0.000000</td>\n",
@@ -1261,19 +1295,19 @@
        "</div>"
       ],
       "text/plain": [
-       "         a0   a1   a2        a3   a4       a5        a6   a7   a8        a9  \\\n",
-       "0  0.000000  0.0  0.0  0.000000  0.0  9.37476  0.000000  0.0  0.0  0.000000   \n",
-       "1  0.000000  0.0  0.0  0.000000  0.0  0.00000  0.000000  0.0  0.0  0.000000   \n",
-       "2  3.232751  0.0  0.0  0.000000  0.0  0.00000  8.341915  0.0  0.0  0.000000   \n",
-       "3  0.000000  0.0  0.0  0.000000  0.0  0.00000  0.000000  0.0  0.0  0.000000   \n",
-       "4  0.000000  0.0  0.0  7.743024  0.0  0.00000  0.000000  0.0  0.0  5.987098   \n",
+       "    a0   a1   a2   a3   a4   a5        a6   a7   a8        a9  a10  a11  a12  \\\n",
+       "0  0.0  0.0  0.0  0.0  0.0  0.0  0.000000  0.0  0.0  0.000000  0.0  0.0  0.0   \n",
+       "1  0.0  0.0  0.0  0.0  0.0  0.0  0.000000  0.0  0.0 -5.241297  0.0  0.0  0.0   \n",
+       "2  0.0  0.0  0.0  0.0  0.0  0.0  0.000000  0.0  0.0  0.000000  0.0  0.0  0.0   \n",
+       "3  0.0  0.0  0.0  0.0  0.0  0.0  0.000000  0.0  0.0  0.000000  0.0  0.0  0.0   \n",
+       "4  0.0  0.0  0.0  0.0  0.0  0.0  2.526274  0.0  0.0  0.000000  0.0  0.0  0.0   \n",
        "\n",
-       "        a10  a11  a12       a13  a14  a15       a16  a17  a18       a19  \n",
-       "0  6.237859  0.0  0.0  0.000000  0.0  0.0   0.00000  0.0  0.0  0.000000  \n",
-       "1  0.000000  0.0  0.0  0.065878  0.0  0.0  12.35705  0.0  0.0  0.000000  \n",
-       "2  0.000000  0.0  0.0  0.000000  0.0  0.0   0.00000  0.0  0.0  3.110362  \n",
-       "3  0.000000  0.0  0.0  0.000000  0.0  0.0   0.00000  0.0  0.0  0.000000  \n",
-       "4  0.000000  0.0  0.0  0.000000  0.0  0.0   0.00000  0.0  0.0  0.000000  "
+       "        a13        a14  a15  a16  a17  a18        a19  \n",
+       "0   0.00000   0.000000  0.0  0.0  0.0  0.0  11.308953  \n",
+       "1  17.58476   0.000000  0.0  0.0  0.0  0.0   0.000000  \n",
+       "2   0.00000   0.000000  0.0  0.0  0.0  0.0   0.000000  \n",
+       "3   0.00000  10.869279  0.0  0.0  0.0  0.0   0.000000  \n",
+       "4   0.00000   0.000000  0.0  0.0  0.0  0.0   0.000000  "
       ]
      },
      "execution_count": 20,
@@ -1288,63 +1322,64 @@
   {
    "cell_type": "code",
    "execution_count": 21,
+   "id": "5ba1a551",
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "  (2, 0)\t3.2327506467190874\n",
-      "  (259, 0)\t10.723428115951062\n",
-      "  (643, 0)\t0.47763624588488707\n",
-      "  (899, 0)\t8.857065309921685\n",
-      "  (516, 0)\t8.792407143276648\n",
-      "  (262, 0)\t2.1900894573805396\n",
-      "  (390, 0)\t5.007630701229646\n",
-      "  (646, 0)\t6.630703075588639\n",
-      "  (392, 0)\t5.573713453854357\n",
-      "  (776, 0)\t10.501281989515688\n",
-      "  (904, 0)\t8.261890175181366\n",
-      "  (1033, 0)\t-0.41106824704220446\n",
-      "  (522, 0)\t12.619952511457068\n",
-      "  (139, 0)\t12.753348070606792\n",
-      "  (141, 0)\t4.936902335394504\n",
-      "  (270, 0)\t-1.7695949916946174\n",
-      "  (782, 0)\t4.378746787324408\n",
-      "  (15, 0)\t8.554141682891935\n",
-      "  (527, 0)\t5.1994882136423\n",
-      "  (912, 0)\t2.6101212854793125\n",
-      "  (401, 0)\t5.614628764689268\n",
-      "  (403, 0)\t9.999468341523317\n",
-      "  (787, 0)\t7.6170790481600985\n",
-      "  (404, 0)\t5.105328903336744\n",
-      "  (916, 0)\t1.395526391114967\n",
+      "  (770, 0)\t-1.373354548007899\n",
+      "  (771, 0)\t11.641890592020793\n",
+      "  (644, 0)\t-1.4820515981598015\n",
+      "  (773, 0)\t4.374245789758399\n",
+      "  (646, 0)\t4.58071340724814\n",
+      "  (776, 0)\t5.115792716318899\n",
+      "  (649, 0)\t8.676941295251092\n",
+      "  (522, 0)\t-0.11573951593420229\n",
+      "  (396, 0)\t8.124303607236273\n",
+      "  (652, 0)\t9.359339954077681\n",
+      "  (141, 0)\t8.50710863345112\n",
+      "  (272, 0)\t7.440244879175392\n",
+      "  (1042, 0)\t4.286859524587998\n",
+      "  (275, 0)\t-0.6091666840632348\n",
+      "  (787, 0)\t10.124449357828695\n",
+      "  (915, 0)\t11.391560911074649\n",
+      "  (1043, 0)\t11.478396096078907\n",
+      "  (408, 0)\t11.204049991287349\n",
+      "  (536, 0)\t13.239689100708974\n",
+      "  (26, 0)\t4.951917355877771\n",
+      "  (794, 0)\t2.736556006961319\n",
+      "  (539, 0)\t12.553519350929216\n",
+      "  (412, 0)\t2.8682583361020786\n",
+      "  (540, 0)\t-1.2121388231076713\n",
+      "  (796, 0)\t6.986443354019786\n",
       "  :\t:\n",
-      "  (9328, 19)\t5.938629381103238\n",
-      "  (9457, 19)\t4.463547879031807\n",
-      "  (9458, 19)\t-0.8034946631917106\n",
-      "  (8051, 19)\t-1.904327616912268\n",
-      "  (8819, 19)\t8.314944347687199\n",
-      "  (7543, 19)\t1.4303204025224376\n",
-      "  (8824, 19)\t5.1559713157589\n",
-      "  (7673, 19)\t7.478681299798863\n",
-      "  (7802, 19)\t0.502526238006068\n",
-      "  (8186, 19)\t-3.824944685072472\n",
-      "  (8570, 19)\t8.442324394481236\n",
-      "  (8571, 19)\t6.204199957873215\n",
-      "  (7420, 19)\t0.297737356585836\n",
-      "  (9212, 19)\t3.934797966994188\n",
-      "  (7421, 19)\t14.26161925450462\n",
-      "  (8574, 19)\t5.826108027573207\n",
-      "  (9214, 19)\t7.209975861932724\n",
-      "  (9825, 19)\t11.155342644729613\n",
-      "  (9702, 19)\t3.55144040779287\n",
-      "  (9578, 19)\t12.638681362546228\n",
-      "  (9712, 19)\t2.3542852760656348\n",
-      "  (9969, 19)\t-2.645175092587592\n",
-      "  (9973, 19)\t-2.2666402312025213\n",
-      "  (9851, 19)\t-4.293381721466055\n",
-      "  (9596, 19)\t6.6580506888430415\n"
+      "  (9087, 19)\t-2.9543770156500395\n",
+      "  (9440, 19)\t3.903613949374532\n",
+      "  (9186, 19)\t0.3141028170017329\n",
+      "  (9571, 19)\t1.7347840594688502\n",
+      "  (9188, 19)\t14.68745562157488\n",
+      "  (9316, 19)\t13.808308442016436\n",
+      "  (9957, 19)\t9.705810918221086\n",
+      "  (9318, 19)\t9.984168186940485\n",
+      "  (9446, 19)\t5.173000114288142\n",
+      "  (9830, 19)\t3.2442816093793607\n",
+      "  (9835, 19)\t5.713078257113576\n",
+      "  (9580, 19)\t5.373437384911853\n",
+      "  (9326, 19)\t10.736403419943093\n",
+      "  (9711, 19)\t-4.003216472911014\n",
+      "  (9200, 19)\t5.560182026578174\n",
+      "  (9844, 19)\t6.17251145210342\n",
+      "  (9333, 19)\t7.085353006324948\n",
+      "  (9208, 19)\t6.789030498520347\n",
+      "  (9464, 19)\t4.314887636528589\n",
+      "  (9720, 19)\t12.446300974563027\n",
+      "  (9594, 19)\t4.317523130615451\n",
+      "  (9722, 19)\t-2.3257161477576336\n",
+      "  (9723, 19)\t1.9288133227037407\n",
+      "  (9469, 19)\t0.268312217498608\n",
+      "  (9599, 19)\t4.100996763787237\n"
      ]
     }
    ],
@@ -1355,6 +1390,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "e8e58cd5",
    "metadata": {},
    "source": [
     "From here, we could continue our workflow with a CuPy sparse matrix.\n",
@@ -1379,9 +1415,9 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.7"
+   "version": "3.8.13"
   }
  },
  "nbformat": 4,
- "nbformat_minor": 4
+ "nbformat_minor": 5
 }
diff --git a/docs/cudf/source/user_guide/dask-cudf.md b/docs/cudf/source/user_guide/dask-cudf.md
new file mode 100644
index 00000000000..0c0b37f641c
--- /dev/null
+++ b/docs/cudf/source/user_guide/dask-cudf.md
@@ -0,0 +1,104 @@
+# Multi-GPU with Dask-cuDF
+
+cuDF is a single-GPU library. For Multi-GPU cuDF solutions we use
+[Dask](https://dask.org/) and the [dask-cudf
+package](https://github.com/rapidsai/cudf/tree/main/python/dask_cudf),
+which is able to scale cuDF across multiple GPUs on a single machine,
+or multiple GPUs across many machines in a cluster.
+
+[Dask DataFrame](http://docs.dask.org/en/latest/dataframe.html) was
+originally designed to scale Pandas, orchestrating many Pandas
+DataFrames spread across many CPUs into a cohesive parallel DataFrame.
+Because cuDF currently implements only a subset of the Pandas API, not
+all Dask DataFrame operations work with cuDF.
+
+The following is tested and expected to work:
+
+## What works
+
+- Data ingestion
+
+  - `dask_cudf.read_csv`
+  - Use standard Dask ingestion with Pandas, then convert to cuDF (For
+    Parquet and other formats this is often decently fast)
+
+- Linear operations
+
+  - Element-wise operations: `df.x + df.y`, `df ** 2`
+  - Assignment: `df['z'] = df.x + df.y`
+  - Row-wise selections: `df[df.x > 0]`
+  - Loc: `df.loc['2001-01-01': '2005-02-02']`
+  - Date time/string accessors: `df.timestamp.dt.dayofweek`
+  - ... and most similar operations in this category that are already
+    implemented in cuDF
+
+- Reductions
+
+  - Like `sum`, `mean`, `max`, `count`, and so on on
+    `Series` objects
+  - Support for reductions on full dataframes
+  - `std`
+  - Custom reductions with
+    [dask.dataframe.reduction](https://docs.dask.org/en/latest/generated/dask.dataframe.Series.reduction.html)
+
+- Groupby aggregations
+
+  - On single columns: `df.groupby('x').y.max()`
+  - With custom aggregations:
+  - groupby standard deviation
+  - grouping on multiple columns
+  - groupby agg for multiple outputs
+
+- Joins:
+
+  - On full unsorted columns: `left.merge(right, on='id')`
+    (expensive)
+  - On sorted indexes:
+    `left.merge(right, left_index=True, right_index=True)` (fast)
+  - On large and small dataframes: `left.merge(cudf_df, on='id')`
+    (fast)
+
+- Rolling operations
+
+- Converting to and from other forms
+
+  - Dask + Pandas to Dask + cuDF
+    `df.map_partitions(cudf.from_pandas)`
+  - Dask + cuDF to Dask + Pandas
+    `df.map_partitions(lambda df: df.to_pandas())`
+  - cuDF to Dask + cuDF:
+    `dask.dataframe.from_pandas(df, npartitions=20)`
+  - Dask + cuDF to cuDF: `df.compute()`
+
+Additionally all generic Dask operations, like `compute`, `persist`,
+`visualize` and so on work regardless.
+
+## Developing the API
+
+Above we mention the following:
+
+> and most similar operations in this category that are already
+> implemented in cuDF
+
+This is because it is difficult to create a comprehensive list of
+operations in the cuDF and Pandas libraries. The API is large enough to
+be difficult to track effectively. For any operation that operates
+row-wise like `fillna` or `query` things will likely, but not
+certainly work. If operations don't work it is often due to a slight
+inconsistency between Pandas and cuDF that is generally easy to fix. We
+encourage users to look at the [cuDF issue
+tracker](https://github.com/rapidsai/cudf/issues) to see if their
+issue has already been reported and, if not, [raise a new
+issue](https://github.com/rapidsai/cudf/issues/new).
+
+## Navigating the API
+
+This project reuses the [Dask
+DataFrame](https://docs.dask.org/en/latest/dataframe.html) project,
+which was originally designed for Pandas, with the newer library cuDF.
+Because we use the same Dask classes for both projects there are often
+methods that are implemented for Pandas, but not yet for cuDF. As a
+result users looking at the full Dask DataFrame API can be misleading,
+and often lead to frustration when operations that are advertised in the
+Dask API do not work as expected with cuDF. We apologize for this in
+advance.
diff --git a/docs/cudf/source/user_guide/data-types.md b/docs/cudf/source/user_guide/data-types.md
new file mode 100644
index 00000000000..8963f87d52e
--- /dev/null
+++ b/docs/cudf/source/user_guide/data-types.md
@@ -0,0 +1,153 @@
+# Supported Data Types
+
+cuDF supports many data types supported by NumPy and Pandas, including
+numeric, datetime, timedelta, categorical and string data types. We
+also provide special data types for working with decimals, list-like,
+and dictionary-like data.
+
+All data types in cuDF are [nullable](missing-data).
+
+<div class="special-table">
+
+| Kind of data         | Data type(s)                                                                    |
+|----------------------|---------------------------------------------------------------------------------|
+| Signed integer       | `'int8'`, `'int16'`, `'int32'`, `'int64'`                                       |
+| Unsigned integer     | `'uint32'`, `'uint64'`                                                          |
+| Floating-point       | `'float32'`, `'float64'`                                                        |
+| Datetime             | `'datetime64[s]'`, `'datetime64[ms]'`, `'datetime64['us']`, `'datetime64[ns]'`  |
+| Timedelta (duration) | `'timedelta[s]'`, `'timedelta[ms]'`, `'timedelta['us']`, `'timedelta[ns]'`      |
+| Category             | `cudf.CategoricalDtype`                                                         |
+| String               | `'object'` or `'string'`                                                        |
+| Decimal              | `cudf.Decimal32Dtype`, `cudf.Decimal64Dtype`, `cudf.Decimal64Dtype`             |
+| List                 | `cudf.ListDtype`                                                                |
+| Struct               | `cudf.StructDtype`                                                              |
+
+</div>
+
+## NumPy data types
+
+We use NumPy data types for integer, floating, datetime, timedelta,
+and string data types.  Thus, just like in NumPy,
+`np.dtype("float32")`, `np.float32`, and `"float32"` are all acceptable
+ways to specify the `float32` data type:
+
+```python
+>>> import cudf
+>>> s = cudf.Series([1, 2, 3], dtype="float32")
+>>> s
+0    1.0
+1    2.0
+2    3.0
+dtype: float32
+```
+
+## A note on `object`
+
+The data type associated with string data in cuDF is `"np.object"`.
+
+```python
+>>> import cudf 
+>>> s = cudf.Series(["abc", "def", "ghi"])
+>>> s.dtype
+dtype("object")
+```
+
+This is for compatibility with Pandas, but it can be misleading. In
+both NumPy and Pandas, `"object"` is the data type associated data
+composed of arbitrary Python objects (not just strings).  However,
+cuDF does not support storing arbitrary Python objects.
+
+## Decimal data types
+
+We provide special data types for working with decimal data, namely
+`Decimal32Dtype`, `Decimal64Dtype`, and `Decimal128Dtype`.  Use these
+data types when you need to store values with greater precision than
+allowed by floating-point representation.
+
+Decimal data types in cuDF are based on fixed-point representation.  A
+decimal data type is composed of a _precision_ and a _scale_.  The
+precision represents the total number of digits in each value of this
+dtype. For example, the precision associated with the decimal value
+`1.023` is `4`. The scale is the total number of digits to the right
+of the decimal point. The scale associated with the value `1.023` is
+3.
+
+Each decimal data type is associated with a maximum precision:
+
+```python
+>>> cudf.Decimal32Dtype.MAX_PRECISION
+9.0
+>>> cudf.Decimal64Dtype.MAX_PRECISION
+18.0
+>>> cudf.Decimal128Dtype.MAX_PRECISION
+38
+```
+
+One way to create a decimal Series is from values of type [decimal.Decimal][python-decimal].
+
+```python
+>>> from decimal import Decimal
+>>> s = cudf.Series([Decimal("1.01"), Decimal("4.23"), Decimal("0.5")])
+>>> s
+0    1.01
+1    4.23
+2    0.50
+dtype: decimal128
+>>> s.dtype
+Decimal128Dtype(precision=3, scale=2)
+```
+
+Notice the data type of the result: `1.01`, `4.23`, `0.50` can all be
+represented with a precision of at least 3 and a scale of at least 2.
+
+However, the value `1.234` needs a precision of at least 4, and a
+scale of at least 3, and cannot be fully represented using this data
+type:
+
+```python
+>>> s[1] = Decimal("1.234")  # raises an error
+```
+
+## Nested data types (`List` and `Struct`)
+
+`ListDtype` and `StructDtype` are special data types in cuDF for
+working with list-like and dictionary-like data. These are referred to
+as "nested" data types, because they enable you to store a list of
+lists, or a struct of lists, or a struct of list of lists, etc.,
+
+You can create lists and struct Series from existing Pandas Series of
+lists and dictionaries respectively:
+
+```python
+>>> psr = pd.Series([{'a': 1, 'b': 2}, {'a': 3, 'b': 4}])
+>>> psr
+0 {'a': 1, 'b': 2}
+1 {'a': 3, 'b': 4}
+dtype: object
+>>> gsr = cudf.from_pandas(psr)
+>>> gsr
+0 {'a': 1, 'b': 2}
+1 {'a': 3, 'b': 4}
+dtype: struct
+>>> gsr.dtype
+StructDtype({'a': dtype('int64'), 'b': dtype('int64')})
+```
+
+Or by reading them from disk, using a [file format that supports
+nested data](io).
+
+```python
+>>> pdf = pd.DataFrame({"a": [[1, 2], [3, 4, 5], [6, 7, 8]]})
+>>> pdf.to_parquet("lists.pq")
+>>> gdf = cudf.read_parquet("lists.pq")
+>>> gdf
+           a
+0     [1, 2]
+1  [3, 4, 5]
+2  [6, 7, 8]
+>>> gdf["a"].dtype
+ListDtype(int64)
+```
+
+[numpy-dtype]: https://numpy.org/doc/stable/reference/arrays.dtypes.html#arrays-dtypes
+[python-decimal]: https://docs.python.org/3/library/decimal.html#decimal.Decimal
diff --git a/docs/cudf/source/user_guide/groupby.md b/docs/cudf/source/user_guide/groupby.md
new file mode 100644
index 00000000000..66b548727e1
--- /dev/null
+++ b/docs/cudf/source/user_guide/groupby.md
@@ -0,0 +1,273 @@
+---
+substitutions:
+  describe: '`describe`'
+---
+
+(basics-groupby)=
+
+# GroupBy
+
+cuDF supports a small (but important) subset of Pandas' [groupby
+API](https://pandas.pydata.org/pandas-docs/stable/user_guide/groupby.html).
+
+## Summary of supported operations
+
+1. Grouping by one or more columns
+2. Basic aggregations such as "sum", "mean", etc.
+3. Quantile aggregation
+4. A "collect" or `list` aggregation for collecting values in a group
+   into lists
+5. Automatic exclusion of columns with unsupported dtypes ("nuisance"
+   columns) when aggregating
+6. Iterating over the groups of a GroupBy object
+7. `GroupBy.groups` API that returns a mapping of group keys to row
+   labels
+8. `GroupBy.apply` API for performing arbitrary operations on each
+   group. Note that this has very limited functionality compared to the
+   equivalent Pandas function. See the section on
+   [apply](#groupby-apply) for more details.
+9. `GroupBy.pipe` similar to
+   [Pandas](https://pandas.pydata.org/pandas-docs/stable/user_guide/groupby.html#piping-function-calls).
+
+## Grouping
+
+A GroupBy object is created by grouping the values of a `Series` or
+`DataFrame` by one or more columns:
+
+```python
+>>> import cudf
+>>> df = cudf.DataFrame({'a': [1, 1, 1, 2, 2], 'b': [1, 1, 2, 2, 3], 'c': [1, 2, 3, 4, 5]})
+>>> df
+   a  b  c
+0  1  1  1
+1  1  1  2
+2  1  2  3
+3  2  2  4
+4  2  3  5
+>>> gb1 = df.groupby('a')  # grouping by a single column
+>>> gb2 = df.groupby(['a', 'b'])  # grouping by multiple columns
+>>> gb3 = df.groupby(cudf.Series(['a', 'a', 'b', 'b', 'b']))  # grouping by an external column
+```
+
+````{warning}
+Unlike Pandas, cuDF uses `sort=False` by default to achieve better
+performance, which does not guarantee any particular group order in
+the result.
+
+For example:
+
+```python
+>>> df = cudf.DataFrame({'a' : [2, 2, 1], 'b' : [42, 21, 11]})
+>>> df.groupby('a').sum()
+   b
+a
+2  63
+1  11
+>>> df.to_pandas().groupby('a').sum()
+   b
+a
+1  11
+2  63
+```
+
+Setting `sort=True` will produce Pandas-like output, but with some performance penalty:
+
+```python
+>>> df.groupby('a', sort=True).sum()
+   b
+a
+1  11
+2  63
+```
+````
+
+### Grouping by index levels
+
+You can also group by one or more levels of a MultiIndex:
+
+```python
+>>> df = cudf.DataFrame(
+...     {'a': [1, 1, 1, 2, 2], 'b': [1, 1, 2, 2, 3], 'c': [1, 2, 3, 4, 5]}
+... ).set_index(['a', 'b'])
+...
+>>> df.groupby(level='a')
+```
+
+### The `Grouper` object
+
+A `Grouper` can be used to disambiguate between columns and levels
+when they have the same name:
+
+```python
+>>> df
+   b  c
+b
+1  1  1
+1  1  2
+1  2  3
+2  2  4
+2  3  5
+>>> df.groupby('b', level='b')  # ValueError: Cannot specify both by and level
+>>> df.groupby([cudf.Grouper(key='b'), cudf.Grouper(level='b')])  # OK
+```
+
+## Aggregation
+
+Aggregations on groups are supported via the `agg` method:
+
+```python
+>>> df
+   a  b  c
+0  1  1  1
+1  1  1  2
+2  1  2  3
+3  2  2  4
+4  2  3  5
+>>> df.groupby('a').agg('sum')
+   b  c
+a
+1  4  6
+2  5  9
+>>> df.groupby('a').agg({'b': ['sum', 'min'], 'c': 'mean'})
+    b        c
+  sum min mean
+a
+1   4   1  2.0
+2   5   2  4.5
+>>> df.groupby("a").corr(method="pearson")
+          b          c
+a
+1 b  1.000000  0.866025
+  c  0.866025  1.000000
+2 b  1.000000  1.000000
+  c  1.000000  1.000000
+```
+
+The following table summarizes the available aggregations and the types
+that support them:
+
+```{eval-rst}
+.. table::
+    :class: special-table
+
+    +------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+
+    | Aggregations / dtypes              | Numeric   | Datetime   | String   | Categorical   | List   | Struct   | Interval   | Decimal   |
+    +====================================+===========+============+==========+===============+========+==========+============+===========+
+    | count                              | ✅        | ✅         | ✅       | ✅            |        |          |            | ✅        |
+    +------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+
+    | size                               | ✅        | ✅         | ✅       | ✅            |        |          |            | ✅        |
+    +------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+
+    | sum                                | ✅        | ✅         |          |               |        |          |            | ✅        |
+    +------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+
+    | idxmin                             | ✅        | ✅         |          |               |        |          |            | ✅        |
+    +------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+
+    | idxmax                             | ✅        | ✅         |          |               |        |          |            | ✅        |
+    +------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+
+    | min                                | ✅        | ✅         | ✅       |               |        |          |            | ✅        |
+    +------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+
+    | max                                | ✅        | ✅         | ✅       |               |        |          |            | ✅        |
+    +------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+
+    | mean                               | ✅        | ✅         |          |               |        |          |            |           |
+    +------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+
+    | var                                | ✅        | ✅         |          |               |        |          |            |           |
+    +------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+
+    | std                                | ✅        | ✅         |          |               |        |          |            |           |
+    +------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+
+    | quantile                           | ✅        | ✅         |          |               |        |          |            |           |
+    +------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+
+    | median                             | ✅        | ✅         |          |               |        |          |            |           |
+    +------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+
+    | nunique                            | ✅        | ✅         | ✅       | ✅            |        |          |            | ✅        |
+    +------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+
+    | nth                                | ✅        | ✅         | ✅       |               |        |          |            | ✅        |
+    +------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+
+    | collect                            | ✅        | ✅         | ✅       |               | ✅     |          |            | ✅        |
+    +------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+
+    | unique                             | ✅        | ✅         | ✅       | ✅            |        |          |            |           |
+    +------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+
+    | corr                               | ✅        |            |          |               |        |          |            | ✅        |
+    +------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+
+    | cov                                | ✅        |            |          |               |        |          |            | ✅        |
+    +------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+
+```
+
+## GroupBy apply
+
+To apply function on each group, use the `GroupBy.apply()` method:
+
+```python
+>>> df
+   a  b  c
+0  1  1  1
+1  1  1  2
+2  1  2  3
+3  2  2  4
+4  2  3  5
+>>> df.groupby('a').apply(lambda x: x.max() - x.min())
+   a  b  c
+a
+0  0  1  2
+1  0  1  1
+```
+
+### Limitations
+
+- `apply` works by applying the provided function to each group
+  sequentially, and concatenating the results together. **This can be
+  very slow**, especially for a large number of small groups. For a
+  small number of large groups, it can give acceptable performance.
+- The results may not always match Pandas exactly. For example, cuDF
+  may return a `DataFrame` containing a single column where Pandas
+  returns a `Series`. Some post-processing may be required to match
+  Pandas behavior.
+- cuDF does not support some of the exceptional cases that Pandas
+  supports with `apply`, such as calling [describe] inside the
+  callable.
+
+## Transform
+
+The `.transform()` method aggregates per group, and broadcasts the
+result to the group size, resulting in a Series/DataFrame that is of
+the same size as the input Series/DataFrame.
+
+```python
+>>> import cudf
+>>> df = cudf.DataFrame({'a': [2, 1, 1, 2, 2], 'b': [1, 2, 3, 4, 5]})
+>>> df.groupby('a').transform('max')
+   b
+0  5
+1  3
+2  3
+3  5
+4  5
+```
+
+## Rolling window calculations
+
+Use the `GroupBy.rolling()` method to perform rolling window
+calculations on each group:
+
+```python
+>>> df
+   a  b  c
+0  1  1  1
+1  1  1  2
+2  1  2  3
+3  2  2  4
+4  2  3  5
+```
+
+Rolling window sum on each group with a window size of 2:
+
+```python
+>>> df.groupby('a').rolling(2).sum()
+        a     b     c
+a
+1 0  <NA>  <NA>  <NA>
+  1     2     2     3
+  2     2     3     5
+2 3  <NA>  <NA>  <NA>
+  4     4     5     9
+```
+
+[describe]: https://pandas.pydata.org/pandas-docs/stable/user_guide/groupby.html#flexible-apply
diff --git a/docs/cudf/source/user_guide/guide-to-udfs.ipynb b/docs/cudf/source/user_guide/guide-to-udfs.ipynb
index 8026c378156..ef7500a2be9 100644
--- a/docs/cudf/source/user_guide/guide-to-udfs.ipynb
+++ b/docs/cudf/source/user_guide/guide-to-udfs.ipynb
@@ -2,15 +2,16 @@
  "cells": [
   {
    "cell_type": "markdown",
+   "id": "77149e57",
    "metadata": {},
    "source": [
-    "Overview of User Defined Functions with cuDF\n",
-    "===================================="
+    "# Overview of User Defined Functions with cuDF"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 1,
+   "id": "0c6b65ce",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -21,6 +22,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "8826af13",
    "metadata": {},
    "source": [
     "Like many tabular data processing APIs, cuDF provides a range of composable, DataFrame style operators. While out of the box functions are flexible and useful, it is sometimes necessary to write custom code, or user-defined functions (UDFs), that can be applied to rows, columns, and other groupings of the cells making up the DataFrame.\n",
@@ -39,10 +41,10 @@
   },
   {
    "cell_type": "markdown",
+   "id": "32a8f4fb",
    "metadata": {},
    "source": [
-    "Series UDFs\n",
-    "--------------\n",
+    "## Series UDFs\n",
     "\n",
     "You can execute UDFs on Series in two ways:\n",
     "\n",
@@ -54,14 +56,15 @@
   },
   {
    "cell_type": "markdown",
+   "id": "49399a84",
    "metadata": {},
    "source": [
-    "`cudf.Series.apply`\n",
-    "---------------------"
+    "### `cudf.Series.apply`"
    ]
   },
   {
    "cell_type": "markdown",
+   "id": "0a209ea2",
    "metadata": {},
    "source": [
     "cuDF provides a similar API to `pandas.Series.apply` for applying scalar UDFs to series objects. Here is a very basic example."
@@ -70,6 +73,7 @@
   {
    "cell_type": "code",
    "execution_count": 2,
+   "id": "e28d5b82",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -79,6 +83,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "48a9fa5e",
    "metadata": {},
    "source": [
     "UDFs destined for `cudf.Series.apply` might look something like this:"
@@ -87,6 +92,7 @@
   {
    "cell_type": "code",
    "execution_count": 3,
+   "id": "96aeb19f",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -97,6 +103,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "e61d0169",
    "metadata": {},
    "source": [
     "`cudf.Series.apply` is called like `pd.Series.apply` and returns a new `Series` object:"
@@ -105,6 +112,7 @@
   {
    "cell_type": "code",
    "execution_count": 4,
+   "id": "8ca08834",
    "metadata": {},
    "outputs": [
     {
@@ -127,14 +135,15 @@
   },
   {
    "cell_type": "markdown",
+   "id": "c98dab03",
    "metadata": {},
    "source": [
-    "Functions with Additional Scalar Arguments\n",
-    "---------------------------------------------------"
+    "### Functions with Additional Scalar Arguments"
    ]
   },
   {
    "cell_type": "markdown",
+   "id": "2aa3df6f",
    "metadata": {},
    "source": [
     "In addition, `cudf.Series.apply` supports `args=` just like pandas, allowing you to write UDFs that accept an arbitrary number of scalar arguments. Here is an example of such a function and it's API call in both pandas and cuDF:"
@@ -143,6 +152,7 @@
   {
    "cell_type": "code",
    "execution_count": 5,
+   "id": "8d156d01",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -153,6 +163,7 @@
   {
    "cell_type": "code",
    "execution_count": 6,
+   "id": "1dee82d7",
    "metadata": {},
    "outputs": [
     {
@@ -176,6 +187,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "22739e28",
    "metadata": {},
    "source": [
     "As a final note, `**kwargs` is not yet supported."
@@ -183,14 +195,15 @@
   },
   {
    "cell_type": "markdown",
+   "id": "afbf33dc",
    "metadata": {},
    "source": [
-    "Nullable Data\n",
-    "----------------"
+    "### Nullable Data"
    ]
   },
   {
    "cell_type": "markdown",
+   "id": "5dc06e8c",
    "metadata": {},
    "source": [
     "The null value `NA` an propagates through unary and binary operations. Thus, `NA + 1`, `abs(NA)`, and `NA == NA` all return `NA`. To make this concrete, let's look at the same example from above, this time using nullable data:"
@@ -199,6 +212,7 @@
   {
    "cell_type": "code",
    "execution_count": 7,
+   "id": "bda261dd",
    "metadata": {},
    "outputs": [
     {
@@ -224,6 +238,7 @@
   {
    "cell_type": "code",
    "execution_count": 8,
+   "id": "0123ae07",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -235,6 +250,7 @@
   {
    "cell_type": "code",
    "execution_count": 9,
+   "id": "e95868dd",
    "metadata": {},
    "outputs": [
     {
@@ -258,6 +274,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "97372e15",
    "metadata": {},
    "source": [
     "Often however you want explicit null handling behavior inside the function. cuDF exposes this capability the same way as pandas, by interacting directly with the `NA` singleton object. Here's an example of a function with explicit null handling:"
@@ -266,6 +283,7 @@
   {
    "cell_type": "code",
    "execution_count": 10,
+   "id": "6c65241b",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -280,6 +298,7 @@
   {
    "cell_type": "code",
    "execution_count": 11,
+   "id": "ab0f4dbf",
    "metadata": {},
    "outputs": [
     {
@@ -303,6 +322,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "bdddc4e8",
    "metadata": {},
    "source": [
     "In addition, `cudf.NA` can be returned from a function directly or conditionally. This capability should allow you to implement custom null handling in a wide variety of cases."
@@ -310,14 +330,15 @@
   },
   {
    "cell_type": "markdown",
+   "id": "54cafbc0",
    "metadata": {},
    "source": [
-    "Lower level control with custom `numba` kernels\n",
-    "---------------------------------------------------------"
+    "### Lower level control with custom `numba` kernels"
    ]
   },
   {
    "cell_type": "markdown",
+   "id": "00914f2a",
    "metadata": {},
    "source": [
     "In addition to the Series.apply() method for performing custom operations, you can also pass Series objects directly into [CUDA kernels written with Numba](https://numba.pydata.org/numba-doc/latest/cuda/kernels.html).\n",
@@ -329,6 +350,7 @@
   {
    "cell_type": "code",
    "execution_count": 12,
+   "id": "732434f6",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -338,6 +360,7 @@
   {
    "cell_type": "code",
    "execution_count": 13,
+   "id": "4f5997e5",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -352,6 +375,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "d9667a55",
    "metadata": {},
    "source": [
     "This kernel will take an input array, multiply it by a configurable value (supplied at runtime), and store the result in an output array. Notice that we wrapped our logic in an `if` statement. Because we can launch more threads than the size of our array, we need to make sure that we don't use threads with an index that would be out of bounds. Leaving this out can result in undefined behavior.\n",
@@ -362,6 +386,7 @@
   {
    "cell_type": "code",
    "execution_count": 14,
+   "id": "ea6008a6",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -372,6 +397,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "3fb69909",
    "metadata": {},
    "source": [
     "After calling our kernel, our DataFrame is now populated with the result."
@@ -380,6 +406,7 @@
   {
    "cell_type": "code",
    "execution_count": 15,
+   "id": "183a82ed",
    "metadata": {},
    "outputs": [
     {
@@ -469,6 +496,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "ab9c305e",
    "metadata": {},
    "source": [
     "This API allows a you to theoretically write arbitrary kernel logic, potentially accessing and using elements of the series at arbitrary indices and use them on cuDF data structures. Advanced developers with some CUDA experience can often use this capability to implement iterative transformations, or spot treat problem areas of a data pipeline with a custom kernel that does the same job faster."
@@ -476,28 +504,29 @@
   },
   {
    "cell_type": "markdown",
+   "id": "0acc6ef2",
    "metadata": {},
    "source": [
-    "DataFrame UDFs\n",
-    "--------------------\n",
+    "## DataFrame UDFs\n",
     "\n",
     "Like `cudf.Series`, there are multiple ways of using UDFs on dataframes, which essentially amount to UDFs that expect multiple columns as input:\n",
     "\n",
     "- `cudf.DataFrame.apply`, which functions like `pd.DataFrame.apply` and expects a row udf\n",
     "- `cudf.DataFrame.apply_rows`, which is a thin wrapper around numba and expects a numba kernel\n",
-    "- `cudf.DataFrame.apply_chunks`, which is similar to `cudf.DataFrame.apply_rows` but offers lower level control.\n"
+    "- `cudf.DataFrame.apply_chunks`, which is similar to `cudf.DataFrame.apply_rows` but offers lower level control."
    ]
   },
   {
    "cell_type": "markdown",
+   "id": "2102c3ed",
    "metadata": {},
    "source": [
-    "`cudf.DataFrame.apply`\n",
-    "---------------------------"
+    "### `cudf.DataFrame.apply`"
    ]
   },
   {
    "cell_type": "markdown",
+   "id": "238bec41",
    "metadata": {},
    "source": [
     "`cudf.DataFrame.apply` is the main entrypoint for UDFs that expect multiple columns as input and produce a single output column. Functions intended to be consumed by this API are written in terms of a \"row\" argument. The \"row\" is considered to be like a dictionary and contains all of the column values at a certain `iloc` in a `DataFrame`. The function can access these values by key within the function, the keys being the column names corresponding to the desired value. Below is an example function that would be used to add column `A` and column `B` together inside a UDF."
@@ -506,6 +535,7 @@
   {
    "cell_type": "code",
    "execution_count": 16,
+   "id": "73653918",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -515,6 +545,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "b5eb32dd",
    "metadata": {},
    "source": [
     "Let's create some very basic toy data containing at least one null."
@@ -523,6 +554,7 @@
   {
    "cell_type": "code",
    "execution_count": 17,
+   "id": "077feb75",
    "metadata": {},
    "outputs": [
     {
@@ -592,14 +624,16 @@
   },
   {
    "cell_type": "markdown",
+   "id": "609a3da5",
    "metadata": {},
    "source": [
-    "Finally call the function as you would in pandas - by using a lambda function to map the UDF onto \"rows\" of the DataFrame: "
+    "Finally call the function as you would in pandas - by using a lambda function to map the UDF onto \"rows\" of the DataFrame:"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 18,
+   "id": "091e39e1",
    "metadata": {},
    "outputs": [
     {
@@ -622,6 +656,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "44e54c31",
    "metadata": {},
    "source": [
     "The same function should produce the same result as pandas:"
@@ -630,6 +665,7 @@
   {
    "cell_type": "code",
    "execution_count": 19,
+   "id": "bd345fab",
    "metadata": {},
    "outputs": [
     {
@@ -652,6 +688,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "004fbbba",
    "metadata": {},
    "source": [
     "Notice that Pandas returns `object` dtype - see notes on this in the caveats section."
@@ -659,6 +696,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "0b11c172",
    "metadata": {},
    "source": [
     "Like `cudf.Series.apply`, these functions support generalized null handling. Here's a function that conditionally returns a different value if a certain input is null:"
@@ -667,6 +705,7 @@
   {
    "cell_type": "code",
    "execution_count": 20,
+   "id": "b70f4b3b",
    "metadata": {},
    "outputs": [
     {
@@ -737,6 +776,7 @@
   {
    "cell_type": "code",
    "execution_count": 21,
+   "id": "0313c8df",
    "metadata": {},
    "outputs": [
     {
@@ -759,6 +799,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "313c77f3",
    "metadata": {},
    "source": [
     "`cudf.NA` can also be directly returned from a function resulting in data that has the the correct nulls in the end, just as if it were run in Pandas. For the following data, the last row fulfills the condition that `1 + 3 > 3` and returns `NA` for that row:"
@@ -767,6 +808,7 @@
   {
    "cell_type": "code",
    "execution_count": 22,
+   "id": "96a7952a",
    "metadata": {},
    "outputs": [
     {
@@ -845,6 +887,7 @@
   {
    "cell_type": "code",
    "execution_count": 23,
+   "id": "e0815f60",
    "metadata": {},
    "outputs": [
     {
@@ -867,6 +910,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "b9c674f4",
    "metadata": {},
    "source": [
     "Mixed types are allowed, but will return the common type, rather than object as in Pandas. Here's a null aware op between an int and a float column:"
@@ -875,6 +919,7 @@
   {
    "cell_type": "code",
    "execution_count": 24,
+   "id": "495efd14",
    "metadata": {},
    "outputs": [
     {
@@ -948,6 +993,7 @@
   {
    "cell_type": "code",
    "execution_count": 25,
+   "id": "678b0b5a",
    "metadata": {},
    "outputs": [
     {
@@ -970,6 +1016,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "ce0897c0",
    "metadata": {},
    "source": [
     "Functions may also return scalar values, however the result will be promoted to a safe type regardless of the data. This means even if you have a function like:\n",
@@ -991,6 +1038,7 @@
   {
    "cell_type": "code",
    "execution_count": 26,
+   "id": "acf48d56",
    "metadata": {},
    "outputs": [
     {
@@ -1063,6 +1111,7 @@
   {
    "cell_type": "code",
    "execution_count": 27,
+   "id": "78a98172",
    "metadata": {},
    "outputs": [
     {
@@ -1085,6 +1134,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "2ceaece4",
    "metadata": {},
    "source": [
     "Any number of columns and many arithmetic operators are supported, allowing for complex UDFs:"
@@ -1093,6 +1143,7 @@
   {
    "cell_type": "code",
    "execution_count": 28,
+   "id": "142c30a9",
    "metadata": {},
    "outputs": [
     {
@@ -1181,6 +1232,7 @@
   {
    "cell_type": "code",
    "execution_count": 29,
+   "id": "fee9198a",
    "metadata": {},
    "outputs": [
     {
@@ -1203,17 +1255,17 @@
   },
   {
    "cell_type": "markdown",
+   "id": "9c587bd2",
    "metadata": {},
    "source": [
-    "Numba kernels for DataFrames\n",
-    "------------------------------------"
+    "### Numba kernels for DataFrames"
    ]
   },
   {
    "cell_type": "markdown",
+   "id": "adc6a459",
    "metadata": {},
    "source": [
-    "\n",
     "We could apply a UDF on a DataFrame like we did above with `forall`. We'd need to write a kernel that expects multiple inputs, and pass multiple Series as arguments when we execute our kernel. Because this is fairly common and can be difficult to manage, cuDF provides two APIs to streamline this: `apply_rows` and `apply_chunks`. Below, we walk through an example of using `apply_rows`. `apply_chunks` works in a similar way, but also offers more control over low-level kernel behavior.\n",
     "\n",
     "Now that we have two numeric columns in our DataFrame, let's write a kernel that uses both of them."
@@ -1222,6 +1274,7 @@
   {
    "cell_type": "code",
    "execution_count": 30,
+   "id": "90cbcd85",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1235,6 +1288,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "bce045f2",
    "metadata": {},
    "source": [
     "Notice that we need to `enumerate` through our `zipped` function arguments (which either match or are mapped to our input column names). We can pass this kernel to `apply_rows`. We'll need to specify a few arguments:\n",
@@ -1251,6 +1305,7 @@
   {
    "cell_type": "code",
    "execution_count": 31,
+   "id": "e782daff",
    "metadata": {},
    "outputs": [
     {
@@ -1337,6 +1392,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "6b838b89",
    "metadata": {},
    "source": [
     "As expected, we see our conditional addition worked. At this point, we've successfully executed UDFs on the core data structures of cuDF."
@@ -1344,9 +1400,10 @@
   },
   {
    "cell_type": "markdown",
+   "id": "fca97003",
    "metadata": {},
    "source": [
-    "## Null Handling in `apply_rows` and `apply_chunks`\n",
+    "### Null Handling in `apply_rows` and `apply_chunks`\n",
     "\n",
     "By default, DataFrame methods for applying UDFs like `apply_rows` will handle nulls pessimistically (all rows with a null value will be removed from the output if they are used in the kernel). Exploring how not handling not pessimistically can lead to undefined behavior is outside the scope of this guide. Suffice it to say, pessimistic null handling is the safe and consistent approach. You can see an example below."
    ]
@@ -1354,6 +1411,7 @@
   {
    "cell_type": "code",
    "execution_count": 32,
+   "id": "befd8333",
    "metadata": {},
    "outputs": [
     {
@@ -1445,6 +1503,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "c710ce86",
    "metadata": {},
    "source": [
     "In the dataframe above, there are three null values. Each column has a null in a different row. When we use our UDF with `apply_rows`, our output should have two nulls due to pessimistic null handling (because we're not using column `c`, the null value there does not matter to us)."
@@ -1453,6 +1512,7 @@
   {
    "cell_type": "code",
    "execution_count": 33,
+   "id": "d1f3dcaf",
    "metadata": {},
    "outputs": [
     {
@@ -1546,6 +1606,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "53b9a2f8",
    "metadata": {},
    "source": [
     "As expected, we end up with two nulls in our output. The null values from the columns we used propogated to our output, but the null from the column we ignored did not."
@@ -1553,10 +1614,10 @@
   },
   {
    "cell_type": "markdown",
+   "id": "4bbefa67",
    "metadata": {},
    "source": [
-    "Rolling Window UDFs\n",
-    "-------------------------\n",
+    "## Rolling Window UDFs\n",
     "\n",
     "For time-series data, we may need to operate on a small \\\"window\\\" of our column at a time, processing each portion independently. We could slide (\\\"roll\\\") this window over the entire column to answer questions like \\\"What is the 3-day moving average of a stock price over the past year?\"\n",
     "\n",
@@ -1566,6 +1627,7 @@
   {
    "cell_type": "code",
    "execution_count": 34,
+   "id": "6bc6aea3",
    "metadata": {},
    "outputs": [
     {
@@ -1593,6 +1655,7 @@
   {
    "cell_type": "code",
    "execution_count": 35,
+   "id": "a4c31df1",
    "metadata": {},
    "outputs": [
     {
@@ -1613,6 +1676,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "ff40d863",
    "metadata": {},
    "source": [
     "Next, we'll define a function to use on our rolling windows. We created this one to highlight how you can include things like loops, mathematical functions, and conditionals. Rolling window UDFs do not yet support null values."
@@ -1621,6 +1685,7 @@
   {
    "cell_type": "code",
    "execution_count": 36,
+   "id": "eb5a081b",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1637,6 +1702,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "df8ba31d",
    "metadata": {},
    "source": [
     "We can execute the function by passing it to `apply`. With `window=3`, `min_periods=3`, and `center=False`, our first two values are `null`."
@@ -1645,6 +1711,7 @@
   {
    "cell_type": "code",
    "execution_count": 37,
+   "id": "ddec3263",
    "metadata": {},
    "outputs": [
     {
@@ -1670,6 +1737,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "187478db",
    "metadata": {},
    "source": [
     "We can apply this function to every column in a DataFrame, too."
@@ -1678,6 +1746,7 @@
   {
    "cell_type": "code",
    "execution_count": 38,
+   "id": "8b61094a",
    "metadata": {},
    "outputs": [
     {
@@ -1759,6 +1828,7 @@
   {
    "cell_type": "code",
    "execution_count": 39,
+   "id": "bb8c3019",
    "metadata": {},
    "outputs": [
     {
@@ -1867,10 +1937,10 @@
   },
   {
    "cell_type": "markdown",
+   "id": "d4785060",
    "metadata": {},
    "source": [
-    "GroupBy DataFrame UDFs\n",
-    "-------------------------------\n",
+    "## GroupBy DataFrame UDFs\n",
     "\n",
     "We can also apply UDFs to grouped DataFrames using `apply_grouped`. This example is also drawn and adapted from the RAPIDS [API documentation]().\n",
     "\n",
@@ -1880,6 +1950,7 @@
   {
    "cell_type": "code",
    "execution_count": 40,
+   "id": "3dc272ab",
    "metadata": {},
    "outputs": [
     {
@@ -1971,6 +2042,7 @@
   {
    "cell_type": "code",
    "execution_count": 41,
+   "id": "c0578e0a",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1979,6 +2051,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "4808726f",
    "metadata": {},
    "source": [
     "Next we'll define a function to apply to each group independently. In this case, we'll take the rolling average of column `e`, and call that new column `rolling_avg_e`."
@@ -1987,6 +2060,7 @@
   {
    "cell_type": "code",
    "execution_count": 42,
+   "id": "19f0f7fe",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2006,6 +2080,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "7566f359",
    "metadata": {},
    "source": [
     "We can execute this with a very similar API to `apply_rows`. This time, though, it's going to execute independently for each group."
@@ -2014,6 +2089,7 @@
   {
    "cell_type": "code",
    "execution_count": 43,
+   "id": "c43426c3",
    "metadata": {},
    "outputs": [
     {
@@ -2157,6 +2233,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "c8511306",
    "metadata": {},
    "source": [
     "Notice how, with a window size of three in the kernel, the first two values in each group for our output column are null."
@@ -2164,10 +2241,10 @@
   },
   {
    "cell_type": "markdown",
+   "id": "0060678c",
    "metadata": {},
    "source": [
-    "Numba Kernels on CuPy Arrays\n",
-    "-------------------------------------\n",
+    "## Numba Kernels on CuPy Arrays\n",
     "\n",
     "We can also execute Numba kernels on CuPy NDArrays, again thanks to the `__cuda_array_interface__`. We can even run the same UDF on the Series and the CuPy array. First, we define a Series and then create a CuPy array from that Series."
    ]
@@ -2175,6 +2252,7 @@
   {
    "cell_type": "code",
    "execution_count": 44,
+   "id": "aa6a8509",
    "metadata": {},
    "outputs": [
     {
@@ -2198,6 +2276,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "0fed556f",
    "metadata": {},
    "source": [
     "Next, we define a UDF and execute it on our Series. We need to allocate a Series of the same size for our output, which we'll call `out`."
@@ -2206,6 +2285,7 @@
   {
    "cell_type": "code",
    "execution_count": 45,
+   "id": "0bb8bf93",
    "metadata": {},
    "outputs": [
     {
@@ -2238,6 +2318,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "a857b169",
    "metadata": {},
    "source": [
     "Finally, we execute the same function on our array. We allocate an empty array `out` to store our results."
@@ -2246,6 +2327,7 @@
   {
    "cell_type": "code",
    "execution_count": 46,
+   "id": "ce60b639",
    "metadata": {},
    "outputs": [
     {
@@ -2267,14 +2349,15 @@
   },
   {
    "cell_type": "markdown",
+   "id": "b899d51c",
    "metadata": {},
    "source": [
-    "Caveats\n",
-    "---------"
+    "## Caveats"
    ]
   },
   {
    "cell_type": "markdown",
+   "id": "fe7eb68b",
    "metadata": {},
    "source": [
     "- Only numeric nondecimal scalar types are currently supported as of yet, but strings and structured types are in planning. Attempting to use this API with those types will throw a `TypeError`.\n",
@@ -2283,10 +2366,10 @@
   },
   {
    "cell_type": "markdown",
+   "id": "c690563b",
    "metadata": {},
    "source": [
-    "Summary\n",
-    "-----------\n",
+    "## Summary\n",
     "\n",
     "This guide has covered a lot of content. At this point, you should hopefully feel comfortable writing UDFs (with or without null values) that operate on\n",
     "\n",
@@ -2323,5 +2406,5 @@
   }
  },
  "nbformat": 4,
- "nbformat_minor": 4
+ "nbformat_minor": 5
 }
diff --git a/docs/cudf/source/user_guide/index.md b/docs/cudf/source/user_guide/index.md
new file mode 100644
index 00000000000..2750c75790a
--- /dev/null
+++ b/docs/cudf/source/user_guide/index.md
@@ -0,0 +1,16 @@
+# User Guide
+
+```{toctree}
+:maxdepth: 2
+
+10min
+data-types
+io
+missing-data
+groupby
+guide-to-udfs
+cupy-interop
+dask-cudf
+internals
+PandasCompat
+```
diff --git a/docs/cudf/source/user_guide/index.rst b/docs/cudf/source/user_guide/index.rst
deleted file mode 100644
index 1061008eb3c..00000000000
--- a/docs/cudf/source/user_guide/index.rst
+++ /dev/null
@@ -1,12 +0,0 @@
-==========
-User Guide
-==========
-
-
-.. toctree::
-   :maxdepth: 2
-
-   10min.ipynb
-   10min-cudf-cupy.ipynb
-   guide-to-udfs.ipynb
-   Working-with-missing-data.ipynb
diff --git a/docs/cudf/source/user_guide/internals.md b/docs/cudf/source/user_guide/internals.md
new file mode 100644
index 00000000000..6ceef3d3492
--- /dev/null
+++ b/docs/cudf/source/user_guide/internals.md
@@ -0,0 +1,212 @@
+# cuDF internals
+
+The cuDF API closely matches that of the
+[Pandas](https://pandas.pydata.org/) library. Thus, we have the types
+`cudf.Series`, `cudf.DataFrame` and `cudf.Index` which look and
+feel very much like their Pandas counterparts.
+
+Under the hood, however, cuDF uses data structures very different from
+Pandas. In this document, we describe these internal data structures.
+
+## Column
+
+Columns are cuDF's core data structure and they are modeled after the
+[Apache Arrow Columnar
+Format](https://arrow.apache.org/docs/format/Columnar.html).
+
+A column represents a sequence of values, any number of which may be
+"null". Columns are specialized based on the type of data they contain.
+Thus we have `NumericalColumn`, `StringColumn`, `DatetimeColumn`,
+etc.
+
+A column is composed of the following:
+
+- A **data type**, specifying the type of each element.
+- A **data buffer** that may store the data for the column elements.
+  Some column types do not have a data buffer, instead storing data in
+  the children columns.
+- A **mask buffer** whose bits represent the validity (null or not
+  null) of each element. Columns whose elements are all "valid" may not
+  have a mask buffer. Mask buffers are padded to 64 bytes.
+- A tuple of **children** columns, which enable the representation
+  complex types such as columns with non-fixed width elements such as
+  strings or lists.
+- A **size** indicating the number of elements in the column.
+- An integer **offset**: a column may represent a "slice" of another
+  column, in which case this offset represents the first element of the
+  slice. The size of the column then gives the extent of the slice. A
+  column that is not a slice has an offset of 0.
+
+For example, the `NumericalColumn` backing a Series with 1000 elements
+of type 'int32' and containing nulls is composed of:
+
+1. A data buffer of size 4000 bytes (sizeof(int32) * 1000)
+2. A mask buffer of size 128 bytes (1000/8 padded to a multiple of 64
+   bytes)
+3. No children columns
+
+As another example, the `StringColumn` backing the Series
+`['do', 'you', 'have', 'any', 'cheese?']` is composed of:
+
+1. No data buffer
+2. No mask buffer as there are no nulls in the Series
+3. Two children columns:
+
+   > - A column of UTF-8 characters
+   >   `['d', 'o', 'y', 'o', 'u', 'h' ..., '?']`
+   > - A column of "offsets" to the characters column (in this case,
+   >   `[0, 2, 5, 9, 12, 19]`)
+
+## Buffer
+
+The data and mask buffers of a column represent data in GPU memory
+(a.k.a *device memory*), and are objects of type
+`cudf.core.buffer.Buffer`.
+
+Buffers can be constructed from array-like objects that live either on
+the host (e.g., numpy arrays) or the device (e.g., cupy arrays). Arrays
+must be of `uint8` dtype or viewed as such.
+
+When constructing a Buffer from a host object such as a numpy array, new
+device memory is allocated:
+
+```python
+>>> from cudf.core.buffer import Buffer
+>>> buf = Buffer(np.array([1, 2, 3], dtype='int64').view("uint8"))
+>>> print(buf.ptr)  # address of new device memory allocation
+140050901762560
+>>> print(buf.size)
+24
+>>> print(buf._owner)
+<rmm._lib.device_buffer.DeviceBuffer object at 0x7f6055baab50>
+```
+
+cuDF uses the [RMM](https://github.com/rapidsai/rmm) library for
+allocating device memory. You can read more about device memory
+allocation with RMM
+[here](https://github.com/rapidsai/rmm#devicebuffers).
+
+When constructing a Buffer from a device object such as a CuPy array, no
+new device memory is allocated. Instead, the Buffer points to the
+existing allocation, keeping a reference to the device array:
+
+```python
+>>> import cupy as cp
+>>> c_ary = cp.asarray([1, 2, 3], dtype='int64')
+>>> buf = Buffer(c_ary.view("uint8"))
+>>> print(c_ary.data.mem.ptr)
+140050901762560
+>>> print(buf.ptr)
+140050901762560
+>>> print(buf.size)
+24
+>>> print(buf._owner is c_ary)
+True
+```
+
+An uninitialized block of device memory can be allocated with
+`Buffer.empty`:
+
+```python
+>>> buf = Buffer.empty(10)
+>>> print(buf.size)
+10
+>>> print(buf._owner)
+<rmm._lib.device_buffer.DeviceBuffer object at 0x7f6055baa890>
+```
+
+## ColumnAccessor
+
+cuDF `Series`, `DataFrame` and `Index` are all subclasses of an
+internal `Frame` class. The underlying data structure of `Frame` is
+an ordered, dictionary-like object known as `ColumnAccessor`, which
+can be accessed via the `._data` attribute:
+
+```python
+>>> a = cudf.DataFrame({'x': [1, 2, 3], 'y': ['a', 'b', 'c']})
+>>> a._data
+ColumnAccessor(OrderedColumnDict([('x', <cudf.core.column.numerical.NumericalColumn object at 0x7f5a7d12e050>), ('y', <cudf.core.column.string.StringColumn object at 0x7f5a7d12e320>)]), multiindex=False, level_names=(None,))
+```
+
+ColumnAccessor is an ordered mapping of column labels to columns. In
+addition to behaving like an OrderedDict, it supports things like
+selecting multiple columns (both by index and label), as well as
+hierarchical indexing.
+
+```python
+>>> from cudf.core.column_accessor import ColumnAccessor
+```
+
+The values of a ColumnAccessor are coerced to Columns during
+construction:
+
+```python
+>>> ca = ColumnAccessor({'x': [1, 2, 3], 'y': ['a', 'b', 'c']})
+>>> ca['x']
+<cudf.core.column.numerical.NumericalColumn object at 0x7f5a7d5789e0>
+>>> ca['y']
+<cudf.core.column.string.StringColumn object at 0x7f5a7d578b90>
+>>> ca.pop('x')
+<cudf.core.column.numerical.NumericalColumn object at 0x7f5a7d5789e0>
+>>> ca
+ColumnAccessor(OrderedColumnDict([('y', <cudf.core.column.string.StringColumn object at 0x7f5a7d578b90>)]), multiindex=False, level_names=(None,))
+```
+
+Columns can be inserted at a specified location:
+
+```python
+>>> ca.insert('z', [3, 4, 5], loc=1)
+>>> ca
+ColumnAccessor(OrderedColumnDict([('x', <cudf.core.column.numerical.NumericalColumn object at 0x7f5a7d578dd0>), ('z', <cudf.core.column.numerical.NumericalColumn object at 0x7f5a7d578680>), ('y', <cudf.core.column.string.StringColumn object at 0x7f5a7d12e3b0>)]), multiindex=False, level_names=(None,))
+```
+
+Selecting columns by index:
+
+```python
+>>> ca = ColumnAccessor({'x': [1, 2, 3], 'y': ['a', 'b', 'c'], 'z': [4, 5, 6]})
+>>> ca.select_by_index(1)
+ColumnAccessor(OrderedColumnDict([('y', <cudf.core.column.string.StringColumn object at 0x7f5a7d578830>)]), multiindex=False, level_names=(None,))
+>>> ca.select_by_index([0, 1])
+ColumnAccessor(OrderedColumnDict([('x', <cudf.core.column.numerical.NumericalColumn object at 0x7f5a7d5789e0>), ('y', <cudf.core.column.string.StringColumn object at 0x7f5a7d578830>)]), multiindex=False, level_names=(None,))
+>>> ca.select_by_index(slice(1, 3))
+ColumnAccessor(OrderedColumnDict([('y', <cudf.core.column.string.StringColumn object at 0x7f5a7d578830>), ('z', <cudf.core.column.numerical.NumericalColumn object at 0x7f5a7d5788c0>)]), multiindex=False, level_names=(None,))
+```
+
+Selecting columns by label:
+
+```python
+>>> ca.select_by_label(['y', 'z'])
+ColumnAccessor(OrderedColumnDict([('y', <cudf.core.column.string.StringColumn object at 0x7f5a7d578830>), ('z', <cudf.core.column.numerical.NumericalColumn object at 0x7f5a7d5788c0>)]), multiindex=False, level_names=(None,))
+>>> ca.select_by_label(slice('x', 'y'))
+ColumnAccessor(OrderedColumnDict([('x', <cudf.core.column.numerical.NumericalColumn object at 0x7f5a7d5789e0>), ('y', <cudf.core.column.string.StringColumn object at 0x7f5a7d578830>)]), multiindex=False, level_names=(None,))
+```
+
+A ColumnAccessor with tuple keys (and constructed with
+`multiindex=True`) can be hierarchically indexed:
+
+```python
+>>> ca = ColumnAccessor({('a', 'b'): [1, 2, 3], ('a', 'c'): [2, 3, 4], 'b': [4, 5, 6]}, multiindex=True)
+>>> ca.select_by_label('a')
+ColumnAccessor(OrderedColumnDict([('b', <cudf.core.column.numerical.NumericalColumn object at 0x7f5a7d5789e0>), ('c', <cudf.core.column.numerical.NumericalColumn object at 0x7f5a7d578dd0>)]), multiindex=False, level_names=(None,))
+>>> ca.select_by_label(('a', 'b'))
+ColumnAccessor(OrderedColumnDict([(('a', 'b'), <cudf.core.column.numerical.NumericalColumn object at 0x7f5a7d5789e0>)]), multiindex=False, level_names=(None,))
+```
+
+"Wildcard" indexing is also allowed:
+
+```python
+>>> ca = ColumnAccessor({('a', 'b'): [1, 2, 3], ('a', 'c'): [2, 3, 4], ('d', 'b'): [4, 5, 6]}, multiindex=True)
+>>> ca.select_by_label((slice(None), 'b'))
+ColumnAccessor(OrderedColumnDict([(('a', 'b'), <cudf.core.column.numerical.NumericalColumn object at 0x7f5a7d578830>), (('d', 'b'), <cudf.core.column.numerical.NumericalColumn object at 0x7f5a7d578680>)]), multiindex=True, level_names=(None, None))
+```
+
+Finally, ColumnAccessors can convert to Pandas `Index` or
+`MultiIndex` objects:
+
+```python
+>>> ca.to_pandas_index()
+MultiIndex([('a', 'b'),
+            ('a', 'c'),
+            ('d', 'b')],
+           )
+```
diff --git a/docs/cudf/source/basics/io-supported-types.rst b/docs/cudf/source/user_guide/io.md
similarity index 69%
rename from docs/cudf/source/basics/io-supported-types.rst
rename to docs/cudf/source/user_guide/io.md
index 4a7da60fa85..672375eedaf 100644
--- a/docs/cudf/source/basics/io-supported-types.rst
+++ b/docs/cudf/source/user_guide/io.md
@@ -1,10 +1,17 @@
-I/O Supported dtypes
-====================
+# Input / Output
 
-The following table lists are compatible cudf types for each supported IO format.
+This page contains Input / Output related APIs in cuDF.
 
-.. rst-class:: io-supported-types-table special-table
+## I/O Supported dtypes
+
+The following table lists are compatible cudf types for each supported
+IO format.
+
+<div class="special-table-wrapper" style="overflow:auto">
+
+```{eval-rst}
 .. table::
+    :class: io-supported-types-table special-table
     :widths: 15 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10
 
     +-----------------------+--------+--------+--------+--------+---------+--------+--------+--------+--------+-------------------+--------+--------+---------+---------+
@@ -64,7 +71,103 @@ The following table lists are compatible cudf types for each supported IO format
     +-----------------------+--------+--------+--------+--------+---------+--------+--------+--------+--------+---------+---------+--------+--------+---------+---------+
     | decimal128            | ✅     | ✅     | ✅     | ✅     | ❌      | ❌     | ✅     | ✅     | ❌     | ❌      | ❌      | ❌     | ❌     | ❌      | ❌      |
     +-----------------------+--------+--------+--------+--------+---------+--------+--------+--------+--------+---------+---------+--------+--------+---------+---------+
+```
+
+</div>
+
 
 **Notes:**
 
-* [¹] - Not GPU-accelerated.
+- \[¹\] - Not GPU-accelerated.
+
+## GPUDirect Storage Integration
+
+Many IO APIs can use GPUDirect Storage (GDS) library to optimize IO
+operations.  GDS enables a direct data path for direct memory access
+(DMA) transfers between GPU memory and storage, which avoids a bounce
+buffer through the CPU.  GDS also has a compatibility mode that allows
+the library to fall back to copying through a CPU bounce buffer.  The
+SDK is available for download
+[here](https://developer.nvidia.com/gpudirect-storage).  GDS is also
+included in CUDA Toolkit 11.4 and higher.
+
+Use of GPUDirect Storage in cuDF is enabled by default, but can be
+disabled through the environment variable `LIBCUDF_CUFILE_POLICY`.
+This variable also controls the GDS compatibility mode.
+
+There are four valid values for the environment variable:
+
+- "GDS": Enable GDS use; GDS compatibility mode is *off*.
+- "ALWAYS": Enable GDS use; GDS compatibility mode is *on*.
+- "KVIKIO": Enable GDS through [KvikIO](https://github.com/rapidsai/kvikio).
+- "OFF": Completely disable GDS use.
+
+If no value is set, behavior will be the same as the "GDS" option.
+
+This environment variable also affects how cuDF treats GDS errors.
+
+- When `LIBCUDF_CUFILE_POLICY` is set to "GDS" and a GDS API call
+  fails for any reason, cuDF falls back to the internal implementation
+  with bounce buffers.
+- When `LIBCUDF_CUFILE_POLICY` is set to "ALWAYS" and a GDS API call
+fails for any reason (unlikely, given that the compatibility mode is
+on), cuDF throws an exception to propagate the error to the user.
+- When `LIBCUDF_CUFILE_POLICY` is set to "KVIKIO" and a KvikIO API
+  call fails for any reason (unlikely, given that KvikIO implements
+  its own compatibility mode) cuDF throws an exception to propagate
+  the error to the user.
+
+For more information about error handling, compatibility mode, and
+tuning parameters in KvikIO see: <https://github.com/rapidsai/kvikio>
+
+Operations that support the use of GPUDirect Storage:
+
+- {py:func}`cudf.read_avro`
+- {py:func}`cudf.read_parquet`
+- {py:func}`cudf.read_orc`
+- {py:meth}`cudf.DataFrame.to_csv`
+- {py:meth}`cudf.DataFrame.to_parquet`
+- {py:meth}`cudf.DataFrame.to_orc`
+
+Several parameters that can be used to tune the performance of
+GDS-enabled I/O are exposed through environment variables:
+
+- `LIBCUDF_CUFILE_THREAD_COUNT`: Integral value, maximum number of
+  parallel reads/writes per file (default 16);
+- `LIBCUDF_CUFILE_SLICE_SIZE`: Integral value, maximum size of each
+  GDS read/write, in bytes (default 4MB).  Larger I/O operations are
+  split into multiple calls.
+
+## nvCOMP Integration
+
+Some types of compression/decompression can be performed using either
+the [nvCOMP library](https://github.com/NVIDIA/nvcomp) or the internal
+implementation.
+
+Which implementation is used by default depends on the data format and
+the compression type.  Behavior can be influenced through environment
+variable `LIBCUDF_NVCOMP_POLICY`.
+
+There are three valid values for the environment variable:
+
+- "STABLE": Only enable the nvCOMP in places where it has been deemed
+  stable for production use.
+- "ALWAYS": Enable all available uses of nvCOMP, including new,
+  experimental combinations.
+- "OFF": Disable nvCOMP use whenever possible and use the internal
+  implementations instead.
+
+If no value is set, behavior will be the same as the "STABLE" option.
+
+```{eval-rst}
+.. table:: Current policy for nvCOMP use for different types
+    :widths: 20 15 15 15 15 15 15 15 15 15
+
+    +-----------------------+--------+--------+--------+--------+---------+--------+--------+--------+--------+
+    |                       |       CSV       |      Parquet    |       JSON       |       ORC       |  AVRO  |
+    +-----------------------+--------+--------+--------+--------+---------+--------+--------+--------+--------+
+    | Compression Type      | Writer | Reader | Writer | Reader | Writer¹ | Reader | Writer | Reader | Reader |
+    +=======================+========+========+========+========+=========+========+========+========+========+
+    | snappy                | ❌     | ❌     | Stable | Stable | ❌      | ❌     | Stable | Stable | ❌     |
+    +-----------------------+--------+--------+--------+--------+---------+--------+--------+--------+--------+
+```
diff --git a/docs/cudf/source/user_guide/Working-with-missing-data.ipynb b/docs/cudf/source/user_guide/missing-data.ipynb
similarity index 87%
rename from docs/cudf/source/user_guide/Working-with-missing-data.ipynb
rename to docs/cudf/source/user_guide/missing-data.ipynb
index 54fe774060e..ad12c675373 100644
--- a/docs/cudf/source/user_guide/Working-with-missing-data.ipynb
+++ b/docs/cudf/source/user_guide/missing-data.ipynb
@@ -2,6 +2,7 @@
  "cells": [
   {
    "cell_type": "markdown",
+   "id": "f8ffbea7",
    "metadata": {},
    "source": [
     "# Working with missing data"
@@ -9,6 +10,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "7e3ab093",
    "metadata": {},
    "source": [
     "In this section, we will discuss missing (also referred to as `NA`) values in cudf. cudf supports having missing values in all dtypes. These missing values are represented by `<NA>`. These values are also referenced as \"null values\"."
@@ -16,25 +18,7 @@
   },
   {
    "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "1. [How to Detect missing values](#How-to-Detect-missing-values)\n",
-    "2. [Float dtypes and missing data](#Float-dtypes-and-missing-data)\n",
-    "3. [Datetimes](#Datetimes)\n",
-    "4. [Calculations with missing data](#Calculations-with-missing-data)\n",
-    "5. [Sum/product of Null/nans](#Sum/product-of-Null/nans)\n",
-    "6. [NA values in GroupBy](#NA-values-in-GroupBy)\n",
-    "7. [Inserting missing data](#Inserting-missing-data)\n",
-    "8. [Filling missing values: fillna](#Filling-missing-values:-fillna)\n",
-    "9. [Filling with cudf Object](#Filling-with-cudf-Object)\n",
-    "10. [Dropping axis labels with missing data: dropna](#Dropping-axis-labels-with-missing-data:-dropna)\n",
-    "11. [Replacing generic values](#Replacing-generic-values)\n",
-    "12. [String/regular expression replacement](#String/regular-expression-replacement)\n",
-    "13. [Numeric replacement](#Numeric-replacement)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
+   "id": "8d657a82",
    "metadata": {},
    "source": [
     "## How to Detect missing values"
@@ -42,6 +26,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "9ea9f672",
    "metadata": {},
    "source": [
     "To detect missing values, you can use `isna()` and `notna()` functions."
@@ -50,6 +35,7 @@
   {
    "cell_type": "code",
    "execution_count": 1,
+   "id": "58050adb",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -60,6 +46,7 @@
   {
    "cell_type": "code",
    "execution_count": 2,
+   "id": "416d73da",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -69,6 +56,7 @@
   {
    "cell_type": "code",
    "execution_count": 3,
+   "id": "5dfc6bc3",
    "metadata": {},
    "outputs": [
     {
@@ -141,6 +129,7 @@
   {
    "cell_type": "code",
    "execution_count": 4,
+   "id": "4d7f7a6d",
    "metadata": {},
    "outputs": [
     {
@@ -213,6 +202,7 @@
   {
    "cell_type": "code",
    "execution_count": 5,
+   "id": "40edca67",
    "metadata": {},
    "outputs": [
     {
@@ -236,6 +226,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "acdf29d7",
    "metadata": {},
    "source": [
     "One has to be mindful that in Python (and NumPy), the nan's don’t compare equal, but None's do. Note that cudf/NumPy uses the fact that `np.nan != np.nan`, and treats `None` like `np.nan`."
@@ -244,6 +235,7 @@
   {
    "cell_type": "code",
    "execution_count": 6,
+   "id": "c269c1f5",
    "metadata": {},
    "outputs": [
     {
@@ -264,6 +256,7 @@
   {
    "cell_type": "code",
    "execution_count": 7,
+   "id": "99fb083a",
    "metadata": {},
    "outputs": [
     {
@@ -283,22 +276,23 @@
   },
   {
    "cell_type": "markdown",
+   "id": "4fdb8bc7",
    "metadata": {},
    "source": [
-    "So as compared to above, a scalar equality comparison versus a None/np.nan doesn’t provide useful information.\n",
-    "\n"
+    "So as compared to above, a scalar equality comparison versus a None/np.nan doesn’t provide useful information."
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 8,
+   "id": "630ef6bb",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
        "0    False\n",
-       "1    False\n",
+       "1     <NA>\n",
        "2    False\n",
        "3    False\n",
        "Name: b, dtype: bool"
@@ -316,6 +310,7 @@
   {
    "cell_type": "code",
    "execution_count": 9,
+   "id": "8162e383",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -325,6 +320,7 @@
   {
    "cell_type": "code",
    "execution_count": 10,
+   "id": "199775b3",
    "metadata": {},
    "outputs": [
     {
@@ -348,14 +344,15 @@
   {
    "cell_type": "code",
    "execution_count": 11,
+   "id": "cd09d80c",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "0    False\n",
-       "1    False\n",
-       "2    False\n",
+       "0    <NA>\n",
+       "1    <NA>\n",
+       "2    <NA>\n",
        "dtype: bool"
       ]
      },
@@ -371,6 +368,7 @@
   {
    "cell_type": "code",
    "execution_count": 12,
+   "id": "6b23bb0c",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -380,6 +378,7 @@
   {
    "cell_type": "code",
    "execution_count": 13,
+   "id": "cafb79ee",
    "metadata": {},
    "outputs": [
     {
@@ -403,6 +402,7 @@
   {
    "cell_type": "code",
    "execution_count": 14,
+   "id": "13363897",
    "metadata": {},
    "outputs": [
     {
@@ -425,6 +425,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "208a3776",
    "metadata": {},
    "source": [
     "## Float dtypes and missing data"
@@ -432,16 +433,18 @@
   },
   {
    "cell_type": "markdown",
+   "id": "2c174b88",
    "metadata": {},
    "source": [
     "Because ``NaN`` is a float, a column of integers with even one missing values is cast to floating-point dtype. However this doesn't happen by default.\n",
     "\n",
-    "By default if a ``NaN`` value is passed to `Series` constructor, it is treated as `<NA>` value. "
+    "By default if a ``NaN`` value is passed to `Series` constructor, it is treated as `<NA>` value."
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 15,
+   "id": "c59c3c54",
    "metadata": {},
    "outputs": [
     {
@@ -464,6 +467,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "a9eb2d9c",
    "metadata": {},
    "source": [
     "Hence to consider a ``NaN`` as ``NaN`` you will have to pass `nan_as_null=False` parameter into `Series` constructor."
@@ -472,6 +476,7 @@
   {
    "cell_type": "code",
    "execution_count": 16,
+   "id": "ecc5ae92",
    "metadata": {},
    "outputs": [
     {
@@ -494,6 +499,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "d1db7b08",
    "metadata": {},
    "source": [
     "## Datetimes"
@@ -501,15 +507,16 @@
   },
   {
    "cell_type": "markdown",
+   "id": "548d3734",
    "metadata": {},
    "source": [
-    "For `datetime64` types, cudf doesn't support having `NaT` values. Instead these values which are specific to numpy and pandas are considered as null values(`<NA>`) in cudf. The actual underlying value of `NaT` is `min(int64)` and cudf retains the underlying value when converting a cudf object to pandas object.\n",
-    "\n"
+    "For `datetime64` types, cudf doesn't support having `NaT` values. Instead these values which are specific to numpy and pandas are considered as null values(`<NA>`) in cudf. The actual underlying value of `NaT` is `min(int64)` and cudf retains the underlying value when converting a cudf object to pandas object."
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 17,
+   "id": "de70f244",
    "metadata": {},
    "outputs": [
     {
@@ -535,6 +542,7 @@
   {
    "cell_type": "code",
    "execution_count": 18,
+   "id": "8411a914",
    "metadata": {},
    "outputs": [
     {
@@ -557,6 +565,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "df664145",
    "metadata": {},
    "source": [
     "any operations on rows having `<NA>` values in `datetime` column will result in `<NA>` value at the same location in resulting column:"
@@ -565,6 +574,7 @@
   {
    "cell_type": "code",
    "execution_count": 19,
+   "id": "829c32d0",
    "metadata": {},
    "outputs": [
     {
@@ -587,6 +597,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "aa8031ef",
    "metadata": {},
    "source": [
     "## Calculations with missing data"
@@ -594,6 +605,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "c587fae2",
    "metadata": {},
    "source": [
     "Null values propagate naturally through arithmetic operations between pandas objects."
@@ -602,6 +614,7 @@
   {
    "cell_type": "code",
    "execution_count": 20,
+   "id": "f8f2aec7",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -611,6 +624,7 @@
   {
    "cell_type": "code",
    "execution_count": 21,
+   "id": "0c8a3011",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -620,6 +634,7 @@
   {
    "cell_type": "code",
    "execution_count": 22,
+   "id": "052f6c2b",
    "metadata": {},
    "outputs": [
     {
@@ -698,6 +713,7 @@
   {
    "cell_type": "code",
    "execution_count": 23,
+   "id": "0fb0a083",
    "metadata": {},
    "outputs": [
     {
@@ -776,6 +792,7 @@
   {
    "cell_type": "code",
    "execution_count": 24,
+   "id": "6f8152c0",
    "metadata": {},
    "outputs": [
     {
@@ -853,6 +870,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "11170d49",
    "metadata": {},
    "source": [
     "While summing the data along a series, `NA` values will be treated as `0`."
@@ -861,6 +879,7 @@
   {
    "cell_type": "code",
    "execution_count": 25,
+   "id": "45081790",
    "metadata": {},
    "outputs": [
     {
@@ -886,6 +905,7 @@
   {
    "cell_type": "code",
    "execution_count": 26,
+   "id": "39922658",
    "metadata": {},
    "outputs": [
     {
@@ -905,6 +925,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "6e99afe0",
    "metadata": {},
    "source": [
     "Since `NA` values are treated as `0`, the mean would result to 2 in this case `(1 + 0 + 2 + 3 + 0)/5 = 2`"
@@ -913,6 +934,7 @@
   {
    "cell_type": "code",
    "execution_count": 27,
+   "id": "b2f16ddb",
    "metadata": {},
    "outputs": [
     {
@@ -932,6 +954,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "07f2ec5a",
    "metadata": {},
    "source": [
     "To preserve `NA` values in the above calculations, `sum` & `mean` support `skipna` parameter.\n",
@@ -942,6 +965,7 @@
   {
    "cell_type": "code",
    "execution_count": 28,
+   "id": "d4a463a0",
    "metadata": {},
    "outputs": [
     {
@@ -962,6 +986,7 @@
   {
    "cell_type": "code",
    "execution_count": 29,
+   "id": "a944c42e",
    "metadata": {},
    "outputs": [
     {
@@ -981,6 +1006,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "fb8c8f18",
    "metadata": {},
    "source": [
     "Cumulative methods like `cumsum` and `cumprod` ignore `NA` values by default."
@@ -989,6 +1015,7 @@
   {
    "cell_type": "code",
    "execution_count": 30,
+   "id": "4f2a7306",
    "metadata": {},
    "outputs": [
     {
@@ -1013,6 +1040,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "c8f6054b",
    "metadata": {},
    "source": [
     "To preserve `NA` values in cumulative methods, provide `skipna=False`."
@@ -1021,6 +1049,7 @@
   {
    "cell_type": "code",
    "execution_count": 31,
+   "id": "d4c46776",
    "metadata": {},
    "outputs": [
     {
@@ -1045,6 +1074,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "67077d65",
    "metadata": {},
    "source": [
     "## Sum/product of Null/nans"
@@ -1052,6 +1082,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "ffbb9ca1",
    "metadata": {},
    "source": [
     "The sum of an empty or all-NA Series of a DataFrame is 0."
@@ -1060,6 +1091,7 @@
   {
    "cell_type": "code",
    "execution_count": 32,
+   "id": "f430c9ce",
    "metadata": {},
    "outputs": [
     {
@@ -1080,6 +1112,7 @@
   {
    "cell_type": "code",
    "execution_count": 33,
+   "id": "7fde514b",
    "metadata": {},
    "outputs": [
     {
@@ -1100,6 +1133,7 @@
   {
    "cell_type": "code",
    "execution_count": 34,
+   "id": "56cedd17",
    "metadata": {},
    "outputs": [
     {
@@ -1119,6 +1153,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "cb188adb",
    "metadata": {},
    "source": [
     "The product of an empty or all-NA Series of a DataFrame is 1."
@@ -1127,6 +1162,7 @@
   {
    "cell_type": "code",
    "execution_count": 35,
+   "id": "d20bbbef",
    "metadata": {},
    "outputs": [
     {
@@ -1147,6 +1183,7 @@
   {
    "cell_type": "code",
    "execution_count": 36,
+   "id": "75abbcfa",
    "metadata": {},
    "outputs": [
     {
@@ -1167,6 +1204,7 @@
   {
    "cell_type": "code",
    "execution_count": 37,
+   "id": "becce0cc",
    "metadata": {},
    "outputs": [
     {
@@ -1186,6 +1224,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "0e899e03",
    "metadata": {},
    "source": [
     "## NA values in GroupBy"
@@ -1193,6 +1232,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "7fb20874",
    "metadata": {},
    "source": [
     "`NA` groups in GroupBy are automatically excluded. For example:"
@@ -1201,6 +1241,7 @@
   {
    "cell_type": "code",
    "execution_count": 38,
+   "id": "1379037c",
    "metadata": {},
    "outputs": [
     {
@@ -1279,6 +1320,7 @@
   {
    "cell_type": "code",
    "execution_count": 39,
+   "id": "d6b91e6f",
    "metadata": {},
    "outputs": [
     {
@@ -1345,6 +1387,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "cb83fb11",
    "metadata": {},
    "source": [
     "It is also possible to include `NA` in groups by passing `dropna=False`"
@@ -1353,9 +1396,8 @@
   {
    "cell_type": "code",
    "execution_count": 40,
-   "metadata": {
-    "scrolled": true
-   },
+   "id": "768c3e50",
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -1426,6 +1468,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "133816b4",
    "metadata": {},
    "source": [
     "## Inserting missing data"
@@ -1433,6 +1476,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "306082ad",
    "metadata": {},
    "source": [
     "All dtypes support insertion of missing value by assignment. Any specific location in series can made null by assigning it to `None`."
@@ -1441,6 +1485,7 @@
   {
    "cell_type": "code",
    "execution_count": 41,
+   "id": "7ddde1fe",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1450,6 +1495,7 @@
   {
    "cell_type": "code",
    "execution_count": 42,
+   "id": "16e54597",
    "metadata": {},
    "outputs": [
     {
@@ -1474,6 +1520,7 @@
   {
    "cell_type": "code",
    "execution_count": 43,
+   "id": "f628f94d",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1483,9 +1530,8 @@
   {
    "cell_type": "code",
    "execution_count": 44,
-   "metadata": {
-    "scrolled": true
-   },
+   "id": "b30590b7",
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -1508,6 +1554,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "a1b123d0",
    "metadata": {},
    "source": [
     "## Filling missing values: fillna"
@@ -1515,6 +1562,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "114aa23a",
    "metadata": {},
    "source": [
     "`fillna()` can fill in `NA` & `NaN` values with non-NA data."
@@ -1523,6 +1571,7 @@
   {
    "cell_type": "code",
    "execution_count": 45,
+   "id": "59e22668",
    "metadata": {},
    "outputs": [
     {
@@ -1601,6 +1650,7 @@
   {
    "cell_type": "code",
    "execution_count": 46,
+   "id": "05c221ee",
    "metadata": {},
    "outputs": [
     {
@@ -1625,6 +1675,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "401f91b2",
    "metadata": {},
    "source": [
     "## Filling with cudf Object"
@@ -1632,6 +1683,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "e79346d6",
    "metadata": {},
    "source": [
     "You can also fillna using a dict or Series that is alignable. The labels of the dict or index of the Series must match the columns of the frame you wish to fill. The use case of this is to fill a DataFrame with the mean of that column."
@@ -1640,6 +1692,7 @@
   {
    "cell_type": "code",
    "execution_count": 47,
+   "id": "f52c5d8f",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1650,6 +1703,7 @@
   {
    "cell_type": "code",
    "execution_count": 48,
+   "id": "6affebe9",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1659,6 +1713,7 @@
   {
    "cell_type": "code",
    "execution_count": 49,
+   "id": "1ce1b96f",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1668,6 +1723,7 @@
   {
    "cell_type": "code",
    "execution_count": 50,
+   "id": "90829195",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1677,6 +1733,7 @@
   {
    "cell_type": "code",
    "execution_count": 51,
+   "id": "c0feac14",
    "metadata": {},
    "outputs": [
     {
@@ -1708,63 +1765,63 @@
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
-       "      <td>0.771245</td>\n",
-       "      <td>0.051024</td>\n",
-       "      <td>1.199239</td>\n",
+       "      <td>-0.408268</td>\n",
+       "      <td>-0.676643</td>\n",
+       "      <td>-1.274743</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
-       "      <td>-1.168041</td>\n",
-       "      <td>0.702664</td>\n",
-       "      <td>-0.270806</td>\n",
+       "      <td>-0.029322</td>\n",
+       "      <td>-0.873593</td>\n",
+       "      <td>-1.214105</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
-       "      <td>-1.467009</td>\n",
-       "      <td>-0.143080</td>\n",
-       "      <td>-0.806151</td>\n",
+       "      <td>-0.866371</td>\n",
+       "      <td>1.081735</td>\n",
+       "      <td>-0.226840</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
        "      <td>NaN</td>\n",
-       "      <td>-0.610798</td>\n",
-       "      <td>-0.272895</td>\n",
+       "      <td>0.812278</td>\n",
+       "      <td>1.074973</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
        "      <td>NaN</td>\n",
        "      <td>NaN</td>\n",
-       "      <td>1.396784</td>\n",
+       "      <td>-0.366725</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>5</th>\n",
-       "      <td>-0.439343</td>\n",
+       "      <td>-1.016239</td>\n",
        "      <td>NaN</td>\n",
        "      <td>NaN</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>6</th>\n",
-       "      <td>1.093102</td>\n",
-       "      <td>-0.764758</td>\n",
+       "      <td>0.675123</td>\n",
+       "      <td>1.067536</td>\n",
        "      <td>NaN</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>7</th>\n",
-       "      <td>0.003098</td>\n",
-       "      <td>-0.722648</td>\n",
+       "      <td>0.221568</td>\n",
+       "      <td>2.025961</td>\n",
        "      <td>NaN</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>8</th>\n",
-       "      <td>-0.095899</td>\n",
-       "      <td>-1.285156</td>\n",
-       "      <td>-0.300566</td>\n",
+       "      <td>-0.317241</td>\n",
+       "      <td>1.011275</td>\n",
+       "      <td>0.674891</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>9</th>\n",
-       "      <td>0.109465</td>\n",
-       "      <td>2.497843</td>\n",
-       "      <td>-1.199856</td>\n",
+       "      <td>-0.877041</td>\n",
+       "      <td>-1.919394</td>\n",
+       "      <td>-1.029201</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
@@ -1772,16 +1829,16 @@
       ],
       "text/plain": [
        "          A         B         C\n",
-       "0  0.771245  0.051024  1.199239\n",
-       "1 -1.168041  0.702664 -0.270806\n",
-       "2 -1.467009 -0.143080 -0.806151\n",
-       "3       NaN -0.610798 -0.272895\n",
-       "4       NaN       NaN  1.396784\n",
-       "5 -0.439343       NaN       NaN\n",
-       "6  1.093102 -0.764758       NaN\n",
-       "7  0.003098 -0.722648       NaN\n",
-       "8 -0.095899 -1.285156 -0.300566\n",
-       "9  0.109465  2.497843 -1.199856"
+       "0 -0.408268 -0.676643 -1.274743\n",
+       "1 -0.029322 -0.873593 -1.214105\n",
+       "2 -0.866371  1.081735 -0.226840\n",
+       "3       NaN  0.812278  1.074973\n",
+       "4       NaN       NaN -0.366725\n",
+       "5 -1.016239       NaN       NaN\n",
+       "6  0.675123  1.067536       NaN\n",
+       "7  0.221568  2.025961       NaN\n",
+       "8 -0.317241  1.011275  0.674891\n",
+       "9 -0.877041 -1.919394 -1.029201"
       ]
      },
      "execution_count": 51,
@@ -1796,6 +1853,7 @@
   {
    "cell_type": "code",
    "execution_count": 52,
+   "id": "a07c1260",
    "metadata": {},
    "outputs": [
     {
@@ -1827,63 +1885,63 @@
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
-       "      <td>0.771245</td>\n",
-       "      <td>0.051024</td>\n",
-       "      <td>1.199239</td>\n",
+       "      <td>-0.408268</td>\n",
+       "      <td>-0.676643</td>\n",
+       "      <td>-1.274743</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
-       "      <td>-1.168041</td>\n",
-       "      <td>0.702664</td>\n",
-       "      <td>-0.270806</td>\n",
+       "      <td>-0.029322</td>\n",
+       "      <td>-0.873593</td>\n",
+       "      <td>-1.214105</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
-       "      <td>-1.467009</td>\n",
-       "      <td>-0.143080</td>\n",
-       "      <td>-0.806151</td>\n",
+       "      <td>-0.866371</td>\n",
+       "      <td>1.081735</td>\n",
+       "      <td>-0.226840</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
-       "      <td>-0.149173</td>\n",
-       "      <td>-0.610798</td>\n",
-       "      <td>-0.272895</td>\n",
+       "      <td>-0.327224</td>\n",
+       "      <td>0.812278</td>\n",
+       "      <td>1.074973</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
-       "      <td>-0.149173</td>\n",
-       "      <td>-0.034364</td>\n",
-       "      <td>1.396784</td>\n",
+       "      <td>-0.327224</td>\n",
+       "      <td>0.316145</td>\n",
+       "      <td>-0.366725</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>5</th>\n",
-       "      <td>-0.439343</td>\n",
-       "      <td>-0.034364</td>\n",
-       "      <td>-0.036322</td>\n",
+       "      <td>-1.016239</td>\n",
+       "      <td>0.316145</td>\n",
+       "      <td>-0.337393</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>6</th>\n",
-       "      <td>1.093102</td>\n",
-       "      <td>-0.764758</td>\n",
-       "      <td>-0.036322</td>\n",
+       "      <td>0.675123</td>\n",
+       "      <td>1.067536</td>\n",
+       "      <td>-0.337393</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>7</th>\n",
-       "      <td>0.003098</td>\n",
-       "      <td>-0.722648</td>\n",
-       "      <td>-0.036322</td>\n",
+       "      <td>0.221568</td>\n",
+       "      <td>2.025961</td>\n",
+       "      <td>-0.337393</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>8</th>\n",
-       "      <td>-0.095899</td>\n",
-       "      <td>-1.285156</td>\n",
-       "      <td>-0.300566</td>\n",
+       "      <td>-0.317241</td>\n",
+       "      <td>1.011275</td>\n",
+       "      <td>0.674891</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>9</th>\n",
-       "      <td>0.109465</td>\n",
-       "      <td>2.497843</td>\n",
-       "      <td>-1.199856</td>\n",
+       "      <td>-0.877041</td>\n",
+       "      <td>-1.919394</td>\n",
+       "      <td>-1.029201</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
@@ -1891,16 +1949,16 @@
       ],
       "text/plain": [
        "          A         B         C\n",
-       "0  0.771245  0.051024  1.199239\n",
-       "1 -1.168041  0.702664 -0.270806\n",
-       "2 -1.467009 -0.143080 -0.806151\n",
-       "3 -0.149173 -0.610798 -0.272895\n",
-       "4 -0.149173 -0.034364  1.396784\n",
-       "5 -0.439343 -0.034364 -0.036322\n",
-       "6  1.093102 -0.764758 -0.036322\n",
-       "7  0.003098 -0.722648 -0.036322\n",
-       "8 -0.095899 -1.285156 -0.300566\n",
-       "9  0.109465  2.497843 -1.199856"
+       "0 -0.408268 -0.676643 -1.274743\n",
+       "1 -0.029322 -0.873593 -1.214105\n",
+       "2 -0.866371  1.081735 -0.226840\n",
+       "3 -0.327224  0.812278  1.074973\n",
+       "4 -0.327224  0.316145 -0.366725\n",
+       "5 -1.016239  0.316145 -0.337393\n",
+       "6  0.675123  1.067536 -0.337393\n",
+       "7  0.221568  2.025961 -0.337393\n",
+       "8 -0.317241  1.011275  0.674891\n",
+       "9 -0.877041 -1.919394 -1.029201"
       ]
      },
      "execution_count": 52,
@@ -1915,6 +1973,7 @@
   {
    "cell_type": "code",
    "execution_count": 53,
+   "id": "9e70d61a",
    "metadata": {},
    "outputs": [
     {
@@ -1946,63 +2005,63 @@
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
-       "      <td>0.771245</td>\n",
-       "      <td>0.051024</td>\n",
-       "      <td>1.199239</td>\n",
+       "      <td>-0.408268</td>\n",
+       "      <td>-0.676643</td>\n",
+       "      <td>-1.274743</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
-       "      <td>-1.168041</td>\n",
-       "      <td>0.702664</td>\n",
-       "      <td>-0.270806</td>\n",
+       "      <td>-0.029322</td>\n",
+       "      <td>-0.873593</td>\n",
+       "      <td>-1.214105</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
-       "      <td>-1.467009</td>\n",
-       "      <td>-0.143080</td>\n",
-       "      <td>-0.806151</td>\n",
+       "      <td>-0.866371</td>\n",
+       "      <td>1.081735</td>\n",
+       "      <td>-0.226840</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
        "      <td>NaN</td>\n",
-       "      <td>-0.610798</td>\n",
-       "      <td>-0.272895</td>\n",
+       "      <td>0.812278</td>\n",
+       "      <td>1.074973</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
        "      <td>NaN</td>\n",
-       "      <td>-0.034364</td>\n",
-       "      <td>1.396784</td>\n",
+       "      <td>0.316145</td>\n",
+       "      <td>-0.366725</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>5</th>\n",
-       "      <td>-0.439343</td>\n",
-       "      <td>-0.034364</td>\n",
-       "      <td>-0.036322</td>\n",
+       "      <td>-1.016239</td>\n",
+       "      <td>0.316145</td>\n",
+       "      <td>-0.337393</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>6</th>\n",
-       "      <td>1.093102</td>\n",
-       "      <td>-0.764758</td>\n",
-       "      <td>-0.036322</td>\n",
+       "      <td>0.675123</td>\n",
+       "      <td>1.067536</td>\n",
+       "      <td>-0.337393</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>7</th>\n",
-       "      <td>0.003098</td>\n",
-       "      <td>-0.722648</td>\n",
-       "      <td>-0.036322</td>\n",
+       "      <td>0.221568</td>\n",
+       "      <td>2.025961</td>\n",
+       "      <td>-0.337393</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>8</th>\n",
-       "      <td>-0.095899</td>\n",
-       "      <td>-1.285156</td>\n",
-       "      <td>-0.300566</td>\n",
+       "      <td>-0.317241</td>\n",
+       "      <td>1.011275</td>\n",
+       "      <td>0.674891</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>9</th>\n",
-       "      <td>0.109465</td>\n",
-       "      <td>2.497843</td>\n",
-       "      <td>-1.199856</td>\n",
+       "      <td>-0.877041</td>\n",
+       "      <td>-1.919394</td>\n",
+       "      <td>-1.029201</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
@@ -2010,16 +2069,16 @@
       ],
       "text/plain": [
        "          A         B         C\n",
-       "0  0.771245  0.051024  1.199239\n",
-       "1 -1.168041  0.702664 -0.270806\n",
-       "2 -1.467009 -0.143080 -0.806151\n",
-       "3       NaN -0.610798 -0.272895\n",
-       "4       NaN -0.034364  1.396784\n",
-       "5 -0.439343 -0.034364 -0.036322\n",
-       "6  1.093102 -0.764758 -0.036322\n",
-       "7  0.003098 -0.722648 -0.036322\n",
-       "8 -0.095899 -1.285156 -0.300566\n",
-       "9  0.109465  2.497843 -1.199856"
+       "0 -0.408268 -0.676643 -1.274743\n",
+       "1 -0.029322 -0.873593 -1.214105\n",
+       "2 -0.866371  1.081735 -0.226840\n",
+       "3       NaN  0.812278  1.074973\n",
+       "4       NaN  0.316145 -0.366725\n",
+       "5 -1.016239  0.316145 -0.337393\n",
+       "6  0.675123  1.067536 -0.337393\n",
+       "7  0.221568  2.025961 -0.337393\n",
+       "8 -0.317241  1.011275  0.674891\n",
+       "9 -0.877041 -1.919394 -1.029201"
       ]
      },
      "execution_count": 53,
@@ -2033,6 +2092,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "0ace728d",
    "metadata": {},
    "source": [
     "## Dropping axis labels with missing data: dropna"
@@ -2040,15 +2100,16 @@
   },
   {
    "cell_type": "markdown",
+   "id": "2ccd7115",
    "metadata": {},
    "source": [
-    "Missing data can be excluded using `dropna()`:\n",
-    "\n"
+    "Missing data can be excluded using `dropna()`:"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 54,
+   "id": "98c57be7",
    "metadata": {},
    "outputs": [
     {
@@ -2127,6 +2188,7 @@
   {
    "cell_type": "code",
    "execution_count": 55,
+   "id": "bc3f273a",
    "metadata": {},
    "outputs": [
     {
@@ -2187,6 +2249,7 @@
   {
    "cell_type": "code",
    "execution_count": 56,
+   "id": "a48d4de0",
    "metadata": {},
    "outputs": [
     {
@@ -2249,14 +2312,16 @@
   },
   {
    "cell_type": "markdown",
+   "id": "0b1954f9",
    "metadata": {},
    "source": [
-    "An equivalent `dropna()` is available for Series. "
+    "An equivalent `dropna()` is available for Series."
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 57,
+   "id": "2dd8f660",
    "metadata": {},
    "outputs": [
     {
@@ -2279,6 +2344,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "121eb6d7",
    "metadata": {},
    "source": [
     "## Replacing generic values"
@@ -2286,6 +2352,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "3cc4c5f1",
    "metadata": {},
    "source": [
     "Often times we want to replace arbitrary values with other values.\n",
@@ -2296,6 +2363,7 @@
   {
    "cell_type": "code",
    "execution_count": 58,
+   "id": "e6c14e8a",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2305,6 +2373,7 @@
   {
    "cell_type": "code",
    "execution_count": 59,
+   "id": "a852f0cb",
    "metadata": {},
    "outputs": [
     {
@@ -2330,6 +2399,7 @@
   {
    "cell_type": "code",
    "execution_count": 60,
+   "id": "f6ac12eb",
    "metadata": {},
    "outputs": [
     {
@@ -2354,6 +2424,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "a6e1b6d7",
    "metadata": {},
    "source": [
     "We can also replace any value with a `<NA>` value."
@@ -2362,6 +2433,7 @@
   {
    "cell_type": "code",
    "execution_count": 61,
+   "id": "f0156bff",
    "metadata": {},
    "outputs": [
     {
@@ -2386,6 +2458,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "6673eefb",
    "metadata": {},
    "source": [
     "You can replace a list of values by a list of other values:"
@@ -2394,6 +2467,7 @@
   {
    "cell_type": "code",
    "execution_count": 62,
+   "id": "f3110f5b",
    "metadata": {},
    "outputs": [
     {
@@ -2418,6 +2492,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "61521e8b",
    "metadata": {},
    "source": [
     "You can also specify a mapping dict:"
@@ -2426,6 +2501,7 @@
   {
    "cell_type": "code",
    "execution_count": 63,
+   "id": "45862d05",
    "metadata": {},
    "outputs": [
     {
@@ -2450,6 +2526,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "04a34549",
    "metadata": {},
    "source": [
     "For a DataFrame, you can specify individual values by column:"
@@ -2458,6 +2535,7 @@
   {
    "cell_type": "code",
    "execution_count": 64,
+   "id": "348caa64",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2467,6 +2545,7 @@
   {
    "cell_type": "code",
    "execution_count": 65,
+   "id": "cca41ec4",
    "metadata": {},
    "outputs": [
     {
@@ -2545,6 +2624,7 @@
   {
    "cell_type": "code",
    "execution_count": 66,
+   "id": "64334693",
    "metadata": {},
    "outputs": [
     {
@@ -2622,6 +2702,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "2f0ceec7",
    "metadata": {},
    "source": [
     "## String/regular expression replacement"
@@ -2629,6 +2710,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "c6f44740",
    "metadata": {},
    "source": [
     "cudf supports replacing string values using `replace` API:"
@@ -2637,6 +2719,7 @@
   {
    "cell_type": "code",
    "execution_count": 67,
+   "id": "031d3533",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2646,6 +2729,7 @@
   {
    "cell_type": "code",
    "execution_count": 68,
+   "id": "12b41efb",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2655,6 +2739,7 @@
   {
    "cell_type": "code",
    "execution_count": 69,
+   "id": "d450df49",
    "metadata": {},
    "outputs": [
     {
@@ -2732,6 +2817,7 @@
   {
    "cell_type": "code",
    "execution_count": 70,
+   "id": "f823bc46",
    "metadata": {},
    "outputs": [
     {
@@ -2809,6 +2895,7 @@
   {
    "cell_type": "code",
    "execution_count": 71,
+   "id": "bc52f6e9",
    "metadata": {},
    "outputs": [
     {
@@ -2885,14 +2972,16 @@
   },
   {
    "cell_type": "markdown",
+   "id": "7c1087be",
    "metadata": {},
    "source": [
-    "Replace a few different values (list -> list):\n"
+    "Replace a few different values (list -> list):"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 72,
+   "id": "7e23eba9",
    "metadata": {},
    "outputs": [
     {
@@ -2969,6 +3058,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "42845a9c",
    "metadata": {},
    "source": [
     "Only search in column 'b' (dict -> dict):"
@@ -2977,6 +3067,7 @@
   {
    "cell_type": "code",
    "execution_count": 73,
+   "id": "d2e79805",
    "metadata": {},
    "outputs": [
     {
@@ -3053,6 +3144,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "774b42a6",
    "metadata": {},
    "source": [
     "## Numeric replacement"
@@ -3060,6 +3152,7 @@
   },
   {
    "cell_type": "markdown",
+   "id": "1c1926ac",
    "metadata": {},
    "source": [
     "`replace()` can also be used similar to `fillna()`."
@@ -3068,6 +3161,7 @@
   {
    "cell_type": "code",
    "execution_count": 74,
+   "id": "355a2f0d",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -3077,6 +3171,7 @@
   {
    "cell_type": "code",
    "execution_count": 75,
+   "id": "d9eed372",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -3086,6 +3181,7 @@
   {
    "cell_type": "code",
    "execution_count": 76,
+   "id": "ae944244",
    "metadata": {},
    "outputs": [
     {
@@ -3116,70 +3212,70 @@
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>-0.089358787</td>\n",
+       "      <td>-0.728419386</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>-2.141612003</td>\n",
+       "      <td>-0.574415182</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
-       "      <td>0.123160746</td>\n",
-       "      <td>1.09464783</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>0.774643462</td>\n",
+       "      <td>2.07287721</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>0.93799853</td>\n",
+       "      <td>-1.054129436</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>5</th>\n",
-       "      <td>0.68137677</td>\n",
-       "      <td>-0.357346253</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>6</th>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>-0.435293012</td>\n",
+       "      <td>1.163009584</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>7</th>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>1.346623287</td>\n",
+       "      <td>0.31961371</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>8</th>\n",
-       "      <td>1.173285961</td>\n",
-       "      <td>-0.968616065</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>9</th>\n",
-       "      <td>0.147922362</td>\n",
-       "      <td>-0.154880098</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "text/plain": [
-       "             0             1\n",
-       "0         <NA>          <NA>\n",
-       "1         <NA>          <NA>\n",
-       "2  0.123160746    1.09464783\n",
-       "3         <NA>          <NA>\n",
-       "4         <NA>          <NA>\n",
-       "5   0.68137677  -0.357346253\n",
-       "6         <NA>          <NA>\n",
-       "7         <NA>          <NA>\n",
-       "8  1.173285961  -0.968616065\n",
-       "9  0.147922362  -0.154880098"
+       "              0             1\n",
+       "0  -0.089358787  -0.728419386\n",
+       "1  -2.141612003  -0.574415182\n",
+       "2          <NA>          <NA>\n",
+       "3   0.774643462    2.07287721\n",
+       "4    0.93799853  -1.054129436\n",
+       "5          <NA>          <NA>\n",
+       "6  -0.435293012   1.163009584\n",
+       "7   1.346623287    0.31961371\n",
+       "8          <NA>          <NA>\n",
+       "9          <NA>          <NA>"
       ]
      },
      "execution_count": 76,
@@ -3193,15 +3289,16 @@
   },
   {
    "cell_type": "markdown",
+   "id": "0f32607c",
    "metadata": {},
    "source": [
-    "Replacing more than one value is possible by passing a list.\n",
-    "\n"
+    "Replacing more than one value is possible by passing a list."
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 77,
+   "id": "59b81c60",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -3211,6 +3308,7 @@
   {
    "cell_type": "code",
    "execution_count": 78,
+   "id": "01a71d4c",
    "metadata": {},
    "outputs": [
     {
@@ -3241,70 +3339,70 @@
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
-       "      <td>5.000000</td>\n",
-       "      <td>5.000000</td>\n",
+       "      <td>10.000000</td>\n",
+       "      <td>-0.728419</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
-       "      <td>5.000000</td>\n",
-       "      <td>5.000000</td>\n",
+       "      <td>-2.141612</td>\n",
+       "      <td>-0.574415</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
-       "      <td>0.123161</td>\n",
-       "      <td>1.094648</td>\n",
+       "      <td>5.000000</td>\n",
+       "      <td>5.000000</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
-       "      <td>5.000000</td>\n",
-       "      <td>5.000000</td>\n",
+       "      <td>0.774643</td>\n",
+       "      <td>2.072877</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
-       "      <td>5.000000</td>\n",
-       "      <td>5.000000</td>\n",
+       "      <td>0.937999</td>\n",
+       "      <td>-1.054129</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>5</th>\n",
-       "      <td>0.681377</td>\n",
-       "      <td>-0.357346</td>\n",
+       "      <td>5.000000</td>\n",
+       "      <td>5.000000</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>6</th>\n",
-       "      <td>5.000000</td>\n",
-       "      <td>5.000000</td>\n",
+       "      <td>-0.435293</td>\n",
+       "      <td>1.163010</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>7</th>\n",
-       "      <td>5.000000</td>\n",
-       "      <td>5.000000</td>\n",
+       "      <td>1.346623</td>\n",
+       "      <td>0.319614</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>8</th>\n",
-       "      <td>1.173286</td>\n",
-       "      <td>-0.968616</td>\n",
+       "      <td>5.000000</td>\n",
+       "      <td>5.000000</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>9</th>\n",
-       "      <td>0.147922</td>\n",
-       "      <td>-0.154880</td>\n",
+       "      <td>5.000000</td>\n",
+       "      <td>5.000000</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "text/plain": [
-       "          0         1\n",
-       "0  5.000000  5.000000\n",
-       "1  5.000000  5.000000\n",
-       "2  0.123161  1.094648\n",
-       "3  5.000000  5.000000\n",
-       "4  5.000000  5.000000\n",
-       "5  0.681377 -0.357346\n",
-       "6  5.000000  5.000000\n",
-       "7  5.000000  5.000000\n",
-       "8  1.173286 -0.968616\n",
-       "9  0.147922 -0.154880"
+       "           0         1\n",
+       "0  10.000000 -0.728419\n",
+       "1  -2.141612 -0.574415\n",
+       "2   5.000000  5.000000\n",
+       "3   0.774643  2.072877\n",
+       "4   0.937999 -1.054129\n",
+       "5   5.000000  5.000000\n",
+       "6  -0.435293  1.163010\n",
+       "7   1.346623  0.319614\n",
+       "8   5.000000  5.000000\n",
+       "9   5.000000  5.000000"
       ]
      },
      "execution_count": 78,
@@ -3318,15 +3416,16 @@
   },
   {
    "cell_type": "markdown",
+   "id": "1080e97b",
    "metadata": {},
    "source": [
-    "You can also operate on the DataFrame in place:\n",
-    "\n"
+    "You can also operate on the DataFrame in place:"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 79,
+   "id": "5f0859d7",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -3336,6 +3435,7 @@
   {
    "cell_type": "code",
    "execution_count": 80,
+   "id": "5cf28369",
    "metadata": {},
    "outputs": [
     {
@@ -3366,70 +3466,70 @@
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>-0.089358787</td>\n",
+       "      <td>-0.728419386</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>-2.141612003</td>\n",
+       "      <td>-0.574415182</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
-       "      <td>0.123160746</td>\n",
-       "      <td>1.09464783</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>0.774643462</td>\n",
+       "      <td>2.07287721</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>0.93799853</td>\n",
+       "      <td>-1.054129436</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>5</th>\n",
-       "      <td>0.68137677</td>\n",
-       "      <td>-0.357346253</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>6</th>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>-0.435293012</td>\n",
+       "      <td>1.163009584</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>7</th>\n",
-       "      <td>&lt;NA&gt;</td>\n",
-       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>1.346623287</td>\n",
+       "      <td>0.31961371</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>8</th>\n",
-       "      <td>1.173285961</td>\n",
-       "      <td>-0.968616065</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>9</th>\n",
-       "      <td>0.147922362</td>\n",
-       "      <td>-0.154880098</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
+       "      <td>&lt;NA&gt;</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "text/plain": [
-       "             0             1\n",
-       "0         <NA>          <NA>\n",
-       "1         <NA>          <NA>\n",
-       "2  0.123160746    1.09464783\n",
-       "3         <NA>          <NA>\n",
-       "4         <NA>          <NA>\n",
-       "5   0.68137677  -0.357346253\n",
-       "6         <NA>          <NA>\n",
-       "7         <NA>          <NA>\n",
-       "8  1.173285961  -0.968616065\n",
-       "9  0.147922362  -0.154880098"
+       "              0             1\n",
+       "0  -0.089358787  -0.728419386\n",
+       "1  -2.141612003  -0.574415182\n",
+       "2          <NA>          <NA>\n",
+       "3   0.774643462    2.07287721\n",
+       "4    0.93799853  -1.054129436\n",
+       "5          <NA>          <NA>\n",
+       "6  -0.435293012   1.163009584\n",
+       "7   1.346623287    0.31961371\n",
+       "8          <NA>          <NA>\n",
+       "9          <NA>          <NA>"
       ]
      },
      "execution_count": 80,
@@ -3444,7 +3544,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
   },
@@ -3458,9 +3558,9 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.9"
+   "version": "3.8.13"
   }
  },
  "nbformat": 4,
- "nbformat_minor": 4
+ "nbformat_minor": 5
 }

From 1a457efc019ee06bf11d350485cee12087db9d6e Mon Sep 17 00:00:00 2001
From: Sheilah Kirui <71867292+skirui-source@users.noreply.github.com>
Date: Wed, 4 May 2022 14:42:42 -0700
Subject: [PATCH 18/23] In-place updates with loc or iloc don't work correctly
 when the LHS has more than one column (#9918)

Fixes: https://github.com/rapidsai/cudf/issues/7377

This PR enables to `setitem` using a scalar value, dataframe  or  array/list iterable in both `DataframeLocIndexer `and  `DataFrameIlocIndexer `. Only the following cases are currently supported in cudf:
- Scalar value: follows the original code path, assigns column- values via specified  key (row-label)
- Dataframe : checks for column-alignment in LHS and RHS, then uses a scatter map of the indices to assign column-values accordingly. Substitute NA for columns not found in the RHS
- All other cases (array, list, range value, etc) :  first conversion to cupy array followed by special handling:
   * If 2d array:  If the inner dimension is 1, it's broadcastable to all columns of the dataframe.
   * Otherwise the value must be a 1d array (scalar values are handled in case 1 above), there are 2 subcases:
     * If the key on column axis is a scalar, meaning the user is indexing a single column; Therefore 1d value should assign along the columns.
     * Otherwise, the key on column axis is a 1d array. In this case, the key on row axis can be a scalar or 1d and in both cases of row key, the ith element in value corresponds to the ith row in the indexed object. If the key is 1d, a broadcast will happen.

Authors:
  - Sheilah Kirui (https://github.com/skirui-source)
  - Michael Wang (https://github.com/isVoid)
  - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
  - Ashwin Srinath (https://github.com/shwina)
  - GALI PREM SAGAR (https://github.com/galipremsagar)
  - Michael Wang (https://github.com/isVoid)

URL: https://github.com/rapidsai/cudf/pull/9918
---
 python/cudf/cudf/core/dataframe.py       | 123 ++++++++++++---
 python/cudf/cudf/core/indexed_frame.py   |   1 -
 python/cudf/cudf/tests/test_dataframe.py |  37 -----
 python/cudf/cudf/tests/test_indexing.py  | 186 +++++++++++++++++++++++
 4 files changed, 286 insertions(+), 61 deletions(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 8c459e855c1..036ef890696 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -111,6 +111,14 @@
 }
 
 
+def _shape_mismatch_error(x, y):
+    raise ValueError(
+        f"shape mismatch: value array of shape {x} "
+        f"could not be broadcast to indexing result of "
+        f"shape {y}"
+    )
+
+
 class _DataFrameIndexer(_FrameIndexer):
     def __getitem__(self, arg):
         if (
@@ -342,28 +350,58 @@ def _setitem_tuple_arg(self, key, value):
                 )
             self._frame._data.insert(key[1], new_col)
         else:
-            if isinstance(value, (cupy.ndarray, np.ndarray)):
-                value_df = DataFrame(value)
-                if value_df.shape[1] != columns_df.shape[1]:
-                    if value_df.shape[1] == 1:
-                        value_cols = (
-                            value_df._data.columns * columns_df.shape[1]
-                        )
-                    else:
-                        raise ValueError(
-                            f"shape mismatch: value array of shape "
-                            f"{value_df.shape} could not be "
-                            f"broadcast to indexing result of shape "
-                            f"{columns_df.shape}"
-                        )
-                else:
-                    value_cols = value_df._data.columns
-                for i, col in enumerate(columns_df._column_names):
-                    self._frame[col].loc[key[0]] = value_cols[i]
-            else:
+            if is_scalar(value):
                 for col in columns_df._column_names:
                     self._frame[col].loc[key[0]] = value
 
+            elif isinstance(value, cudf.DataFrame):
+                if value.shape != self._frame.loc[key[0]].shape:
+                    _shape_mismatch_error(
+                        value.shape,
+                        self._frame.loc[key[0]].shape,
+                    )
+                value_column_names = set(value._column_names)
+                scatter_map = _indices_from_labels(self._frame, key[0])
+                for col in columns_df._column_names:
+                    columns_df[col][scatter_map] = (
+                        value._data[col]
+                        if col in value_column_names
+                        else cudf.NA
+                    )
+
+            else:
+                value = cupy.asarray(value)
+                if cupy.ndim(value) == 2:
+                    # If the inner dimension is 1, it's broadcastable to
+                    # all columns of the dataframe.
+                    indexed_shape = columns_df.loc[key[0]].shape
+                    if value.shape[1] == 1:
+                        if value.shape[0] != indexed_shape[0]:
+                            _shape_mismatch_error(value.shape, indexed_shape)
+                        for i, col in enumerate(columns_df._column_names):
+                            self._frame[col].loc[key[0]] = value[:, 0]
+                    else:
+                        if value.shape != indexed_shape:
+                            _shape_mismatch_error(value.shape, indexed_shape)
+                        for i, col in enumerate(columns_df._column_names):
+                            self._frame[col].loc[key[0]] = value[:, i]
+                else:
+                    # handle cases where value is 1d object:
+                    # If the key on column axis is a scalar, we indexed
+                    # a single column; The 1d value should assign along
+                    # the columns.
+                    if is_scalar(key[1]):
+                        for col in columns_df._column_names:
+                            self._frame[col].loc[key[0]] = value
+                    # Otherwise, there are two situations. The key on row axis
+                    # can be a scalar or 1d. In either of the situation, the
+                    # ith element in value corresponds to the ith row in
+                    # the indexed object.
+                    # If the key is 1d, a broadcast will happen.
+                    else:
+                        for i, col in enumerate(columns_df._column_names):
+                            self._frame[col].loc[key[0]] = value[i]
+
 
 class _DataFrameIlocIndexer(_DataFrameIndexer):
     """
@@ -424,10 +462,49 @@ def _getitem_tuple_arg(self, arg):
 
     @_cudf_nvtx_annotate
     def _setitem_tuple_arg(self, key, value):
-        # TODO: Determine if this usage is prevalent enough to expose this
-        # selection logic at a higher level than ColumnAccessor.
-        for col in self._frame._data.get_labels_by_index(key[1]):
-            self._frame[col].iloc[key[0]] = value
+        columns_df = self._frame._from_data(
+            self._frame._data.select_by_index(key[1]), self._frame._index
+        )
+
+        if is_scalar(value):
+            for col in columns_df._column_names:
+                self._frame[col].iloc[key[0]] = value
+
+        elif isinstance(value, cudf.DataFrame):
+            if value.shape != self._frame.iloc[key[0]].shape:
+                _shape_mismatch_error(
+                    value.shape,
+                    self._frame.loc[key[0]].shape,
+                )
+            value_column_names = set(value._column_names)
+            for col in columns_df._column_names:
+                columns_df[col][key[0]] = (
+                    value._data[col] if col in value_column_names else cudf.NA
+                )
+
+        else:
+            # TODO: consolidate code path with identical counterpart
+            # in `_DataFrameLocIndexer._setitem_tuple_arg`
+            value = cupy.asarray(value)
+            if cupy.ndim(value) == 2:
+                indexed_shape = columns_df.iloc[key[0]].shape
+                if value.shape[1] == 1:
+                    if value.shape[0] != indexed_shape[0]:
+                        _shape_mismatch_error(value.shape, indexed_shape)
+                    for i, col in enumerate(columns_df._column_names):
+                        self._frame[col].iloc[key[0]] = value[:, 0]
+                else:
+                    if value.shape != indexed_shape:
+                        _shape_mismatch_error(value.shape, indexed_shape)
+                    for i, col in enumerate(columns_df._column_names):
+                        self._frame._data[col][key[0]] = value[:, i]
+            else:
+                if is_scalar(key[1]):
+                    for col in columns_df._column_names:
+                        self._frame[col].iloc[key[0]] = value
+                else:
+                    for i, col in enumerate(columns_df._column_names):
+                        self._frame[col].iloc[key[0]] = value[i]
 
     def _getitem_scalar(self, arg):
         col = self._frame.columns[arg[1]]
diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py
index 1361fc56fa0..f4dcf9f59ca 100644
--- a/python/cudf/cudf/core/indexed_frame.py
+++ b/python/cudf/cudf/core/indexed_frame.py
@@ -144,7 +144,6 @@ def _drop_columns(f: Frame, columns: abc.Iterable, errors: str):
 
 
 def _indices_from_labels(obj, labels):
-
     if not isinstance(labels, cudf.MultiIndex):
         labels = cudf.core.column.as_column(labels)
 
diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
index 9f2a3d45778..7f482c0e776 100644
--- a/python/cudf/cudf/tests/test_dataframe.py
+++ b/python/cudf/cudf/tests/test_dataframe.py
@@ -8697,43 +8697,6 @@ def test_frame_series_where():
     assert_eq(expected, actual)
 
 
-@pytest.mark.parametrize(
-    "array,is_error",
-    [
-        (cupy.arange(20, 40).reshape(-1, 2), False),
-        (cupy.arange(20, 50).reshape(-1, 3), True),
-        (np.arange(20, 40).reshape(-1, 2), False),
-        (np.arange(20, 30).reshape(-1, 1), False),
-        (cupy.arange(20, 30).reshape(-1, 1), False),
-    ],
-)
-def test_dataframe_indexing_setitem_np_cp_array(array, is_error):
-    gdf = cudf.DataFrame({"a": range(10), "b": range(10)})
-    pdf = gdf.to_pandas()
-    if not is_error:
-        gdf.loc[:, ["a", "b"]] = array
-        pdf.loc[:, ["a", "b"]] = cupy.asnumpy(array)
-
-        assert_eq(gdf, pdf)
-    else:
-        assert_exceptions_equal(
-            lfunc=pdf.loc.__setitem__,
-            rfunc=gdf.loc.__setitem__,
-            lfunc_args_and_kwargs=(
-                [(slice(None, None, None), ["a", "b"]), cupy.asnumpy(array)],
-                {},
-            ),
-            rfunc_args_and_kwargs=(
-                [(slice(None, None, None), ["a", "b"]), array],
-                {},
-            ),
-            compare_error_message=False,
-            expected_error_message="shape mismatch: value array of shape "
-            "(10, 3) could not be broadcast to indexing "
-            "result of shape (10, 2)",
-        )
-
-
 @pytest.mark.parametrize(
     "data",
     [{"a": [1, 2, 3], "b": [1, 1, 0]}],
diff --git a/python/cudf/cudf/tests/test_indexing.py b/python/cudf/cudf/tests/test_indexing.py
index 225aa0cd6bc..790fbd0d3f8 100644
--- a/python/cudf/cudf/tests/test_indexing.py
+++ b/python/cudf/cudf/tests/test_indexing.py
@@ -1486,3 +1486,189 @@ def test_iloc_decimal():
         ["4.00", "3.00", "2.00", "1.00"],
     ).astype(cudf.Decimal64Dtype(scale=2, precision=3))
     assert_eq(expect.reset_index(drop=True), got.reset_index(drop=True))
+
+
+@pytest.mark.parametrize(
+    ("key, value"),
+    [
+        (
+            ([0], ["x", "y"]),
+            [10, 20],
+        ),
+        (
+            ([0, 2], ["x", "y"]),
+            [[10, 30], [20, 40]],
+        ),
+        (
+            (0, ["x", "y"]),
+            [10, 20],
+        ),
+        (
+            ([0, 2], "x"),
+            [10, 20],
+        ),
+    ],
+)
+def test_dataframe_loc_inplace_update(key, value):
+    gdf = cudf.DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]})
+    pdf = gdf.to_pandas()
+
+    actual = gdf.loc[key] = value
+    expected = pdf.loc[key] = value
+
+    assert_eq(expected, actual)
+
+
+def test_dataframe_loc_inplace_update_string_index():
+    gdf = cudf.DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]}, index=list("abc"))
+    pdf = gdf.to_pandas()
+
+    actual = gdf.loc[["a"], ["x", "y"]] = [10, 20]
+    expected = pdf.loc[["a"], ["x", "y"]] = [10, 20]
+
+    assert_eq(expected, actual)
+
+
+@pytest.mark.parametrize(
+    ("key, value"),
+    [
+        ([0], [10, 20]),
+        ([0, 2], [[10, 30], [20, 40]]),
+        (([0, 2], [0, 1]), [[10, 30], [20, 40]]),
+        (([0, 2], 0), [10, 30]),
+        ((0, [0, 1]), [20, 40]),
+    ],
+)
+def test_dataframe_iloc_inplace_update(key, value):
+    gdf = cudf.DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]})
+    pdf = gdf.to_pandas()
+
+    actual = gdf.iloc[key] = value
+    expected = pdf.iloc[key] = value
+
+    assert_eq(expected, actual)
+
+
+@pytest.mark.parametrize(
+    "loc_key",
+    [([0, 2], ["x", "y"])],
+)
+@pytest.mark.parametrize(
+    "iloc_key",
+    [[0, 2]],
+)
+@pytest.mark.parametrize(
+    ("data, index"),
+    [
+        (
+            {"x": [10, 20], "y": [30, 40]},
+            [0, 2],
+        )
+    ],
+)
+def test_dataframe_loc_iloc_inplace_update_with_RHS_dataframe(
+    loc_key, iloc_key, data, index
+):
+    gdf = cudf.DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]})
+    pdf = gdf.to_pandas()
+
+    actual = gdf.loc[loc_key] = cudf.DataFrame(data, index=cudf.Index(index))
+    expected = pdf.loc[loc_key] = pd.DataFrame(data, index=pd.Index(index))
+    assert_eq(expected, actual)
+
+    actual = gdf.iloc[iloc_key] = cudf.DataFrame(data, index=cudf.Index(index))
+    expected = pdf.iloc[iloc_key] = pd.DataFrame(data, index=pd.Index(index))
+    assert_eq(expected, actual)
+
+
+def test_dataframe_loc_inplace_update_with_invalid_RHS_df_columns():
+    gdf = cudf.DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]})
+    pdf = gdf.to_pandas()
+
+    actual = gdf.loc[[0, 2], ["x", "y"]] = cudf.DataFrame(
+        {"b": [10, 20], "y": [30, 40]}, index=cudf.Index([0, 2])
+    )
+    expected = pdf.loc[[0, 2], ["x", "y"]] = pd.DataFrame(
+        {"b": [10, 20], "y": [30, 40]}, index=pd.Index([0, 2])
+    )
+
+    assert_eq(expected, actual)
+
+
+@pytest.mark.parametrize(
+    ("key, value"),
+    [
+        (([0, 2], ["x", "y"]), [[10, 30, 50], [20, 40, 60]]),
+        (([0], ["x", "y"]), [[10], [20]]),
+    ],
+)
+def test_dataframe_loc_inplace_update_shape_mismatch(key, value):
+    gdf = cudf.DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]})
+    with pytest.raises(ValueError, match="shape mismatch:"):
+        gdf.loc[key] = value
+
+
+@pytest.mark.parametrize(
+    ("key, value"),
+    [
+        ([0, 2], [[10, 30, 50], [20, 40, 60]]),
+        ([0], [[10], [20]]),
+    ],
+)
+def test_dataframe_iloc_inplace_update_shape_mismatch(key, value):
+    gdf = cudf.DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]})
+    with pytest.raises(ValueError, match="shape mismatch:"):
+        gdf.iloc[key] = value
+
+
+def test_dataframe_loc_inplace_update_shape_mismatch_RHS_df():
+    gdf = cudf.DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]})
+    with pytest.raises(ValueError, match="shape mismatch:"):
+        gdf.loc[([0, 2], ["x", "y"])] = cudf.DataFrame(
+            {"x": [10, 20]}, index=cudf.Index([0, 2])
+        )
+
+
+def test_dataframe_iloc_inplace_update_shape_mismatch_RHS_df():
+    gdf = cudf.DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]})
+    with pytest.raises(ValueError, match="shape mismatch:"):
+        gdf.iloc[[0, 2]] = cudf.DataFrame(
+            {"x": [10, 20]}, index=cudf.Index([0, 2])
+        )
+
+
+@pytest.mark.parametrize(
+    "array,is_error",
+    [
+        (cupy.arange(20, 40).reshape(-1, 2), False),
+        (cupy.arange(20, 50).reshape(-1, 3), True),
+        (np.arange(20, 40).reshape(-1, 2), False),
+        (np.arange(20, 30).reshape(-1, 1), False),
+        (cupy.arange(20, 30).reshape(-1, 1), False),
+    ],
+)
+def test_dataframe_indexing_setitem_np_cp_array(array, is_error):
+    gdf = cudf.DataFrame({"a": range(10), "b": range(10)})
+    pdf = gdf.to_pandas()
+    if not is_error:
+        gdf.loc[:, ["a", "b"]] = array
+        pdf.loc[:, ["a", "b"]] = cupy.asnumpy(array)
+
+        assert_eq(gdf, pdf)
+    else:
+        assert_exceptions_equal(
+            lfunc=pdf.loc.__setitem__,
+            rfunc=gdf.loc.__setitem__,
+            lfunc_args_and_kwargs=(
+                [(slice(None, None, None), ["a", "b"]), cupy.asnumpy(array)],
+                {},
+            ),
+            rfunc_args_and_kwargs=(
+                [(slice(None, None, None), ["a", "b"]), array],
+                {},
+            ),
+            compare_error_message=False,
+            expected_error_message="shape mismatch: value array of shape "
+            "(10, 3) could not be broadcast to indexing "
+            "result of shape (10, 2)",
+        )

From 14b51693c43fcc376576ceb347a7ca748fa43d32 Mon Sep 17 00:00:00 2001
From: Gera Shegalov <gshegalov@nvidia.com>
Date: Wed, 4 May 2022 21:32:07 -0700
Subject: [PATCH 19/23] Enable ccache for cudfjni build in Docker (#10790)

This PR enables ccache support for `./build.sh clean cudfjar`.

ccache 4.6 is built during image creation because ccacheversion available via `yum install` does not cache nvcc-compiled binaries.

It's enabled by default  for build.sh and repeated no-change  build
```bash
PARALLEL_LEVEL=6 SKIP_TESTS=true time ./build.sh clean cudfjar
```
succeeds in 1.5 min on my machine. To disable set CCACHE_DISABLE=1 in the environment

It's not enabled for ./java/ci/build-in-docker.sh

Signed-off-by: Gera Shegalov <gera@apache.org>

Authors:
  - Gera Shegalov (https://github.com/gerashegalov)

Approvers:
  - Jason Lowe (https://github.com/jlowe)

URL: https://github.com/rapidsai/cudf/pull/10790
---
 build.sh                   | 19 +++++++++++++++++--
 java/ci/Dockerfile.centos7 | 20 +++++++++++++++++++-
 java/pom.xml               |  2 ++
 3 files changed, 38 insertions(+), 3 deletions(-)

diff --git a/build.sh b/build.sh
index 48182ca1a6f..ab3bd0e7a89 100755
--- a/build.sh
+++ b/build.sh
@@ -112,16 +112,22 @@ function buildLibCudfJniInDocker {
     local localMavenRepo=${LOCAL_MAVEN_REPO:-"$HOME/.m2/repository"}
     local workspaceRepoDir="$workspaceDir/cudf"
     local workspaceMavenRepoDir="$workspaceDir/.m2/repository"
+    local workspaceCcacheDir="$workspaceDir/.ccache"
     mkdir -p "$CUDF_JAR_JAVA_BUILD_DIR/libcudf-cmake-build"
+    mkdir -p "$HOME/.ccache" "$HOME/.m2"
     nvidia-docker build \
         -f java/ci/Dockerfile.centos7 \
         --build-arg CUDA_VERSION=${cudaVersion} \
         -t $imageName .
     nvidia-docker run -it -u $(id -u):$(id -g) --rm \
+        -e PARALLEL_LEVEL \
+        -e CCACHE_DISABLE \
+        -e CCACHE_DIR="$workspaceCcacheDir" \
         -v "/etc/group:/etc/group:ro" \
         -v "/etc/passwd:/etc/passwd:ro" \
         -v "/etc/shadow:/etc/shadow:ro" \
         -v "/etc/sudoers.d:/etc/sudoers.d:ro" \
+        -v "$HOME/.ccache:$workspaceCcacheDir:rw" \
         -v "$REPODIR:$workspaceRepoDir:rw" \
         -v "$localMavenRepo:$workspaceMavenRepoDir:rw" \
         --workdir "$workspaceRepoDir/java/target/libcudf-cmake-build" \
@@ -129,11 +135,16 @@ function buildLibCudfJniInDocker {
         scl enable devtoolset-9 \
             "cmake $workspaceRepoDir/cpp \
                 -G${CMAKE_GENERATOR} \
+                -DCMAKE_C_COMPILER_LAUNCHER=ccache \
+                -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
+                -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache \
+                -DCMAKE_CXX_LINKER_LAUNCHER=ccache \
                 -DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
                 -DCUDA_STATIC_RUNTIME=ON \
                 -DCMAKE_CUDA_ARCHITECTURES=${CUDF_CMAKE_CUDA_ARCHITECTURES} \
-                -DCMAKE_INSTALL_PREFIX==/usr/local/rapids \
-                -DUSE_NVTX=ON -DCUDF_USE_ARROW_STATIC=ON \
+                -DCMAKE_INSTALL_PREFIX=/usr/local/rapids \
+                -DUSE_NVTX=ON \
+                -DCUDF_USE_ARROW_STATIC=ON \
                 -DCUDF_ENABLE_ARROW_S3=OFF \
                 -DBUILD_TESTS=OFF \
                 -DPER_THREAD_DEFAULT_STREAM=ON \
@@ -145,6 +156,10 @@ function buildLibCudfJniInDocker {
                 -Dmaven.repo.local=$workspaceMavenRepoDir \
                 -DskipTests=${SKIP_TESTS:-false} \
                 -Dparallel.level=${PARALLEL_LEVEL} \
+                -Dcmake.ccache.opts='-DCMAKE_C_COMPILER_LAUNCHER=ccache \
+                                     -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
+                                     -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache \
+                                     -DCMAKE_CXX_LINKER_LAUNCHER=ccache' \
                 -DCUDF_CPP_BUILD_DIR=$workspaceRepoDir/java/target/libcudf-cmake-build \
                 -DCUDA_STATIC_RUNTIME=ON \
                 -DPER_THREAD_DEFAULT_STREAM=ON \
diff --git a/java/ci/Dockerfile.centos7 b/java/ci/Dockerfile.centos7
index dc8c0e4a95b..7993804554d 100644
--- a/java/ci/Dockerfile.centos7
+++ b/java/ci/Dockerfile.centos7
@@ -26,8 +26,9 @@ ARG CUDA_VERSION=11.5.0
 FROM gpuci/cuda:$CUDA_VERSION-devel-centos7
 
 ### Install basic requirements
+ARG DEVTOOLSET_VERSION=9
 RUN yum install -y centos-release-scl
-RUN yum install -y devtoolset-9 epel-release
+RUN yum install -y devtoolset-${DEVTOOLSET_VERSION} epel-release
 RUN yum install -y git zlib-devel maven tar wget patch ninja-build
 
 ## pre-create the CMAKE_INSTALL_PREFIX folder, set writable by any user for Jenkins
@@ -37,4 +38,21 @@ ARG CMAKE_VERSION=3.22.3
 RUN cd /usr/local/ && wget --quiet https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-linux-x86_64.tar.gz && \
    tar zxf cmake-${CMAKE_VERSION}-linux-x86_64.tar.gz && \
    rm cmake-${CMAKE_VERSION}-linux-x86_64.tar.gz
+
 ENV PATH /usr/local/cmake-${CMAKE_VERSION}-linux-x86_64/bin:$PATH
+
+ARG CCACHE_VERSION=4.6
+RUN cd /tmp && wget --quiet https://github.com/ccache/ccache/releases/download/v${CCACHE_VERSION}/ccache-${CCACHE_VERSION}.tar.gz && \
+   tar zxf ccache-${CCACHE_VERSION}.tar.gz && \
+   rm ccache-${CCACHE_VERSION}.tar.gz && \
+   cd ccache-${CCACHE_VERSION} && \
+   mkdir build && \
+   cd build && \
+   scl enable devtoolset-${DEVTOOLSET_VERSION} \
+      "cmake .. \
+         -DCMAKE_BUILD_TYPE=Release \
+         -DZSTD_FROM_INTERNET=ON \
+         -DREDIS_STORAGE_BACKEND=OFF && \
+      cmake --build . --parallel ${PARALLEL_LEVEL} --target install" && \
+   cd ../.. && \
+   rm -rf ccache-${CCACHE_VERSION}
diff --git a/java/pom.xml b/java/pom.xml
index 50b6ca59440..31a79ec9801 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -173,6 +173,7 @@
         <arrow.version>0.15.1</arrow.version>
         <parallel.level>4</parallel.level>
         <CUDF_CPP_BUILD_DIR/>
+        <cmake.ccache.opts/>
     </properties>
 
     <profiles>
@@ -382,6 +383,7 @@
                                       failonerror="true"
                                       executable="cmake">
                                     <arg value="${basedir}/src/main/native"/>
+                                    <arg line="${cmake.ccache.opts}"/>
                                     <arg value="-DCUDA_STATIC_RUNTIME=${CUDA_STATIC_RUNTIME}" />
                                     <arg value="-DPER_THREAD_DEFAULT_STREAM=${PER_THREAD_DEFAULT_STREAM}" />
                                     <arg value="-DUSE_GDS=${USE_GDS}" />

From d9949055c5ad34e05b62d7dc90225cdbc4ab9184 Mon Sep 17 00:00:00 2001
From: Yunsong Wang <yunsongw@nvidia.com>
Date: Thu, 5 May 2022 09:17:51 -0400
Subject: [PATCH 20/23] Update `cuco` git tag (#10788)

Closes https://github.com/rapidsai/cudf/issues/10572

This PR fetches the changes in https://github.com/NVIDIA/cuCollections/pull/151 to avoid launching kernels with 0 threads.

Authors:
  - Yunsong Wang (https://github.com/PointKernel)

Approvers:
  - Mark Harris (https://github.com/harrism)

URL: https://github.com/rapidsai/cudf/pull/10788
---
 cpp/cmake/thirdparty/get_cucollections.cmake | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cpp/cmake/thirdparty/get_cucollections.cmake b/cpp/cmake/thirdparty/get_cucollections.cmake
index 5232821d113..332b0d9dc96 100644
--- a/cpp/cmake/thirdparty/get_cucollections.cmake
+++ b/cpp/cmake/thirdparty/get_cucollections.cmake
@@ -22,7 +22,7 @@ function(find_and_configure_cucollections)
     GLOBAL_TARGETS cuco::cuco
     BUILD_EXPORT_SET cudf-exports
     CPM_ARGS GITHUB_REPOSITORY NVIDIA/cuCollections
-    GIT_TAG fb58a38701f1c24ecfe07d8f1f208bbe80930da5
+    GIT_TAG 8b15f06f38d034e815bc72045ca3403787f75e07
     EXCLUDE_FROM_ALL ${BUILD_SHARED_LIBS}
     OPTIONS "BUILD_TESTS OFF" "BUILD_BENCHMARKS OFF" "BUILD_EXAMPLES OFF"
   )

From e52a1eb023260fdbc8b5e2697668aca39d04ca8f Mon Sep 17 00:00:00 2001
From: MithunR <mythrocks@gmail.com>
Date: Thu, 5 May 2022 09:45:23 -0700
Subject: [PATCH 21/23] Segmented `apply_boolean_mask` for `LIST` columns
 (#10773)

Fixes #10650.

This commit introduces an `apply_boolean_mask()` method that interprets a boolean `LIST` column as a filter, to select elements from an arbitrary `LIST` input column.
E.g.
```c++
auto const input    = lcw<int32_t>{ {0,1,2}, {3,4}, {5,6,7}, {8,9} };
auto const selector = lcw<bool>   { {0,1,1}, {1,0}, {1,1,1}, {0,0} };
auto const results  = apply_boolean_mask( lists_column_view{input}, lists_column_view{selector} );
// results          == { {1,2}, {3}, {5,6,7}, {} };
```

The `input` and the `boolean_mask` should both have the same number of rows, and each row should have the same number of elements.
Each output row copies the elements from the input where the boolean mask is non-null and true.

Authors:
  - MithunR (https://github.com/mythrocks)

Approvers:
  - Nghia Truong (https://github.com/ttnghia)
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/10773
---
 cpp/CMakeLists.txt                            |   1 +
 .../cudf/lists/detail/stream_compaction.hpp   |  37 +++
 cpp/include/cudf/lists/stream_compaction.hpp  |  58 +++++
 cpp/src/lists/apply_boolean_mask.cu           | 105 ++++++++
 cpp/tests/CMakeLists.txt                      |   1 +
 cpp/tests/lists/apply_boolean_mask_test.cpp   | 233 ++++++++++++++++++
 6 files changed, 435 insertions(+)
 create mode 100644 cpp/include/cudf/lists/detail/stream_compaction.hpp
 create mode 100644 cpp/include/cudf/lists/stream_compaction.hpp
 create mode 100644 cpp/src/lists/apply_boolean_mask.cu
 create mode 100644 cpp/tests/lists/apply_boolean_mask_test.cpp

diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 7870366b714..42a434ba53d 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -362,6 +362,7 @@ add_library(
   src/join/mixed_join_size_kernel_nulls.cu
   src/join/mixed_join_size_kernels_semi.cu
   src/join/semi_join.cu
+  src/lists/apply_boolean_mask.cu
   src/lists/contains.cu
   src/lists/combine/concatenate_list_elements.cu
   src/lists/combine/concatenate_rows.cu
diff --git a/cpp/include/cudf/lists/detail/stream_compaction.hpp b/cpp/include/cudf/lists/detail/stream_compaction.hpp
new file mode 100644
index 00000000000..0e9f2ec16c4
--- /dev/null
+++ b/cpp/include/cudf/lists/detail/stream_compaction.hpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include <cudf/column/column.hpp>
+#include <cudf/lists/lists_column_view.hpp>
+
+#include <rmm/mr/device/device_memory_resource.hpp>
+
+namespace cudf::lists::detail {
+
+/**
+ * @copydoc cudf::lists::apply_boolean_mask(lists_column_view const&, lists_column_view const&,
+ * rmm::mr::device_memory_resource*)
+ *
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ */
+std::unique_ptr<column> apply_boolean_mask(
+  lists_column_view const& input,
+  lists_column_view const& boolean_mask,
+  rmm::cuda_stream_view stream,
+  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+
+}  // namespace cudf::lists::detail
diff --git a/cpp/include/cudf/lists/stream_compaction.hpp b/cpp/include/cudf/lists/stream_compaction.hpp
new file mode 100644
index 00000000000..c7a9731eb65
--- /dev/null
+++ b/cpp/include/cudf/lists/stream_compaction.hpp
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include <cudf/column/column.hpp>
+#include <cudf/lists/lists_column_view.hpp>
+
+#include <rmm/mr/device/device_memory_resource.hpp>
+
+namespace cudf::lists {
+
+/**
+ * @brief Filters elements in each row of `input` LIST column using `boolean_mask`
+ * LIST of booleans as a mask.
+ *
+ * Given an input `LIST` column and a list-of-bools column, the function produces
+ * a new `LIST` column of the same type as `input`, where each element is copied
+ * from the input row *only* if the corresponding `boolean_mask` is non-null and `true`.
+ *
+ * E.g.
+ * @code{.pseudo}
+ * input        = { {0,1,2}, {3,4}, {5,6,7}, {8,9} };
+ * boolean_mask = { {0,1,1}, {1,0}, {1,1,1}, {0,0} };
+ * results      = { {1,2},   {3},   {5,6,7}, {} };
+ * @endcode
+ *
+ * `input` and `boolean_mask` must have the same number of rows.
+ * The output column has the same number of rows as the input column.
+ * An element is copied to an output row *only* if the corresponding boolean_mask element is `true`.
+ * An output row is invalid only if the input row is invalid.
+ *
+ * @throws cudf::logic_error if `boolean_mask` is not a "lists of bools" column
+ * @throws cudf::logic_error if `input` and `boolean_mask` have different number of rows
+ *
+ * @param input The input list column view to be filtered
+ * @param boolean_mask A nullable list of bools column used to filter `input` elements
+ * @param mr Device memory resource used to allocate the returned table's device memory
+ * @return List column of the same type as `input`, containing filtered list rows
+ */
+std::unique_ptr<column> apply_boolean_mask(
+  lists_column_view const& input,
+  lists_column_view const& boolean_mask,
+  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+
+}  // namespace cudf::lists
diff --git a/cpp/src/lists/apply_boolean_mask.cu b/cpp/src/lists/apply_boolean_mask.cu
new file mode 100644
index 00000000000..670e99dfbc8
--- /dev/null
+++ b/cpp/src/lists/apply_boolean_mask.cu
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cudf/column/column_factories.hpp>
+#include <cudf/detail/copy.hpp>
+#include <cudf/detail/fill.hpp>
+#include <cudf/detail/iterator.cuh>
+#include <cudf/detail/null_mask.hpp>
+#include <cudf/detail/reduction_functions.hpp>
+#include <cudf/detail/replace.hpp>
+#include <cudf/detail/stream_compaction.hpp>
+#include <cudf/lists/detail/stream_compaction.hpp>
+#include <cudf/lists/stream_compaction.hpp>
+#include <cudf/utilities/bit.hpp>
+
+#include <rmm/exec_policy.hpp>
+
+#include <thrust/reduce.h>
+
+namespace cudf::lists {
+namespace detail {
+
+std::unique_ptr<column> apply_boolean_mask(lists_column_view const& input,
+                                           lists_column_view const& boolean_mask,
+                                           rmm::cuda_stream_view stream,
+                                           rmm::mr::device_memory_resource* mr)
+{
+  CUDF_EXPECTS(boolean_mask.child().type().id() == type_id::BOOL8, "Mask must be of type BOOL8.");
+  CUDF_EXPECTS(input.size() == boolean_mask.size(),
+               "Boolean masks column must have same number of rows as input.");
+  auto const num_rows = input.size();
+
+  if (num_rows == 0) { return cudf::empty_like(input.parent()); }
+
+  auto constexpr offset_data_type = data_type{type_id::INT32};
+
+  auto const boolean_mask_sliced_child = boolean_mask.get_sliced_child(stream);
+
+  auto const make_filtered_child = [&] {
+    auto filtered =
+      cudf::detail::apply_boolean_mask(
+        cudf::table_view{{input.get_sliced_child(stream)}}, boolean_mask_sliced_child, stream, mr)
+        ->release();
+    return std::move(filtered.front());
+  };
+
+  auto const make_output_offsets = [&] {
+    auto boolean_mask_sliced_offsets =
+      cudf::detail::slice(
+        boolean_mask.offsets(), {boolean_mask.offset(), boolean_mask.size() + 1}, stream)
+        .front();
+    auto const sizes       = cudf::reduction::segmented_sum(boolean_mask_sliced_child,
+                                                      boolean_mask_sliced_offsets,
+                                                      offset_data_type,
+                                                      null_policy::EXCLUDE,
+                                                      stream);
+    auto const d_sizes     = column_device_view::create(*sizes, stream);
+    auto const sizes_begin = cudf::detail::make_null_replacement_iterator(*d_sizes, offset_type{0});
+    auto const sizes_end   = sizes_begin + sizes->size();
+    auto output_offsets    = cudf::make_numeric_column(
+      offset_data_type, num_rows + 1, mask_state::UNALLOCATED, stream, mr);
+    auto output_offsets_view = output_offsets->mutable_view();
+
+    // Could have attempted an exclusive_scan(), but it would not compute the last entry.
+    // Instead, inclusive_scan(), followed by writing `0` to the head of the offsets column.
+    thrust::inclusive_scan(rmm::exec_policy(stream),
+                           sizes_begin,
+                           sizes_end,
+                           output_offsets_view.begin<offset_type>() + 1);
+    CUDF_CUDA_TRY(cudaMemsetAsync(
+      output_offsets_view.begin<offset_type>(), 0, sizeof(offset_type), stream.value()));
+    return output_offsets;
+  };
+
+  return cudf::make_lists_column(input.size(),
+                                 make_output_offsets(),
+                                 make_filtered_child(),
+                                 input.null_count(),
+                                 cudf::detail::copy_bitmask(input.parent(), stream, mr),
+                                 stream,
+                                 mr);
+}
+}  // namespace detail
+
+std::unique_ptr<column> apply_boolean_mask(lists_column_view const& input,
+                                           lists_column_view const& boolean_mask,
+                                           rmm::mr::device_memory_resource* mr)
+{
+  return detail::apply_boolean_mask(input, boolean_mask, rmm::cuda_stream_default, mr);
+}
+
+}  // namespace cudf::lists
diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt
index 95c54d7596e..eadcd985de3 100644
--- a/cpp/tests/CMakeLists.txt
+++ b/cpp/tests/CMakeLists.txt
@@ -470,6 +470,7 @@ ConfigureTest(AST_TEST ast/transform_tests.cpp)
 # * lists tests ----------------------------------------------------------------------------------
 ConfigureTest(
   LISTS_TEST
+  lists/apply_boolean_mask_test.cpp
   lists/combine/concatenate_list_elements_tests.cpp
   lists/combine/concatenate_rows_tests.cpp
   lists/contains_tests.cpp
diff --git a/cpp/tests/lists/apply_boolean_mask_test.cpp b/cpp/tests/lists/apply_boolean_mask_test.cpp
new file mode 100644
index 00000000000..a5b036210ba
--- /dev/null
+++ b/cpp/tests/lists/apply_boolean_mask_test.cpp
@@ -0,0 +1,233 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <cudf/column/column_factories.hpp>
+#include <cudf/detail/null_mask.hpp>
+#include <cudf/lists/extract.hpp>
+#include <cudf/lists/stream_compaction.hpp>
+
+#include <cudf_test/base_fixture.hpp>
+#include <cudf_test/column_utilities.hpp>
+#include <cudf_test/column_wrapper.hpp>
+#include <cudf_test/iterator_utilities.hpp>
+#include <cudf_test/type_lists.hpp>
+
+namespace cudf::test {
+
+using namespace iterators;
+using cudf::lists_column_view;
+using cudf::lists::apply_boolean_mask;
+
+template <typename T>
+using lists    = lists_column_wrapper<T, int32_t>;
+using filter_t = lists_column_wrapper<bool, int32_t>;
+
+template <typename T>
+using fwcw    = fixed_width_column_wrapper<T, int32_t>;
+using offsets = fwcw<int32_t>;
+using strings = strings_column_wrapper;
+
+auto constexpr X = int32_t{0};  // Placeholder for NULL.
+
+struct ApplyBooleanMaskTest : public BaseFixture {
+};
+
+template <typename T>
+struct ApplyBooleanMaskTypedTest : ApplyBooleanMaskTest {
+};
+
+TYPED_TEST_SUITE(ApplyBooleanMaskTypedTest, cudf::test::NumericTypes);
+
+TYPED_TEST(ApplyBooleanMaskTypedTest, StraightLine)
+{
+  using T    = TypeParam;
+  auto input = lists<T>{{0, 1, 2, 3}, {4, 5}, {6, 7, 8, 9}, {0, 1}, {2, 3, 4, 5}, {6, 7}}.release();
+  auto filter = filter_t{{1, 0, 1, 0}, {1, 0}, {1, 0, 1, 0}, {1, 0}, {1, 0, 1, 0}, {1, 0}};
+
+  {
+    // Unsliced.
+    auto filtered = apply_boolean_mask(lists_column_view{*input}, lists_column_view{filter});
+    auto expected = lists<T>{{0, 2}, {4}, {6, 8}, {0}, {2, 4}, {6}};
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(*filtered, expected);
+  }
+  {
+    // Sliced input: Remove the first row.
+    auto sliced = cudf::slice(*input, {1, input->size()}).front();
+    //           == lists_t {{4, 5}, {6, 7, 8, 9}, {0, 1}, {2, 3, 4, 5}, {6, 7}};
+    auto filter   = filter_t{{0, 1}, {0, 1, 0, 1}, {1, 1}, {0, 1, 0, 1}, {0, 0}};
+    auto filtered = apply_boolean_mask(lists_column_view{sliced}, lists_column_view{filter});
+    auto expected = lists<T>{{5}, {7, 9}, {0, 1}, {3, 5}, {}};
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(*filtered, expected);
+  }
+}
+
+TYPED_TEST(ApplyBooleanMaskTypedTest, NullElementsInTheListRows)
+{
+  using T = TypeParam;
+  auto input =
+    lists<T>{
+      {0, 1, 2, 3},
+      lists<T>{{X, 5}, null_at(0)},
+      {6, 7, 8, 9},
+      {0, 1},
+      lists<T>{{X, 3, 4, X}, nulls_at({0, 3})},
+      lists<T>{{X, X}, nulls_at({0, 1})},
+    }
+      .release();
+  auto filter = filter_t{{1, 0, 1, 0}, {1, 0}, {1, 0, 1, 0}, {1, 0}, {1, 0, 1, 0}, {1, 0}};
+
+  {
+    // Unsliced.
+    auto filtered = apply_boolean_mask(lists_column_view{*input}, lists_column_view{filter});
+    auto expected = lists<T>{{0, 2},
+                             lists<T>{{X}, null_at(0)},
+                             {6, 8},
+                             {0},
+                             lists<T>{{X, 4}, null_at(0)},
+                             lists<T>{{X}, null_at(0)}};
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(*filtered, expected);
+  }
+  {
+    // Sliced input: Remove the first row.
+    auto sliced = cudf::slice(*input, {1, input->size()}).front();
+    //           == lists_t {{X, 5}, {6, 7, 8, 9}, {0, 1}, {X, 3, 4, X}, {X, X}};
+    auto filter   = filter_t{{0, 1}, {0, 1, 0, 1}, {1, 1}, {0, 1, 0, 1}, {0, 0}};
+    auto filtered = apply_boolean_mask(lists_column_view{sliced}, lists_column_view{filter});
+    auto expected = lists<T>{{5}, {7, 9}, {0, 1}, lists<T>{{3, X}, null_at(1)}, {}};
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(*filtered, expected);
+  }
+}
+
+TYPED_TEST(ApplyBooleanMaskTypedTest, NullListRowsInTheInputColumn)
+{
+  using T = TypeParam;
+  auto input =
+    lists<T>{{{0, 1, 2, 3}, {}, {6, 7, 8, 9}, {}, {2, 3, 4, 5}, {6, 7}}, nulls_at({1, 3})}
+      .release();
+  auto filter = filter_t{{1, 0, 1, 0}, {}, {1, 0, 1, 0}, {}, {1, 0, 1, 0}, {1, 0}};
+
+  {
+    // Unsliced.
+    auto filtered = apply_boolean_mask(lists_column_view{*input}, lists_column_view{filter});
+    auto expected = lists<T>{{{0, 2}, {}, {6, 8}, {}, {2, 4}, {6}}, nulls_at({1, 3})};
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(*filtered, expected);
+  }
+  {
+    // Sliced input: Remove the first row.
+    auto sliced = cudf::slice(*input, {1, input->size()}).front();
+    //           == lists_t{{{}, {6, 7, 8, 9}, {}, {2, 3, 4, 5}, {6, 7}}, nulls_at({0,2})};
+    auto filter   = filter_t{{}, {0, 1, 0, 1}, {}, {0, 1, 0, 1}, {0, 0}};
+    auto filtered = apply_boolean_mask(lists_column_view{sliced}, lists_column_view{filter});
+    auto expected = lists<T>{{{}, {7, 9}, {}, {3, 5}, {}}, nulls_at({0, 2})};
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(*filtered, expected);
+  }
+  {
+    // Sliced input: Remove the first two rows.
+    auto sliced = cudf::slice(*input, {2, input->size()}).front();
+    //           == lists_t{{{6, 7, 8, 9}, {}, {2, 3, 4, 5}, {6, 7}}, null_at(1)};
+    auto filter   = filter_t{{0, 1, 0, 1}, {}, {0, 1, 0, 1}, {0, 0}};
+    auto filtered = apply_boolean_mask(lists_column_view{sliced}, lists_column_view{filter});
+    auto expected = lists<T>{{{7, 9}, {}, {3, 5}, {}}, null_at(1)};
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(*filtered, expected);
+  }
+}
+
+TYPED_TEST(ApplyBooleanMaskTypedTest, StructInput)
+{
+  using T    = TypeParam;
+  using fwcw = fwcw<T>;
+
+  auto constexpr num_input_rows = 7;
+  auto const input              = [] {
+    auto child_num             = fwcw{0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
+    auto child_str             = strings{"0", "1", "2", "3", "4", "5", "6", "7", "8", "9"};
+    auto const null_mask_begin = null_at(5);
+    auto const null_mask_end   = null_mask_begin + num_input_rows;
+    return cudf::make_lists_column(num_input_rows,
+                                   offsets{0, 2, 3, 6, 6, 8, 8, 10}.release(),
+                                   structs_column_wrapper{{child_num, child_str}}.release(),
+                                   1,
+                                   detail::make_null_mask(null_mask_begin, null_mask_end));
+  }();
+  {
+    // Unsliced.
+    // The input should now look as follows: (String child dropped for brevity.)
+    // Input:                     {[0, 1], [2], [3, 4, 5], [], [6, 7], [], [8, 9]}
+    auto const filter   = filter_t{{1, 1}, {0}, {0, 1, 0}, {}, {1, 0}, {}, {0, 1}};
+    auto const result   = apply_boolean_mask(lists_column_view{*input}, lists_column_view{filter});
+    auto const expected = [] {
+      auto child_num             = fwcw{0, 1, 4, 6, 9};
+      auto child_str             = strings{"0", "1", "4", "6", "9"};
+      auto const null_mask_begin = null_at(5);
+      auto const null_mask_end   = null_mask_begin + num_input_rows;
+      return cudf::make_lists_column(num_input_rows,
+                                     offsets{0, 2, 2, 3, 3, 4, 4, 5}.release(),
+                                     structs_column_wrapper{{child_num, child_str}}.release(),
+                                     1,
+                                     detail::make_null_mask(null_mask_begin, null_mask_end));
+    }();
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, *expected);
+  }
+  {
+    // Sliced. Remove the first row.
+    auto const sliced_input = cudf::slice(*input, {1, input->size()}).front();
+    // The input should now look as follows: (String child dropped for brevity.)
+    // Input:                   {[2], [3, 4, 5], [], [6, 7], [], [8, 9]}
+    auto const filter = filter_t{{0}, {0, 1, 0}, {}, {1, 0}, {}, {0, 1}};
+    auto const result =
+      apply_boolean_mask(lists_column_view{sliced_input}, lists_column_view{filter});
+    auto const expected = [] {
+      auto child_num             = fwcw{4, 6, 9};
+      auto child_str             = strings{"4", "6", "9"};
+      auto const null_mask_begin = null_at(4);
+      auto const null_mask_end   = null_mask_begin + num_input_rows;
+      return cudf::make_lists_column(num_input_rows - 1,
+                                     offsets{0, 0, 1, 1, 2, 2, 3}.release(),
+                                     structs_column_wrapper{{child_num, child_str}}.release(),
+                                     1,
+                                     detail::make_null_mask(null_mask_begin, null_mask_end));
+    }();
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, *expected);
+  }
+}
+
+TEST_F(ApplyBooleanMaskTest, Trivial)
+{
+  auto const input  = lists<int32_t>{};
+  auto const filter = filter_t{};
+  auto const result = apply_boolean_mask(lists_column_view{input}, lists_column_view{filter});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, lists<int32_t>{});
+}
+
+TEST_F(ApplyBooleanMaskTest, Failure)
+{
+  {
+    // Invalid mask type.
+    auto const input  = lists<int32_t>{{1, 2, 3}, {4, 5, 6}};
+    auto const filter = lists<int32_t>{{0, 0, 0}};
+    CUDF_EXPECT_THROW_MESSAGE(
+      apply_boolean_mask(lists_column_view{input}, lists_column_view{filter}),
+      "Mask must be of type BOOL8.");
+  }
+  {
+    // Mismatched number of rows.
+    auto const input  = lists<int32_t>{{1, 2, 3}, {4, 5, 6}};
+    auto const filter = filter_t{{0, 0, 0}};
+    CUDF_EXPECT_THROW_MESSAGE(
+      apply_boolean_mask(lists_column_view{input}, lists_column_view{filter}),
+      "Boolean masks column must have same number of rows as input.");
+  }
+}
+}  // namespace cudf::test

From ee26fbe42dfe2ebd37073afbf3559efff3997eff Mon Sep 17 00:00:00 2001
From: Xavier Simmons <cheinger@users.noreply.github.com>
Date: Thu, 5 May 2022 13:32:56 -0700
Subject: [PATCH 22/23] Optimize `left_semi_join` by materializing the gather
 mask (#10511)

Closes https://github.com/rapidsai/cudf/issues/10464

Updates the `left_semi_join` to materialize the gather mask instead of generating it via a transform iterator.

Including the `map.contains` in the `gather` call reduced occupancy due to increasing register usage. As a result, explicitly materializing the gather mask is faster.

Authors:
  - Xavier Simmons (https://github.com/cheinger)
  - Yunsong Wang (https://github.com/PointKernel)

Approvers:
  - Jake Hemstad (https://github.com/jrhemstad)
  - Yunsong Wang (https://github.com/PointKernel)

URL: https://github.com/rapidsai/cudf/pull/10511
---
 cpp/src/join/semi_join.cu | 29 +++++++++++++++++++----------
 1 file changed, 19 insertions(+), 10 deletions(-)

diff --git a/cpp/src/join/semi_join.cu b/cpp/src/join/semi_join.cu
index 687e553fefd..b7b33000707 100644
--- a/cpp/src/join/semi_join.cu
+++ b/cpp/src/join/semi_join.cu
@@ -137,19 +137,28 @@ std::unique_ptr<rmm::device_uvector<cudf::size_type>> left_semi_anti_join(
   auto gather_map =
     std::make_unique<rmm::device_uvector<cudf::size_type>>(left_num_rows, stream, mr);
 
-  // gather_map_end will be the end of valid data in gather_map
-  auto gather_map_end = thrust::copy_if(
+  rmm::device_uvector<bool> flagged(left_num_rows, stream, mr);
+  auto flagged_d = flagged.data();
+
+  auto counting_iter = thrust::counting_iterator<size_type>(0);
+  thrust::for_each(
     rmm::exec_policy(stream),
-    thrust::make_counting_iterator(0),
-    thrust::make_counting_iterator(left_num_rows),
-    gather_map->begin(),
-    [hash_table_view, join_type_boolean, hash_probe, equality_probe] __device__(
-      size_type const idx) {
-      // Look up this row. The hash function used here needs to map a (left) row index to the hash
-      // of the row, so it's a row hash. The equality check needs to verify
-      return hash_table_view.contains(idx, hash_probe, equality_probe) == join_type_boolean;
+    counting_iter,
+    counting_iter + left_num_rows,
+    [flagged_d, hash_table_view, join_type_boolean, hash_probe, equality_probe] __device__(
+      const size_type idx) {
+      flagged_d[idx] =
+        hash_table_view.contains(idx, hash_probe, equality_probe) == join_type_boolean;
     });
 
+  // gather_map_end will be the end of valid data in gather_map
+  auto gather_map_end =
+    thrust::copy_if(rmm::exec_policy(stream),
+                    counting_iter,
+                    counting_iter + left_num_rows,
+                    gather_map->begin(),
+                    [flagged_d] __device__(size_type const idx) { return flagged_d[idx]; });
+
   auto join_size = thrust::distance(gather_map->begin(), gather_map_end);
   gather_map->resize(join_size, stream);
   return gather_map;

From 4ce7b6516764173034592cbefc0d5429c2e12a9f Mon Sep 17 00:00:00 2001
From: Mike Wilson <hyperbolic2346@users.noreply.github.com>
Date: Thu, 5 May 2022 16:45:23 -0400
Subject: [PATCH 23/23] simplifying skiprows test in test_orc.py (#10783)

@bdice helped me look into an issue with deprecated warnings in #10772 and in the process, he pointed out that the skiprows test was unnecessarily complex. We looked into it some and it appeared to be a copy/paste of a more complex test. He asked that I make this PR to simplify this test, but all the credit for noticing and fixing it is his.

Authors:
  - Mike Wilson (https://github.com/hyperbolic2346)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

URL: https://github.com/rapidsai/cudf/pull/10783
---
 python/cudf/cudf/tests/test_orc.py | 37 +++++++++++++++++++-----------
 1 file changed, 23 insertions(+), 14 deletions(-)

diff --git a/python/cudf/cudf/tests/test_orc.py b/python/cudf/cudf/tests/test_orc.py
index c28358f5fa0..e94888fc770 100644
--- a/python/cudf/cudf/tests/test_orc.py
+++ b/python/cudf/cudf/tests/test_orc.py
@@ -301,27 +301,36 @@ def test_orc_read_rows(datadir, skiprows, num_rows):
     assert_eq(pdf, gdf)
 
 
-def test_orc_read_skiprows(tmpdir):
+def test_orc_read_skiprows():
     buff = BytesIO()
-    df = pd.DataFrame(
-        {"a": [1, 0, 1, 0, None, 1, 1, 1, 0, None, 0, 0, 1, 1, 1, 1]},
-        dtype=pd.BooleanDtype(),
-    )
+    data = [
+        True,
+        False,
+        True,
+        False,
+        None,
+        True,
+        True,
+        True,
+        False,
+        None,
+        False,
+        False,
+        True,
+        True,
+        True,
+        True,
+    ]
     writer = pyorc.Writer(buff, pyorc.Struct(a=pyorc.Boolean()))
-    tuples = list(
-        map(
-            lambda x: (None,) if x[0] is pd.NA else (bool(x[0]),),
-            list(df.itertuples(index=False, name=None)),
-        )
-    )
-    writer.writerows(tuples)
+    writer.writerows([(d,) for d in data])
     writer.close()
 
+    # testing 10 skiprows due to a boolean specific bug fix that didn't
+    # repro for other sizes of data
     skiprows = 10
 
-    expected = cudf.read_orc(buff)[skiprows::].reset_index(drop=True)
+    expected = cudf.read_orc(buff)[skiprows:].reset_index(drop=True)
     got = cudf.read_orc(buff, skiprows=skiprows)
-
     assert_eq(expected, got)