From 8c825f53feba3582a4b22d759c054f1b71d1e095 Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Tue, 12 Apr 2022 13:39:40 -0700
Subject: [PATCH 01/33] Remove `concurrent_unordered_multimap`. (#10642)

The `concurrent_unordered_multimap` is no longer used in libcudf. It has been replaced by `cuco::static_multimap`. The majority of the refactoring was done in PRs #8934 and #9704. A similar effort is in progress for `concurrent_unordered_map` and `cuco::static_map` in #9666 (and may depend on porting some optimizations from libcudf to cuco -- need to look into this before doing a direct replacement).

This partially resolves issue #10401.

cc: @PointKernel @vyasr

Authors:
  - Bradley Dice (https://github.com/bdice)

Approvers:
  - Yunsong Wang (https://github.com/PointKernel)
  - Vyas Ramasubramani (https://github.com/vyasr)
  - Jake Hemstad (https://github.com/jrhemstad)

URL: https://github.com/rapidsai/cudf/pull/10642
---
 .../hash/concurrent_unordered_multimap.cuh    | 592 ------------------
 cpp/tests/CMakeLists.txt                      |   2 +-
 cpp/tests/hash_map/multimap_test.cu           |  95 ---
 3 files changed, 1 insertion(+), 688 deletions(-)
 delete mode 100644 cpp/src/hash/concurrent_unordered_multimap.cuh
 delete mode 100644 cpp/tests/hash_map/multimap_test.cu
diff --git a/cpp/src/hash/concurrent_unordered_multimap.cuh b/cpp/src/hash/concurrent_unordered_multimap.cuh
deleted file mode 100644
index aa5b8db393f..00000000000
--- a/cpp/src/hash/concurrent_unordered_multimap.cuh
+++ /dev/null
@@ -1,592 +0,0 @@
-/*
- * Copyright (c) 2017-2022, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef CONCURRENT_UNORDERED_MULTIMAP_CUH
-#define CONCURRENT_UNORDERED_MULTIMAP_CUH
-
-#include <hash/hash_allocator.cuh>
-#include <hash/helper_functions.cuh>
-#include <hash/managed.cuh>
-
-#include <cudf/detail/nvtx/ranges.hpp>
-#include <cudf/detail/utilities/device_atomics.cuh>
-#include <cudf/detail/utilities/hash_functions.cuh>
-#include <cudf/utilities/error.hpp>
-
-#include <rmm/cuda_stream_view.hpp>
-
-#include <thrust/pair.h>
-
-#include <cassert>
-#include <iostream>
-#include <iterator>
-#include <type_traits>
-
-/**
- * Does support concurrent insert, but not concurrent insert and probing.
- *
- * @note The user is responsible for the following stream semantics:
- * - Either the same stream should be used to create the map as is used by the kernels that access
- * it, or
- * - the stream used to create the map should be synchronized before it is accessed from a different
- * stream or from host code.
- *
- * TODO:
- *  - add constructor that takes pointer to hash_table to avoid allocations
- */
-template <typename Key,
-          typename Element,
-          typename size_type,
-          Key unused_key,
-          Element unused_element,
-          typename Hasher       = default_hash<Key>,
-          typename Equality     = equal_to<Key>,
-          typename Allocator    = managed_allocator<thrust::pair<Key, Element>>,
-          bool count_collisions = false>
-class concurrent_unordered_multimap {
- public:
-  using hasher         = Hasher;
-  using key_equal      = Equality;
-  using allocator_type = Allocator;
-  using key_type       = Key;
-  using value_type     = thrust::pair<Key, Element>;
-  using mapped_type    = Element;
-  using iterator       = cycle_iterator_adapter<value_type*>;
-  using const_iterator = const cycle_iterator_adapter<value_type*>;
-
- private:
-  union pair2longlong {
-    unsigned long long int longlong;
-    value_type pair;
-  };
-
- public:
-  /**
-   * @brief Factory to construct a new concurrent unordered multimap.
-   *
-   * Returns a `std::unique_ptr` to a new concurrent unordered multimap object.
-   * The map is non-owning and trivially copyable and should be passed by value
-   * into kernels. The `unique_ptr` contains a custom deleter that will free the
-   * map's contents.
-   *
-   * @note The implementation of this multimap uses sentinel values to
-   * indicate an entry in the hash table that is empty, i.e., if a hash bucket
-   * is empty, the pair residing there will be equal to (unused_key,
-   * unused_element). As a result, attempting to insert a key equal to
-   * `unused_key` results in undefined behavior.
-   *
-   * @note All allocations, kernels and copies in the constructor take place
-   * on stream but the constructor does not synchronize the stream. It is the user's
-   * responsibility to synchronize or use the same stream to access the map.
-   *
-   * @param capacity The maximum number of pairs the map may hold.
-   * @param stream CUDA stream used for device memory operations and kernel launches.
-   * @param init Indicates if the map should be initialized with the unused
-   * key/values
-   * @param hash_function The hash function to use for hashing keys
-   * @param equal The equality comparison function for comparing if two keys are
-   * equal
-   * @param allocator The allocator to use for allocation of the map's storage
-   */
-  static auto create(size_type capacity,
-                     rmm::cuda_stream_view stream    = rmm::cuda_stream_default,
-                     const bool init                 = true,
-                     const Hasher& hash_function     = hasher(),
-                     const Equality& equal           = key_equal(),
-                     const allocator_type& allocator = allocator_type())
-  {
-    CUDF_FUNC_RANGE();
-    using Self = concurrent_unordered_multimap<Key,
-                                               Element,
-                                               size_type,
-                                               unused_key,
-                                               unused_element,
-                                               Hasher,
-                                               Equality,
-                                               Allocator,
-                                               count_collisions>;
-
-    // Note: need `(*p).destroy` instead of `p->destroy` here
-    // due to compiler bug: https://github.com/rapidsai/cudf/pull/5692
-    auto deleter = [stream](Self* p) { (*p).destroy(stream); };
-
-    return std::unique_ptr<Self, std::function<void(Self*)>>{
-      new Self(capacity, init, hash_function, equal, allocator, stream), deleter};
-  }
-
-  /**
-   * @brief Frees the contents of the map and destroys the map object.
-   *
-   * This function is invoked as the deleter of the `std::unique_ptr` returned
-   * from the `create()` factory function.
-   *
-   * @param stream CUDA stream used for device memory operations and kernel launches.
-   */
-  void destroy(rmm::cuda_stream_view stream = rmm::cuda_stream_default)
-  {
-    m_allocator.deallocate(m_hashtbl_values, m_hashtbl_capacity, stream);
-    delete this;
-  }
-
-  /**
-   * @brief Returns an iterator to the first element in the map
-   *
-   * @note When using the managed allocator, host code that calls this function
-   * should ensure the stream used for `create()` is appropriately synchronized.
-   *
-   * @note When called in a device code, user should make sure that it should
-   * either be running on the same stream as create(), or the accessing stream
-   * should be appropriately synchronized with the creating stream.
-   *
-   * @returns iterator to the first element in the map.
-   */
-  __host__ __device__ iterator begin()
-  {
-    return iterator(m_hashtbl_values, m_hashtbl_values + m_hashtbl_size, m_hashtbl_values);
-  }
-
-  /**
-   * @brief Returns a constant iterator to the first element in the map
-   *
-   * @note When using the managed allocator, host code that calls this function
-   * should ensure the stream used for `create()` is appropriately synchronized.
-   *
-   * @note When called in a device code, user should make sure that it should
-   * either be running on the same stream as create(), or the accessing stream
-   * should be appropriately synchronized with the creating stream.
-   *
-   * @returns constant iterator to the first element in the map.
-   */
-  __host__ __device__ const_iterator begin() const
-  {
-    return const_iterator(m_hashtbl_values, m_hashtbl_values + m_hashtbl_size, m_hashtbl_values);
-  }
-
-  /**
-   * @brief Returns an iterator to the one past the last element in the map
-   *
-   * @note When using the managed allocator, host code that calls this function
-   * should ensure the stream used for `create()` is appropriately synchronized.
-   *
-   * @note When called in a device code, user should make sure that it should
-   * either be running on the same stream as create(), or the accessing stream
-   * should be appropriately synchronized with the creating stream.
-   *
-   * @returns iterator to the one past the last element in the map.
-   */
-  __host__ __device__ iterator end()
-  {
-    return iterator(
-      m_hashtbl_values, m_hashtbl_values + m_hashtbl_size, m_hashtbl_values + m_hashtbl_size);
-  }
-
-  /**
-   * @brief Returns a constant iterator to the one past the last element in the map
-   *
-   * @note When using the managed allocator, host code that calls this function
-   * should ensure the stream used for `create()` is appropriately synchronized.
-   *
-   * @note When called in a device code, user should make sure that it should
-   * either be running on the same stream as create(), or the accessing stream
-   * should be appropriately synchronized with the creating stream.
-   *
-   * @returns constant iterator to the one past the last element in the map.
-   */
-  __host__ __device__ const_iterator end() const
-  {
-    return const_iterator(
-      m_hashtbl_values, m_hashtbl_values + m_hashtbl_size, m_hashtbl_values + m_hashtbl_size);
-  }
-
-  __forceinline__ static constexpr __host__ __device__ key_type get_unused_key()
-  {
-    return unused_key;
-  }
-
-  /**
-   * @brief Computes a hash value for a key
-   *
-   * @param[in] the_key The key to compute a hash for
-   * @tparam hash_value_type The datatype of the hash value
-   *
-   * @returns   The hash value for the key
-   */
-  template <typename hash_value_type = typename Hasher::result_type>
-  __forceinline__ __host__ __device__ hash_value_type get_hash(const key_type& the_key) const
-  {
-    return m_hf(the_key);
-  }
-
-  /**
-   * @brief Computes the destination hash map partition for a key
-   *
-   * @param[in] the_key The key to search for
-   * @param[in] num_parts The total number of partitions in the partitioned
-   * hash table
-   * @param[in] precomputed_hash A flag indicating whether or not a precomputed
-   * hash value is passed in
-   * @param[in] precomputed_hash_value A precomputed hash value to use for
-   * determining the write location of the key into the hash map instead of
-   * computing the the hash value directly from the key
-   * @tparam hash_value_type The datatype of the hash value
-   *
-   * @returns   The destination hash table partition for the specified key
-   */
-  template <typename hash_value_type = typename Hasher::result_type>
-  __forceinline__ __host__ __device__ int get_partition(
-    const key_type& the_key,
-    const int num_parts                    = 1,
-    bool precomputed_hash                  = false,
-    hash_value_type precomputed_hash_value = 0) const
-  {
-    hash_value_type hash_value{0};
-
-    // If a precomputed hash value has been passed in, then use it to determine
-    // the location of the key
-    if (true == precomputed_hash) {
-      hash_value = precomputed_hash_value;
-    }
-    // Otherwise, compute the hash value from the key
-    else {
-      hash_value = m_hf(the_key);
-    }
-
-    size_type hash_tbl_idx = hash_value % m_hashtbl_size;
-
-    const size_type partition_size = m_hashtbl_size / num_parts;
-
-    int dest_part = hash_tbl_idx / partition_size;
-    // Note that if m_hashtbl_size % num_parts != 0 then dest_part can be
-    // num_parts for the last few elements and we remap that to the
-    // num_parts-1 partition
-    if (dest_part == num_parts) dest_part = num_parts - 1;
-
-    return dest_part;
-  }
-
-  /**
-   * @brief  Inserts a (key, value) pair into the hash map
-   *
-   * @param[in] x The (key, value) pair to insert
-   * @param[in] precomputed_hash A flag indicating whether or not a precomputed
-   * hash value is passed in
-   * @param[in] precomputed_hash_value A precomputed hash value to use for
-   * determining the write location of the key into the hash map instead of
-   * computing the the hash value directly from the key
-   * @param[in] keys_are_equal An optional functor for comparing if two keys are
-   * equal
-   * @tparam hash_value_type The datatype of the hash value
-   * @tparam comparison_type The type of the key comparison functor
-   *
-   * @returns An iterator to the newly inserted (key, value) pair
-   */
-  template <typename hash_value_type = typename Hasher::result_type,
-            typename comparison_type = key_equal>
-  __forceinline__ __device__ iterator insert(const value_type& x,
-                                             bool precomputed_hash                  = false,
-                                             hash_value_type precomputed_hash_value = 0,
-                                             comparison_type keys_are_equal         = key_equal())
-  {
-    const size_type hashtbl_size = m_hashtbl_size;
-    value_type* hashtbl_values   = m_hashtbl_values;
-
-    hash_value_type hash_value{0};
-
-    // If a precomputed hash value has been passed in, then use it to determine
-    // the write location of the new key
-    if (true == precomputed_hash) {
-      hash_value = precomputed_hash_value;
-    }
-    // Otherwise, compute the hash value from the new key
-    else {
-      hash_value = m_hf(x.first);
-    }
-
-    size_type hash_tbl_idx = hash_value % hashtbl_size;
-
-    value_type* it = 0;
-
-    size_type attempt_counter{0};
-
-    while (0 == it) {
-      value_type* tmp_it = hashtbl_values + hash_tbl_idx;
-
-      if (std::numeric_limits<key_type>::is_integer &&
-          std::numeric_limits<mapped_type>::is_integer &&
-          sizeof(unsigned long long int) == sizeof(value_type)) {
-        pair2longlong converter             = {0ull};
-        converter.pair                      = thrust::make_pair(unused_key, unused_element);
-        const unsigned long long int unused = converter.longlong;
-        converter.pair                      = x;
-        const unsigned long long int value  = converter.longlong;
-        const unsigned long long int old_val =
-          atomicCAS(reinterpret_cast<unsigned long long int*>(tmp_it), unused, value);
-        if (old_val == unused) {
-          it = tmp_it;
-        } else if (count_collisions) {
-          atomicAdd(&m_collisions, 1);
-        }
-      } else {
-        const key_type old_key = atomicCAS(&(tmp_it->first), unused_key, x.first);
-
-        if (keys_are_equal(unused_key, old_key)) {
-          (m_hashtbl_values + hash_tbl_idx)->second = x.second;
-          it                                        = tmp_it;
-        } else if (count_collisions) {
-          atomicAdd(&m_collisions, 1);
-        }
-      }
-
-      hash_tbl_idx = (hash_tbl_idx + 1) % hashtbl_size;
-
-      attempt_counter++;
-      if (attempt_counter > hashtbl_size) {
-        printf("Attempted to insert to multimap but the map is full!\n");
-        return this->end();
-      }
-    }
-
-    return iterator(m_hashtbl_values, m_hashtbl_values + hashtbl_size, it);
-  }
-
-  /**
-   * @brief  Inserts a (key, value) pair into the hash map partition. This
-   * is useful when building the hash table in multiple passes, one
-   * contiguous partition at a time, or when building the hash table
-   * distributed between multiple devices.
-   *
-   * @param[in] x The (key, value) pair to insert
-   * @param[in] part The partition number for the partitioned hash table build
-   * @param[in] num_parts The total number of partitions in the partitioned
-   * hash table
-   * @param[in] precomputed_hash A flag indicating whether or not a precomputed
-   * hash value is passed in
-   * @param[in] precomputed_hash_value A precomputed hash value to use for
-   * determining the write location of the key into the hash map instead of
-   * computing the the hash value directly from the key
-   * @param[in] keys_are_equal An optional functor for comparing if two keys are
-   * equal
-   * @tparam hash_value_type The datatype of the hash value
-   * @tparam comparison_type The type of the key comparison functor
-   *
-   * @returns An iterator to the newly inserted (key, value) pair
-   */
-  template <typename hash_value_type = typename Hasher::result_type,
-            typename comparison_type = key_equal>
-  __forceinline__ __device__ iterator insert_part(const value_type& x,
-                                                  const int part                         = 0,
-                                                  const int num_parts                    = 1,
-                                                  bool precomputed_hash                  = false,
-                                                  hash_value_type precomputed_hash_value = 0,
-                                                  comparison_type keys_are_equal = key_equal())
-  {
-    hash_value_type hash_value{0};
-
-    // If a precomputed hash value has been passed in, then use it to determine
-    // the write location of the new key
-    if (true == precomputed_hash) {
-      hash_value = precomputed_hash_value;
-    }
-    // Otherwise, compute the hash value from the new key
-    else {
-      hash_value = m_hf(x.first);
-    }
-
-    // Find the destination partition index
-    int dest_part = get_partition(x.first, num_parts, true, hash_value);
-
-    // Only insert if the key belongs to the specified partition
-    if (dest_part != part)
-      return end();
-    else
-      return insert(x, true, hash_value, keys_are_equal);
-  }
-
-  /**
-   * @brief Searches for a key in the hash map and returns an iterator to the
-   * first instance of the key in the map.
-   *
-   * @param[in] the_key The key to search for
-   * @param[in] precomputed_hash A flag indicating whether or not a precomputed
-   * hash value is passed in
-   * @param[in] precomputed_hash_value A precomputed hash value to use for
-   * determining the write location of the key into the hash map instead of
-   * computing the the hash value directly from the key
-   * @param[in] keys_are_equal An optional functor for comparing if two keys are
-   * equal
-   * @tparam hash_value_type The datatype of the hash value
-   * @tparam comparison_type The type of the key comparison functor
-   *
-   * @returns   An iterator to the first instance of the key in the map
-   */
-  template <typename hash_value_type = typename Hasher::result_type,
-            typename comparison_type = key_equal>
-  __forceinline__ __host__ __device__ const_iterator
-  find(const key_type& the_key,
-       bool precomputed_hash                  = false,
-       hash_value_type precomputed_hash_value = 0,
-       comparison_type keys_are_equal         = key_equal()) const
-  {
-    hash_value_type hash_value{0};
-
-    // If a precomputed hash value has been passed in, then use it to determine
-    // the location of the key
-    if (true == precomputed_hash) {
-      hash_value = precomputed_hash_value;
-    }
-    // Otherwise, compute the hash value from the key
-    else {
-      hash_value = m_hf(the_key);
-    }
-
-    size_type hash_tbl_idx = hash_value % m_hashtbl_size;
-
-    value_type* begin_ptr = 0;
-
-    size_type counter = 0;
-    while (0 == begin_ptr) {
-      value_type* tmp_ptr    = m_hashtbl_values + hash_tbl_idx;
-      const key_type tmp_val = tmp_ptr->first;
-      if (keys_are_equal(the_key, tmp_val)) {
-        begin_ptr = tmp_ptr;
-        break;
-      }
-      if (keys_are_equal(unused_key, tmp_val) || (counter > m_hashtbl_size)) {
-        begin_ptr = m_hashtbl_values + m_hashtbl_size;
-        break;
-      }
-      hash_tbl_idx = (hash_tbl_idx + 1) % m_hashtbl_size;
-      ++counter;
-    }
-
-    return const_iterator(m_hashtbl_values, m_hashtbl_values + m_hashtbl_size, begin_ptr);
-  }
-
-  void assign_async(const concurrent_unordered_multimap& other,
-                    rmm::cuda_stream_view stream = rmm::cuda_stream_default)
-  {
-    m_collisions = other.m_collisions;
-    if (other.m_hashtbl_size <= m_hashtbl_capacity) {
-      m_hashtbl_size = other.m_hashtbl_size;
-    } else {
-      m_allocator.deallocate(m_hashtbl_values, m_hashtbl_capacity, stream);
-      m_hashtbl_capacity = other.m_hashtbl_size;
-      m_hashtbl_size     = other.m_hashtbl_size;
-
-      m_hashtbl_values = m_allocator.allocate(m_hashtbl_capacity, stream);
-    }
-    CUDF_CUDA_TRY(cudaMemcpyAsync(m_hashtbl_values,
-                                  other.m_hashtbl_values,
-                                  m_hashtbl_size * sizeof(value_type),
-                                  cudaMemcpyDefault,
-                                  stream.value()));
-  }
-
-  void clear_async(rmm::cuda_stream_view stream = rmm::cuda_stream_default)
-  {
-    constexpr int block_size = 128;
-    init_hashtbl<<<((m_hashtbl_size - 1) / block_size) + 1, block_size, 0, stream.value()>>>(
-      m_hashtbl_values, m_hashtbl_size, unused_key, unused_element);
-    if (count_collisions) m_collisions = 0;
-  }
-
-  [[nodiscard]] unsigned long long get_num_collisions() const { return m_collisions; }
-
-  void print()
-  {
-    for (size_type i = 0; i < m_hashtbl_size; ++i) {
-      std::cout << i << ": " << m_hashtbl_values[i].first << "," << m_hashtbl_values[i].second
-                << std::endl;
-    }
-  }
-
-  void prefetch(const int dev_id, rmm::cuda_stream_view stream = rmm::cuda_stream_default)
-  {
-    cudaPointerAttributes hashtbl_values_ptr_attributes;
-    cudaError_t status = cudaPointerGetAttributes(&hashtbl_values_ptr_attributes, m_hashtbl_values);
-
-    if (cudaSuccess == status && isPtrManaged(hashtbl_values_ptr_attributes)) {
-      CUDF_CUDA_TRY(cudaMemPrefetchAsync(
-        m_hashtbl_values, m_hashtbl_size * sizeof(value_type), dev_id, stream.value()));
-    }
-  }
-
-  concurrent_unordered_multimap()                                     = delete;
-  concurrent_unordered_multimap(concurrent_unordered_multimap const&) = default;
-  concurrent_unordered_multimap(concurrent_unordered_multimap&&)      = default;
-  concurrent_unordered_multimap& operator=(concurrent_unordered_multimap const&) = default;
-  concurrent_unordered_multimap& operator=(concurrent_unordered_multimap&&) = default;
-  ~concurrent_unordered_multimap()                                          = default;
-
- private:
-  hasher m_hf;
-  key_equal m_equal;
-  allocator_type m_allocator;
-  size_type m_hashtbl_size;
-  size_type m_hashtbl_capacity;
-  value_type* m_hashtbl_values;
-  unsigned long long m_collisions;
-
-  /**
-   * @brief Private constructor used by `create` factory function.
-   *
-   * Allocates memory and optionally fills the hash map with unused
-   * keys/values
-   *
-   * @param[in] n The size of the hash table (the number of key-value pairs)
-   * @param[in] init Initialize the hash table with the unused keys/values
-   * @param[in] hash_function An optional hashing function
-   * @param[in] equal An optional functor for comparing if two keys are equal
-   * @param[in] a An optional functor for allocating the hash table memory
-   * @param[in] stream CUDA stream used for device memory operations and kernel launches.
-   */
-  explicit concurrent_unordered_multimap(size_type n,
-                                         const bool init              = true,
-                                         const Hasher& hash_function  = hasher(),
-                                         const Equality& equal        = key_equal(),
-                                         const allocator_type& a      = allocator_type(),
-                                         rmm::cuda_stream_view stream = rmm::cuda_stream_default)
-    : m_hf(hash_function),
-      m_equal(equal),
-      m_allocator(a),
-      m_hashtbl_size(n),
-      m_hashtbl_capacity(n),
-      m_collisions(0)
-  {
-    m_hashtbl_values         = m_allocator.allocate(m_hashtbl_capacity, stream);
-    constexpr int block_size = 128;
-    {
-      cudaPointerAttributes hashtbl_values_ptr_attributes;
-      cudaError_t status =
-        cudaPointerGetAttributes(&hashtbl_values_ptr_attributes, m_hashtbl_values);
-
-      if (cudaSuccess == status && isPtrManaged(hashtbl_values_ptr_attributes)) {
-        int dev_id = 0;
-        CUDF_CUDA_TRY(cudaGetDevice(&dev_id));
-        CUDF_CUDA_TRY(cudaMemPrefetchAsync(
-          m_hashtbl_values, m_hashtbl_size * sizeof(value_type), dev_id, stream.value()));
-      }
-    }
-
-    if (init) {
-      init_hashtbl<<<((m_hashtbl_size - 1) / block_size) + 1, block_size, 0, stream.value()>>>(
-        m_hashtbl_values, m_hashtbl_size, unused_key, unused_element);
-      CUDF_CHECK_CUDA(stream.value());
-    }
-  }
-};
-
-#endif  // CONCURRENT_UNORDERED_MULTIMAP_CUH
diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt
index b28aac659d9..1ed921d1f08 100644
--- a/cpp/tests/CMakeLists.txt
+++ b/cpp/tests/CMakeLists.txt
@@ -138,7 +138,7 @@ ConfigureTest(
 
 # ##################################################################################################
 # * hash_map tests --------------------------------------------------------------------------------
-ConfigureTest(HASH_MAP_TEST hash_map/map_test.cu hash_map/multimap_test.cu)
+ConfigureTest(HASH_MAP_TEST hash_map/map_test.cu)
 
 # ##################################################################################################
 # * quantiles tests -------------------------------------------------------------------------------
diff --git a/cpp/tests/hash_map/multimap_test.cu b/cpp/tests/hash_map/multimap_test.cu
deleted file mode 100644
index b8f35b4d404..00000000000
--- a/cpp/tests/hash_map/multimap_test.cu
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * Copyright (c) 2018-2022, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <cudf_test/base_fixture.hpp>
-#include <cudf_test/cudf_gtest.hpp>
-
-#include <hash/concurrent_unordered_multimap.cuh>
-#include <hash/hash_allocator.cuh>
-
-#include <rmm/cuda_stream_view.hpp>
-
-#include <gtest/gtest.h>
-
-#include <thrust/pair.h>
-
-#include <limits>
-
-// This is necessary to do a parametrized typed-test over multiple template
-// arguments
-template <typename Key, typename Value>
-struct KeyValueTypes {
-  using key_type   = Key;
-  using value_type = Value;
-};
-
-// A new instance of this class will be created for each *TEST(MultimapTest,
-// ...) Put all repeated stuff for each test here
-template <class T>
-class MultimapTest : public cudf::test::BaseFixture {
- public:
-  using key_type   = typename T::key_type;
-  using value_type = typename T::value_type;
-  using size_type  = int;
-
-  using multimap_type =
-    concurrent_unordered_multimap<key_type,
-                                  value_type,
-                                  size_type,
-                                  std::numeric_limits<key_type>::max(),
-                                  std::numeric_limits<value_type>::max(),
-                                  default_hash<key_type>,
-                                  equal_to<key_type>,
-                                  default_allocator<thrust::pair<key_type, value_type>>>;
-
-  std::unique_ptr<multimap_type, std::function<void(multimap_type*)>> the_map;
-
-  const key_type unused_key     = std::numeric_limits<key_type>::max();
-  const value_type unused_value = std::numeric_limits<value_type>::max();
-
-  const size_type size;
-
-  MultimapTest(const size_type hash_table_size = 100)
-    : the_map(multimap_type::create(hash_table_size)), size(hash_table_size)
-  {
-    rmm::cuda_stream_default.synchronize();
-  }
-
-  ~MultimapTest() override {}
-};
-
-// Google Test can only do a parameterized typed-test over a single type, so we
-// have to nest multiple types inside of the KeyValueTypes struct above
-// KeyValueTypes<type1, type2> implies key_type = type1, value_type = type2
-// This list is the types across which Google Test will run our tests
-using Implementations = ::testing::Types<KeyValueTypes<int, int>,
-                                         KeyValueTypes<int, long long>,
-                                         KeyValueTypes<int, unsigned long long>,
-                                         KeyValueTypes<unsigned long long, int>,
-                                         KeyValueTypes<unsigned long long, long long>,
-                                         KeyValueTypes<unsigned long long, unsigned long long>>;
-
-TYPED_TEST_SUITE(MultimapTest, Implementations);
-
-TYPED_TEST(MultimapTest, InitialState)
-{
-  using key_type   = typename TypeParam::key_type;
-  using value_type = typename TypeParam::value_type;
-
-  auto begin = this->the_map->begin();
-  auto end   = this->the_map->end();
-  EXPECT_NE(begin, end);
-}

From 64a811e7b0051e3d405f9714e9cce936a70cd64b Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Tue, 12 Apr 2022 13:44:36 -0700
Subject: [PATCH 02/33] Add missing APIs to documentation. (#10643)

This adds a bunch of missing methods to the documentation and removes methods that no longer exist.

When building, Sphinx issues warnings like this one, which indicates that a method isn't documented:
```
.../cudf/docs/cudf/source/api_docs/api/cudf.Series.pct_change.rst: WARNING: document isn't included in any toctree
```
and this one, which indicates that a documented method no longer exists:
```
WARNING: [autosummary] failed to import cudf.Series.ceil.
Possible hints:
* ModuleNotFoundError: No module named 'cudf.Series'
* AttributeError: type object 'Series' has no attribute 'ceil'
* ImportError:
```

This PR doesn't fix all of the warnings/errors, but it is a good chunk of them.

Authors:
  - Bradley Dice (https://github.com/bdice)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

URL: https://github.com/rapidsai/cudf/pull/10643
---
 docs/cudf/source/api_docs/dataframe.rst     | 30 ++++++++-------------
 docs/cudf/source/api_docs/index_objects.rst |  5 ----
 docs/cudf/source/api_docs/series.rst        | 26 +++++-------------
 python/cudf/cudf/utils/ioutils.py           |  4 +--
 4 files changed, 18 insertions(+), 47 deletions(-)

diff --git a/docs/cudf/source/api_docs/dataframe.rst b/docs/cudf/source/api_docs/dataframe.rst
index 7a7c9c195b2..1aa1ea8beac 100644
--- a/docs/cudf/source/api_docs/dataframe.rst
+++ b/docs/cudf/source/api_docs/dataframe.rst
@@ -54,7 +54,7 @@ Indexing, iteration
    DataFrame.iloc
    DataFrame.insert
    DataFrame.__iter__
-   DataFrame.iteritems
+   DataFrame.items
    DataFrame.keys
    DataFrame.iterrows
    DataFrame.itertuples
@@ -65,9 +65,6 @@ Indexing, iteration
    DataFrame.mask
    DataFrame.query
 
-For more information on ``.at``, ``.iat``, ``.loc``, and
-``.iloc``,  see the :ref:`indexing documentation <indexing>`.
-
 Binary operator functions
 ~~~~~~~~~~~~~~~~~~~~~~~~~
 .. autosummary::
@@ -84,6 +81,7 @@ Binary operator functions
    DataFrame.floordiv
    DataFrame.mod
    DataFrame.pow
+   DataFrame.dot
    DataFrame.radd
    DataFrame.rsub
    DataFrame.rmul
@@ -121,6 +119,7 @@ Computations / descriptive stats
 .. autosummary::
    :toctree: api/
 
+   DataFrame.abs
    DataFrame.all
    DataFrame.any
    DataFrame.clip
@@ -132,12 +131,15 @@ Computations / descriptive stats
    DataFrame.cumprod
    DataFrame.cumsum
    DataFrame.describe
+   DataFrame.diff
    DataFrame.kurt
    DataFrame.kurtosis
    DataFrame.max
    DataFrame.mean
+   DataFrame.median
    DataFrame.min
    DataFrame.mode
+   DataFrame.pct_change
    DataFrame.prod
    DataFrame.product
    DataFrame.quantile
@@ -148,6 +150,7 @@ Computations / descriptive stats
    DataFrame.sum
    DataFrame.std
    DataFrame.var
+   DataFrame.nunique
 
 Reindexing / selection / label manipulation
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -159,7 +162,9 @@ Reindexing / selection / label manipulation
    DataFrame.drop
    DataFrame.drop_duplicates
    DataFrame.equals
+   DataFrame.first
    DataFrame.head
+   DataFrame.last
    DataFrame.reindex
    DataFrame.rename
    DataFrame.reset_index
@@ -180,6 +185,7 @@ Missing data handling
 
    DataFrame.dropna
    DataFrame.fillna
+   DataFrame.interpolate
    DataFrame.isna
    DataFrame.isnull
    DataFrame.nans_to_nulls
@@ -220,27 +226,13 @@ Combining / comparing / joining / merging
    DataFrame.merge
    DataFrame.update
 
-Numerical operations
-~~~~~~~~~~~~~~~~~~~~
-.. autosummary::
-   :toctree: api/
-
-   DataFrame.acos
-   DataFrame.asin
-   DataFrame.atan
-   DataFrame.cos
-   DataFrame.exp
-   DataFrame.log
-   DataFrame.sin
-   DataFrame.sqrt
-   DataFrame.tan
-
 Time Series-related
 ~~~~~~~~~~~~~~~~~~~
 .. autosummary::
    :toctree: api/
 
    DataFrame.shift
+   DataFrame.resample
 
 Serialization / IO / conversion
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
diff --git a/docs/cudf/source/api_docs/index_objects.rst b/docs/cudf/source/api_docs/index_objects.rst
index b7b358e38be..6f5affd0ecd 100644
--- a/docs/cudf/source/api_docs/index_objects.rst
+++ b/docs/cudf/source/api_docs/index_objects.rst
@@ -35,7 +35,6 @@ Properties
    Index.size
    Index.values
 
-
 Modifying and computations
 ~~~~~~~~~~~~~~~~~~~~~~~~~~
 .. autosummary::
@@ -151,7 +150,6 @@ Numeric Index
    UInt64Index
    Float64Index
 
-
 .. _api.categoricalindex:
 
 CategoricalIndex
@@ -205,7 +203,6 @@ MultiIndex
 
    MultiIndex
 
-
 MultiIndex constructors
 ~~~~~~~~~~~~~~~~~~~~~~~
 .. autosummary::
@@ -271,7 +268,6 @@ Time/date components
    DatetimeIndex.quarter
    DatetimeIndex.isocalendar
 
-
 Time-specific operations
 ~~~~~~~~~~~~~~~~~~~~~~~~
 .. autosummary::
@@ -315,5 +311,4 @@ Conversion
    :toctree: api/
 
    TimedeltaIndex.to_series
-   TimedeltaIndex.round
    TimedeltaIndex.to_frame
diff --git a/docs/cudf/source/api_docs/series.rst b/docs/cudf/source/api_docs/series.rst
index 376acf1694b..95aa71919e4 100644
--- a/docs/cudf/source/api_docs/series.rst
+++ b/docs/cudf/source/api_docs/series.rst
@@ -28,6 +28,7 @@ Attributes
    Series.nullmask
    Series.null_count
    Series.size
+   Series.T
    Series.memory_usage
    Series.has_nulls
    Series.empty
@@ -59,9 +60,6 @@ Indexing, iteration
    Series.iteritems
    Series.keys
 
-For more information on ``.at``, ``.iat``, ``.loc``, and
-``.iloc``,  see the :ref:`indexing documentation <indexing>`.
-
 Binary operator functions
 -------------------------
 .. autosummary::
@@ -94,6 +92,7 @@ Binary operator functions
    Series.ne
    Series.eq
    Series.product
+   Series.dot
 
 Function application, GroupBy & window
 --------------------------------------
@@ -118,7 +117,6 @@ Computations / descriptive stats
    Series.all
    Series.any
    Series.autocorr
-   Series.ceil
    Series.clip
    Series.corr
    Series.count
@@ -131,7 +129,6 @@ Computations / descriptive stats
    Series.diff
    Series.digitize
    Series.factorize
-   Series.floor
    Series.kurt
    Series.max
    Series.mean
@@ -140,6 +137,7 @@ Computations / descriptive stats
    Series.mode
    Series.nlargest
    Series.nsmallest
+   Series.pct_change
    Series.prod
    Series.quantile
    Series.rank
@@ -166,8 +164,10 @@ Reindexing / selection / label manipulation
    Series.drop
    Series.drop_duplicates
    Series.equals
+   Series.first
    Series.head
    Series.isin
+   Series.last
    Series.reindex
    Series.rename
    Series.reset_index
@@ -215,27 +215,13 @@ Combining / comparing / joining / merging
    Series.append
    Series.update
 
-Numerical operations
-~~~~~~~~~~~~~~~~~~~~
-.. autosummary::
-   :toctree: api/
-
-   Series.acos
-   Series.asin
-   Series.atan
-   Series.cos
-   Series.exp
-   Series.log
-   Series.sin
-   Series.sqrt
-   Series.tan
-
 Time Series-related
 -------------------
 .. autosummary::
    :toctree: api/
 
    Series.shift
+   Series.resample
 
 Accessors
 ---------
diff --git a/python/cudf/cudf/utils/ioutils.py b/python/cudf/cudf/utils/ioutils.py
index c3031fc8d8d..6ef44d9b1d6 100644
--- a/python/cudf/cudf/utils/ioutils.py
+++ b/python/cudf/cudf/utils/ioutils.py
@@ -259,7 +259,6 @@
 See Also
 --------
 cudf.read_parquet
-cudf.read_orc
 """
 doc_to_parquet = docfmt_partial(docstring=_docstring_to_parquet)
 
@@ -413,8 +412,7 @@
 
 See Also
 --------
-cudf.read_parquet
-cudf.DataFrame.to_parquet
+cudf.DataFrame.to_orc
 """.format(
     remote_data_sources=_docstring_remote_sources
 )

From c9e16c72cd6734b0036c4225dc59310356eab5ea Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Tue, 12 Apr 2022 15:51:25 -0700
Subject: [PATCH 03/33] Simplify preprocessing of arguments for DataFrame
 binops (#10563)

This PR simplifies the preprocessing of the rhs of binary operations for DataFrame, streamlining it to a single path that can be more easily combined with that of other types in the future.

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

URL: https://github.com/rapidsai/cudf/pull/10563
---
 python/cudf/cudf/core/dataframe.py           | 125 +++++++++----------
 python/cudf/cudf/core/frame.py               |  12 +-
 python/cudf/cudf/core/single_column_frame.py |  12 ++
 python/cudf/cudf/tests/test_dataframe.py     |  81 +++++++++++-
 4 files changed, 154 insertions(+), 76 deletions(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index b3beb553187..277fd5aae57 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -10,7 +10,7 @@
 import sys
 import warnings
 from collections import defaultdict
-from collections.abc import Iterable, Sequence
+from collections.abc import Iterable, Mapping, Sequence
 from typing import (
     Any,
     Dict,
@@ -1854,86 +1854,75 @@ def _make_operands_and_index_for_binop(
         ],
         Optional[BaseIndex],
     ]:
-        lhs, rhs = self, other
-
-        if _is_scalar_or_zero_d_array(rhs):
-            rhs = [rhs] * lhs._num_columns
-
-        # For columns that exist in rhs but not lhs, we swap the order so that
-        # we can always assume that left has a binary operator. This
-        # implementation assumes that binary operations between a column and
-        # NULL are always commutative, even for binops (like subtraction) that
-        # are normally anticommutative.
-        # TODO: The above should no longer be necessary once we switch to
-        # properly invoking the operator since we can then rely on reflection.
-        if isinstance(rhs, Sequence):
-            # TODO: Consider validating sequence length (pandas does).
-            operands = {
-                name: (left, right, reflect, fill_value)
-                for right, (name, left) in zip(rhs, lhs._data.items())
-            }
-        elif isinstance(rhs, DataFrame):
+        # Check built-in types first for speed.
+        if isinstance(other, (list, dict, Sequence, Mapping)):
+            warnings.warn(
+                "Binary operations between host objects such as "
+                f"{type(other)} and cudf.DataFrame are deprecated and will be "
+                "removed in a future release. Please convert it to a cudf "
+                "object before performing the operation.",
+                FutureWarning,
+            )
+            if len(other) != self._num_columns:
+                raise ValueError(
+                    "Other is of the wrong length. Expected "
+                    f"{self._num_columns}, got {len(other)}"
+                )
+
+        lhs, rhs = self._data, other
+        index = self._index
+        fill_requires_key = False
+        left_default: Any = False
+
+        if _is_scalar_or_zero_d_array(other):
+            rhs = {name: other for name in self._data}
+        elif isinstance(other, (list, Sequence)):
+            rhs = {name: o for (name, o) in zip(self._data, other)}
+        elif isinstance(other, Series):
+            rhs = dict(zip(other.index.values_host, other.values_host))
+            # For keys in right but not left, perform binops between NaN (not
+            # NULL!) and the right value (result is NaN).
+            left_default = as_column(np.nan, length=len(self))
+        elif isinstance(other, DataFrame):
             if (
                 not can_reindex
                 and fn in cudf.utils.utils._EQUALITY_OPS
                 and (
-                    not lhs._data.to_pandas_index().equals(
-                        rhs._data.to_pandas_index()
+                    not self.index.equals(other.index)
+                    or not self._data.to_pandas_index().equals(
+                        other._data.to_pandas_index()
                     )
-                    or not lhs.index.equals(rhs.index)
                 )
             ):
                 raise ValueError(
                     "Can only compare identically-labeled DataFrame objects"
                 )
+            new_lhs, new_rhs = _align_indices(self, other)
+            index = new_lhs._index
+            lhs, rhs = new_lhs._data, new_rhs._data
+            fill_requires_key = True
+            # For DataFrame-DataFrame ops, always default to operating against
+            # the fill value.
+            left_default = fill_value
+
+        if not isinstance(rhs, (dict, Mapping)):
+            return NotImplemented, None
 
-            lhs, rhs = _align_indices(lhs, rhs)
-
-            operands = {
-                name: (
-                    lcol,
-                    rhs._data[name]
-                    if name in rhs._data
-                    else (fill_value or None),
-                    reflect,
-                    fill_value if name in rhs._data else None,
-                )
-                for name, lcol in lhs._data.items()
-            }
-            for name, col in rhs._data.items():
-                if name not in lhs._data:
-                    operands[name] = (
-                        col,
-                        (fill_value or None),
-                        not reflect,
-                        None,
-                    )
-        elif isinstance(rhs, Series):
-            # Note: This logic will need updating if any of the user-facing
-            # binop methods (e.g. DataFrame.add) ever support axis=0/rows.
-            right_dict = dict(zip(rhs.index.values_host, rhs.values_host))
-            left_cols = lhs._column_names
-            # mypy thinks lhs._column_names is a List rather than a Tuple, so
-            # we have to ignore the type check.
-            result_cols = left_cols + tuple(  # type: ignore
-                col for col in right_dict if col not in left_cols
+        operands = {
+            k: (
+                v,
+                rhs.get(k, fill_value),
+                reflect,
+                fill_value if (not fill_requires_key or k in rhs) else None,
             )
-            operands = {}
-            for col in result_cols:
-                if col in left_cols:
-                    left = lhs._data[col]
-                    right = right_dict[col] if col in right_dict else None
-                else:
-                    # We match pandas semantics here by performing binops
-                    # between a NaN (not NULL!) column and the actual values,
-                    # which results in nans, the pandas output.
-                    left = as_column(np.nan, length=lhs._num_rows)
-                    right = right_dict[col]
-                operands[col] = (left, right, reflect, fill_value)
-        else:
-            return NotImplemented, None
+            for k, v in lhs.items()
+        }
 
-        return operands, lhs._index
+        if left_default is not False:
+            for k, v in rhs.items():
+                if k not in lhs:
+                    operands[k] = (left_default, v, reflect, None)
+        return operands, index
 
     @_cudf_nvtx_annotate
     def update(
diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index 1382ebfd8ee..5185fb05cb4 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -2475,7 +2475,10 @@ def _colwise_binop(
         ) in operands.items():
             output_mask = None
             if fill_value is not None:
-                if isinstance(right_column, ColumnBase):
+                left_is_column = isinstance(left_column, ColumnBase)
+                right_is_column = isinstance(right_column, ColumnBase)
+
+                if left_is_column and right_is_column:
                     # If both columns are nullable, pandas semantics dictate
                     # that nulls that are present in both left_column and
                     # right_column are not filled.
@@ -2489,9 +2492,14 @@ def _colwise_binop(
                         left_column = left_column.fillna(fill_value)
                     elif right_column.nullable:
                         right_column = right_column.fillna(fill_value)
-                else:
+                elif left_is_column:
                     if left_column.nullable:
                         left_column = left_column.fillna(fill_value)
+                elif right_is_column:
+                    if right_column.nullable:
+                        right_column = right_column.fillna(fill_value)
+                else:
+                    assert False, "At least one operand must be a column."
 
             # TODO: Disable logical and binary operators between columns that
             # are not numerical using the new binops mixin.
diff --git a/python/cudf/cudf/core/single_column_frame.py b/python/cudf/cudf/core/single_column_frame.py
index 4fcd846e7bc..003f8ea7fdb 100644
--- a/python/cudf/cudf/core/single_column_frame.py
+++ b/python/cudf/cudf/core/single_column_frame.py
@@ -3,6 +3,7 @@
 
 from __future__ import annotations
 
+import warnings
 from typing import Any, Dict, Optional, Tuple, Type, TypeVar, Union
 
 import cupy
@@ -337,6 +338,17 @@ def _make_operands_for_binop(
         if isinstance(other, SingleColumnFrame):
             other = other._column
         elif not _is_scalar_or_zero_d_array(other):
+            if not hasattr(other, "__cuda_array_interface__"):
+                # TODO: When this deprecated behavior is removed, also change
+                # the above conditional to stop checking for pd.Series and
+                # pd.Index since we only need to support SingleColumnFrame.
+                warnings.warn(
+                    f"Binary operations between host objects such as "
+                    f"{type(other)} and {type(self)} are deprecated and will "
+                    "be removed in a future release. Please convert it to a "
+                    "cudf object before performing the operation.",
+                    FutureWarning,
+                )
             # Non-scalar right operands are valid iff they convert to columns.
             try:
                 other = as_column(other)
diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
index 303c245777c..a7fad792bd0 100644
--- a/python/cudf/cudf/tests/test_dataframe.py
+++ b/python/cudf/cudf/tests/test_dataframe.py
@@ -8,6 +8,8 @@
 import re
 import string
 import textwrap
+import warnings
+from contextlib import contextmanager
 from copy import copy
 
 import cupy
@@ -2017,6 +2019,15 @@ def test_dataframe_min_count_ops(data, ops, skipna, min_count):
     )
 
 
+@contextmanager
+def _hide_host_other_warning(other):
+    if isinstance(other, (dict, list)):
+        with pytest.warns(FutureWarning):
+            yield
+    else:
+        yield
+
+
 @pytest.mark.parametrize(
     "binop",
     [
@@ -2034,12 +2045,70 @@ def test_dataframe_min_count_ops(data, ops, skipna, min_count):
         operator.ne,
     ],
 )
-def test_binops_df(pdf, gdf, binop):
-    pdf = pdf + 1.0
-    gdf = gdf + 1.0
-    d = binop(pdf, pdf)
-    g = binop(gdf, gdf)
-    assert_eq(d, g)
+@pytest.mark.parametrize(
+    "other",
+    [
+        1.0,
+        [1.0],
+        [1.0, 2.0],
+        [1.0, 2.0, 3.0],
+        {"x": 1.0},
+        {"x": 1.0, "y": 2.0},
+        {"x": 1.0, "y": 2.0, "z": 3.0},
+        {"x": 1.0, "z": 3.0},
+        pd.Series([1.0]),
+        pd.Series([1.0, 2.0]),
+        pd.Series([1.0, 2.0, 3.0]),
+        pd.Series([1.0], index=["x"]),
+        pd.Series([1.0, 2.0], index=["x", "y"]),
+        pd.Series([1.0, 2.0, 3.0], index=["x", "y", "z"]),
+        pd.DataFrame({"x": [1.0]}),
+        pd.DataFrame({"x": [1.0], "y": [2.0]}),
+        pd.DataFrame({"x": [1.0], "y": [2.0], "z": [3.0]}),
+    ],
+)
+def test_binops_df(pdf, gdf, binop, other):
+    # Avoid 1**NA cases: https://github.com/pandas-dev/pandas/issues/29997
+    pdf[pdf == 1.0] = 2
+    gdf[gdf == 1.0] = 2
+    try:
+        with warnings.catch_warnings(record=True) as w:
+            d = binop(pdf, other)
+    except Exception:
+        if isinstance(other, (pd.Series, pd.DataFrame)):
+            other = cudf.from_pandas(other)
+
+        # TODO: When we remove support for binary operations with lists and
+        # dicts, those cases should all be checked in a `pytest.raises` block
+        # that returns before we enter this try-except.
+        with _hide_host_other_warning(other):
+            assert_exceptions_equal(
+                lfunc=binop,
+                rfunc=binop,
+                lfunc_args_and_kwargs=([pdf, other], {}),
+                rfunc_args_and_kwargs=([gdf, other], {}),
+                compare_error_message=False,
+            )
+    else:
+        if isinstance(other, (pd.Series, pd.DataFrame)):
+            other = cudf.from_pandas(other)
+        with _hide_host_other_warning(other):
+            g = binop(gdf, other)
+        try:
+            assert_eq(d, g)
+        except AssertionError:
+            # Currently we will not match pandas for equality/inequality
+            # operators when there are columns that exist in a Series but not
+            # the DataFrame because pandas returns True/False values whereas we
+            # return NA. However, this reindexing is deprecated in pandas so we
+            # opt not to add support.
+            if w and "DataFrame vs Series comparisons is deprecated" in str(w):
+                pass
+
+
+def test_binops_df_invalid(gdf):
+    with pytest.raises(TypeError):
+        gdf + np.array([1, 2])
 
 
 @pytest.mark.parametrize("binop", [operator.and_, operator.or_, operator.xor])

From 0ea6f8ee649579618caa990c38515acdbf9d3775 Mon Sep 17 00:00:00 2001
From: Devavret Makkar <devavret@users.noreply.github.com>
Date: Wed, 13 Apr 2022 13:16:42 +0530
Subject: [PATCH 04/33] List element Equality comparator (#10289)

This PR implements equality comparator for LIST columns. This only supports "self" comparison for now, meaning the two rows to be compared should belong to the same table. A comparator that works on rows of two different tables will be implemented in another PR.

This works only on "sanitized" list columns. See #10291 for details.

This will partially support #10186.

Authors:
  - Devavret Makkar (https://github.com/devavret)

Approvers:
  - Robert Maynard (https://github.com/robertmaynard)
  - Vyas Ramasubramani (https://github.com/vyasr)
  - Mike Wilson (https://github.com/hyperbolic2346)
  - Jake Hemstad (https://github.com/jrhemstad)
  - Jordan Jacobelli (https://github.com/Ethyling)

URL: https://github.com/rapidsai/cudf/pull/10289
---
 conda/recipes/libcudf/meta.yaml               |   1 +
 cpp/benchmarks/CMakeLists.txt                 |   2 +-
 cpp/benchmarks/reduction/rank.cpp             |  64 ++++
 .../cudf/column/column_device_view.cuh        |  92 ++++--
 cpp/include/cudf/detail/iterator.cuh          |  47 ++-
 cpp/include/cudf/detail/utilities/column.hpp  |  84 +++++
 cpp/include/cudf/lists/list_device_view.cuh   |  33 +-
 .../cudf/lists/lists_column_device_view.cuh   |  57 ++--
 .../structs/structs_column_device_view.cuh    |  68 ++++
 .../cudf/table/experimental/row_operators.cuh | 299 +++++++++++++++++-
 cpp/src/io/parquet/writer_impl.cu             |  79 +----
 cpp/src/reductions/scan/rank_scan.cu          |  25 +-
 cpp/src/table/row_operators.cu                | 214 +++++++++++--
 cpp/tests/CMakeLists.txt                      |   1 +
 cpp/tests/reductions/list_rank_test.cpp       | 228 +++++++++++++
 15 files changed, 1106 insertions(+), 188 deletions(-)
 create mode 100644 cpp/benchmarks/reduction/rank.cpp
 create mode 100644 cpp/include/cudf/detail/utilities/column.hpp
 create mode 100644 cpp/include/cudf/structs/structs_column_device_view.cuh
 create mode 100644 cpp/tests/reductions/list_rank_test.cpp

diff --git a/conda/recipes/libcudf/meta.yaml b/conda/recipes/libcudf/meta.yaml
index fdd9011ae34..0806bb964cf 100644
--- a/conda/recipes/libcudf/meta.yaml
+++ b/conda/recipes/libcudf/meta.yaml
@@ -110,6 +110,7 @@ outputs:
         - test -f $PREFIX/include/cudf/detail/transpose.hpp
         - test -f $PREFIX/include/cudf/detail/unary.hpp
         - test -f $PREFIX/include/cudf/detail/utilities/alignment.hpp
+        - test -f $PREFIX/include/cudf/detail/utilities/column.hpp
         - test -f $PREFIX/include/cudf/detail/utilities/integer_utils.hpp
         - test -f $PREFIX/include/cudf/detail/utilities/int_fastdiv.h
         - test -f $PREFIX/include/cudf/detail/utilities/vector_factories.hpp
diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt
index d863e6e05a9..26bb10da69f 100644
--- a/cpp/benchmarks/CMakeLists.txt
+++ b/cpp/benchmarks/CMakeLists.txt
@@ -181,7 +181,7 @@ ConfigureBench(
   REDUCTION_BENCH reduction/anyall.cpp reduction/dictionary.cpp reduction/minmax.cpp
   reduction/reduce.cpp reduction/scan.cpp
 )
-ConfigureNVBench(REDUCTION_NVBENCH reduction/segment_reduce.cu)
+ConfigureNVBench(REDUCTION_NVBENCH reduction/segment_reduce.cu reduction/rank.cpp)
 
 # ##################################################################################################
 # * reduction benchmark ---------------------------------------------------------------------------
diff --git a/cpp/benchmarks/reduction/rank.cpp b/cpp/benchmarks/reduction/rank.cpp
new file mode 100644
index 00000000000..5e2848d7f0b
--- /dev/null
+++ b/cpp/benchmarks/reduction/rank.cpp
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <benchmarks/common/generate_input.hpp>
+#include <benchmarks/fixture/rmm_pool_raii.hpp>
+
+#include <cudf/detail/scan.hpp>
+#include <cudf/filling.hpp>
+#include <cudf/lists/list_view.cuh>
+
+#include <nvbench/nvbench.cuh>
+
+template <typename type>
+static void nvbench_reduction_scan(nvbench::state& state, nvbench::type_list<type>)
+{
+  cudf::rmm_pool_raii pool_raii;
+
+  auto const dtype = cudf::type_to_id<type>();
+
+  double const null_frequency = state.get_float64("null_frequency");
+  size_t const size           = state.get_int64("data_size");
+
+  data_profile table_data_profile;
+  table_data_profile.set_distribution_params(dtype, distribution_id::UNIFORM, 0, 5);
+  table_data_profile.set_null_frequency(null_frequency);
+
+  auto const table = create_random_table({dtype}, table_size_bytes{size / 2}, table_data_profile);
+
+  auto const new_tbl = cudf::repeat(table->view(), 2);
+  cudf::column_view input(new_tbl->view().column(0));
+
+  state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
+    rmm::cuda_stream_view stream_view{launch.get_stream()};
+    auto result = cudf::detail::inclusive_dense_rank_scan(
+      input, stream_view, rmm::mr::get_current_device_resource());
+  });
+}
+
+using data_type = nvbench::type_list<int32_t, cudf::list_view>;
+
+NVBENCH_BENCH_TYPES(nvbench_reduction_scan, NVBENCH_TYPE_AXES(data_type))
+  .set_name("rank_scan")
+  .add_float64_axis("null_frequency", {0, 0.1, 0.5, 0.9})
+  .add_int64_axis("data_size",
+                  {
+                    10000,      // 10k
+                    100000,     // 100k
+                    1000000,    // 1M
+                    10000000,   // 10M
+                    100000000,  // 100M
+                  });
\ No newline at end of file
diff --git a/cpp/include/cudf/column/column_device_view.cuh b/cpp/include/cudf/column/column_device_view.cuh
index ec3795238b0..070ca80858b 100644
--- a/cpp/include/cudf/column/column_device_view.cuh
+++ b/cpp/include/cudf/column/column_device_view.cuh
@@ -111,7 +111,7 @@ class alignas(16) column_device_view_base {
    */
   template <typename T = void,
             CUDF_ENABLE_IF(std::is_same_v<T, void> or is_rep_layout_compatible<T>())>
-  __host__ __device__ T const* head() const noexcept
+  [[nodiscard]] CUDF_HOST_DEVICE T const* head() const noexcept
   {
     return static_cast<T const*>(_data);
   }
@@ -132,7 +132,7 @@ class alignas(16) column_device_view_base {
    * @return T const* Typed pointer to underlying data, including the offset
    */
   template <typename T, CUDF_ENABLE_IF(is_rep_layout_compatible<T>())>
-  __host__ __device__ T const* data() const noexcept
+  [[nodiscard]] CUDF_HOST_DEVICE T const* data() const noexcept
   {
     return head<T>() + _offset;
   }
@@ -140,12 +140,12 @@ class alignas(16) column_device_view_base {
   /**
    * @brief Returns the number of elements in the column.
    */
-  [[nodiscard]] __host__ __device__ size_type size() const noexcept { return _size; }
+  [[nodiscard]] CUDF_HOST_DEVICE size_type size() const noexcept { return _size; }
 
   /**
    * @brief Returns the element type
    */
-  [[nodiscard]] __host__ __device__ data_type type() const noexcept { return _type; }
+  [[nodiscard]] CUDF_HOST_DEVICE data_type type() const noexcept { return _type; }
 
   /**
    * @brief Indicates whether the column can contain null elements, i.e., if it
@@ -156,7 +156,7 @@ class alignas(16) column_device_view_base {
    * @return true The bitmask is allocated
    * @return false The bitmask is not allocated
    */
-  [[nodiscard]] __host__ __device__ bool nullable() const noexcept { return nullptr != _null_mask; }
+  [[nodiscard]] CUDF_HOST_DEVICE bool nullable() const noexcept { return nullptr != _null_mask; }
 
   /**
    * @brief Returns raw pointer to the underlying bitmask allocation.
@@ -165,7 +165,7 @@ class alignas(16) column_device_view_base {
    *
    * @note If `null_count() == 0`, this may return `nullptr`.
    */
-  [[nodiscard]] __host__ __device__ bitmask_type const* null_mask() const noexcept
+  [[nodiscard]] CUDF_HOST_DEVICE bitmask_type const* null_mask() const noexcept
   {
     return _null_mask;
   }
@@ -174,7 +174,7 @@ class alignas(16) column_device_view_base {
    * @brief Returns the index of the first element relative to the base memory
    * allocation, i.e., what is returned from `head<T>()`.
    */
-  [[nodiscard]] __host__ __device__ size_type offset() const noexcept { return _offset; }
+  [[nodiscard]] CUDF_HOST_DEVICE size_type offset() const noexcept { return _offset; }
 
   /**
    * @brief Returns whether the specified element holds a valid value (i.e., not
@@ -269,11 +269,11 @@ class alignas(16) column_device_view_base {
   size_type _offset{};               ///< Index position of the first element.
                                      ///< Enables zero-copy slicing
 
-  column_device_view_base(data_type type,
-                          size_type size,
-                          void const* data,
-                          bitmask_type const* null_mask,
-                          size_type offset)
+  CUDF_HOST_DEVICE column_device_view_base(data_type type,
+                                           size_type size,
+                                           void const* data,
+                                           bitmask_type const* null_mask,
+                                           size_type offset)
     : _type{type}, _size{size}, _data{data}, _null_mask{null_mask}, _offset{offset}
   {
   }
@@ -329,6 +329,33 @@ class alignas(16) column_device_view : public detail::column_device_view_base {
    */
   column_device_view(column_view column, void* h_ptr, void* d_ptr);
 
+  /**
+   * @brief Get a new column_device_view which is a slice of this column.
+   *
+   * Example:
+   * @code{.cpp}
+   * // column = column_device_view([1, 2, 3, 4, 5, 6, 7])
+   * auto c = column.slice(1, 3);
+   * // c = column_device_view([2, 3, 4])
+   * auto c1 = column.slice(2, 3);
+   * // c1 = column_device_view([3, 4, 5])
+   * @endcode
+   *
+   * @param offset The index of the first element in the slice
+   * @param size The number of elements in the slice
+   */
+  [[nodiscard]] CUDF_HOST_DEVICE column_device_view slice(size_type offset,
+                                                          size_type size) const noexcept
+  {
+    return column_device_view{this->type(),
+                              size,
+                              this->head(),
+                              this->null_mask(),
+                              this->offset() + offset,
+                              d_children,
+                              this->num_child_columns()};
+  }
+
   /**
    * @brief Returns reference to element at the specified index.
    *
@@ -346,7 +373,7 @@ class alignas(16) column_device_view : public detail::column_device_view_base {
    * @param element_index Position of the desired element
    */
   template <typename T, CUDF_ENABLE_IF(is_rep_layout_compatible<T>())>
-  __device__ T element(size_type element_index) const noexcept
+  [[nodiscard]] __device__ T element(size_type element_index) const noexcept
   {
     return data<T>()[element_index];
   }
@@ -365,9 +392,8 @@ class alignas(16) column_device_view : public detail::column_device_view_base {
   template <typename T, CUDF_ENABLE_IF(std::is_same_v<T, string_view>)>
   __device__ T element(size_type element_index) const noexcept
   {
-    size_type index = element_index + offset();  // account for this view's _offset
-    const int32_t* d_offsets =
-      d_children[strings_column_view::offsets_column_index].data<int32_t>();
+    size_type index       = element_index + offset();  // account for this view's _offset
+    const auto* d_offsets = d_children[strings_column_view::offsets_column_index].data<int32_t>();
     const char* d_strings = d_children[strings_column_view::chars_column_index].data<char>();
     size_type offset      = d_offsets[index];
     return string_view{d_strings + offset, d_offsets[index + 1] - offset};
@@ -763,11 +789,37 @@ class alignas(16) column_device_view : public detail::column_device_view_base {
    *
    * @return The number of child columns
    */
-  [[nodiscard]] __host__ __device__ size_type num_child_columns() const noexcept
+  [[nodiscard]] CUDF_HOST_DEVICE size_type num_child_columns() const noexcept
   {
     return _num_children;
   }
 
+ private:
+  /**
+   * @brief Creates an instance of this class using pre-existing device memory pointers to data,
+   * nullmask, and offset.
+   *
+   * @param type The type of the column
+   * @param size The number of elements in the column
+   * @param data Pointer to the device memory containing the data
+   * @param null_mask Pointer to the device memory containing the null bitmask
+   * @param offset The index of the first element in the column
+   * @param children Pointer to the device memory containing child data
+   * @param num_children The number of child columns
+   */
+  CUDF_HOST_DEVICE column_device_view(data_type type,
+                                      size_type size,
+                                      void const* data,
+                                      bitmask_type const* null_mask,
+                                      size_type offset,
+                                      column_device_view* children,
+                                      size_type num_children)
+    : column_device_view_base(type, size, data, null_mask, offset),
+      d_children(children),
+      _num_children(num_children)
+  {
+  }
+
  protected:
   column_device_view* d_children{};  ///< Array of `column_device_view`
                                      ///< objects in device memory.
@@ -852,7 +904,7 @@ class alignas(16) mutable_column_device_view : public detail::column_device_view
    */
   template <typename T = void,
             CUDF_ENABLE_IF(std::is_same_v<T, void> or is_rep_layout_compatible<T>())>
-  __host__ __device__ T* head() const noexcept
+  CUDF_HOST_DEVICE T* head() const noexcept
   {
     return const_cast<T*>(detail::column_device_view_base::head<T>());
   }
@@ -870,7 +922,7 @@ class alignas(16) mutable_column_device_view : public detail::column_device_view
    * @return T* Typed pointer to underlying data, including the offset
    */
   template <typename T, CUDF_ENABLE_IF(is_rep_layout_compatible<T>())>
-  __host__ __device__ T* data() const noexcept
+  CUDF_HOST_DEVICE T* data() const noexcept
   {
     return const_cast<T*>(detail::column_device_view_base::data<T>());
   }
@@ -912,7 +964,7 @@ class alignas(16) mutable_column_device_view : public detail::column_device_view
    *
    * @note If `null_count() == 0`, this may return `nullptr`.
    */
-  [[nodiscard]] __host__ __device__ bitmask_type* null_mask() const noexcept
+  [[nodiscard]] CUDF_HOST_DEVICE bitmask_type* null_mask() const noexcept
   {
     return const_cast<bitmask_type*>(detail::column_device_view_base::null_mask());
   }
diff --git a/cpp/include/cudf/detail/iterator.cuh b/cpp/include/cudf/detail/iterator.cuh
index 4442af8fab1..7a83298c72a 100644
--- a/cpp/include/cudf/detail/iterator.cuh
+++ b/cpp/include/cudf/detail/iterator.cuh
@@ -67,7 +67,8 @@ namespace detail {
  * @return A transform iterator that applies `f` to a counting iterator
  */
 template <typename UnaryFunction>
-inline auto make_counting_transform_iterator(cudf::size_type start, UnaryFunction f)
+CUDF_HOST_DEVICE inline auto make_counting_transform_iterator(cudf::size_type start,
+                                                              UnaryFunction f)
 {
   return thrust::make_transform_iterator(thrust::make_counting_iterator(start), f);
 }
@@ -117,26 +118,42 @@ struct null_replaced_value_accessor {
 
 /**
  * @brief validity accessor of column with null bitmask
- * A unary functor returns validity at `id`.
- * `operator() (cudf::size_type id)` computes validity flag at `id`
- * This functor is only allowed for nullable columns.
+ * A unary functor that returns validity at index `i`.
  *
- * @throws cudf::logic_error if the column is not nullable.
+ * @tparam safe If false, the accessor with throw logic_error if the column is not nullable. If
+ * true, the accessor checks for nullability and if col is not nullable, returns true.
  */
+template <bool safe = false>
 struct validity_accessor {
   column_device_view const col;
 
   /**
    * @brief constructor
+   *
+   * @throws cudf::logic_error if not safe and `col` does not have a validity bitmask
+   *
    * @param[in] _col column device view of cudf column
    */
-  validity_accessor(column_device_view const& _col) : col{_col}
+  CUDF_HOST_DEVICE validity_accessor(column_device_view const& _col) : col{_col}
   {
-    // verify valid is non-null, otherwise, is_valid() will crash
-    CUDF_EXPECTS(_col.nullable(), "Unexpected non-nullable column.");
+    if constexpr (not safe) {
+      // verify col is nullable, otherwise, is_valid_nocheck() will crash
+#if defined(__CUDA_ARCH__)
+      cudf_assert(_col.nullable() && "Unexpected non-nullable column.");
+#else
+      CUDF_EXPECTS(_col.nullable(), "Unexpected non-nullable column.");
+#endif
+    }
   }
 
-  __device__ inline bool operator()(cudf::size_type i) const { return col.is_valid_nocheck(i); }
+  __device__ inline bool operator()(cudf::size_type i) const
+  {
+    if constexpr (safe) {
+      return col.is_valid(i);
+    } else {
+      return col.is_valid_nocheck(i);
+    }
+  }
 };
 
 /**
@@ -289,16 +306,20 @@ auto make_pair_rep_iterator(column_device_view const& column)
  *
  * Dereferencing the returned iterator for element `i` will return the validity
  * of `column[i]`
- * This iterator is only allowed for nullable columns.
+ * This iterator is only allowed for nullable columns if `safe` = false
+ * When safe = true, if the column is not nullable then the validity is always true.
  *
- * @throws cudf::logic_error if the column is not nullable.
+ * @throws cudf::logic_error if the column is not nullable when safe = false
  *
+ * @tparam safe If false, the accessor with throw logic_error if the column is not nullable. If
+ * true, the accessor checks for nullability and if col is not nullable, returns true.
  * @param column The column to iterate
  * @return auto Iterator that returns validities of column elements.
  */
-auto inline make_validity_iterator(column_device_view const& column)
+template <bool safe = false>
+CUDF_HOST_DEVICE auto inline make_validity_iterator(column_device_view const& column)
 {
-  return make_counting_transform_iterator(cudf::size_type{0}, validity_accessor{column});
+  return make_counting_transform_iterator(cudf::size_type{0}, validity_accessor<safe>{column});
 }
 
 /**
diff --git a/cpp/include/cudf/detail/utilities/column.hpp b/cpp/include/cudf/detail/utilities/column.hpp
new file mode 100644
index 00000000000..7d22bbd60af
--- /dev/null
+++ b/cpp/include/cudf/detail/utilities/column.hpp
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <cudf/table/table_view.hpp>
+
+#include <thrust/iterator/transform_iterator.h>
+
+namespace cudf::detail {
+
+struct linked_column_view;
+
+using LinkedColPtr    = std::shared_ptr<linked_column_view>;
+using LinkedColVector = std::vector<LinkedColPtr>;
+
+/**
+ * @brief column_view with the added member pointer to the parent of this column.
+ *
+ */
+struct linked_column_view : public column_view_base {
+  linked_column_view(linked_column_view const&) = delete;
+  linked_column_view& operator=(linked_column_view const&) = delete;
+
+  linked_column_view(column_view const& col) : linked_column_view(nullptr, col) {}
+
+  linked_column_view(linked_column_view* parent, column_view const& col)
+    : column_view_base(col), parent(parent)
+  {
+    children.reserve(col.num_children());
+    std::transform(
+      col.child_begin(), col.child_end(), std::back_inserter(children), [&](column_view const& c) {
+        return std::make_shared<linked_column_view>(this, c);
+      });
+  }
+
+  operator column_view() const
+  {
+    auto child_it = thrust::make_transform_iterator(
+      children.begin(), [](auto const& c) { return static_cast<column_view>(*c); });
+    return column_view(this->type(),
+                       this->size(),
+                       this->head(),
+                       this->null_mask(),
+                       UNKNOWN_NULL_COUNT,
+                       this->offset(),
+                       std::vector<column_view>(child_it, child_it + children.size()));
+  }
+
+  linked_column_view* parent;  //!< Pointer to parent of this column. Nullptr if root
+  LinkedColVector children;
+};
+
+/**
+ * @brief Converts all column_views of a table into linked_column_views
+ *
+ * @param table table of columns to convert
+ * @return Vector of converted linked_column_views
+ */
+inline LinkedColVector table_to_linked_columns(table_view const& table)
+{
+  LinkedColVector result;
+  result.reserve(table.num_columns());
+  std::transform(table.begin(), table.end(), std::back_inserter(result), [&](column_view const& c) {
+    return std::make_shared<linked_column_view>(c);
+  });
+
+  return result;
+}
+
+}  // namespace cudf::detail
\ No newline at end of file
diff --git a/cpp/include/cudf/lists/list_device_view.cuh b/cpp/include/cudf/lists/list_device_view.cuh
index ae0a247f005..5cc1e3d166b 100644
--- a/cpp/include/cudf/lists/list_device_view.cuh
+++ b/cpp/include/cudf/lists/list_device_view.cuh
@@ -16,6 +16,7 @@
 #pragma once
 
 #include <cuda_runtime.h>
+#include <cudf/detail/iterator.cuh>
 #include <cudf/lists/lists_column_device_view.cuh>
 #include <cudf/types.hpp>
 #include <cudf/utilities/type_dispatcher.hpp>
@@ -293,22 +294,34 @@ class list_device_view {
  *
  */
 struct list_size_functor {
-  column_device_view const d_column;
-  CUDF_HOST_DEVICE inline list_size_functor(column_device_view const& d_col) : d_column(d_col)
+  detail::lists_column_device_view const d_column;
+  CUDF_HOST_DEVICE inline list_size_functor(detail::lists_column_device_view const& d_col)
+    : d_column(d_col)
   {
-#if defined(__CUDA_ARCH__)
-    cudf_assert(d_col.type().id() == type_id::LIST && "Only list type column is supported");
-#else
-    CUDF_EXPECTS(d_col.type().id() == type_id::LIST, "Only list type column is supported");
-#endif
   }
   __device__ inline size_type operator()(size_type idx)
   {
     if (d_column.is_null(idx)) return size_type{0};
-    auto d_offsets =
-      d_column.child(lists_column_view::offsets_column_index).data<size_type>() + d_column.offset();
-    return d_offsets[idx + 1] - d_offsets[idx];
+    return d_column.offset_at(idx + 1) - d_column.offset_at(idx);
   }
 };
 
+/**
+ * @brief Makes an iterator that returns size of the list by row index
+ *
+ * Example:
+ * For a list_column_device_view with 3 rows, `l = {[1, 2, 3], [4, 5], [6, 7, 8, 9]}`,
+ * @code{.cpp}
+ * auto it = make_list_size_iterator(l);
+ * assert(it[0] == 3);
+ * assert(it[1] == 2);
+ * assert(it[2] == 4);
+ * @endcode
+ *
+ */
+CUDF_HOST_DEVICE auto inline make_list_size_iterator(detail::lists_column_device_view const& c)
+{
+  return detail::make_counting_transform_iterator(0, list_size_functor{c});
+}
+
 }  // namespace cudf
diff --git a/cpp/include/cudf/lists/lists_column_device_view.cuh b/cpp/include/cudf/lists/lists_column_device_view.cuh
index e48707ec298..06c20933a70 100644
--- a/cpp/include/cudf/lists/lists_column_device_view.cuh
+++ b/cpp/include/cudf/lists/lists_column_device_view.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -25,67 +25,70 @@ namespace cudf {
 namespace detail {
 
 /**
- * @brief Given a column-device-view, an instance of this class provides a
+ * @brief Given a column_device_view, an instance of this class provides a
  * wrapper on this compound column for list operations.
  * Analogous to list_column_view.
  */
-class lists_column_device_view {
+class lists_column_device_view : private column_device_view {
  public:
+  lists_column_device_view()                                = delete;
   ~lists_column_device_view()                               = default;
   lists_column_device_view(lists_column_device_view const&) = default;
   lists_column_device_view(lists_column_device_view&&)      = default;
   lists_column_device_view& operator=(lists_column_device_view const&) = default;
   lists_column_device_view& operator=(lists_column_device_view&&) = default;
 
-  lists_column_device_view(column_device_view const& underlying_) : underlying(underlying_)
+  CUDF_HOST_DEVICE lists_column_device_view(column_device_view const& underlying_)
+    : column_device_view(underlying_)
   {
+#ifdef __CUDA_ARCH__
+    cudf_assert(underlying_.type().id() == type_id::LIST and
+                "lists_column_device_view only supports lists");
+#else
     CUDF_EXPECTS(underlying_.type().id() == type_id::LIST,
                  "lists_column_device_view only supports lists");
+#endif
   }
 
-  /**
-   * @brief Fetches number of rows in the lists column
-   */
-  [[nodiscard]] CUDF_HOST_DEVICE inline cudf::size_type size() const { return underlying.size(); }
+  using column_device_view::is_null;
+  using column_device_view::nullable;
+  using column_device_view::offset;
+  using column_device_view::size;
 
   /**
    * @brief Fetches the offsets column of the underlying list column.
    */
   [[nodiscard]] __device__ inline column_device_view offsets() const
   {
-    return underlying.child(lists_column_view::offsets_column_index);
+    return column_device_view::child(lists_column_view::offsets_column_index);
   }
 
   /**
-   * @brief Fetches the child column of the underlying list column.
+   * @brief Fetches the list offset value at a given row index while taking column offset into
+   * account.
    */
-  [[nodiscard]] __device__ inline column_device_view child() const
+  [[nodiscard]] __device__ inline size_type offset_at(size_type idx) const
   {
-    return underlying.child(lists_column_view::child_column_index);
+    return offsets().size() > 0 ? offsets().element<size_type>(offset() + idx) : 0;
   }
 
   /**
-   * @brief Indicates whether the list column is nullable.
-   */
-  [[nodiscard]] __device__ inline bool nullable() const { return underlying.nullable(); }
-
-  /**
-   * @brief Indicates whether the row (i.e. list) at the specified
-   * index is null.
+   * @brief Fetches the child column of the underlying list column.
    */
-  [[nodiscard]] __device__ inline bool is_null(size_type idx) const
+  [[nodiscard]] __device__ inline column_device_view child() const
   {
-    return underlying.is_null(idx);
+    return column_device_view::child(lists_column_view::child_column_index);
   }
 
   /**
-   * @brief Fetches the offset of the underlying column_device_view,
-   *        in case it is a sliced/offset column.
+   * @brief Fetches the child column of the underlying list column with offset and size applied
    */
-  [[nodiscard]] __device__ inline size_type offset() const { return underlying.offset(); }
-
- private:
-  column_device_view underlying;
+  [[nodiscard]] __device__ inline column_device_view sliced_child() const
+  {
+    auto start = offset_at(0);
+    auto end   = offset_at(size());
+    return child().slice(start, end - start);
+  }
 };
 
 }  // namespace detail
diff --git a/cpp/include/cudf/structs/structs_column_device_view.cuh b/cpp/include/cudf/structs/structs_column_device_view.cuh
new file mode 100644
index 00000000000..09bbb46a93c
--- /dev/null
+++ b/cpp/include/cudf/structs/structs_column_device_view.cuh
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include <cudf/column/column_device_view.cuh>
+#include <cudf/types.hpp>
+
+namespace cudf {
+
+namespace detail {
+
+/**
+ * @brief Given a column_device_view, an instance of this class provides a
+ * wrapper on this compound column for struct operations.
+ * Analogous to struct_column_view.
+ */
+class structs_column_device_view : private column_device_view {
+ public:
+  structs_column_device_view()                                  = delete;
+  ~structs_column_device_view()                                 = default;
+  structs_column_device_view(structs_column_device_view const&) = default;
+  structs_column_device_view(structs_column_device_view&&)      = default;
+  structs_column_device_view& operator=(structs_column_device_view const&) = default;
+  structs_column_device_view& operator=(structs_column_device_view&&) = default;
+
+  CUDF_HOST_DEVICE structs_column_device_view(column_device_view const& underlying_)
+    : column_device_view(underlying_)
+  {
+#ifdef __CUDA_ARCH__
+    cudf_assert(underlying_.type().id() == type_id::STRUCT and
+                "structs_column_device_view only supports structs");
+#else
+    CUDF_EXPECTS(underlying_.type().id() == type_id::STRUCT,
+                 "structs_column_device_view only supports structs");
+#endif
+  }
+
+  using column_device_view::child;
+  using column_device_view::is_null;
+  using column_device_view::nullable;
+  using column_device_view::offset;
+  using column_device_view::size;
+
+  /**
+   * @brief Fetches the child column of the underlying struct column.
+   */
+  [[nodiscard]] __device__ inline column_device_view sliced_child(size_type idx) const
+  {
+    return child(idx).slice(offset(), size());
+  }
+};
+
+}  // namespace detail
+
+}  // namespace cudf
diff --git a/cpp/include/cudf/table/experimental/row_operators.cuh b/cpp/include/cudf/table/experimental/row_operators.cuh
index 0fb1ad7ca68..88e31744fdf 100644
--- a/cpp/include/cudf/table/experimental/row_operators.cuh
+++ b/cpp/include/cudf/table/experimental/row_operators.cuh
@@ -17,15 +17,20 @@
 #pragma once
 
 #include <cudf/column/column_device_view.cuh>
+#include <cudf/detail/iterator.cuh>
 #include <cudf/detail/utilities/assert.cuh>
 #include <cudf/detail/utilities/hash_functions.cuh>
+#include <cudf/lists/list_device_view.cuh>
+#include <cudf/lists/lists_column_device_view.cuh>
 #include <cudf/sorting.hpp>
+#include <cudf/structs/structs_column_device_view.cuh>
 #include <cudf/table/row_operators.cuh>
 #include <cudf/table/table_device_view.cuh>
 #include <cudf/utilities/traits.hpp>
 #include <cudf/utilities/type_dispatcher.hpp>
 
 #include <thrust/equal.h>
+#include <thrust/logical.h>
 #include <thrust/swap.h>
 #include <thrust/transform_reduce.h>
 
@@ -172,13 +177,11 @@ class device_row_comparator {
 
     template <typename Element,
               CUDF_ENABLE_IF(not cudf::is_relationally_comparable<Element, Element>() and
-                             not std::is_same_v<Element, cudf::struct_view>)>
-    __device__ cuda::std::pair<weak_ordering, int> operator()(size_type const lhs_element_index,
-                                                              size_type const rhs_element_index)
+                             not std::is_same_v<Element, cudf::struct_view>),
+              typename... Args>
+    __device__ cuda::std::pair<weak_ordering, int> operator()(Args...)
     {
-      // TODO: make this CUDF_UNREACHABLE
-      cudf_assert(false && "Attempted to compare elements of uncomparable types.");
-      return cuda::std::make_pair(weak_ordering::LESS, std::numeric_limits<int>::max());
+      CUDF_UNREACHABLE("Attempted to compare elements of uncomparable types.");
     }
 
     template <typename Element, CUDF_ENABLE_IF(std::is_same_v<Element, cudf::struct_view>)>
@@ -424,6 +427,290 @@ class self_comparator {
 };
 
 }  // namespace lexicographic
+
+namespace equality {
+
+template <typename Nullate>
+class device_row_comparator {
+  friend class self_comparator;
+
+ public:
+  /**
+   * @brief Checks whether the row at `lhs_index` in the `lhs` table is equal to the row at
+   * `rhs_index` in the `rhs` table.
+   *
+   * @param lhs_index The index of row in the `lhs` table to examine
+   * @param rhs_index The index of the row in the `rhs` table to examine
+   * @return `true` if row from the `lhs` table is equal to the row in the `rhs` table
+   */
+  __device__ bool operator()(size_type const lhs_index, size_type const rhs_index) const noexcept
+  {
+    auto equal_elements = [=](column_device_view l, column_device_view r) {
+      return cudf::type_dispatcher(
+        l.type(), element_comparator{nulls, l, r, nulls_are_equal}, lhs_index, rhs_index);
+    };
+
+    return thrust::equal(thrust::seq, lhs.begin(), lhs.end(), rhs.begin(), equal_elements);
+  }
+
+ private:
+  /**
+   * @brief Construct a function object for performing equality comparison between the rows of two
+   * tables.
+   *
+   * @param has_nulls Indicates if either input table contains columns with nulls.
+   * @param lhs The first table
+   * @param rhs The second table (may be the same table as `lhs`)
+   * @param nulls_are_equal Indicates if two null elements are treated as equivalent
+   */
+  device_row_comparator(Nullate has_nulls,
+                        table_device_view lhs,
+                        table_device_view rhs,
+                        null_equality nulls_are_equal = null_equality::EQUAL) noexcept
+    : lhs{lhs}, rhs{rhs}, nulls{has_nulls}, nulls_are_equal{nulls_are_equal}
+  {
+  }
+
+  /**
+   * @brief Performs an equality comparison between two elements in two columns.
+   *
+   * @tparam Nullate A cudf::nullate type describing how to check for nulls.
+   */
+  class element_comparator {
+   public:
+    /**
+     * @brief Construct type-dispatched function object for comparing equality
+     * between two elements.
+     *
+     * @note `lhs` and `rhs` may be the same.
+     *
+     * @param has_nulls Indicates if either input column contains nulls.
+     * @param lhs The column containing the first element
+     * @param rhs The column containing the second element (may be the same as lhs)
+     * @param nulls_are_equal Indicates if two null elements are treated as equivalent
+     */
+    __device__ element_comparator(Nullate has_nulls,
+                                  column_device_view lhs,
+                                  column_device_view rhs,
+                                  null_equality nulls_are_equal = null_equality::EQUAL) noexcept
+      : lhs{lhs}, rhs{rhs}, nulls{has_nulls}, nulls_are_equal{nulls_are_equal}
+    {
+    }
+
+    /**
+     * @brief Compares the specified elements for equality.
+     *
+     * @param lhs_element_index The index of the first element
+     * @param rhs_element_index The index of the second element
+     * @return True if lhs and rhs are equal or if both lhs and rhs are null and nulls are
+     * configured to be considered equal (`nulls_are_equal` == `null_equality::EQUAL`)
+     */
+    template <typename Element, CUDF_ENABLE_IF(cudf::is_equality_comparable<Element, Element>())>
+    __device__ bool operator()(size_type const lhs_element_index,
+                               size_type const rhs_element_index) const noexcept
+    {
+      if (nulls) {
+        bool const lhs_is_null{lhs.is_null(lhs_element_index)};
+        bool const rhs_is_null{rhs.is_null(rhs_element_index)};
+        if (lhs_is_null and rhs_is_null) {
+          return nulls_are_equal == null_equality::EQUAL;
+        } else if (lhs_is_null != rhs_is_null) {
+          return false;
+        }
+      }
+
+      return equality_compare(lhs.element<Element>(lhs_element_index),
+                              rhs.element<Element>(rhs_element_index));
+    }
+
+    template <typename Element,
+              CUDF_ENABLE_IF(not cudf::is_equality_comparable<Element, Element>() and
+                             not cudf::is_nested<Element>()),
+              typename... Args>
+    __device__ bool operator()(Args...)
+    {
+      CUDF_UNREACHABLE("Attempted to compare elements of uncomparable types.");
+    }
+
+    template <typename Element, CUDF_ENABLE_IF(cudf::is_nested<Element>())>
+    __device__ bool operator()(size_type const lhs_element_index,
+                               size_type const rhs_element_index) const noexcept
+    {
+      column_device_view lcol = lhs.slice(lhs_element_index, 1);
+      column_device_view rcol = rhs.slice(rhs_element_index, 1);
+      while (is_nested(lcol.type())) {
+        if (nulls) {
+          auto lvalid = detail::make_validity_iterator<true>(lcol);
+          auto rvalid = detail::make_validity_iterator<true>(rcol);
+          if (nulls_are_equal == null_equality::UNEQUAL) {
+            if (thrust::any_of(
+                  thrust::seq, lvalid, lvalid + lcol.size(), thrust::logical_not<bool>()) or
+                thrust::any_of(
+                  thrust::seq, rvalid, rvalid + rcol.size(), thrust::logical_not<bool>())) {
+              return false;
+            }
+          } else {
+            if (not thrust::equal(thrust::seq, lvalid, lvalid + lcol.size(), rvalid)) {
+              return false;
+            }
+          }
+        }
+        if (lcol.type().id() == type_id::STRUCT) {
+          if (lcol.num_child_columns() == 0) { return true; }
+          lcol = detail::structs_column_device_view(lcol).sliced_child(0);
+          rcol = detail::structs_column_device_view(rcol).sliced_child(0);
+        } else if (lcol.type().id() == type_id::LIST) {
+          auto l_list_col = detail::lists_column_device_view(lcol);
+          auto r_list_col = detail::lists_column_device_view(rcol);
+
+          auto lsizes = make_list_size_iterator(l_list_col);
+          auto rsizes = make_list_size_iterator(r_list_col);
+          if (not thrust::equal(thrust::seq, lsizes, lsizes + lcol.size(), rsizes)) {
+            return false;
+          }
+
+          lcol = l_list_col.sliced_child();
+          rcol = r_list_col.sliced_child();
+          if (lcol.size() != rcol.size()) { return false; }
+        }
+      }
+
+      auto comp =
+        column_comparator{element_comparator{nulls, lcol, rcol, nulls_are_equal}, lcol.size()};
+      return type_dispatcher<dispatch_void_if_nested>(lcol.type(), comp);
+    }
+
+   private:
+    /**
+     * @brief Serially compare two columns for equality.
+     *
+     * When we want to get the equivalence of two columns by serially comparing all elements in a
+     * one column with the corresponding elements in the other column, this saves us from type
+     * dispatching for each individual element in the range
+     */
+    struct column_comparator {
+      element_comparator const comp;
+      size_type const size;
+
+      /**
+       * @brief Serially compare two columns for equality.
+       *
+       * @return True if ALL elements compare equal, false otherwise
+       */
+      template <typename Element, CUDF_ENABLE_IF(cudf::is_equality_comparable<Element, Element>())>
+      __device__ bool operator()() const noexcept
+      {
+        return thrust::all_of(thrust::seq,
+                              thrust::make_counting_iterator(0),
+                              thrust::make_counting_iterator(0) + size,
+                              [=](auto i) { return comp.template operator()<Element>(i, i); });
+      }
+
+      template <typename Element,
+                CUDF_ENABLE_IF(not cudf::is_equality_comparable<Element, Element>()),
+                typename... Args>
+      __device__ bool operator()(Args...) const noexcept
+      {
+        CUDF_UNREACHABLE("Attempted to compare elements of uncomparable types.");
+      }
+    };
+
+    column_device_view const lhs;
+    column_device_view const rhs;
+    Nullate const nulls;
+    null_equality const nulls_are_equal;
+  };
+
+  table_device_view const lhs;
+  table_device_view const rhs;
+  Nullate const nulls;
+  null_equality const nulls_are_equal;
+};
+
+struct preprocessed_table {
+  /**
+   * @brief Preprocess table for use with row equality comparison or row hashing
+   *
+   * Sets up the table for use with row equality comparison or row hashing. The resulting
+   * preprocessed table can be passed to the constructor of `equality::self_comparator` to
+   * avoid preprocessing again.
+   *
+   * @param table The table to preprocess
+   * @param stream The cuda stream to use while preprocessing.
+   */
+  static std::shared_ptr<preprocessed_table> create(table_view const& table,
+                                                    rmm::cuda_stream_view stream);
+
+ private:
+  friend class self_comparator;
+
+  using table_device_view_owner =
+    std::invoke_result_t<decltype(table_device_view::create), table_view, rmm::cuda_stream_view>;
+
+  preprocessed_table(table_device_view_owner&& table,
+                     std::vector<rmm::device_buffer>&& null_buffers)
+    : _t(std::move(table)), _null_buffers(std::move(null_buffers))
+  {
+  }
+
+  /**
+   * @brief Implicit conversion operator to a `table_device_view` of the preprocessed table.
+   *
+   * @return table_device_view
+   */
+  operator table_device_view() { return *_t; }
+
+  table_device_view_owner _t;
+  std::vector<rmm::device_buffer> _null_buffers;
+};
+
+class self_comparator {
+ public:
+  /**
+   * @brief Construct an owning object for performing equality comparisons between two rows of the
+   * same table.
+   *
+   * @param t The table to compare
+   * @param stream The stream to construct this object on. Not the stream that will be used for
+   * comparisons using this object.
+   */
+  self_comparator(table_view const& t, rmm::cuda_stream_view stream)
+    : d_t(preprocessed_table::create(t, stream))
+  {
+  }
+
+  /**
+   * @brief Construct an owning object for performing equality comparisons between two rows of the
+   * same table.
+   *
+   * This constructor allows independently constructing a `preprocessed_table` and sharing it among
+   * multiple comparators.
+   *
+   * @param t A table preprocessed for equality comparison
+   */
+  self_comparator(std::shared_ptr<preprocessed_table> t) : d_t{std::move(t)} {}
+
+  /**
+   * @brief Get the comparison operator to use on the device
+   *
+   * Returns a binary callable, `F`, with signature `bool F(size_t, size_t)`.
+   *
+   * `F(i,j)` returns true if and only if row `i` compares equal to row `j`.
+   *
+   * @tparam Nullate Optional, A cudf::nullate type describing how to check for nulls.
+   */
+  template <typename Nullate>
+  device_row_comparator<Nullate> device_comparator(Nullate nullate = {}) const
+  {
+    return device_row_comparator(nullate, *d_t, *d_t);
+  }
+
+ private:
+  std::shared_ptr<preprocessed_table> d_t;
+};
+
+}  // namespace equality
+
 }  // namespace row
 }  // namespace experimental
 }  // namespace cudf
diff --git a/cpp/src/io/parquet/writer_impl.cu b/cpp/src/io/parquet/writer_impl.cu
index 70a594423c9..cb1acb4d9ec 100644
--- a/cpp/src/io/parquet/writer_impl.cu
+++ b/cpp/src/io/parquet/writer_impl.cu
@@ -30,6 +30,7 @@
 
 #include <cudf/column/column_device_view.cuh>
 #include <cudf/detail/iterator.cuh>
+#include <cudf/detail/utilities/column.hpp>
 #include <cudf/detail/utilities/vector_factories.hpp>
 #include <cudf/lists/lists_column_view.hpp>
 #include <cudf/null_mask.hpp>
@@ -190,55 +191,6 @@ struct aggregate_writer_metadata {
   uint32_t column_order_listsize = 0;
 };
 
-struct linked_column_view;
-
-using LinkedColPtr    = std::shared_ptr<linked_column_view>;
-using LinkedColVector = std::vector<LinkedColPtr>;
-
-/**
- * @brief column_view with the added member pointer to the parent of this column.
- *
- */
-struct linked_column_view : public column_view {
-  // TODO(cp): we are currently keeping all column_view children info multiple times - once for each
-  //       copy of this object. Options:
-  // 1. Inherit from column_view_base. Only lose out on children vector. That is not needed.
-  // 2. Don't inherit at all. make linked_column_view keep a reference wrapper to its column_view
-  linked_column_view(column_view const& col) : column_view(col), parent(nullptr)
-  {
-    for (auto child_it = col.child_begin(); child_it < col.child_end(); ++child_it) {
-      children.push_back(std::make_shared<linked_column_view>(this, *child_it));
-    }
-  }
-
-  linked_column_view(linked_column_view* parent, column_view const& col)
-    : column_view(col), parent(parent)
-  {
-    for (auto child_it = col.child_begin(); child_it < col.child_end(); ++child_it) {
-      children.push_back(std::make_shared<linked_column_view>(this, *child_it));
-    }
-  }
-
-  linked_column_view* parent;  //!< Pointer to parent of this column. Nullptr if root
-  LinkedColVector children;
-};
-
-/**
- * @brief Converts all column_views of a table into linked_column_views
- *
- * @param table table of columns to convert
- * @return Vector of converted linked_column_views
- */
-LinkedColVector input_table_to_linked_columns(table_view const& table)
-{
-  LinkedColVector result;
-  for (column_view const& col : table) {
-    result.emplace_back(std::make_shared<linked_column_view>(col));
-  }
-
-  return result;
-}
-
 /**
  * @brief Extends SchemaElement to add members required in constructing parquet_column_view
  *
@@ -250,7 +202,7 @@ LinkedColVector input_table_to_linked_columns(table_view const& table)
  *    supported types
  */
 struct schema_tree_node : public SchemaElement {
-  LinkedColPtr leaf_column;
+  cudf::detail::LinkedColPtr leaf_column;
   statistics_dtype stats_dtype;
   int32_t ts_scale;
 
@@ -262,7 +214,7 @@ struct schema_tree_node : public SchemaElement {
 
 struct leaf_schema_fn {
   schema_tree_node& col_schema;
-  LinkedColPtr const& col;
+  cudf::detail::LinkedColPtr const& col;
   column_in_metadata const& col_meta;
   bool timestamp_is_int96;
 
@@ -494,7 +446,7 @@ struct leaf_schema_fn {
   }
 };
 
-inline bool is_col_nullable(LinkedColPtr const& col,
+inline bool is_col_nullable(cudf::detail::LinkedColPtr const& col,
                             column_in_metadata const& col_meta,
                             bool single_write_mode)
 {
@@ -520,10 +472,11 @@ inline bool is_col_nullable(LinkedColPtr const& col,
  * Recursively traverses through linked_columns and corresponding metadata to construct schema tree.
  * The resulting schema tree is stored in a vector in pre-order traversal order.
  */
-std::vector<schema_tree_node> construct_schema_tree(LinkedColVector const& linked_columns,
-                                                    table_input_metadata& metadata,
-                                                    bool single_write_mode,
-                                                    bool int96_timestamps)
+std::vector<schema_tree_node> construct_schema_tree(
+  cudf::detail::LinkedColVector const& linked_columns,
+  table_input_metadata& metadata,
+  bool single_write_mode,
+  bool int96_timestamps)
 {
   std::vector<schema_tree_node> schema;
   schema_tree_node root{};
@@ -534,8 +487,8 @@ std::vector<schema_tree_node> construct_schema_tree(LinkedColVector const& linke
   root.parent_idx      = -1;  // root schema has no parent
   schema.push_back(std::move(root));
 
-  std::function<void(LinkedColPtr const&, column_in_metadata&, size_t)> add_schema =
-    [&](LinkedColPtr const& col, column_in_metadata& col_meta, size_t parent_idx) {
+  std::function<void(cudf::detail::LinkedColPtr const&, column_in_metadata&, size_t)> add_schema =
+    [&](cudf::detail::LinkedColPtr const& col, column_in_metadata& col_meta, size_t parent_idx) {
       bool col_nullable = is_col_nullable(col, col_meta, single_write_mode);
 
       if (col->type().id() == type_id::STRUCT) {
@@ -545,7 +498,7 @@ std::vector<schema_tree_node> construct_schema_tree(LinkedColVector const& linke
           col_nullable ? FieldRepetitionType::OPTIONAL : FieldRepetitionType::REQUIRED;
 
         struct_schema.name = (schema[parent_idx].name == "list") ? "element" : col_meta.get_name();
-        struct_schema.num_children = col->num_children();
+        struct_schema.num_children = col->children.size();
         struct_schema.parent_idx   = parent_idx;
         schema.push_back(std::move(struct_schema));
 
@@ -553,7 +506,7 @@ std::vector<schema_tree_node> construct_schema_tree(LinkedColVector const& linke
         // for (auto child_it = col->children.begin(); child_it < col->children.end(); child_it++) {
         //   add_schema(*child_it, struct_node_index);
         // }
-        CUDF_EXPECTS(col->num_children() == static_cast<int>(col_meta.num_children()),
+        CUDF_EXPECTS(col->children.size() == static_cast<size_t>(col_meta.num_children()),
                      "Mismatch in number of child columns between input table and metadata");
         for (size_t i = 0; i < col->children.size(); ++i) {
           add_schema(col->children[i], col_meta.child(i), struct_node_index);
@@ -592,7 +545,7 @@ std::vector<schema_tree_node> construct_schema_tree(LinkedColVector const& linke
         // "col_name" : { "key_value" : { "key", "value" } }
 
         // verify the List child structure is a struct<left_child, right_child>
-        auto const& struct_col = col->child(lists_column_view::child_column_index);
+        column_view struct_col = *col->children[lists_column_view::child_column_index];
         CUDF_EXPECTS(struct_col.type().id() == type_id::STRUCT, "Map should be a List of struct");
         CUDF_EXPECTS(struct_col.num_children() == 2,
                      "Map should be a List of struct with two children only but found " +
@@ -740,7 +693,7 @@ parquet_column_view::parquet_column_view(schema_tree_node const& schema_node,
     // For list columns, we still need to retain the offset child column.
     auto children =
       (parent.type().id() == type_id::LIST)
-        ? std::vector<column_view>{parent.child(lists_column_view::offsets_column_index),
+        ? std::vector<column_view>{*parent.children[lists_column_view::offsets_column_index],
                                    single_inheritance_cudf_col}
         : std::vector<column_view>{single_inheritance_cudf_col};
 
@@ -1221,7 +1174,7 @@ void writer::impl::write(table_view const& table, std::vector<partition_info> co
     add_default_name(table_meta->column_metadata[i], "_col" + std::to_string(i));
   }
 
-  auto vec         = input_table_to_linked_columns(table);
+  auto vec         = table_to_linked_columns(table);
   auto schema_tree = construct_schema_tree(vec, *table_meta, single_write_mode, int96_timestamps);
   // Construct parquet_column_views from the schema tree leaf nodes.
   std::vector<parquet_column_view> parquet_columns;
diff --git a/cpp/src/reductions/scan/rank_scan.cu b/cpp/src/reductions/scan/rank_scan.cu
index 67b4b594f2e..521f8e2d06f 100644
--- a/cpp/src/reductions/scan/rank_scan.cu
+++ b/cpp/src/reductions/scan/rank_scan.cu
@@ -19,7 +19,7 @@
 #include <cudf/detail/aggregation/aggregation.hpp>
 #include <cudf/detail/structs/utilities.hpp>
 #include <cudf/detail/utilities/device_operators.cuh>
-#include <cudf/table/row_operators.cuh>
+#include <cudf/table/experimental/row_operators.cuh>
 
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/exec_policy.hpp>
@@ -38,7 +38,6 @@ namespace {
  * @tparam value_resolver flag value resolver with boolean first and row number arguments
  * @tparam scan_operator scan function ran on the flag values
  * @param order_by input column to generate ranks for
- * @param has_nulls if the order_by column has nested nulls
  * @param resolver flag value resolver
  * @param scan_op scan operation ran on the flag results
  * @param stream CUDA stream used for device memory operations and kernel launches
@@ -47,28 +46,22 @@ namespace {
  */
 template <typename value_resolver, typename scan_operator>
 std::unique_ptr<column> rank_generator(column_view const& order_by,
-                                       bool has_nulls,
                                        value_resolver resolver,
                                        scan_operator scan_op,
                                        rmm::cuda_stream_view stream,
                                        rmm::mr::device_memory_resource* mr)
 {
-  auto const flattened = cudf::structs::detail::flatten_nested_columns(
-    table_view{{order_by}}, {}, {}, structs::detail::column_nullability::MATCH_INCOMING);
-  auto const d_flat_order = table_device_view::create(flattened, stream);
-  row_equality_comparator comparator(
-    nullate::DYNAMIC{has_nulls}, *d_flat_order, *d_flat_order, null_equality::EQUAL);
-  auto ranks         = make_fixed_width_column(data_type{type_to_id<size_type>()},
-                                       flattened.flattened_columns().num_rows(),
-                                       mask_state::UNALLOCATED,
-                                       stream,
-                                       mr);
+  auto comp = cudf::experimental::row::equality::self_comparator(table_view{{order_by}}, stream);
+  auto const device_comparator =
+    comp.device_comparator(nullate::DYNAMIC{has_nested_nulls(table_view({order_by}))});
+  auto ranks = make_fixed_width_column(
+    data_type{type_to_id<size_type>()}, order_by.size(), mask_state::UNALLOCATED, stream, mr);
   auto mutable_ranks = ranks->mutable_view();
 
   thrust::tabulate(rmm::exec_policy(stream),
                    mutable_ranks.begin<size_type>(),
                    mutable_ranks.end<size_type>(),
-                   [comparator, resolver] __device__(size_type row_index) {
+                   [comparator = device_comparator, resolver] __device__(size_type row_index) {
                      return resolver(row_index == 0 || !comparator(row_index, row_index - 1),
                                      row_index);
                    });
@@ -87,11 +80,8 @@ std::unique_ptr<column> inclusive_dense_rank_scan(column_view const& order_by,
                                                   rmm::cuda_stream_view stream,
                                                   rmm::mr::device_memory_resource* mr)
 {
-  CUDF_EXPECTS(!cudf::structs::detail::is_or_has_nested_lists(order_by),
-               "Unsupported list type in dense_rank scan.");
   return rank_generator(
     order_by,
-    has_nested_nulls(table_view{{order_by}}),
     [] __device__(bool const unequal, size_type const) { return unequal ? 1 : 0; },
     DeviceSum{},
     stream,
@@ -106,7 +96,6 @@ std::unique_ptr<column> inclusive_rank_scan(column_view const& order_by,
                "Unsupported list type in rank scan.");
   return rank_generator(
     order_by,
-    has_nested_nulls(table_view{{order_by}}),
     [] __device__(bool unequal, auto row_index) { return unequal ? row_index + 1 : 0; },
     DeviceMax{},
     stream,
diff --git a/cpp/src/table/row_operators.cu b/cpp/src/table/row_operators.cu
index a0400133c68..408d4e51425 100644
--- a/cpp/src/table/row_operators.cu
+++ b/cpp/src/table/row_operators.cu
@@ -17,7 +17,9 @@
 #include <cudf/column/column.hpp>
 #include <cudf/column/column_factories.hpp>
 #include <cudf/detail/structs/utilities.hpp>
+#include <cudf/detail/utilities/column.hpp>
 #include <cudf/detail/utilities/vector_factories.hpp>
+#include <cudf/lists/lists_column_view.hpp>
 #include <cudf/table/experimental/row_operators.cuh>
 #include <cudf/table/table_view.hpp>
 
@@ -28,6 +30,59 @@ namespace experimental {
 
 namespace {
 
+/**
+ * @brief Applies the offsets of struct column onto its children
+ *
+ * @param c The column whose children are to be sliced
+ * @return Children of `c` with offsets applied
+ */
+std::vector<column_view> slice_children(column_view const& c)
+{
+  if (c.type().id() == type_id::STRUCT) {
+    std::vector<column_view> sliced_children;
+    sliced_children.reserve(c.num_children());
+    auto struct_col = structs_column_view(c);
+    for (size_type i = 0; i < struct_col.num_children(); ++i) {
+      auto sliced = struct_col.get_sliced_child(i);
+      // We cannot directly use the output of `structs_column_view::get_sliced_child` because we
+      // must first traverse its children recursively to push offsets all the way down to the leaf
+      // children.
+      sliced_children.emplace_back(sliced.type(),
+                                   sliced.size(),
+                                   sliced.head<uint8_t>(),
+                                   sliced.null_mask(),
+                                   sliced.null_count(),
+                                   sliced.offset(),
+                                   slice_children(sliced));
+    }
+    return sliced_children;
+  }
+  return {c.child_begin(), c.child_end()};
+};
+
+/**
+ * @brief Applies the offsets of struct columns in a table onto their children.
+ *
+ * Given a table, this replaces any struct columns with similar struct columns that have their
+ * offsets applied to their children. Structs that are children of list columns are not affected.
+ *
+ */
+table_view pushdown_struct_offsets(table_view table)
+{
+  std::vector<column_view> cols;
+  cols.reserve(table.num_columns());
+  std::transform(table.begin(), table.end(), std::back_inserter(cols), [&](column_view const& c) {
+    return column_view(c.type(),
+                       c.size(),
+                       c.head<uint8_t>(),
+                       c.null_mask(),
+                       c.null_count(),
+                       c.offset(),
+                       slice_children(c));
+  });
+  return table_view(cols);
+}
+
 /**
  * @brief Decompose all struct columns in a table
  *
@@ -39,33 +94,60 @@ namespace {
  * non-decomposed table, which are pruned during decomposition.
  *
  * For example, if the original table has a column `Struct<Struct<int, float>, decimal>`,
+ *
  *      S1
  *     / \
  *    S2  d
  *   / \
  *  i   f
+ *
  * then after decomposition, we get three columns:
  * `Struct<Struct<int>>`, `float`, and `decimal`.
- * 0   2   1  <- depths
- * S1
- * |
- * S2      d
- * |
- * i   f
+ *
+ *  0   2   1  <- depths
+ *  S1
+ *  |
+ *  S2      d
+ *  |
+ *  i   f
+ *
  * The depth of the first column is 0 because it contains all its parent levels, while the depth
  * of the second column is 2 because two of its parent struct levels were pruned.
  *
- * Similarly, a struct column of type Struct<int<Struct<float, decimal>> is decomposed as follows
+ * Similarly, a struct column of type Struct<int, Struct<float, decimal>> is decomposed as follows
+ *
  *     S1
  *    / \
  *   i   S2
  *      / \
  *     f   d
  *
- * 0   1   2  <- depths
- * S1  S2  d
- * |   |
- * i   f
+ *  0   1   2  <- depths
+ *  S1  S2  d
+ *  |   |
+ *  i   f
+ *
+ * When list columns are present, the decomposition is performed similarly to pure structs but list
+ * parent columns are NOT pruned
+ *
+ * For example, if the original table has a column `List<Struct<int, float>>`,
+ *
+ *    L
+ *    |
+ *    S
+ *   / \
+ *  i   f
+ *
+ * after decomposition, we get two columns
+ *
+ *  L   L
+ *  |   |
+ *  S   f
+ *  |
+ *  i
+ *
+ * The list parents are still needed to define the range of elements in the leaf that belong to the
+ * same row.
  *
  * @param table The table whose struct columns to decompose.
  * @param column_order The per-column order if using output with lexicographic comparison
@@ -77,26 +159,34 @@ auto decompose_structs(table_view table,
                        host_span<order const> column_order         = {},
                        host_span<null_order const> null_precedence = {})
 {
+  auto sliced         = pushdown_struct_offsets(table);
+  auto linked_columns = detail::table_to_linked_columns(sliced);
+
   std::vector<column_view> verticalized_columns;
   std::vector<order> new_column_order;
   std::vector<null_order> new_null_precedence;
   std::vector<int> verticalized_col_depths;
-  for (size_type col_idx = 0; col_idx < table.num_columns(); ++col_idx) {
-    auto const& col = table.column(col_idx);
-    if (is_nested(col.type())) {
+  for (size_t col_idx = 0; col_idx < linked_columns.size(); ++col_idx) {
+    detail::linked_column_view const* col = linked_columns[col_idx].get();
+    if (is_nested(col->type())) {
       // convert and insert
-      std::vector<std::vector<column_view>> flattened;
-      std::function<void(column_view const&, std::vector<column_view>*, int)> recursive_child =
-        [&](column_view const& c, std::vector<column_view>* branch, int depth) {
+      std::vector<std::vector<detail::linked_column_view const*>> flattened;
+      std::function<void(
+        detail::linked_column_view const*, std::vector<detail::linked_column_view const*>*, int)>
+        recursive_child = [&](detail::linked_column_view const* c,
+                              std::vector<detail::linked_column_view const*>* branch,
+                              int depth) {
           branch->push_back(c);
-          if (c.type().id() == type_id::STRUCT) {
-            for (int child_idx = 0; child_idx < c.num_children(); ++child_idx) {
-              auto scol = structs_column_view(c);
+          if (c->type().id() == type_id::LIST) {
+            recursive_child(
+              c->children[lists_column_view::child_column_index].get(), branch, depth + 1);
+          } else if (c->type().id() == type_id::STRUCT) {
+            for (size_t child_idx = 0; child_idx < c->children.size(); ++child_idx) {
               if (child_idx > 0) {
                 verticalized_col_depths.push_back(depth + 1);
                 branch = &flattened.emplace_back();
               }
-              recursive_child(scol.get_sliced_child(child_idx), branch, depth + 1);
+              recursive_child(c->children[child_idx].get(), branch, depth + 1);
             }
           }
         };
@@ -105,17 +195,39 @@ auto decompose_structs(table_view table,
       recursive_child(col, &branch, 0);
 
       for (auto const& branch : flattened) {
-        column_view curr_col = branch.back();
+        column_view temp_col = *branch.back();
         for (auto it = branch.crbegin() + 1; it < branch.crend(); ++it) {
-          curr_col = column_view(it->type(),
-                                 it->size(),
+          auto const& prev_col = *(*it);
+          auto children =
+            (prev_col.type().id() == type_id::LIST)
+              ? std::vector<column_view>{*prev_col
+                                            .children[lists_column_view::offsets_column_index],
+                                         temp_col}
+              : std::vector<column_view>{temp_col};
+          temp_col = column_view(prev_col.type(),
+                                 prev_col.size(),
                                  nullptr,
-                                 it->null_mask(),
+                                 prev_col.null_mask(),
                                  UNKNOWN_NULL_COUNT,
-                                 it->offset(),
-                                 {curr_col});
+                                 prev_col.offset(),
+                                 std::move(children));
+        }
+        // Traverse upward and include any list columns in the ancestors
+        for (detail::linked_column_view* parent = branch.front()->parent; parent;
+             parent                             = parent->parent) {
+          if (parent->type().id() == type_id::LIST) {
+            // Include this parent
+            temp_col = column_view(
+              parent->type(),
+              parent->size(),
+              nullptr,  // list has no data of its own
+              nullptr,  // If we're going through this then nullmask is already in another branch
+              UNKNOWN_NULL_COUNT,
+              parent->offset(),
+              {*parent->children[lists_column_view::offsets_column_index], temp_col});
+          }
         }
-        verticalized_columns.push_back(curr_col);
+        verticalized_columns.push_back(temp_col);
       }
       if (not column_order.empty()) {
         new_column_order.insert(new_column_order.end(), flattened.size(), column_order[col_idx]);
@@ -125,7 +237,7 @@ auto decompose_structs(table_view table,
           new_null_precedence.end(), flattened.size(), null_precedence[col_idx]);
       }
     } else {
-      verticalized_columns.push_back(col);
+      verticalized_columns.push_back(*col);
       verticalized_col_depths.push_back(0);
       if (not column_order.empty()) { new_column_order.push_back(column_order[col_idx]); }
       if (not null_precedence.empty()) { new_null_precedence.push_back(null_precedence[col_idx]); }
@@ -137,6 +249,8 @@ auto decompose_structs(table_view table,
                          std::move(verticalized_col_depths));
 }
 
+using column_checker_fn_t = std::function<void(column_view const&)>;
+
 /**
  * @brief Check a table for compatibility with lexicographic comparison
  *
@@ -145,7 +259,7 @@ auto decompose_structs(table_view table,
 void check_lex_compatibility(table_view const& input)
 {
   // Basically check if there's any LIST hiding anywhere in the table
-  std::function<void(column_view const&)> check_column = [&](column_view const& c) {
+  column_checker_fn_t check_column = [&](column_view const& c) {
     CUDF_EXPECTS(c.type().id() != type_id::LIST,
                  "Cannot lexicographic compare a table with a LIST column");
     if (not is_nested(c.type())) {
@@ -162,6 +276,28 @@ void check_lex_compatibility(table_view const& input)
   }
 }
 
+/**
+ * @brief Check a table for compatibility with equality comparison
+ *
+ * Checks whether a given table contains columns of non-equality comparable types.
+ */
+void check_eq_compatibility(table_view const& input)
+{
+  column_checker_fn_t check_column = [&](column_view const& c) {
+    if (not is_nested(c.type())) {
+      CUDF_EXPECTS(is_equality_comparable(c.type()),
+                   "Cannot compare equality for a table with a column of type " +
+                     jit::get_type_name(c.type()));
+    }
+    for (auto child = c.child_begin(); child < c.child_end(); ++child) {
+      check_column(*child);
+    }
+  };
+  for (column_view const& c : input) {
+    check_column(c);
+  }
+}
+
 }  // namespace
 
 namespace row {
@@ -189,6 +325,24 @@ std::shared_ptr<preprocessed_table> preprocessed_table::create(
 }
 
 }  // namespace lexicographic
+
+namespace equality {
+
+std::shared_ptr<preprocessed_table> preprocessed_table::create(table_view const& t,
+                                                               rmm::cuda_stream_view stream)
+{
+  check_eq_compatibility(t);
+
+  auto null_pushed_table              = structs::detail::superimpose_parent_nulls(t, stream);
+  auto [verticalized_lhs, _, __, ___] = decompose_structs(std::get<0>(null_pushed_table));
+
+  auto d_t = table_device_view_owner(table_device_view::create(verticalized_lhs, stream));
+  return std::shared_ptr<preprocessed_table>(
+    new preprocessed_table(std::move(d_t), std::move(std::get<1>(null_pushed_table))));
+}
+
+}  // namespace equality
+
 }  // namespace row
 }  // namespace experimental
 }  // namespace cudf
diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt
index 1ed921d1f08..e016f47616b 100644
--- a/cpp/tests/CMakeLists.txt
+++ b/cpp/tests/CMakeLists.txt
@@ -156,6 +156,7 @@ ConfigureTest(
   reductions/reduction_tests.cpp
   reductions/scan_tests.cpp
   reductions/segmented_reduction_tests.cpp
+  reductions/list_rank_test.cpp
   reductions/tdigest_tests.cu
 )
 
diff --git a/cpp/tests/reductions/list_rank_test.cpp b/cpp/tests/reductions/list_rank_test.cpp
new file mode 100644
index 00000000000..d263677f23b
--- /dev/null
+++ b/cpp/tests/reductions/list_rank_test.cpp
@@ -0,0 +1,228 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cudf_test/base_fixture.hpp>
+#include <cudf_test/column_wrapper.hpp>
+#include <cudf_test/iterator_utilities.hpp>
+
+#include "benchmarks/common/generate_input.hpp"
+#include <cudf/detail/aggregation/aggregation.hpp>
+#include <cudf/filling.hpp>
+#include <cudf/reduction.hpp>
+
+struct ListRankScanTest : public cudf::test::BaseFixture {
+  inline void test_ungrouped_rank_scan(cudf::column_view const& input,
+                                       cudf::column_view const& expect_vals,
+                                       std::unique_ptr<cudf::scan_aggregation> const& agg,
+                                       cudf::null_policy null_handling)
+  {
+    auto col_out = cudf::scan(input, agg, cudf::scan_type::INCLUSIVE, null_handling);
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(
+      expect_vals, col_out->view(), cudf::test::debug_output_level::ALL_ERRORS);
+  }
+};
+
+TEST_F(ListRankScanTest, BasicList)
+{
+  using lcw      = cudf::test::lists_column_wrapper<uint64_t>;
+  auto const col = lcw{{}, {}, {1}, {1, 1}, {1}, {1, 2}, {2, 2}, {2}, {2}, {2, 1}, {2, 2}, {2, 2}};
+
+  auto const expected_dense_vals =
+    cudf::test::fixed_width_column_wrapper<cudf::size_type>{1, 1, 2, 3, 4, 5, 6, 7, 7, 8, 9, 9};
+  this->test_ungrouped_rank_scan(col,
+                                 expected_dense_vals,
+                                 cudf::make_dense_rank_aggregation<cudf::scan_aggregation>(),
+                                 cudf::null_policy::INCLUDE);
+}
+
+TEST_F(ListRankScanTest, DeepList)
+{
+  using lcw = cudf::test::lists_column_wrapper<uint64_t>;
+  lcw col{
+    {{1, 2, 3}, {}, {4, 5}, {}, {0, 6, 0}},
+    {{1, 2, 3}, {}, {4, 5}, {}, {0, 6, 0}},
+    {{1, 2, 3}, {}, {4, 5}, {0, 6, 0}},
+    {{7, 8}, {}},
+    lcw{lcw{}, lcw{}, lcw{}},
+    lcw{lcw{}},
+    lcw{lcw{}},
+    lcw{lcw{}},
+    lcw{lcw{}, lcw{}, lcw{}},
+    lcw{lcw{}, lcw{}, lcw{}},
+    {lcw{10}},
+    {lcw{10}},
+    {{13, 14}, {15}},
+    {{13, 14}, {16}},
+    lcw{},
+    lcw{lcw{}},
+  };
+
+  {  // Non-sliced
+    auto const expected_dense_vals = cudf::test::fixed_width_column_wrapper<cudf::size_type>{
+      1, 1, 2, 3, 4, 5, 5, 5, 6, 6, 7, 7, 8, 9, 10, 11};
+    this->test_ungrouped_rank_scan(col,
+                                   expected_dense_vals,
+                                   cudf::make_dense_rank_aggregation<cudf::scan_aggregation>(),
+                                   cudf::null_policy::INCLUDE);
+  }
+
+  {  // sliced
+    auto sliced_col = cudf::slice(col, {3, 12})[0];
+    auto const expected_dense_vals =
+      cudf::test::fixed_width_column_wrapper<cudf::size_type>{1, 2, 3, 3, 3, 4, 4, 5, 5};
+    this->test_ungrouped_rank_scan(sliced_col,
+                                   expected_dense_vals,
+                                   cudf::make_dense_rank_aggregation<cudf::scan_aggregation>(),
+                                   cudf::null_policy::INCLUDE);
+  }
+}
+
+TEST_F(ListRankScanTest, ListOfStruct)
+{
+  // Constructing a list of struct of two elements
+  // 0.   []                  ==
+  // 1.   []                  !=
+  // 2.   Null                ==
+  // 3.   Null                !=
+  // 4.   [Null, Null]        !=
+  // 5.   [Null]              ==
+  // 6.   [Null]              ==
+  // 7.   [Null]              !=
+  // 8.   [{Null, Null}]      !=
+  // 9.   [{1,'a'}, {2,'b'}]  !=
+  // 10.  [{0,'a'}, {2,'b'}]  !=
+  // 11.  [{0,'a'}, {2,'c'}]  ==
+  // 12.  [{0,'a'}, {2,'c'}]  !=
+  // 13.  [{0,Null}]          ==
+  // 14.  [{0,Null}]          !=
+  // 15.  [{Null, 0}]         ==
+  // 16.  [{Null, 0}]
+
+  auto col1 = cudf::test::fixed_width_column_wrapper<int32_t>{
+    {-1, -1, 0, 2, 2, 2, 1, 2, 0, 2, 0, 2, 0, 2, 0, 0, 1, 2},
+    {1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0}};
+  auto col2 = cudf::test::strings_column_wrapper{
+    {"x", "x", "a", "a", "b", "b", "a", "b", "a", "b", "a", "c", "a", "c", "a", "c", "b", "b"},
+    {1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1}};
+  auto struc = cudf::test::structs_column_wrapper{
+    {col1, col2}, {0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}};
+
+  auto offsets = cudf::test::fixed_width_column_wrapper<cudf::size_type>{
+    0, 0, 0, 0, 0, 2, 3, 4, 5, 6, 8, 10, 12, 14, 15, 16, 17, 18};
+
+  auto list_nullmask = std::vector<bool>{1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
+  auto nullmask_buf =
+    cudf::test::detail::make_null_mask(list_nullmask.begin(), list_nullmask.end());
+  auto list_column = cudf::column_view(cudf::data_type(cudf::type_id::LIST),
+                                       17,
+                                       nullptr,
+                                       static_cast<cudf::bitmask_type*>(nullmask_buf.data()),
+                                       cudf::UNKNOWN_NULL_COUNT,
+                                       0,
+                                       {offsets, struc});
+
+  {  // Non-sliced
+    auto expect = cudf::test::fixed_width_column_wrapper<cudf::size_type>{
+      1, 1, 2, 2, 3, 4, 4, 4, 5, 6, 7, 8, 8, 9, 9, 10, 10};
+
+    this->test_ungrouped_rank_scan(list_column,
+                                   expect,
+                                   cudf::make_dense_rank_aggregation<cudf::scan_aggregation>(),
+                                   cudf::null_policy::INCLUDE);
+  }
+
+  {  // Sliced
+    auto sliced_col = cudf::slice(list_column, {3, 15})[0];
+    auto expect =
+      cudf::test::fixed_width_column_wrapper<cudf::size_type>{1, 2, 3, 3, 3, 4, 5, 6, 7, 7, 8, 8};
+
+    this->test_ungrouped_rank_scan(sliced_col,
+                                   expect,
+                                   cudf::make_dense_rank_aggregation<cudf::scan_aggregation>(),
+                                   cudf::null_policy::INCLUDE);
+  }
+}
+
+TEST_F(ListRankScanTest, ListOfEmptyStruct)
+{
+  // []
+  // []
+  // Null
+  // Null
+  // [Null, Null]
+  // [Null, Null]
+  // [Null, Null]
+  // [Null]
+  // [Null]
+  // [{}]
+  // [{}]
+  // [{}, {}]
+  // [{}, {}]
+
+  auto struct_validity = std::vector<bool>{0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1};
+  auto struct_validity_buffer =
+    cudf::test::detail::make_null_mask(struct_validity.begin(), struct_validity.end());
+  auto struct_col =
+    cudf::make_structs_column(14, {}, cudf::UNKNOWN_NULL_COUNT, std::move(struct_validity_buffer));
+
+  auto offsets = cudf::test::fixed_width_column_wrapper<cudf::size_type>{
+    0, 0, 0, 0, 0, 2, 4, 6, 7, 8, 9, 10, 12, 14};
+  auto list_nullmask = std::vector<bool>{1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1};
+  auto list_validity_buffer =
+    cudf::test::detail::make_null_mask(list_nullmask.begin(), list_nullmask.end());
+  auto list_column = cudf::make_lists_column(13,
+                                             offsets.release(),
+                                             std::move(struct_col),
+                                             cudf::UNKNOWN_NULL_COUNT,
+                                             std::move(list_validity_buffer));
+
+  auto expect =
+    cudf::test::fixed_width_column_wrapper<cudf::size_type>{1, 1, 2, 2, 3, 3, 3, 4, 4, 5, 5, 6, 6};
+
+  this->test_ungrouped_rank_scan(*list_column,
+                                 expect,
+                                 cudf::make_dense_rank_aggregation<cudf::scan_aggregation>(),
+                                 cudf::null_policy::INCLUDE);
+}
+
+TEST_F(ListRankScanTest, EmptyDeepList)
+{
+  // List<List<int>>, where all lists are empty
+  // []
+  // []
+  // Null
+  // Null
+
+  // Internal empty list
+  auto list1 = cudf::test::lists_column_wrapper<int>{};
+
+  auto offsets       = cudf::test::fixed_width_column_wrapper<cudf::size_type>{0, 0, 0, 0, 0};
+  auto list_nullmask = std::vector<bool>{1, 1, 0, 0};
+  auto list_validity_buffer =
+    cudf::test::detail::make_null_mask(list_nullmask.begin(), list_nullmask.end());
+  auto list_column = cudf::make_lists_column(4,
+                                             offsets.release(),
+                                             list1.release(),
+                                             cudf::UNKNOWN_NULL_COUNT,
+                                             std::move(list_validity_buffer));
+
+  auto expect = cudf::test::fixed_width_column_wrapper<cudf::size_type>{1, 1, 2, 2};
+
+  this->test_ungrouped_rank_scan(*list_column,
+                                 expect,
+                                 cudf::make_dense_rank_aggregation<cudf::scan_aggregation>(),
+                                 cudf::null_policy::INCLUDE);
+}

From dd7143a3526f4c4d2f4e165cd3562ad1ad98fc39 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Wed, 13 Apr 2022 21:03:08 +0000
Subject: [PATCH 05/33] Bump hadoop-common from 3.1.4 to 3.2.3 in /java
 (#10645)

Bumps hadoop-common from 3.1.4 to 3.2.3.


[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=org.apache.hadoop:hadoop-common&package-manager=maven&previous-version=3.1.4&new-version=3.2.3)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@dependabot rebase` will rebase this PR
- `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@dependabot merge` will merge this PR after your CI passes on it
- `@dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@dependabot reopen` will reopen this PR if it is closed
- `@dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
- `@dependabot use these labels` will set the current labels as the default for future PRs for this repo and language
- `@dependabot use these reviewers` will set the current reviewers as the default for future PRs for this repo and language
- `@dependabot use these assignees` will set the current assignees as the default for future PRs for this repo and language
- `@dependabot use this milestone` will set the current milestone as the default for future PRs for this repo and language

You can disable automated security fix PRs for this repo from the [Security Alerts page](https://github.com/rapidsai/cudf/network/alerts).

</details>

Authors:
  - https://github.com/apps/dependabot

Approvers:
  - Jason Lowe (https://github.com/jlowe)

URL: https://github.com/rapidsai/cudf/pull/10645
---
 java/pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/java/pom.xml b/java/pom.xml
index 8eccd652a46..e2efed19636 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -147,7 +147,7 @@
         <dependency>
             <groupId>org.apache.hadoop</groupId>
             <artifactId>hadoop-common</artifactId>
-            <version>3.1.4</version>
+            <version>3.2.3</version>
             <scope>test</scope>
         </dependency>
     </dependencies>

From c72868e9f19fe496486773f69e5558f90f999216 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <53796099+brandon-b-miller@users.noreply.github.com>
Date: Wed, 13 Apr 2022 16:35:37 -0500
Subject: [PATCH 06/33] Remove implementation details from `apply` docstrings
 (#10651)

Removes some unnecessary implementation detail from `apply` docstrings and updates them where necessary.

Authors:
  - https://github.com/brandon-b-miller

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)
  - Ashwin Srinath (https://github.com/shwina)

URL: https://github.com/rapidsai/cudf/pull/10651
---
 python/cudf/cudf/core/dataframe.py | 25 +++++++++++++------------
 python/cudf/cudf/core/series.py    | 16 +++++++++++-----
 2 files changed, 24 insertions(+), 17 deletions(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 277fd5aae57..7c209086663 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -3564,12 +3564,13 @@ def apply(
     ):
         """
         Apply a function along an axis of the DataFrame.
-
-        Designed to mimic `pandas.DataFrame.apply`. Applies a user
-        defined function row wise over a dataframe, with true null
-        handling. Works with UDFs using `core.udf.pipeline.nulludf`
-        and returns a single series. Uses numba to jit compile the
-        function to PTX via LLVM.
+        ``apply`` relies on Numba to JIT compile ``func``.
+        Thus the allowed operations within ``func`` are limited
+        to the ones specified
+        [here](https://numba.pydata.org/numba-doc/latest/cuda/cudapysupported.html).
+        For more information, see the cuDF guide
+        to user defined functions found
+        [here](https://docs.rapids.ai/api/cudf/stable/user_guide/guide-to-udfs.html).
 
         Parameters
         ----------
@@ -3590,7 +3591,7 @@ def apply(
         Examples
         --------
 
-        Simple function of a single variable which could be NA
+        Simple function of a single variable which could be NA:
 
         >>> def f(row):
         ...     if row['a'] is cudf.NA:
@@ -3606,7 +3607,7 @@ def apply(
         dtype: int64
 
         Function of multiple variables will operate in
-        a null aware manner
+        a null aware manner:
 
         >>> def f(row):
         ...     return row['a'] - row['b']
@@ -3622,7 +3623,7 @@ def apply(
         3    <NA>
         dtype: int64
 
-        Functions may conditionally return NA as in pandas
+        Functions may conditionally return NA as in pandas:
 
         >>> def f(row):
         ...     if row['a'] + row['b'] > 3:
@@ -3641,7 +3642,7 @@ def apply(
         dtype: int64
 
         Mixed types are allowed, but will return the common
-        type, rather than object as in pandas
+        type, rather than object as in pandas:
 
         >>> def f(row):
         ...     return row['a'] + row['b']
@@ -3658,7 +3659,7 @@ def apply(
 
         Functions may also return scalar values, however the
         result will be promoted to a safe type regardless of
-        the data
+        the data:
 
         >>> def f(row):
         ...     if row['a'] > 3:
@@ -3675,7 +3676,7 @@ def apply(
         2    5.0
         dtype: float64
 
-        Ops against N columns are supported generally
+        Ops against N columns are supported generally:
 
         >>> def f(row):
         ...     v, w, x, y, z = (
diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index 5bf52ed7520..6e15c03e6b4 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -2021,9 +2021,15 @@ def _return_sentinel_series():
     def apply(self, func, convert_dtype=True, args=(), **kwargs):
         """
         Apply a scalar function to the values of a Series.
+        Similar to ``pandas.Series.apply``.
 
-        Similar to `pandas.Series.apply. Applies a user
-        defined function elementwise over a series.
+        ``apply`` relies on Numba to JIT compile ``func``.
+        Thus the allowed operations within ``func`` are limited
+        to the ones specified
+        [here](https://numba.pydata.org/numba-doc/latest/cuda/cudapysupported.html).
+        For more information, see the cuDF guide to
+        user defined functions found
+        [here](https://docs.rapids.ai/api/cudf/stable/user_guide/guide-to-udfs.html).
 
         Parameters
         ----------
@@ -2061,7 +2067,7 @@ def apply(self, func, convert_dtype=True, args=(), **kwargs):
         2    4
         dtype: int64
 
-        Apply a basic function to a series with nulls
+        Apply a basic function to a series with nulls:
 
         >>> sr = cudf.Series([1,cudf.NA,3])
         >>> def f(x):
@@ -2073,7 +2079,7 @@ def apply(self, func, convert_dtype=True, args=(), **kwargs):
         dtype: int64
 
         Use a function that does something conditionally,
-        based on if the value is or is not null
+        based on if the value is or is not null:
 
         >>> sr = cudf.Series([1,cudf.NA,3])
         >>> def f(x):
@@ -2091,7 +2097,7 @@ def apply(self, func, convert_dtype=True, args=(), **kwargs):
         as derived from the UDFs logic. Note that this means
         the common type will be returned even if such data
         is passed that would not result in any values of that
-        dtype.
+        dtype:
 
         >>> sr = cudf.Series([1,cudf.NA,3])
         >>> def f(x):

From ce56bc3abad18943c972311305085843df7af36b Mon Sep 17 00:00:00 2001
From: brandon-b-miller <53796099+brandon-b-miller@users.noreply.github.com>
Date: Wed, 13 Apr 2022 16:36:00 -0500
Subject: [PATCH 07/33] Add `cudf.DataFrame.applymap` (#10542)

Naive implementation of `DataFrame.applymap` that just calls `apply` in a loop over columns.

This could theoretically be made much faster within our framework. This requires at worst `N` compilations and `M` kernel launches, where `N` is the number of different dtypes in the data, and `M` is the number of total columns. We could however as an improvement to this launch just one kernel that populates the entire output data. This would still suffer from the compilation bottleneck however, since the function must be compiled in order for an output dtype to be determined, and this will need to be done for each distinct dtype within the data.

Part of https://github.com/rapidsai/cudf/issues/10169

Authors:
  - https://github.com/brandon-b-miller
  - Bradley Dice (https://github.com/bdice)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - GALI PREM SAGAR (https://github.com/galipremsagar)

URL: https://github.com/rapidsai/cudf/pull/10542
---
 docs/cudf/source/api_docs/dataframe.rst       |  1 +
 python/cudf/cudf/core/dataframe.py            | 64 +++++++++++++++++++
 python/cudf/cudf/tests/test_applymap.py       | 44 ++++++++++++-
 .../dask_cudf/tests/test_applymap.py          | 29 +++++++++
 .../dask_cudf/dask_cudf/tests/test_binops.py  | 13 ++--
 python/dask_cudf/dask_cudf/tests/utils.py     | 21 ++++++
 6 files changed, 162 insertions(+), 10 deletions(-)
 create mode 100644 python/dask_cudf/dask_cudf/tests/test_applymap.py
 create mode 100644 python/dask_cudf/dask_cudf/tests/utils.py

diff --git a/docs/cudf/source/api_docs/dataframe.rst b/docs/cudf/source/api_docs/dataframe.rst
index 1aa1ea8beac..1d600acfef1 100644
--- a/docs/cudf/source/api_docs/dataframe.rst
+++ b/docs/cudf/source/api_docs/dataframe.rst
@@ -105,6 +105,7 @@ Function application, GroupBy & window
    :toctree: api/
 
    DataFrame.apply
+   DataFrame.applymap
    DataFrame.apply_chunks
    DataFrame.apply_rows
    DataFrame.pipe
diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 7c209086663..2b2c09fa2a0 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -13,6 +13,7 @@
 from collections.abc import Iterable, Mapping, Sequence
 from typing import (
     Any,
+    Callable,
     Dict,
     List,
     MutableMapping,
@@ -25,6 +26,7 @@
 )
 
 import cupy
+import numba
 import numpy as np
 import pandas as pd
 import pyarrow as pa
@@ -3708,6 +3710,68 @@ def apply(
 
         return self._apply(func, _get_row_kernel, *args, **kwargs)
 
+    def applymap(
+        self,
+        func: Callable[[Any], Any],
+        na_action: Union[str, None] = None,
+        **kwargs,
+    ) -> DataFrame:
+
+        """
+        Apply a function to a Dataframe elementwise.
+
+        This method applies a function that accepts and returns a scalar
+        to every element of a DataFrame.
+
+        Parameters
+        ----------
+        func : callable
+            Python function, returns a single value from a single value.
+        na_action : {None, 'ignore'}, default None
+            If 'ignore', propagate NaN values, without passing them to func.
+
+        Returns
+        -------
+        DataFrame
+            Transformed DataFrame.
+        """
+
+        if kwargs:
+            raise NotImplementedError(
+                "DataFrame.applymap does not yet support **kwargs."
+            )
+
+        if na_action not in {"ignore", None}:
+            raise ValueError(
+                f"na_action must be 'ignore' or None. Got {repr(na_action)}"
+            )
+
+        if na_action == "ignore":
+            devfunc = numba.cuda.jit(device=True)(func)
+
+            # promote to a null-ignoring function
+            # this code is never run in python, it only
+            # exists to provide numba with the correct
+            # bytecode to generate the equivalent PTX
+            # as a null-ignoring version of the function
+            def _func(x):  # pragma: no cover
+                if x is cudf.NA:
+                    return cudf.NA
+                else:
+                    return devfunc(x)
+
+        else:
+            _func = func
+
+        # TODO: naive implementation
+        # this could be written as a single kernel
+        result = {}
+        for name, col in self._data.items():
+            apply_sr = Series._from_data({None: col})
+            result[name] = apply_sr.apply(_func)
+
+        return DataFrame._from_data(result, index=self.index)
+
     @_cudf_nvtx_annotate
     @applyutils.doc_apply()
     def apply_rows(
diff --git a/python/cudf/cudf/tests/test_applymap.py b/python/cudf/cudf/tests/test_applymap.py
index bd322a28a08..c8a9b5d03f5 100644
--- a/python/cudf/cudf/tests/test_applymap.py
+++ b/python/cudf/cudf/tests/test_applymap.py
@@ -6,7 +6,7 @@
 import numpy as np
 import pytest
 
-from cudf import Series
+from cudf import NA, DataFrame, Series
 from cudf.testing import _utils as utils
 
 
@@ -58,3 +58,45 @@ def test_applymap_change_out_dtype():
     expect = np.array(data, dtype=float)
     got = out.to_numpy()
     np.testing.assert_array_equal(expect, got)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        {"a": [1, 2, 3], "b": [4, 5, 6]},
+        {"a": [1, 2, 3], "b": [1.0, 2.0, 3.0]},
+        {"a": [1, 2, 3], "b": [True, False, True]},
+        {"a": [1, NA, 2], "b": [NA, 4, NA]},
+    ],
+)
+@pytest.mark.parametrize(
+    "func",
+    [
+        lambda x: x + 1,
+        lambda x: x - 0.5,
+        lambda x: 2 if x is NA else 2 + (x + 1) / 4.1,
+        lambda x: 42,
+    ],
+)
+@pytest.mark.parametrize("na_action", [None, "ignore"])
+def test_applymap_dataframe(data, func, na_action):
+    gdf = DataFrame(data)
+    pdf = gdf.to_pandas(nullable=True)
+
+    expect = pdf.applymap(func, na_action=na_action)
+    got = gdf.applymap(func, na_action=na_action)
+
+    utils.assert_eq(expect, got, check_dtype=False)
+
+
+def test_applymap_raise_cases():
+    df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
+
+    def f(x, some_kwarg=0):
+        return x + some_kwarg
+
+    with pytest.raises(NotImplementedError):
+        df.applymap(f, some_kwarg=1)
+
+    with pytest.raises(ValueError):
+        df.applymap(f, na_action="some_invalid_option")
diff --git a/python/dask_cudf/dask_cudf/tests/test_applymap.py b/python/dask_cudf/dask_cudf/tests/test_applymap.py
new file mode 100644
index 00000000000..929f00ec296
--- /dev/null
+++ b/python/dask_cudf/dask_cudf/tests/test_applymap.py
@@ -0,0 +1,29 @@
+# Copyright (c) 2022, NVIDIA CORPORATION.
+
+import pytest
+from pandas import NA
+
+from dask import dataframe as dd
+
+from dask_cudf.tests.utils import _make_random_frame
+
+
+@pytest.mark.parametrize(
+    "func",
+    [
+        lambda x: x + 1,
+        lambda x: x - 0.5,
+        lambda x: 2 if x is NA else 2 + (x + 1) / 4.1,
+        lambda x: 42,
+    ],
+)
+@pytest.mark.parametrize("has_na", [True, False])
+def test_applymap_basic(func, has_na):
+    size = 2000
+    pdf, dgdf = _make_random_frame(size, include_na=False)
+
+    dpdf = dd.from_pandas(pdf, npartitions=dgdf.npartitions)
+
+    expect = dpdf.applymap(func)
+    got = dgdf.applymap(func)
+    dd.assert_eq(expect, got, check_dtype=False)
diff --git a/python/dask_cudf/dask_cudf/tests/test_binops.py b/python/dask_cudf/dask_cudf/tests/test_binops.py
index 64b7cc85971..87bd401accd 100644
--- a/python/dask_cudf/dask_cudf/tests/test_binops.py
+++ b/python/dask_cudf/dask_cudf/tests/test_binops.py
@@ -1,3 +1,5 @@
+# Copyright (c) 2022, NVIDIA CORPORATION.
+
 import operator
 
 import numpy as np
@@ -8,6 +10,8 @@
 
 import cudf
 
+from dask_cudf.tests.utils import _make_random_frame
+
 
 def _make_empty_frame(npartitions=2):
     df = pd.DataFrame({"x": [], "y": []})
@@ -16,15 +20,6 @@ def _make_empty_frame(npartitions=2):
     return dgf
 
 
-def _make_random_frame(nelem, npartitions=2):
-    df = pd.DataFrame(
-        {"x": np.random.random(size=nelem), "y": np.random.random(size=nelem)}
-    )
-    gdf = cudf.DataFrame.from_pandas(df)
-    dgf = dd.from_pandas(gdf, npartitions=npartitions)
-    return df, dgf
-
-
 def _make_random_frame_float(nelem, npartitions=2):
     df = pd.DataFrame(
         {
diff --git a/python/dask_cudf/dask_cudf/tests/utils.py b/python/dask_cudf/dask_cudf/tests/utils.py
new file mode 100644
index 00000000000..88a2116fb0a
--- /dev/null
+++ b/python/dask_cudf/dask_cudf/tests/utils.py
@@ -0,0 +1,21 @@
+# Copyright (c) 2022, NVIDIA CORPORATION.
+
+import numpy as np
+import pandas as pd
+
+import dask.dataframe as dd
+
+import cudf
+
+
+def _make_random_frame(nelem, npartitions=2, include_na=False):
+    df = pd.DataFrame(
+        {"x": np.random.random(size=nelem), "y": np.random.random(size=nelem)}
+    )
+
+    if include_na:
+        df["x"][::2] = pd.NA
+
+    gdf = cudf.DataFrame.from_pandas(df)
+    dgf = dd.from_pandas(gdf, npartitions=npartitions)
+    return df, dgf

From 489e41f2f4eddcbc24dcd5e38624e2693b596d21 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Wed, 13 Apr 2022 15:43:11 -0700
Subject: [PATCH 08/33] Deprecate various functions that don't need to be
 defined for Index. (#10647)

This PR adds deprecations for various `Frame` methods that shouldn't actually be defined for `Index` objects. These methods can all be moved down to `IndexedFrame` in 22.08. This change contributes to making a clean distinction between `Frame` and `IndexedFrame`.

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/10647
---
 python/cudf/cudf/core/frame.py | 45 +++++++++++++++++++++++++++++++++-
 1 file changed, 44 insertions(+), 1 deletion(-)

diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index 5185fb05cb4..806cdf14c71 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -917,6 +917,12 @@ def scatter_by_map(
         -------
         A list of cudf.DataFrame objects.
         """
+        if not isinstance(self, cudf.DataFrame):
+            warnings.warn(
+                f"{self.__class__.__name__}.scatter_by_map is deprecated and "
+                "will be removed.",
+                FutureWarning,
+            )
 
         # map_index might be a column name or array,
         # make it a Column
@@ -1095,6 +1101,8 @@ def fillna(
             elif method == "backfill":
                 method = "bfill"
 
+        # TODO: This logic should be handled in different subclasses since
+        # different Frames support different types of values.
         if isinstance(value, cudf.Series):
             value = value.reindex(self._data.names)
         elif isinstance(value, cudf.DataFrame):
@@ -1209,6 +1217,11 @@ def interpolate(
             some or all ``NaN`` values
 
         """
+        if isinstance(self, cudf.BaseIndex):
+            warnings.warn(
+                "Index.interpolate is deprecated and will be removed.",
+                FutureWarning,
+            )
 
         if method in {"pad", "ffill"} and limit_direction != "forward":
             raise ValueError(
@@ -1320,6 +1333,12 @@ def rank(
         same type as caller
             Return a Series or DataFrame with data ranks as values.
         """
+        if isinstance(self, cudf.BaseIndex):
+            warnings.warn(
+                "Index.rank is deprecated and will be removed.",
+                FutureWarning,
+            )
+
         if method not in {"average", "min", "max", "first", "dense"}:
             raise KeyError(method)
 
@@ -1355,6 +1374,12 @@ def rank(
     @_cudf_nvtx_annotate
     def shift(self, periods=1, freq=None, axis=0, fill_value=None):
         """Shift values by `periods` positions."""
+        if isinstance(self, cudf.BaseIndex):
+            warnings.warn(
+                "Index.shift is deprecated and will be removed.",
+                FutureWarning,
+            )
+
         axis = self._get_axis_from_axis_arg(axis)
         if axis != 0:
             raise ValueError("Only axis=0 is supported.")
@@ -1747,6 +1772,12 @@ def replace(
         3    3    8  d
         4    4    9  e
         """
+        if isinstance(self, cudf.BaseIndex):
+            warnings.warn(
+                "Index.replace is deprecated and will be removed.",
+                FutureWarning,
+            )
+
         if limit is not None:
             raise NotImplementedError("limit parameter is not implemented yet")
 
@@ -2309,6 +2340,12 @@ def scale(self):
         4    0.043478
         dtype: float64
         """
+        if isinstance(self, cudf.BaseIndex):
+            warnings.warn(
+                "Index.scale is deprecated and will be removed.",
+                FutureWarning,
+            )
+
         vmin = self.min()
         vmax = self.max()
         scaled = (self - vmin) / (vmax - vmin)
@@ -3365,6 +3402,12 @@ def _scan(self, op, axis=None, skipna=True):
         2   6  24
         3  10  34
         """
+        if isinstance(self, cudf.BaseIndex):
+            warnings.warn(
+                f"Index.{op} is deprecated and will be removed.",
+                FutureWarning,
+            )
+
         cast_to_int = op in ("cumsum", "cumprod")
         skipna = True if skipna is None else skipna
 
@@ -3402,7 +3445,7 @@ def _scan(self, op, axis=None, skipna=True):
         # TODO: This will work for Index because it's passing self._index
         # (which is None), but eventually we may want to remove that parameter
         # for Index._from_data and simplify.
-        return self._from_data(results, index=self._index)
+        return self._from_data(results, self._index)
 
     @_cudf_nvtx_annotate
     @ioutils.doc_to_json()

From 03e84ef40b3b7d184091beab1e9a6ed10d638410 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Wed, 13 Apr 2022 19:46:48 -0700
Subject: [PATCH 09/33] Update pinning to allow newer CMake versions. (#10646)

CMake 3.23.1 contains the bug fixes that we need to make use of CMake 3.23, so now we can update the pinnings to just avoid 3.23.0.

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - https://github.com/jakirkham
  - Jordan Jacobelli (https://github.com/Ethyling)
  - Robert Maynard (https://github.com/robertmaynard)

URL: https://github.com/rapidsai/cudf/pull/10646
---
 conda/environments/cudf_dev_cuda11.5.yml      | 2 +-
 conda/recipes/cudf_kafka/meta.yaml            | 2 +-
 conda/recipes/libcudf/conda_build_config.yaml | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/conda/environments/cudf_dev_cuda11.5.yml b/conda/environments/cudf_dev_cuda11.5.yml
index 6bea7b2623b..bdde007e33e 100644
--- a/conda/environments/cudf_dev_cuda11.5.yml
+++ b/conda/environments/cudf_dev_cuda11.5.yml
@@ -12,7 +12,7 @@ dependencies:
   - clang-tools=11.1.0
   - cupy>=9.5.0,<11.0.0a0
   - rmm=22.06.*
-  - cmake>=3.20.1,<3.23
+  - cmake>=3.20.1,!=3.23.0
   - cmake_setuptools>=0.1.3
   - python>=3.7,<3.9
   - numba>=0.54
diff --git a/conda/recipes/cudf_kafka/meta.yaml b/conda/recipes/cudf_kafka/meta.yaml
index 9e77d44c15d..7d7b5d65cce 100644
--- a/conda/recipes/cudf_kafka/meta.yaml
+++ b/conda/recipes/cudf_kafka/meta.yaml
@@ -25,7 +25,7 @@ build:
 
 requirements:
   build:
-    - cmake >=3.20.1,<3.23
+    - cmake >=3.20.1,!=3.23.0
   host:
     - python
     - cython >=0.29,<0.30
diff --git a/conda/recipes/libcudf/conda_build_config.yaml b/conda/recipes/libcudf/conda_build_config.yaml
index 397feab067e..b598a157196 100644
--- a/conda/recipes/libcudf/conda_build_config.yaml
+++ b/conda/recipes/libcudf/conda_build_config.yaml
@@ -1,5 +1,5 @@
 cmake_version:
-  - ">=3.20.1,<3.23"
+  - ">=3.20.1,!=3.23.0"
 
 gtest_version:
   - "=1.10.0"

From 22a6679b5f5d36530a59c4cbfe1acfd530c2711b Mon Sep 17 00:00:00 2001
From: Alfred Xu <lovedreamf@gmail.com>
Date: Thu, 14 Apr 2022 16:09:52 +0800
Subject: [PATCH 10/33] Improve cudf::cuda_error (#10630)

Closes  #10553

Improves `cudf::cuda_error` in two aspects:
1. Add a cudaError_t member to `cudf::cuda_error` and corresponding error_code() function that returns the error code
2. Breaks down `cuda::cuda_error` as `sticky_cuda_error` and `cudart_error`. `sticky_cuda_error` refers to fatal error on device.

Authors:
  - Alfred Xu (https://github.com/sperlingxx)

Approvers:
  - Karthikeyan (https://github.com/karthikeyann)
  - Jake Hemstad (https://github.com/jrhemstad)

URL: https://github.com/rapidsai/cudf/pull/10630
---
 cpp/include/cudf/utilities/error.hpp   | 43 +++++++++++++++++++-------
 cpp/include/cudf_test/cudf_gtest.hpp   |  5 ++-
 cpp/tests/error/error_handling_test.cu |  1 +
 3 files changed, 37 insertions(+), 12 deletions(-)

diff --git a/cpp/include/cudf/utilities/error.hpp b/cpp/include/cudf/utilities/error.hpp
index 8be1a7e3a32..8f6190bbaf7 100644
--- a/cpp/include/cudf/utilities/error.hpp
+++ b/cpp/include/cudf/utilities/error.hpp
@@ -46,7 +46,20 @@ struct logic_error : public std::logic_error {
  * @brief Exception thrown when a CUDA error is encountered.
  */
 struct cuda_error : public std::runtime_error {
-  cuda_error(std::string const& message) : std::runtime_error(message) {}
+  cuda_error(std::string const& message, cudaError_t const& error)
+    : std::runtime_error(message), _cudaError(error)
+  {
+  }
+
+ public:
+  cudaError_t error_code() const { return _cudaError; }
+
+ protected:
+  cudaError_t _cudaError;
+};
+
+struct fatal_cuda_error : public cuda_error {
+  using cuda_error::cuda_error;
 };
 /** @} */
 
@@ -101,9 +114,20 @@ namespace detail {
 
 inline void throw_cuda_error(cudaError_t error, const char* file, unsigned int line)
 {
-  throw cudf::cuda_error(std::string{"CUDA error encountered at: " + std::string{file} + ":" +
-                                     std::to_string(line) + ": " + std::to_string(error) + " " +
-                                     cudaGetErrorName(error) + " " + cudaGetErrorString(error)});
+  // Calls cudaGetLastError twice. It is nearly certain that a fatal error occurred if the second
+  // call doesn't return with cudaSuccess.
+  cudaGetLastError();
+  auto const last = cudaGetLastError();
+  auto const msg  = std::string{"CUDA error encountered at: " + std::string{file} + ":" +
+                               std::to_string(line) + ": " + std::to_string(error) + " " +
+                               cudaGetErrorName(error) + " " + cudaGetErrorString(error)};
+  // Call cudaDeviceSynchronize to ensure `last` did not result from an asynchronous error.
+  // between two calls.
+  if (error == last && last == cudaDeviceSynchronize()) {
+    throw fatal_cuda_error{"Fatal " + msg, error};
+  } else {
+    throw cuda_error{msg, error};
+  }
 }
 }  // namespace detail
 }  // namespace cudf
@@ -115,13 +139,10 @@ inline void throw_cuda_error(cudaError_t error, const char* file, unsigned int l
  * cudaSuccess, invokes cudaGetLastError() to clear the error and throws an
  * exception detailing the CUDA error that occurred
  */
-#define CUDF_CUDA_TRY(call)                                       \
-  do {                                                            \
-    cudaError_t const status = (call);                            \
-    if (cudaSuccess != status) {                                  \
-      cudaGetLastError();                                         \
-      cudf::detail::throw_cuda_error(status, __FILE__, __LINE__); \
-    }                                                             \
+#define CUDF_CUDA_TRY(call)                                                                    \
+  do {                                                                                         \
+    cudaError_t const status = (call);                                                         \
+    if (cudaSuccess != status) { cudf::detail::throw_cuda_error(status, __FILE__, __LINE__); } \
   } while (0);
 
 /**
diff --git a/cpp/include/cudf_test/cudf_gtest.hpp b/cpp/include/cudf_test/cudf_gtest.hpp
index d078bf90a8a..7bd704a288d 100644
--- a/cpp/include/cudf_test/cudf_gtest.hpp
+++ b/cpp/include/cudf_test/cudf_gtest.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2020, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -120,6 +120,9 @@ struct TypeList<Types<TYPES...>> {
 #define CUDA_EXPECT_THROW_MESSAGE(x, msg) \
   EXPECT_THROW_MESSAGE(x, cudf::cuda_error, "CUDA error encountered at:", msg)
 
+#define FATAL_CUDA_EXPECT_THROW_MESSAGE(x, msg) \
+  EXPECT_THROW_MESSAGE(x, cudf::fatal_cuda_error, "Fatal CUDA error encountered at:", msg)
+
 /**
  * @brief test macro to be expected as no exception.
  * The testing is same with EXPECT_NO_THROW() in gtest.
diff --git a/cpp/tests/error/error_handling_test.cu b/cpp/tests/error/error_handling_test.cu
index 4327a8b694b..bde8ccc6de7 100644
--- a/cpp/tests/error/error_handling_test.cu
+++ b/cpp/tests/error/error_handling_test.cu
@@ -39,6 +39,7 @@ TEST(CudaTryTest, Error)
   CUDA_EXPECT_THROW_MESSAGE(CUDF_CUDA_TRY(cudaErrorLaunchFailure),
                             "cudaErrorLaunchFailure unspecified launch failure");
 }
+
 TEST(CudaTryTest, Success) { EXPECT_NO_THROW(CUDF_CUDA_TRY(cudaSuccess)); }
 
 TEST(CudaTryTest, TryCatch)

From ac27757092e9ba2bc0656b6a7dfbc79ce8b5e76a Mon Sep 17 00:00:00 2001
From: David Wendt <45795991+davidwendt@users.noreply.github.com>
Date: Thu, 14 Apr 2022 08:10:26 -0400
Subject: [PATCH 11/33] Cleanup libcudf strings regex classes (#10573)

Refactors some of the internal libcudf regex classes used for executing regex on strings. This is the first part of some changes to reduce kernel memory launch size for the regex code. A follow on PR will change the stack-based state management to a device memory approach. The changes here are isolated to help ease the review process in the next PR. Mostly code has been moved or refactored along with general cleanup like adding consts and removing some unnecessary pass-by-reference/pointer.

None of the calling routines currently require changes and no behavior has changed.

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Robert Maynard (https://github.com/robertmaynard)
  - Jake Hemstad (https://github.com/jrhemstad)

URL: https://github.com/rapidsai/cudf/pull/10573
---
 cpp/src/strings/regex/regcomp.cpp |  37 +++-
 cpp/src/strings/regex/regcomp.h   |   2 +-
 cpp/src/strings/regex/regex.cuh   | 109 ++++-----
 cpp/src/strings/regex/regex.inl   | 352 ++++++++++++++----------------
 cpp/src/strings/regex/regexec.cu  | 130 +++++------
 5 files changed, 313 insertions(+), 317 deletions(-)

diff --git a/cpp/src/strings/regex/regcomp.cpp b/cpp/src/strings/regex/regcomp.cpp
index 6f36658523b..829230d0842 100644
--- a/cpp/src/strings/regex/regcomp.cpp
+++ b/cpp/src/strings/regex/regcomp.cpp
@@ -16,6 +16,7 @@
 
 #include <strings/regex/regcomp.h>
 
+#include <cudf/strings/detail/utf8.hpp>
 #include <cudf/utilities/error.hpp>
 
 #include <algorithm>
@@ -58,6 +59,37 @@ const std::array<char, 33> escapable_chars{
   {'.', '-', '+',  '*', '\\', '?', '^', '$', '|', '{', '}', '(', ')', '[', ']', '<', '>',
    '"', '~', '\'', '`', '_',  '@', '=', ';', ':', '!', '#', '%', '&', ',', '/', ' '}};
 
+/**
+ * @brief Converts UTF-8 string into fixed-width 32-bit character vector.
+ *
+ * No character conversion occurs.
+ * Each UTF-8 character is promoted into a 32-bit value.
+ * The last entry in the returned vector will be a 0 value.
+ * The fixed-width vector makes it easier to compile and faster to execute.
+ *
+ * @param pattern Regular expression encoded with UTF-8.
+ * @return Fixed-width 32-bit character vector.
+ */
+std::vector<char32_t> string_to_char32_vector(std::string_view pattern)
+{
+  size_type size  = static_cast<size_type>(pattern.size());
+  size_type count = std::count_if(pattern.cbegin(), pattern.cend(), [](char ch) {
+    return is_begin_utf8_char(static_cast<uint8_t>(ch));
+  });
+  std::vector<char32_t> result(count + 1);
+  char32_t* output_ptr  = result.data();
+  const char* input_ptr = pattern.data();
+  for (size_type idx = 0; idx < size; ++idx) {
+    char_utf8 output_character = 0;
+    size_type ch_width         = to_char_utf8(input_ptr, output_character);
+    input_ptr += ch_width;
+    idx += ch_width - 1;
+    *output_ptr++ = output_character;
+  }
+  result[count] = 0;  // last entry set to 0
+  return result;
+}
+
 }  // namespace
 
 int32_t reprog::add_inst(int32_t t)
@@ -838,10 +870,11 @@ class regex_compiler {
 };
 
 // Convert pattern into program
-reprog reprog::create_from(const char32_t* pattern, regex_flags const flags)
+reprog reprog::create_from(std::string_view pattern, regex_flags const flags)
 {
   reprog rtn;
-  regex_compiler compiler(pattern, flags, rtn);
+  auto pattern32 = string_to_char32_vector(pattern);
+  regex_compiler compiler(pattern32.data(), flags, rtn);
   // for debugging, it can be helpful to call rtn.print(flags) here to dump
   // out the instructions that have been created from the given pattern
   return rtn;
diff --git a/cpp/src/strings/regex/regcomp.h b/cpp/src/strings/regex/regcomp.h
index 18735d0f980..798b43830b4 100644
--- a/cpp/src/strings/regex/regcomp.h
+++ b/cpp/src/strings/regex/regcomp.h
@@ -92,7 +92,7 @@ class reprog {
    * @brief Parses the given regex pattern and compiles
    * into a list of chained instructions.
    */
-  static reprog create_from(const char32_t* pattern, regex_flags const flags);
+  static reprog create_from(std::string_view pattern, regex_flags const flags);
 
   int32_t add_inst(int32_t type);
   int32_t add_inst(reinst inst);
diff --git a/cpp/src/strings/regex/regex.cuh b/cpp/src/strings/regex/regex.cuh
index b172ceae2a6..bcdd15bceda 100644
--- a/cpp/src/strings/regex/regex.cuh
+++ b/cpp/src/strings/regex/regex.cuh
@@ -25,7 +25,6 @@
 #include <thrust/optional.h>
 #include <thrust/pair.h>
 
-#include <functional>
 #include <memory>
 
 namespace cudf {
@@ -35,9 +34,7 @@ class string_view;
 namespace strings {
 namespace detail {
 
-struct reljunk;
-struct reinst;
-class reprog;
+struct relist;
 
 using match_pair   = thrust::pair<cudf::size_type, cudf::size_type>;
 using match_result = thrust::optional<match_pair>;
@@ -65,19 +62,18 @@ constexpr int32_t RX_LARGE_INSTS  = (RX_STACK_LARGE / 11);
  *
  * This class holds the unique data for any regex CCLASS instruction.
  */
-class reclass_device {
- public:
+struct alignas(16) reclass_device {
   int32_t builtins{};
   int32_t count{};
-  char32_t* literals{};
+  char32_t const* literals{};
 
-  __device__ bool is_match(char32_t ch, const uint8_t* flags);
+  __device__ inline bool is_match(char32_t const ch, uint8_t const* flags) const;
 };
 
 /**
  * @brief Regex program of instructions/data for a specific regex pattern.
  *
- * Once create, this find/extract methods are used to evaluating the regex instructions
+ * Once created, the find/extract methods are used to evaluate the regex instructions
  * against a single string.
  */
 class reprog_device {
@@ -132,15 +128,7 @@ class reprog_device {
   /**
    * @brief Returns the number of regex instructions.
    */
-  [[nodiscard]] __host__ __device__ int32_t insts_counts() const { return _insts_count; }
-
-  /**
-   * @brief Returns true if this is an empty program.
-   */
-  [[nodiscard]] __device__ bool is_empty() const
-  {
-    return insts_counts() == 0 || get_inst(0)->type == END;
-  }
+  [[nodiscard]] CUDF_HOST_DEVICE int32_t insts_counts() const { return _insts_count; }
 
   /**
    * @brief Returns the number of regex groups found in the expression.
@@ -151,19 +139,9 @@ class reprog_device {
   }
 
   /**
-   * @brief Returns the regex instruction object for a given index.
-   */
-  [[nodiscard]] __device__ inline reinst* get_inst(int32_t idx) const;
-
-  /**
-   * @brief Returns the regex class object for a given index.
-   */
-  [[nodiscard]] __device__ inline reclass_device get_class(int32_t idx) const;
-
-  /**
-   * @brief Returns the start-instruction-ids vector.
+   * @brief Returns true if this is an empty program.
    */
-  [[nodiscard]] __device__ inline int32_t* startinst_ids() const;
+  [[nodiscard]] __device__ inline bool is_empty() const;
 
   /**
    * @brief Does a find evaluation using the compiled expression on the given string.
@@ -180,9 +158,9 @@ class reprog_device {
    */
   template <int stack_size>
   __device__ inline int32_t find(int32_t idx,
-                                 string_view const& d_str,
-                                 int32_t& begin,
-                                 int32_t& end);
+                                 string_view const d_str,
+                                 cudf::size_type& begin,
+                                 cudf::size_type& end) const;
 
   /**
    * @brief Does an extract evaluation using the compiled expression on the given string.
@@ -192,8 +170,8 @@ class reprog_device {
    * the matched section.
    *
    * @tparam stack_size One of the `RX_STACK_` values based on the `insts_count`.
-   * @param idx The string index used for mapping the state memory for this string in global memory
-   * (if necessary).
+   * @param idx The string index used for mapping the state memory for this string in global
+   * memory (if necessary).
    * @param d_str The string to search.
    * @param begin Position index to begin the search. If found, returns the position found
    * in the string.
@@ -204,34 +182,65 @@ class reprog_device {
    */
   template <int stack_size>
   __device__ inline match_result extract(cudf::size_type idx,
-                                         string_view const& d_str,
+                                         string_view const d_str,
                                          cudf::size_type begin,
                                          cudf::size_type end,
-                                         cudf::size_type group_id);
+                                         cudf::size_type const group_id) const;
 
  private:
-  int32_t _startinst_id, _num_capturing_groups;
-  int32_t _insts_count, _starts_count, _classes_count;
-  const uint8_t* _codepoint_flags{};  // table of character types
-  reinst* _insts{};                   // array of regex instructions
-  int32_t* _startinst_ids{};          // array of start instruction ids
-  reclass_device* _classes{};         // array of regex classes
-  void* _relists_mem{};               // runtime relist memory for regexec
+  struct reljunk {
+    relist* __restrict__ list1;
+    relist* __restrict__ list2;
+    int32_t starttype{};
+    char32_t startchar{};
+
+    __device__ inline reljunk(relist* list1, relist* list2, reinst const inst);
+    __device__ inline void swaplist();
+  };
+
+  /**
+   * @brief Returns the regex instruction object for a given id.
+   */
+  __device__ inline reinst get_inst(int32_t id) const;
+
+  /**
+   * @brief Returns the regex class object for a given id.
+   */
+  __device__ inline reclass_device get_class(int32_t id) const;
 
   /**
    * @brief Executes the regex pattern on the given string.
    */
-  __device__ inline int32_t regexec(
-    string_view const& d_str, reljunk& jnk, int32_t& begin, int32_t& end, int32_t group_id = 0);
+  __device__ inline int32_t regexec(string_view const d_str,
+                                    reljunk jnk,
+                                    cudf::size_type& begin,
+                                    cudf::size_type& end,
+                                    cudf::size_type const group_id = 0) const;
 
   /**
    * @brief Utility wrapper to setup state memory structures for calling regexec
    */
   template <int stack_size>
-  __device__ inline int32_t call_regexec(
-    int32_t idx, string_view const& d_str, int32_t& begin, int32_t& end, int32_t group_id = 0);
-
-  reprog_device(reprog&);  // must use create()
+  __device__ inline int32_t call_regexec(int32_t idx,
+                                         string_view const d_str,
+                                         cudf::size_type& begin,
+                                         cudf::size_type& end,
+                                         cudf::size_type const group_id = 0) const;
+
+  reprog_device(reprog&);
+
+  int32_t _startinst_id;          // first instruction id
+  int32_t _num_capturing_groups;  // instruction groups
+  int32_t _insts_count;           // number of instructions
+  int32_t _starts_count;          // number of start-insts ids
+  int32_t _classes_count;         // number of classes
+
+  uint8_t const* _codepoint_flags{};  // table of character types
+  reinst const* _insts{};             // array of regex instructions
+  int32_t const* _startinst_ids{};    // array of start instruction ids
+  reclass_device const* _classes{};   // array of regex classes
+
+  void* _relists_mem{};  // runtime relist memory for regexec()
 };
 
 }  // namespace detail
diff --git a/cpp/src/strings/regex/regex.inl b/cpp/src/strings/regex/regex.inl
index 01e773960e4..9fe4440d7ec 100644
--- a/cpp/src/strings/regex/regex.inl
+++ b/cpp/src/strings/regex/regex.inl
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2021, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.  All rights reserved.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -17,16 +17,9 @@
 #include <strings/char_types/is_flags.h>
 #include <strings/utf8.cuh>
 
-#include <cudf/strings/detail/utilities.cuh>
+#include <cudf/detail/utilities/integer_utils.hpp>
 #include <cudf/strings/string_view.cuh>
 
-#include <memory.h>
-#include <thrust/execution_policy.h>
-#include <thrust/functional.h>
-#include <thrust/iterator/counting_iterator.h>
-#include <thrust/logical.h>
-#include <thrust/optional.h>
-
 namespace cudf {
 namespace strings {
 namespace detail {
@@ -40,95 +33,102 @@ namespace detail {
  * reflected here. The regexec function updates and manages this state data.
  */
 struct alignas(8) relist {
-  int16_t size{};
-  int16_t listsize{};
-  int32_t reserved;
-  int2* ranges{};       // pair per instruction
-  int16_t* inst_ids{};  // one per instruction
-  u_char* mask{};       // bit per instruction
-
-  CUDF_HOST_DEVICE inline static int32_t data_size_for(int32_t insts)
+  /**
+   * @brief Compute the memory size for the state data.
+   */
+  constexpr inline static std::size_t data_size_for(int32_t insts)
   {
-    return ((sizeof(ranges[0]) + sizeof(inst_ids[0])) * insts) + ((insts + 7) / 8);
+    return ((sizeof(ranges[0]) + sizeof(inst_ids[0])) * insts) +
+           cudf::util::div_rounding_up_unsafe(insts, 8);
   }
 
-  CUDF_HOST_DEVICE inline static int32_t alloc_size(int32_t insts)
+  /**
+   * @brief Compute the aligned memory allocation size.
+   */
+  constexpr inline static std::size_t alloc_size(int32_t insts)
   {
-    int32_t size = sizeof(relist);
-    size += data_size_for(insts);
-    size = ((size + 7) / 8) * 8;  // align it too
-    return size;
+    return cudf::util::round_up_unsafe<size_t>(data_size_for(insts) + sizeof(relist),
+                                               sizeof(ranges[0]));
   }
 
-  CUDF_HOST_DEVICE inline relist() {}
+  struct alignas(16) restate {
+    int2 range;
+    int32_t inst_id;
+    int32_t reserved;
+  };
 
-  CUDF_HOST_DEVICE inline relist(int16_t insts, u_char* data = nullptr) : listsize(insts)
+  __device__ __forceinline__ relist(int16_t insts, u_char* data = nullptr)
+    : masksize(cudf::util::div_rounding_up_unsafe(insts, 8))
   {
     auto ptr = data == nullptr ? reinterpret_cast<u_char*>(this) + sizeof(relist) : data;
     ranges   = reinterpret_cast<int2*>(ptr);
-    ptr += listsize * sizeof(ranges[0]);
+    ptr += insts * sizeof(ranges[0]);
     inst_ids = reinterpret_cast<int16_t*>(ptr);
-    ptr += listsize * sizeof(inst_ids[0]);
+    ptr += insts * sizeof(inst_ids[0]);
     mask = ptr;
     reset();
   }
 
-  CUDF_HOST_DEVICE inline void reset()
+  __device__ __forceinline__ void reset()
   {
-    memset(mask, 0, (listsize + 7) / 8);
+    memset(mask, 0, masksize);
     size = 0;
   }
 
-  __device__ inline bool activate(int32_t i, int32_t begin, int32_t end)
+  __device__ __forceinline__ bool activate(int32_t id, int32_t begin, int32_t end)
   {
-    if (readMask(i)) return false;
-    writeMask(true, i);
-    inst_ids[size] = static_cast<int16_t>(i);
+    if (readMask(id)) { return false; }
+    writeMask(id);
+    inst_ids[size] = static_cast<int16_t>(id);
     ranges[size]   = int2{begin, end};
     ++size;
     return true;
   }
 
-  __device__ inline void writeMask(bool v, int32_t pos)
+  __device__ __forceinline__ restate get_state(int16_t idx) const
   {
-    u_char uc = 1 << (pos & 7);
-    if (v)
-      mask[pos >> 3] |= uc;
-    else
-      mask[pos >> 3] &= ~uc;
+    return restate{ranges[idx], inst_ids[idx]};
   }
 
-  __device__ inline bool readMask(int32_t pos)
+  __device__ __forceinline__ int16_t get_size() const { return size; }
+
+ private:
+  int16_t size{};
+  int16_t const masksize;
+  int32_t reserved;
+  int2* __restrict__ ranges;       // pair per instruction
+  int16_t* __restrict__ inst_ids;  // one per instruction
+  u_char* __restrict__ mask;       // bit per instruction
+
+  __device__ __forceinline__ void writeMask(int32_t pos) const
   {
-    u_char uc = mask[pos >> 3];
-    return static_cast<bool>((uc >> (pos & 7)) & 1);
+    u_char const uc = 1 << (pos & 7);
+    mask[pos >> 3] |= uc;
   }
-};
 
-/**
- * @brief This manages the two relist instances required by the regexec function.
- */
-struct reljunk {
-  relist* list1;
-  relist* list2;
-  int32_t starttype{};
-  char32_t startchar{};
-
-  __host__ __device__ reljunk(relist* list1, relist* list2, int32_t stype, char32_t schar)
-    : list1(list1), list2(list2)
+  __device__ __forceinline__ bool readMask(int32_t pos) const
   {
-    if (starttype == CHAR || starttype == BOL) {
-      starttype = stype;
-      startchar = schar;
-    }
+    u_char const uc = mask[pos >> 3];
+    return static_cast<bool>((uc >> (pos & 7)) & 1);
   }
 };
 
-__device__ inline void swaplist(relist*& l1, relist*& l2)
+__device__ __forceinline__ reprog_device::reljunk::reljunk(relist* list1,
+                                                           relist* list2,
+                                                           reinst const inst)
+  : list1(list1), list2(list2)
+{
+  if (inst.type == CHAR || inst.type == BOL) {
+    starttype = inst.type;
+    startchar = inst.u1.c;
+  }
+}
+
+__device__ __forceinline__ void reprog_device::reljunk::swaplist()
 {
-  relist* tmp = l1;
-  l1          = l2;
-  l2          = tmp;
+  auto tmp = list1;
+  list1    = list2;
+  list2    = tmp;
 }
 
 /**
@@ -138,15 +138,13 @@ __device__ inline void swaplist(relist*& l1, relist*& l2)
  * @param codepoint_flags Used for mapping a character to type for builtin classes.
  * @return true if the character matches
  */
-__device__ inline bool reclass_device::is_match(char32_t ch, const uint8_t* codepoint_flags)
+__device__ __forceinline__ bool reclass_device::is_match(char32_t const ch,
+                                                         uint8_t const* codepoint_flags) const
 {
-  if (thrust::any_of(thrust::seq,
-                     thrust::make_counting_iterator<int>(0),
-                     thrust::make_counting_iterator<int>(count),
-                     [ch, this] __device__(int i) {
-                       return ((ch >= literals[i * 2]) && (ch <= literals[(i * 2) + 1]));
-                     }))
-    return true;
+  for (int i = 0; i < count; ++i) {
+    if ((ch >= literals[i * 2]) && (ch <= literals[(i * 2) + 1])) { return true; }
+  }
+
   if (!builtins) return false;
   uint32_t codept = utf8_to_codepoint(ch);
   if (codept > 0x00FFFF) return false;
@@ -167,20 +165,18 @@ __device__ inline bool reclass_device::is_match(char32_t ch, const uint8_t* code
   return false;
 }
 
-__device__ inline reinst* reprog_device::get_inst(int32_t idx) const
+__device__ __forceinline__ reinst reprog_device::get_inst(int32_t id) const { return _insts[id]; }
+
+__device__ __forceinline__ reclass_device reprog_device::get_class(int32_t id) const
 {
-  assert((idx >= 0) && (idx < _insts_count));
-  return _insts + idx;
+  return _classes[id];
 }
 
-__device__ inline reclass_device reprog_device::get_class(int32_t idx) const
+__device__ __forceinline__ bool reprog_device::is_empty() const
 {
-  assert((idx >= 0) && (idx < _classes_count));
-  return _classes[idx];
+  return insts_counts() == 0 || get_inst(0).type == END;
 }
 
-__device__ inline int32_t* reprog_device::startinst_ids() const { return _startinst_ids; }
-
 /**
  * @brief Evaluate a specific string against regex pattern compiled to this instance.
  *
@@ -195,35 +191,36 @@ __device__ inline int32_t* reprog_device::startinst_ids() const { return _starti
  * @param group_id Index of the group to match in a multi-group regex pattern.
  * @return >0 if match found
  */
-__device__ inline int32_t reprog_device::regexec(
-  string_view const& dstr, reljunk& jnk, int32_t& begin, int32_t& end, int32_t group_id)
+__device__ __forceinline__ int32_t reprog_device::regexec(string_view const dstr,
+                                                          reljunk jnk,
+                                                          cudf::size_type& begin,
+                                                          cudf::size_type& end,
+                                                          cudf::size_type const group_id) const
 {
-  int32_t match                   = 0;
-  auto checkstart                 = jnk.starttype;
-  auto pos                        = begin;
-  auto eos                        = end;
-  char32_t c                      = 0;
-  auto last_character             = false;
+  int32_t match       = 0;
+  auto pos            = begin;
+  auto eos            = end;
+  char_utf8 c         = 0;
+  auto checkstart     = jnk.starttype != 0;
+  auto last_character = false;
+
   string_view::const_iterator itr = string_view::const_iterator(dstr, pos);
 
   jnk.list1->reset();
   do {
-    /* fast check for first char */
+    // fast check for first CHAR or BOL
     if (checkstart) {
+      auto startchar = static_cast<char_utf8>(jnk.startchar);
       switch (jnk.starttype) {
-        case CHAR: {
-          auto fidx = dstr.find(static_cast<char_utf8>(jnk.startchar), pos);
-          if (fidx < 0) return match;
-          pos = fidx;
-          break;
-        }
-        case BOL: {
+        case BOL:
           if (pos == 0) break;
-          if (jnk.startchar != '^') return match;
+          if (jnk.startchar != '^') { return match; }
           --pos;
-          int fidx = dstr.find(static_cast<char_utf8>('\n'), pos);
-          if (fidx < 0) return match;  // update begin/end values?
-          pos = fidx + 1;
+          startchar = static_cast<char_utf8>('\n');
+        case CHAR: {
+          auto const fidx = dstr.find(startchar, pos);
+          if (fidx < 0) { return match; }
+          pos = fidx + (jnk.starttype == BOL);
           break;
         }
       }
@@ -231,128 +228,114 @@ __device__ inline int32_t reprog_device::regexec(
     }
 
     if (((eos < 0) || (pos < eos)) && match == 0) {
-      int32_t i = 0;
-      auto ids  = startinst_ids();
-      while (ids[i] >= 0)
-        jnk.list1->activate(ids[i++], (group_id == 0 ? pos : -1), -1);
+      auto ids = _startinst_ids;
+      while (*ids >= 0)
+        jnk.list1->activate(*ids++, (group_id == 0 ? pos : -1), -1);
     }
 
-    last_character = (pos >= dstr.length());
+    last_character = itr.byte_offset() >= dstr.size_bytes();
 
-    c = static_cast<char32_t>(last_character ? 0 : *itr);
+    c = last_character ? 0 : *itr;
 
-    // expand LBRA, RBRA, BOL, EOL, BOW, NBOW, and OR
+    // expand the non-character types like: LBRA, RBRA, BOL, EOL, BOW, NBOW, and OR
     bool expanded = false;
     do {
       jnk.list2->reset();
       expanded = false;
 
-      for (int16_t i = 0; i < jnk.list1->size; i++) {
-        auto inst_id        = static_cast<int32_t>(jnk.list1->inst_ids[i]);
-        int2& range         = jnk.list1->ranges[i];
-        const reinst* inst  = get_inst(inst_id);
+      for (int16_t i = 0; i < jnk.list1->get_size(); i++) {
+        auto state          = jnk.list1->get_state(i);
+        auto range          = state.range;
+        auto const inst     = get_inst(state.inst_id);
         int32_t id_activate = -1;
 
-        switch (inst->type) {
+        switch (inst.type) {
           case CHAR:
           case ANY:
           case ANYNL:
           case CCLASS:
           case NCCLASS:
-          case END: id_activate = inst_id; break;
+          case END: id_activate = state.inst_id; break;
           case LBRA:
-            if (inst->u1.subid == group_id) range.x = pos;
-            id_activate = inst->u2.next_id;
+            if (inst.u1.subid == group_id) range.x = pos;
+            id_activate = inst.u2.next_id;
             expanded    = true;
             break;
           case RBRA:
-            if (inst->u1.subid == group_id) range.y = pos;
-            id_activate = inst->u2.next_id;
+            if (inst.u1.subid == group_id) range.y = pos;
+            id_activate = inst.u2.next_id;
             expanded    = true;
             break;
           case BOL:
-            if ((pos == 0) ||
-                ((inst->u1.c == '^') && (dstr[pos - 1] == static_cast<char_utf8>('\n')))) {
-              id_activate = inst->u2.next_id;
+            if ((pos == 0) || ((inst.u1.c == '^') && (dstr[pos - 1] == '\n'))) {
+              id_activate = inst.u2.next_id;
               expanded    = true;
             }
             break;
           case EOL:
-            if (last_character || (c == '\n' && inst->u1.c == '$')) {
-              id_activate = inst->u2.next_id;
-              expanded    = true;
-            }
-            break;
-          case BOW: {
-            auto codept           = utf8_to_codepoint(c);
-            auto last_c           = static_cast<char32_t>(pos ? dstr[pos - 1] : 0);
-            auto last_codept      = utf8_to_codepoint(last_c);
-            bool cur_alphaNumeric = (codept < 0x010000) && IS_ALPHANUM(_codepoint_flags[codept]);
-            bool last_alphaNumeric =
-              (last_codept < 0x010000) && IS_ALPHANUM(_codepoint_flags[last_codept]);
-            if (cur_alphaNumeric != last_alphaNumeric) {
-              id_activate = inst->u2.next_id;
+            if (last_character || (c == '\n' && inst.u1.c == '$')) {
+              id_activate = inst.u2.next_id;
               expanded    = true;
             }
             break;
-          }
+          case BOW:
           case NBOW: {
-            auto codept           = utf8_to_codepoint(c);
-            auto last_c           = static_cast<char32_t>(pos ? dstr[pos - 1] : 0);
-            auto last_codept      = utf8_to_codepoint(last_c);
-            bool cur_alphaNumeric = (codept < 0x010000) && IS_ALPHANUM(_codepoint_flags[codept]);
-            bool last_alphaNumeric =
+            auto const codept      = utf8_to_codepoint(c);
+            auto const last_c      = pos > 0 ? dstr[pos - 1] : 0;
+            auto const last_codept = utf8_to_codepoint(last_c);
+
+            bool const cur_alphaNumeric =
+              (codept < 0x010000) && IS_ALPHANUM(_codepoint_flags[codept]);
+            bool const last_alphaNumeric =
               (last_codept < 0x010000) && IS_ALPHANUM(_codepoint_flags[last_codept]);
-            if (cur_alphaNumeric == last_alphaNumeric) {
-              id_activate = inst->u2.next_id;
+            if ((cur_alphaNumeric == last_alphaNumeric) != (inst.type == BOW)) {
+              id_activate = inst.u2.next_id;
               expanded    = true;
             }
             break;
           }
           case OR:
-            jnk.list2->activate(inst->u1.right_id, range.x, range.y);
-            id_activate = inst->u2.left_id;
+            jnk.list2->activate(inst.u1.right_id, range.x, range.y);
+            id_activate = inst.u2.left_id;
             expanded    = true;
             break;
         }
         if (id_activate >= 0) jnk.list2->activate(id_activate, range.x, range.y);
       }
-      swaplist(jnk.list1, jnk.list2);
+      jnk.swaplist();
 
     } while (expanded);
 
-    // execute
+    // execute instructions
     bool continue_execute = true;
     jnk.list2->reset();
-    for (int16_t i = 0; continue_execute && i < jnk.list1->size; i++) {
-      auto inst_id        = static_cast<int32_t>(jnk.list1->inst_ids[i]);
-      int2& range         = jnk.list1->ranges[i];
-      const reinst* inst  = get_inst(inst_id);
+    for (int16_t i = 0; continue_execute && i < jnk.list1->get_size(); i++) {
+      auto const state    = jnk.list1->get_state(i);
+      auto const range    = state.range;
+      auto const inst     = get_inst(state.inst_id);
       int32_t id_activate = -1;
 
-      switch (inst->type) {
+      switch (inst.type) {
         case CHAR:
-          if (inst->u1.c == c) id_activate = inst->u2.next_id;
+          if (inst.u1.c == c) id_activate = inst.u2.next_id;
           break;
         case ANY:
-          if (c != '\n') id_activate = inst->u2.next_id;
+          if (c != '\n') id_activate = inst.u2.next_id;
           break;
-        case ANYNL: id_activate = inst->u2.next_id; break;
+        case ANYNL: id_activate = inst.u2.next_id; break;
+        case NCCLASS:
         case CCLASS: {
-          reclass_device cls = get_class(inst->u1.cls_id);
-          if (cls.is_match(c, _codepoint_flags)) id_activate = inst->u2.next_id;
-          break;
-        }
-        case NCCLASS: {
-          reclass_device cls = get_class(inst->u1.cls_id);
-          if (!cls.is_match(c, _codepoint_flags)) id_activate = inst->u2.next_id;
+          auto const cls = get_class(inst.u1.cls_id);
+          if (cls.is_match(static_cast<char32_t>(c), _codepoint_flags) == (inst.type == CCLASS)) {
+            id_activate = inst.u2.next_id;
+          }
           break;
         }
         case END:
           match = 1;
           begin = range.x;
           end   = group_id == 0 ? pos : range.y;
-
+          // done with execute
           continue_execute = false;
           break;
       }
@@ -362,18 +345,18 @@ __device__ inline int32_t reprog_device::regexec(
 
     ++pos;
     ++itr;
-    swaplist(jnk.list1, jnk.list2);
-    checkstart = jnk.list1->size > 0 ? 0 : 1;
-  } while (!last_character && (jnk.list1->size > 0 || match == 0));
+    jnk.swaplist();
+    checkstart = jnk.list1->get_size() == 0;
+  } while (!last_character && (!checkstart || !match));
 
   return match;
 }
 
 template <int stack_size>
-__device__ inline int32_t reprog_device::find(int32_t idx,
-                                              string_view const& dstr,
-                                              int32_t& begin,
-                                              int32_t& end)
+__device__ __forceinline__ int32_t reprog_device::find(int32_t idx,
+                                                       string_view const dstr,
+                                                       cudf::size_type& begin,
+                                                       cudf::size_type& end) const
 {
   int32_t rtn = call_regexec<stack_size>(idx, dstr, begin, end);
   if (rtn <= 0) begin = end = -1;
@@ -381,11 +364,11 @@ __device__ inline int32_t reprog_device::find(int32_t idx,
 }
 
 template <int stack_size>
-__device__ inline match_result reprog_device::extract(cudf::size_type idx,
-                                                      string_view const& dstr,
-                                                      cudf::size_type begin,
-                                                      cudf::size_type end,
-                                                      cudf::size_type group_id)
+__device__ __forceinline__ match_result reprog_device::extract(cudf::size_type idx,
+                                                               string_view const dstr,
+                                                               cudf::size_type begin,
+                                                               cudf::size_type end,
+                                                               cudf::size_type const group_id) const
 {
   end = begin + 1;
   return call_regexec<stack_size>(idx, dstr, begin, end, group_id + 1) > 0
@@ -394,28 +377,29 @@ __device__ inline match_result reprog_device::extract(cudf::size_type idx,
 }
 
 template <int stack_size>
-__device__ inline int32_t reprog_device::call_regexec(
-  int32_t idx, string_view const& dstr, int32_t& begin, int32_t& end, int32_t group_id)
+__device__ __forceinline__ int32_t reprog_device::call_regexec(int32_t idx,
+                                                               string_view const dstr,
+                                                               cudf::size_type& begin,
+                                                               cudf::size_type& end,
+                                                               cudf::size_type const group_id) const
 {
   u_char data1[stack_size], data2[stack_size];
 
-  auto const stype = get_inst(_startinst_id)->type;
-  auto const schar = get_inst(_startinst_id)->u1.c;
-
   relist list1(static_cast<int16_t>(_insts_count), data1);
   relist list2(static_cast<int16_t>(_insts_count), data2);
 
-  reljunk jnk(&list1, &list2, stype, schar);
+  reljunk jnk(&list1, &list2, get_inst(_startinst_id));
   return regexec(dstr, jnk, begin, end, group_id);
 }
 
 template <>
-__device__ inline int32_t reprog_device::call_regexec<RX_STACK_ANY>(
-  int32_t idx, string_view const& dstr, int32_t& begin, int32_t& end, int32_t group_id)
+__device__ __forceinline__ int32_t
+reprog_device::call_regexec<RX_STACK_ANY>(int32_t idx,
+                                          string_view const dstr,
+                                          cudf::size_type& begin,
+                                          cudf::size_type& end,
+                                          cudf::size_type const group_id) const
 {
-  auto const stype = get_inst(_startinst_id)->type;
-  auto const schar = get_inst(_startinst_id)->u1.c;
-
   auto const relists_size = relist::alloc_size(_insts_count);
   auto* listmem           = reinterpret_cast<u_char*>(_relists_mem);  // beginning of relist buffer;
   listmem += (idx * relists_size * 2);                                // two relist ptrs in reljunk:
@@ -423,7 +407,7 @@ __device__ inline int32_t reprog_device::call_regexec<RX_STACK_ANY>(
   auto* list1 = new (listmem) relist(static_cast<int16_t>(_insts_count));
   auto* list2 = new (listmem + relists_size) relist(static_cast<int16_t>(_insts_count));
 
-  reljunk jnk(list1, list2, stype, schar);
+  reljunk jnk(list1, list2, get_inst(_startinst_id));
   return regexec(dstr, jnk, begin, end, group_id);
 }
 
diff --git a/cpp/src/strings/regex/regexec.cu b/cpp/src/strings/regex/regexec.cu
index 3bcf55cf069..70d6079972a 100644
--- a/cpp/src/strings/regex/regexec.cu
+++ b/cpp/src/strings/regex/regexec.cu
@@ -18,6 +18,7 @@
 #include <strings/regex/regex.cuh>
 
 #include <cudf/detail/utilities/integer_utils.hpp>
+#include <cudf/utilities/error.hpp>
 
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/device_buffer.hpp>
@@ -27,39 +28,6 @@
 namespace cudf {
 namespace strings {
 namespace detail {
-namespace {
-/**
- * @brief Converts UTF-8 string into fixed-width 32-bit character vector.
- *
- * No character conversion occurs.
- * Each UTF-8 character is promoted into a 32-bit value.
- * The last entry in the returned vector will be a 0 value.
- * The fixed-width vector makes it easier to compile and faster to execute.
- *
- * @param pattern Regular expression encoded with UTF-8.
- * @return Fixed-width 32-bit character vector.
- */
-std::vector<char32_t> string_to_char32_vector(std::string const& pattern)
-{
-  size_type size  = static_cast<size_type>(pattern.size());
-  size_type count = std::count_if(pattern.cbegin(), pattern.cend(), [](char ch) {
-    return is_begin_utf8_char(static_cast<uint8_t>(ch));
-  });
-  std::vector<char32_t> result(count + 1);
-  char32_t* output_ptr  = result.data();
-  const char* input_ptr = pattern.data();
-  for (size_type idx = 0; idx < size; ++idx) {
-    char_utf8 output_character = 0;
-    size_type ch_width         = to_char_utf8(input_ptr, output_character);
-    input_ptr += ch_width;
-    idx += ch_width - 1;
-    *output_ptr++ = output_character;
-  }
-  result[count] = 0;  // last entry set to 0
-  return result;
-}
-
-}  // namespace
 
 // Copy reprog primitive values
 reprog_device::reprog_device(reprog& prog)
@@ -89,75 +57,76 @@ std::unique_ptr<reprog_device, std::function<void(reprog_device*)>> reprog_devic
   size_type strings_count,
   rmm::cuda_stream_view stream)
 {
-  std::vector<char32_t> pattern32 = string_to_char32_vector(pattern);
   // compile pattern into host object
-  reprog h_prog = reprog::create_from(pattern32.data(), flags);
+  reprog h_prog = reprog::create_from(pattern, flags);
+
   // compute size to hold all the member data
-  auto insts_count   = h_prog.insts_count();
-  auto classes_count = h_prog.classes_count();
-  auto starts_count  = h_prog.starts_count();
-  // compute size of each section; make sure each is aligned appropriately
-  auto insts_size =
-    cudf::util::round_up_safe<size_t>(insts_count * sizeof(_insts[0]), sizeof(size_t));
-  auto startids_size =
-    cudf::util::round_up_safe<size_t>(starts_count * sizeof(_startinst_ids[0]), sizeof(size_t));
-  auto classes_size =
-    cudf::util::round_up_safe<size_t>(classes_count * sizeof(_classes[0]), sizeof(size_t));
-  for (int32_t idx = 0; idx < classes_count; ++idx)
+  auto const insts_count   = h_prog.insts_count();
+  auto const classes_count = h_prog.classes_count();
+  auto const starts_count  = h_prog.starts_count();
+
+  // compute size of each section
+  auto insts_size    = insts_count * sizeof(_insts[0]);
+  auto startids_size = starts_count * sizeof(_startinst_ids[0]);
+  auto classes_size  = classes_count * sizeof(_classes[0]);
+  for (auto idx = 0; idx < classes_count; ++idx)
     classes_size += static_cast<int32_t>((h_prog.class_at(idx).literals.size()) * sizeof(char32_t));
-  size_t memsize  = insts_size + startids_size + classes_size;
-  size_t rlm_size = 0;
-  // check memory size needed for executing regex
-  if (insts_count > RX_LARGE_INSTS) {
-    auto relist_alloc_size = relist::alloc_size(insts_count);
-    rlm_size               = relist_alloc_size * 2L * strings_count;  // reljunk has 2 relist ptrs
-  }
+  // make sure each section is aligned for the subsequent section's data type
+  auto const memsize = cudf::util::round_up_safe(insts_size, sizeof(_startinst_ids[0])) +
+                       cudf::util::round_up_safe(startids_size, sizeof(_classes[0])) +
+                       cudf::util::round_up_safe(classes_size, sizeof(char32_t));
+
+  // allocate memory to store all the prog data in a flat contiguous buffer
+  std::vector<u_char> h_buffer(memsize);                        // copy everything into here;
+  auto h_ptr    = h_buffer.data();                              // this is our running host ptr;
+  auto d_buffer = new rmm::device_buffer(memsize, stream);      // output device memory;
+  auto d_ptr    = reinterpret_cast<u_char*>(d_buffer->data());  // running device pointer
 
-  // allocate memory to store prog data
-  std::vector<u_char> h_buffer(memsize);
-  u_char* h_ptr  = h_buffer.data();  // running pointer
-  auto* d_buffer = new rmm::device_buffer(memsize, stream);
-  u_char* d_ptr  = reinterpret_cast<u_char*>(d_buffer->data());  // running device pointer
   // put everything into a flat host buffer first
   reprog_device* d_prog = new reprog_device(h_prog);
-  // copy the instructions array first (fixed-size structs)
-  reinst* insts = reinterpret_cast<reinst*>(h_ptr);
-  memcpy(insts, h_prog.insts_data(), insts_size);
-  h_ptr += insts_size;  // next section
+
+  // copy the instructions array first (fixed-sized structs)
+  memcpy(h_ptr, h_prog.insts_data(), insts_size);
   d_prog->_insts = reinterpret_cast<reinst*>(d_ptr);
+
+  // point to the end for the next section
+  insts_size = cudf::util::round_up_safe(insts_size, sizeof(_startinst_ids[0]));
+  h_ptr += insts_size;
   d_ptr += insts_size;
-  // copy the startinst_ids next (ints)
-  int32_t* startinst_ids = reinterpret_cast<int32_t*>(h_ptr);
-  memcpy(startinst_ids, h_prog.starts_data(), startids_size);
-  h_ptr += startids_size;  // next section
+  // copy the startinst_ids next
+  memcpy(h_ptr, h_prog.starts_data(), startids_size);
   d_prog->_startinst_ids = reinterpret_cast<int32_t*>(d_ptr);
+
+  // next section; align the size for next data type
+  startids_size = cudf::util::round_up_safe(startids_size, sizeof(_classes[0]));
+  h_ptr += startids_size;
   d_ptr += startids_size;
   // copy classes into flat memory: [class1,class2,...][char32 arrays]
-  reclass_device* classes = reinterpret_cast<reclass_device*>(h_ptr);
-  d_prog->_classes        = reinterpret_cast<reclass_device*>(d_ptr);
+  auto classes     = reinterpret_cast<reclass_device*>(h_ptr);
+  d_prog->_classes = reinterpret_cast<reclass_device*>(d_ptr);
   // get pointer to the end to handle variable length data
-  u_char* h_end = h_ptr + (classes_count * sizeof(reclass_device));
-  u_char* d_end = d_ptr + (classes_count * sizeof(reclass_device));
+  auto h_end = h_ptr + (classes_count * sizeof(reclass_device));
+  auto d_end = d_ptr + (classes_count * sizeof(reclass_device));
   // place each class and append the variable length data
   for (int32_t idx = 0; idx < classes_count; ++idx) {
     reclass& h_class = h_prog.class_at(idx);
-    reclass_device d_class;
-    d_class.builtins = h_class.builtins;
-    d_class.count    = h_class.literals.size() / 2;
-    d_class.literals = reinterpret_cast<char32_t*>(d_end);
-    memcpy(classes++, &d_class, sizeof(d_class));
+    reclass_device d_class{h_class.builtins,
+                           static_cast<int32_t>(h_class.literals.size() / 2),
+                           reinterpret_cast<char32_t*>(d_end)};
+    *classes++ = d_class;
     memcpy(h_end, h_class.literals.c_str(), h_class.literals.size() * sizeof(char32_t));
     h_end += h_class.literals.size() * sizeof(char32_t);
     d_end += h_class.literals.size() * sizeof(char32_t);
   }
+
   // initialize the rest of the elements
-  d_prog->_insts_count     = insts_count;
-  d_prog->_starts_count    = starts_count;
-  d_prog->_classes_count   = classes_count;
   d_prog->_codepoint_flags = codepoint_flags;
+
   // allocate execute memory if needed
   rmm::device_buffer* d_relists{};
-  if (rlm_size > 0) {
+  if (insts_count > RX_LARGE_INSTS) {
+    // two relist state structures are needed for execute per string
+    auto const rlm_size  = relist::alloc_size(insts_count) * 2 * strings_count;
     d_relists            = new rmm::device_buffer(rlm_size, stream);
     d_prog->_relists_mem = d_relists->data();
   }
@@ -165,7 +134,8 @@ std::unique_ptr<reprog_device, std::function<void(reprog_device*)>> reprog_devic
   // copy flat prog to device memory
   CUDF_CUDA_TRY(cudaMemcpyAsync(
     d_buffer->data(), h_buffer.data(), memsize, cudaMemcpyHostToDevice, stream.value()));
-  //
+
+  // build deleter to cleanup device memory
   auto deleter = [d_buffer, d_relists](reprog_device* t) {
     t->destroy();
     delete d_buffer;

From f7c35d56cdfb7af842b54255029b7481ca9b6d94 Mon Sep 17 00:00:00 2001
From: martinfalisse <45781926+martinfalisse@users.noreply.github.com>
Date: Thu, 14 Apr 2022 20:27:51 +0200
Subject: [PATCH 12/33] Add support for numeric_only in DataFrame._reduce
 (#10629)

Add support for numeric_only in DataFrame._reduce, this way can use df.mean(numeric_only=True), etc. Resolves https://github.com/rapidsai/cudf/issues/2067. Also partially addresses https://github.com/rapidsai/cudf/issues/9009.

Authors:
  - https://github.com/martinfalisse

Approvers:
  - Michael Wang (https://github.com/isVoid)
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/10629
---
 python/cudf/cudf/core/dataframe.py           | 25 +++---
 python/cudf/cudf/core/single_column_frame.py |  4 +-
 python/cudf/cudf/tests/test_dataframe.py     | 54 +++++++++++++
 python/cudf/cudf/tests/test_stats.py         | 83 +++++++++++++++++---
 4 files changed, 145 insertions(+), 21 deletions(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 2b2c09fa2a0..ae60cd91fac 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -5180,26 +5180,33 @@ def _reduce(
         if level is not None:
             raise NotImplementedError("level parameter is not implemented yet")
 
-        if numeric_only not in (None, True):
-            raise NotImplementedError(
-                "numeric_only parameter is not implemented yet"
+        source = self
+        if numeric_only:
+            numeric_cols = (
+                name
+                for name in self._data.names
+                if is_numeric_dtype(self._data[name])
             )
-        axis = self._get_axis_from_axis_arg(axis)
+            source = self._get_columns_by_label(numeric_cols)
+            if source.empty:
+                return Series(index=cudf.StringIndex([]))
+
+        axis = source._get_axis_from_axis_arg(axis)
 
         if axis == 0:
             try:
                 result = [
-                    getattr(self._data[col], op)(**kwargs)
-                    for col in self._data.names
+                    getattr(source._data[col], op)(**kwargs)
+                    for col in source._data.names
                 ]
             except AttributeError:
-                raise TypeError(f"cannot perform {op} with type {self.dtype}")
+                raise TypeError(f"Not all column dtypes support op {op}")
 
             return Series._from_data(
-                {None: result}, as_index(self._data.names)
+                {None: result}, as_index(source._data.names)
             )
         elif axis == 1:
-            return self._apply_cupy_method_axis_1(op, **kwargs)
+            return source._apply_cupy_method_axis_1(op, **kwargs)
 
     @_cudf_nvtx_annotate
     def _scan(
diff --git a/python/cudf/cudf/core/single_column_frame.py b/python/cudf/cudf/core/single_column_frame.py
index 003f8ea7fdb..addc823e7f1 100644
--- a/python/cudf/cudf/core/single_column_frame.py
+++ b/python/cudf/cudf/core/single_column_frame.py
@@ -52,9 +52,9 @@ def _reduce(
         if level is not None:
             raise NotImplementedError("level parameter is not implemented yet")
 
-        if numeric_only not in (None, True):
+        if numeric_only:
             raise NotImplementedError(
-                "numeric_only parameter is not implemented yet"
+                f"Series.{op} does not implement numeric_only"
             )
         try:
             return getattr(self._column, op)(**kwargs)
diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
index a7fad792bd0..13ab0b35822 100644
--- a/python/cudf/cudf/tests/test_dataframe.py
+++ b/python/cudf/cudf/tests/test_dataframe.py
@@ -9230,3 +9230,57 @@ def test_dataframe_pct_change(data, periods, fill_method):
     expected = pdf.pct_change(periods=periods, fill_method=fill_method)
 
     assert_eq(expected, actual)
+
+
+def test_mean_timeseries():
+    gdf = cudf.datasets.timeseries()
+    pdf = gdf.to_pandas()
+
+    expected = pdf.mean(numeric_only=True)
+    actual = gdf.mean(numeric_only=True)
+
+    assert_eq(expected, actual)
+
+    with pytest.raises(TypeError):
+        gdf.mean()
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        {
+            "a": [1, 2, 3, 4, 5],
+            "b": ["a", "b", "c", "d", "e"],
+            "c": [1.0, 2.0, 3.0, 4.0, 5.0],
+        }
+    ],
+)
+def test_std_different_dtypes(data):
+    gdf = cudf.DataFrame(data)
+    pdf = gdf.to_pandas()
+
+    expected = pdf.std(numeric_only=True)
+    actual = gdf.std(numeric_only=True)
+
+    assert_eq(expected, actual)
+
+    with pytest.raises(TypeError):
+        gdf.std()
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        {
+            "id": ["a", "a", "a", "b", "b", "b", "c", "c", "c"],
+            "val1": ["v", "n", "k", "l", "m", "i", "y", "r", "w"],
+            "val2": ["d", "d", "d", "e", "e", "e", "f", "f", "f"],
+        }
+    ],
+)
+def test_empty_numeric_only(data):
+    gdf = cudf.DataFrame(data)
+    pdf = gdf.to_pandas()
+    expected = pdf.prod(numeric_only=True)
+    actual = gdf.prod(numeric_only=True)
+    assert_eq(expected, actual)
diff --git a/python/cudf/cudf/tests/test_stats.py b/python/cudf/cudf/tests/test_stats.py
index 977a01952db..08f662f0ba7 100644
--- a/python/cudf/cudf/tests/test_stats.py
+++ b/python/cudf/cudf/tests/test_stats.py
@@ -239,13 +239,10 @@ def test_misc_quantiles(data, q):
         cudf.Series([1.1032, 2.32, 43.4, 13, -312.0], index=[0, 4, 3, 19, 6]),
         cudf.Series([]),
         cudf.Series([-3]),
-        randomdata(
-            nrows=1000, dtypes={"a": float, "b": int, "c": float, "d": str}
-        ),
     ],
 )
 @pytest.mark.parametrize("null_flag", [False, True])
-def test_kurtosis(data, null_flag):
+def test_kurtosis_series(data, null_flag):
     pdata = data.to_pandas()
 
     if null_flag and len(data) > 2:
@@ -262,8 +259,13 @@ def test_kurtosis(data, null_flag):
     expected = pdata.kurt()
     np.testing.assert_array_almost_equal(got, expected)
 
+    got = data.kurt(numeric_only=False)
+    got = got if np.isscalar(got) else got.to_numpy()
+    expected = pdata.kurt(numeric_only=False)
+    np.testing.assert_array_almost_equal(got, expected)
+
     with pytest.raises(NotImplementedError):
-        data.kurt(numeric_only=False)
+        data.kurt(numeric_only=True)
 
 
 @pytest.mark.parametrize(
@@ -280,13 +282,10 @@ def test_kurtosis(data, null_flag):
         cudf.Series([1.1032, 2.32, 43.4, 13, -312.0], index=[0, 4, 3, 19, 6]),
         cudf.Series([]),
         cudf.Series([-3]),
-        randomdata(
-            nrows=1000, dtypes={"a": float, "b": int, "c": float, "d": str}
-        ),
     ],
 )
 @pytest.mark.parametrize("null_flag", [False, True])
-def test_skew(data, null_flag):
+def test_skew_series(data, null_flag):
     pdata = data.to_pandas()
 
     if null_flag and len(data) > 2:
@@ -298,8 +297,13 @@ def test_skew(data, null_flag):
     got = got if np.isscalar(got) else got.to_numpy()
     np.testing.assert_array_almost_equal(got, expected)
 
+    got = data.skew(numeric_only=False)
+    expected = pdata.skew(numeric_only=False)
+    got = got if np.isscalar(got) else got.to_numpy()
+    np.testing.assert_array_almost_equal(got, expected)
+
     with pytest.raises(NotImplementedError):
-        data.skew(numeric_only=False)
+        data.skew(numeric_only=True)
 
 
 @pytest.mark.parametrize("dtype", params_dtypes)
@@ -541,3 +545,62 @@ def test_cov_corr_invalid_dtypes(gsr):
         rfunc_args_and_kwargs=([gsr],),
         compare_error_message=False,
     )
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        randomdata(
+            nrows=1000, dtypes={"a": float, "b": int, "c": float, "d": str}
+        ),
+    ],
+)
+@pytest.mark.parametrize("null_flag", [False, True])
+def test_kurtosis_df(data, null_flag):
+    pdata = data.to_pandas()
+
+    if null_flag and len(data) > 2:
+        data.iloc[[0, 2]] = None
+        pdata.iloc[[0, 2]] = None
+
+    got = data.kurtosis()
+    got = got if np.isscalar(got) else got.to_numpy()
+    expected = pdata.kurtosis()
+    np.testing.assert_array_almost_equal(got, expected)
+
+    got = data.kurt()
+    got = got if np.isscalar(got) else got.to_numpy()
+    expected = pdata.kurt()
+    np.testing.assert_array_almost_equal(got, expected)
+
+    got = data.kurt(numeric_only=True)
+    got = got if np.isscalar(got) else got.to_numpy()
+    expected = pdata.kurt(numeric_only=True)
+    np.testing.assert_array_almost_equal(got, expected)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        randomdata(
+            nrows=1000, dtypes={"a": float, "b": int, "c": float, "d": str}
+        ),
+    ],
+)
+@pytest.mark.parametrize("null_flag", [False, True])
+def test_skew_df(data, null_flag):
+    pdata = data.to_pandas()
+
+    if null_flag and len(data) > 2:
+        data.iloc[[0, 2]] = None
+        pdata.iloc[[0, 2]] = None
+
+    got = data.skew()
+    expected = pdata.skew()
+    got = got if np.isscalar(got) else got.to_numpy()
+    np.testing.assert_array_almost_equal(got, expected)
+
+    got = data.skew(numeric_only=True)
+    expected = pdata.skew(numeric_only=True)
+    got = got if np.isscalar(got) else got.to_numpy()
+    np.testing.assert_array_almost_equal(got, expected)

From 77fa49eddf1c961277ec5e0fb3616433f2a46ea4 Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Thu, 14 Apr 2022 14:13:06 -0700
Subject: [PATCH 13/33] Clean up C++ includes to use <> instead of "". (#10658)

This PR cleans up some C++ includes to use `#include <...>` instead of `#include "..."` where appropriate.

Authors:
  - Bradley Dice (https://github.com/bdice)

Approvers:
  - Karthikeyan (https://github.com/karthikeyann)
  - Yunsong Wang (https://github.com/PointKernel)

URL: https://github.com/rapidsai/cudf/pull/10658
---
 cpp/benchmarks/io/orc/orc_writer.cpp              |  2 +-
 cpp/benchmarks/sort/rank.cpp                      |  2 +-
 cpp/benchmarks/string/convert_durations.cpp       | 15 +++++++--------
 cpp/include/cudf/detail/reduction_functions.hpp   |  2 +-
 cpp/libcudf_kafka/src/kafka_callback.cpp          |  2 +-
 cpp/libcudf_kafka/src/kafka_consumer.cpp          |  2 +-
 cpp/src/merge/merge.cu                            |  2 +-
 cpp/src/structs/structs_column_view.cpp           |  4 ++--
 .../binaryop/binop-compiled-fixed_point-test.cpp  |  2 +-
 cpp/tests/hash_map/map_test.cu                    |  2 +-
 cpp/tests/iterator/value_iterator_test_strings.cu | 10 ++++++----
 cpp/tests/partitioning/partition_test.cpp         | 10 +++++-----
 12 files changed, 28 insertions(+), 27 deletions(-)

diff --git a/cpp/benchmarks/io/orc/orc_writer.cpp b/cpp/benchmarks/io/orc/orc_writer.cpp
index 525c13af5c0..f61dac7677b 100644
--- a/cpp/benchmarks/io/orc/orc_writer.cpp
+++ b/cpp/benchmarks/io/orc/orc_writer.cpp
@@ -14,7 +14,6 @@
  * limitations under the License.
  */
 
-#include "cudf/io/types.hpp"
 #include <benchmark/benchmark.h>
 
 #include <benchmarks/common/generate_input.hpp>
@@ -23,6 +22,7 @@
 #include <benchmarks/synchronization/synchronization.hpp>
 
 #include <cudf/io/orc.hpp>
+#include <cudf/io/types.hpp>
 
 // to enable, run cmake with -DBUILD_BENCHMARKS=ON
 
diff --git a/cpp/benchmarks/sort/rank.cpp b/cpp/benchmarks/sort/rank.cpp
index 22acb241f0b..c3c77ebd52f 100644
--- a/cpp/benchmarks/sort/rank.cpp
+++ b/cpp/benchmarks/sort/rank.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "cudf/column/column_view.hpp"
+#include <cudf/column/column_view.hpp>
 #include <cudf/sorting.hpp>
 
 #include <cudf_test/base_fixture.hpp>
diff --git a/cpp/benchmarks/string/convert_durations.cpp b/cpp/benchmarks/string/convert_durations.cpp
index dc9a1e991b2..8af111d9a63 100644
--- a/cpp/benchmarks/string/convert_durations.cpp
+++ b/cpp/benchmarks/string/convert_durations.cpp
@@ -13,25 +13,24 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
-#include <benchmark/benchmark.h>
-
+#include <cudf/column/column_view.hpp>
 #include <cudf/strings/convert/convert_durations.hpp>
 #include <cudf/types.hpp>
+#include <cudf/wrappers/durations.hpp>
 
 #include <cudf_test/base_fixture.hpp>
 #include <cudf_test/column_utilities.hpp>
 #include <cudf_test/column_wrapper.hpp>
 #include <cudf_test/cudf_gtest.hpp>
 
+#include <benchmark/benchmark.h>
+
+#include <benchmarks/fixture/benchmark_fixture.hpp>
+#include <benchmarks/synchronization/synchronization.hpp>
+
 #include <algorithm>
 #include <random>
 
-#include "../fixture/benchmark_fixture.hpp"
-#include "../synchronization/synchronization.hpp"
-#include "cudf/column/column_view.hpp"
-#include "cudf/wrappers/durations.hpp"
-
 class DurationsToString : public cudf::benchmark {
 };
 template <class TypeParam>
diff --git a/cpp/include/cudf/detail/reduction_functions.hpp b/cpp/include/cudf/detail/reduction_functions.hpp
index 3a6113e66ce..317e4d0cf47 100644
--- a/cpp/include/cudf/detail/reduction_functions.hpp
+++ b/cpp/include/cudf/detail/reduction_functions.hpp
@@ -17,9 +17,9 @@
 #pragma once
 
 #include <cudf/column/column_view.hpp>
+#include <cudf/lists/lists_column_view.hpp>
 #include <cudf/scalar/scalar.hpp>
 
-#include "cudf/lists/lists_column_view.hpp"
 #include <rmm/cuda_stream_view.hpp>
 
 namespace cudf {
diff --git a/cpp/libcudf_kafka/src/kafka_callback.cpp b/cpp/libcudf_kafka/src/kafka_callback.cpp
index 6b98747c145..79a40640627 100644
--- a/cpp/libcudf_kafka/src/kafka_callback.cpp
+++ b/cpp/libcudf_kafka/src/kafka_callback.cpp
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "cudf_kafka/kafka_callback.hpp"
+#include <cudf_kafka/kafka_callback.hpp>
 
 #include <librdkafka/rdkafkacpp.h>
 
diff --git a/cpp/libcudf_kafka/src/kafka_consumer.cpp b/cpp/libcudf_kafka/src/kafka_consumer.cpp
index 49e89a56e60..2ddaa9892da 100644
--- a/cpp/libcudf_kafka/src/kafka_consumer.cpp
+++ b/cpp/libcudf_kafka/src/kafka_consumer.cpp
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "cudf_kafka/kafka_consumer.hpp"
+#include <cudf_kafka/kafka_consumer.hpp>
 
 #include <librdkafka/rdkafkacpp.h>
 
diff --git a/cpp/src/merge/merge.cu b/cpp/src/merge/merge.cu
index 01a94457b69..9c94a6220d6 100644
--- a/cpp/src/merge/merge.cu
+++ b/cpp/src/merge/merge.cu
@@ -26,6 +26,7 @@
 #include <cudf/structs/structs_column_view.hpp>
 #include <cudf/table/table.hpp>
 #include <cudf/table/table_device_view.cuh>
+#include <cudf/utilities/traits.hpp>
 
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/device_uvector.hpp>
@@ -38,7 +39,6 @@
 #include <thrust/transform.h>
 #include <thrust/tuple.h>
 
-#include "cudf/utilities/traits.hpp"
 #include <queue>
 #include <vector>
 
diff --git a/cpp/src/structs/structs_column_view.cpp b/cpp/src/structs/structs_column_view.cpp
index db9496f18be..681f13386ff 100644
--- a/cpp/src/structs/structs_column_view.cpp
+++ b/cpp/src/structs/structs_column_view.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,9 +14,9 @@
  * limitations under the License.
  */
 
-#include "cudf/utilities/error.hpp"
 #include <cudf/column/column.hpp>
 #include <cudf/structs/structs_column_view.hpp>
+#include <cudf/utilities/error.hpp>
 
 namespace cudf {
 
diff --git a/cpp/tests/binaryop/binop-compiled-fixed_point-test.cpp b/cpp/tests/binaryop/binop-compiled-fixed_point-test.cpp
index 64462669f90..28df893aff1 100644
--- a/cpp/tests/binaryop/binop-compiled-fixed_point-test.cpp
+++ b/cpp/tests/binaryop/binop-compiled-fixed_point-test.cpp
@@ -20,13 +20,13 @@
 #include <cudf/scalar/scalar_factories.hpp>
 #include <cudf/types.hpp>
 #include <cudf/unary.hpp>
+#include <cudf/utilities/error.hpp>
 #include <cudf/utilities/type_dispatcher.hpp>
 
 #include <cudf_test/column_utilities.hpp>
 #include <cudf_test/column_wrapper.hpp>
 #include <cudf_test/type_lists.hpp>
 
-#include "cudf/utilities/error.hpp"
 #include <tests/binaryop/assert-binops.h>
 #include <tests/binaryop/binop-fixture.hpp>
 
diff --git a/cpp/tests/hash_map/map_test.cu b/cpp/tests/hash_map/map_test.cu
index d69aee57756..f42549514e6 100644
--- a/cpp/tests/hash_map/map_test.cu
+++ b/cpp/tests/hash_map/map_test.cu
@@ -23,12 +23,12 @@
 
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/device_uvector.hpp>
+#include <rmm/exec_policy.hpp>
 
 #include <thrust/logical.h>
 #include <thrust/pair.h>
 #include <thrust/tabulate.h>
 
-#include "rmm/exec_policy.hpp"
 #include <cstdlib>
 #include <iostream>
 #include <limits>
diff --git a/cpp/tests/iterator/value_iterator_test_strings.cu b/cpp/tests/iterator/value_iterator_test_strings.cu
index 5bddbfbd4aa..9aa18eb844f 100644
--- a/cpp/tests/iterator/value_iterator_test_strings.cu
+++ b/cpp/tests/iterator/value_iterator_test_strings.cu
@@ -12,10 +12,12 @@
  * or implied. See the License for the specific language governing permissions and limitations under
  * the License.
  */
-#include "cudf/detail/utilities/vector_factories.hpp"
-#include "rmm/cuda_stream_view.hpp"
-#include "rmm/device_uvector.hpp"
-#include <tests/iterator/iterator_tests.cuh>
+#include "iterator_tests.cuh"
+
+#include <cudf/detail/utilities/vector_factories.hpp>
+
+#include <rmm/cuda_stream_view.hpp>
+#include <rmm/device_uvector.hpp>
 
 #include <thrust/host_vector.h>
 #include <thrust/iterator/counting_iterator.h>
diff --git a/cpp/tests/partitioning/partition_test.cpp b/cpp/tests/partitioning/partition_test.cpp
index 785af409c4c..014a19e93a9 100644
--- a/cpp/tests/partitioning/partition_test.cpp
+++ b/cpp/tests/partitioning/partition_test.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2021, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -13,16 +13,16 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include <cudf/copying.hpp>
-#include <cudf/partitioning.hpp>
-#include <cudf/table/table.hpp>
 #include <cudf_test/base_fixture.hpp>
 #include <cudf_test/column_utilities.hpp>
 #include <cudf_test/column_wrapper.hpp>
 #include <cudf_test/table_utilities.hpp>
 #include <cudf_test/type_lists.hpp>
 
-#include "cudf/sorting.hpp"
+#include <cudf/copying.hpp>
+#include <cudf/partitioning.hpp>
+#include <cudf/sorting.hpp>
+#include <cudf/table/table.hpp>
 
 template <typename T>
 class PartitionTest : public cudf::test::BaseFixture {

From 14a32619a5b1c0eff49588b141f8ef2eb754cadf Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Thu, 14 Apr 2022 14:40:20 -0700
Subject: [PATCH 14/33] Improve User Guide docs (#10663)

This PR makes some minor improvements to the cuDF user guide and some docstrings.

Authors:
  - Bradley Dice (https://github.com/bdice)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

URL: https://github.com/rapidsai/cudf/pull/10663
---
 docs/cudf/source/basics/basics.rst            | 58 ++++++++++---------
 docs/cudf/source/basics/internals.rst         |  4 +-
 .../cudf/source/basics/io-gds-integration.rst | 24 ++++----
 .../source/basics/io-nvcomp-integration.rst   |  4 +-
 python/cudf/cudf/core/cut.py                  | 46 ++++++++++-----
 python/cudf/cudf/core/groupby/groupby.py      | 21 +++----
 python/cudf/cudf/core/single_column_frame.py  |  4 +-
 7 files changed, 91 insertions(+), 70 deletions(-)

diff --git a/docs/cudf/source/basics/basics.rst b/docs/cudf/source/basics/basics.rst
index 60a65558033..9b8983fba49 100644
--- a/docs/cudf/source/basics/basics.rst
+++ b/docs/cudf/source/basics/basics.rst
@@ -15,36 +15,40 @@ The following table lists all of cudf types. For methods requiring dtype argumen
 .. rst-class:: special-table
 .. table::
 
-    +------------------------+------------------+-------------------------------------------------------------------------------------+---------------------------------------------+
-    | Kind of Data           | Data Type        | Scalar                                                                              | String Aliases                              |
-    +========================+==================+=====================================================================================+=============================================+
-    | Integer                |                  | np.int8_, np.int16_, np.int32_, np.int64_, np.uint8_, np.uint16_,                   | ``'int8'``, ``'int16'``, ``'int32'``,       |
-    |                        |                  | np.uint32_, np.uint64_                                                              | ``'int64'``, ``'uint8'``, ``'uint16'``,     |
-    |                        |                  |                                                                                     | ``'uint32'``, ``'uint64'``                  |
-    +------------------------+------------------+-------------------------------------------------------------------------------------+---------------------------------------------+
-    | Float                  |                  | np.float32_, np.float64_                                                            | ``'float32'``, ``'float64'``                |
-    +------------------------+------------------+-------------------------------------------------------------------------------------+---------------------------------------------+
-    | Strings                |                  | `str <https://docs.python.org/3/library/stdtypes.html#str>`_                        | ``'string'``, ``'object'``                  |
-    +------------------------+------------------+-------------------------------------------------------------------------------------+---------------------------------------------+
-    | Datetime               |                  | np.datetime64_                                                                      | ``'datetime64[s]'``, ``'datetime64[ms]'``,  |
-    |                        |                  |                                                                                     | ``'datetime64[us]'``, ``'datetime64[ns]'``  |
-    +------------------------+------------------+-------------------------------------------------------------------------------------+---------------------------------------------+
-    | Timedelta              |                  | np.timedelta64_                                                                     | ``'timedelta64[s]'``, ``'timedelta64[ms]'``,|
-    | (duration type)        |                  |                                                                                     | ``'timedelta64[us]'``, ``'timedelta64[ns]'``|
-    +------------------------+------------------+-------------------------------------------------------------------------------------+---------------------------------------------+
-    | Categorical            | CategoricalDtype | (none)                                                                              | ``'category'``                              |
-    +------------------------+------------------+-------------------------------------------------------------------------------------+---------------------------------------------+
-    | Boolean                |                  | np.bool_                                                                            | ``'bool'``                                  |
-    +------------------------+------------------+-------------------------------------------------------------------------------------+---------------------------------------------+
-    | Decimal                | Decimal32Dtype,  | (none)                                                                              | (none)                                      |
-    |                        | Decimal64Dtype,  |                                                                                     |                                             |
-    |                        | Decimal128Dtype  |                                                                                     |                                             |
-    +------------------------+------------------+-------------------------------------------------------------------------------------+---------------------------------------------+
+    +-----------------+------------------+--------------------------------------------------------------+----------------------------------------------+
+    | Kind of Data    | Data Type        | Scalar                                                       | String Aliases                               |
+    +=================+==================+==============================================================+==============================================+
+    | Integer         |                  | np.int8_, np.int16_, np.int32_, np.int64_, np.uint8_,        | ``'int8'``, ``'int16'``, ``'int32'``,        |
+    |                 |                  | np.uint16_, np.uint32_, np.uint64_                           | ``'int64'``, ``'uint8'``, ``'uint16'``,      |
+    |                 |                  |                                                              | ``'uint32'``, ``'uint64'``                   |
+    +-----------------+------------------+--------------------------------------------------------------+----------------------------------------------+
+    | Float           |                  | np.float32_, np.float64_                                     | ``'float32'``, ``'float64'``                 |
+    +-----------------+------------------+--------------------------------------------------------------+----------------------------------------------+
+    | Strings         |                  | `str <https://docs.python.org/3/library/stdtypes.html#str>`_ | ``'string'``, ``'object'``                   |
+    +-----------------+------------------+--------------------------------------------------------------+----------------------------------------------+
+    | Datetime        |                  | np.datetime64_                                               | ``'datetime64[s]'``, ``'datetime64[ms]'``,   |
+    |                 |                  |                                                              | ``'datetime64[us]'``, ``'datetime64[ns]'``   |
+    +-----------------+------------------+--------------------------------------------------------------+----------------------------------------------+
+    | Timedelta       |                  | np.timedelta64_                                              | ``'timedelta64[s]'``, ``'timedelta64[ms]'``, |
+    | (duration type) |                  |                                                              | ``'timedelta64[us]'``, ``'timedelta64[ns]'`` |
+    +-----------------+------------------+--------------------------------------------------------------+----------------------------------------------+
+    | Categorical     | CategoricalDtype | (none)                                                       | ``'category'``                               |
+    +-----------------+------------------+--------------------------------------------------------------+----------------------------------------------+
+    | Boolean         |                  | np.bool_                                                     | ``'bool'``                                   |
+    +-----------------+------------------+--------------------------------------------------------------+----------------------------------------------+
+    | Decimal         | Decimal32Dtype,  | (none)                                                       | (none)                                       |
+    |                 | Decimal64Dtype,  |                                                              |                                              |
+    |                 | Decimal128Dtype  |                                                              |                                              |
+    +-----------------+------------------+--------------------------------------------------------------+----------------------------------------------+
+    | Lists           | ListDtype        | list                                                         | ``'list'``                                   |
+    +-----------------+------------------+--------------------------------------------------------------+----------------------------------------------+
+    | Structs         | StructDtype      | dict                                                         | ``'struct'``                                 |
+    +-----------------+------------------+--------------------------------------------------------------+----------------------------------------------+
 
 **Note: All dtypes above are Nullable**
 
-.. _np.int8: 
-.. _np.int16: 
+.. _np.int8:
+.. _np.int16:
 .. _np.int32:
 .. _np.int64:
 .. _np.uint8:
diff --git a/docs/cudf/source/basics/internals.rst b/docs/cudf/source/basics/internals.rst
index 60b63c6fab8..96ef40d51e6 100644
--- a/docs/cudf/source/basics/internals.rst
+++ b/docs/cudf/source/basics/internals.rst
@@ -54,7 +54,7 @@ As another example, the ``StringColumn`` backing the Series
 2. No mask buffer as there are no nulls in the Series
 3. Two children columns:
 
-    -  A column of 8-bit characters
+    -  A column of UTF-8 characters
        ``['d', 'o', 'y', 'o', 'u', h' ... '?']``
     -  A column of "offsets" to the characters column (in this case,
        ``[0, 2, 5, 9, 12, 19]``)
@@ -172,7 +172,7 @@ Selecting columns by index:
     >>> ca.select_by_index(1)
     ColumnAccessor(OrderedColumnDict([('y', <cudf.core.column.string.StringColumn object at 0x7f5a7d578830>)]), multiindex=False, level_names=(None,))
     >>> ca.select_by_index([0, 1])
-    ColumnAccessor(OrderedColumnDict([('x', <cudf.core.column.numerical.NumericalColumn object at 0x7f5a7d5789e0>), ('y', <cudf.core.column.string.StringColumn object at 0x7f5a7d578830>)]), multiindex=False, level_names=(None,))    
+    ColumnAccessor(OrderedColumnDict([('x', <cudf.core.column.numerical.NumericalColumn object at 0x7f5a7d5789e0>), ('y', <cudf.core.column.string.StringColumn object at 0x7f5a7d578830>)]), multiindex=False, level_names=(None,))
     >>> ca.select_by_index(slice(1, 3))
     ColumnAccessor(OrderedColumnDict([('y', <cudf.core.column.string.StringColumn object at 0x7f5a7d578830>), ('z', <cudf.core.column.numerical.NumericalColumn object at 0x7f5a7d5788c0>)]), multiindex=False, level_names=(None,))
 
diff --git a/docs/cudf/source/basics/io-gds-integration.rst b/docs/cudf/source/basics/io-gds-integration.rst
index 71c114e9149..5ff07ac29c5 100644
--- a/docs/cudf/source/basics/io-gds-integration.rst
+++ b/docs/cudf/source/basics/io-gds-integration.rst
@@ -1,14 +1,14 @@
 GPUDirect Storage Integration
 =============================
 
-Many IO APIs can use GPUDirect Storage (GDS) library to optimize IO operations. 
-GDS enables a direct data path for direct memory access (DMA) transfers between GPU memory and storage, which avoids a bounce buffer through the CPU. 
-GDS also has a compatibility mode that allows the library to fall back to copying through a CPU bounce buffer. 
+Many IO APIs can use GPUDirect Storage (GDS) library to optimize IO operations.
+GDS enables a direct data path for direct memory access (DMA) transfers between GPU memory and storage, which avoids a bounce buffer through the CPU.
+GDS also has a compatibility mode that allows the library to fall back to copying through a CPU bounce buffer.
 The SDK is available for download `here <https://developer.nvidia.com/gpudirect-storage>`_.
 GDS is also included in CUDA Toolkit 11.4 and higher.
 
-Use of GPUDirect Storage in cuDF is enabled by default, but can be disabled through the environment variable ``LIBCUDF_CUFILE_POLICY``. 
-This variable also controls the GDS compatibility mode. 
+Use of GPUDirect Storage in cuDF is enabled by default, but can be disabled through the environment variable ``LIBCUDF_CUFILE_POLICY``.
+This variable also controls the GDS compatibility mode.
 
 There are three valid values for the environment variable:
 
@@ -20,17 +20,17 @@ If no value is set, behavior will be the same as the "GDS" option.
 
 This environment variable also affects how cuDF treats GDS errors.
 When ``LIBCUDF_CUFILE_POLICY`` is set to "GDS" and a GDS API call fails for any reason, cuDF falls back to the internal implementation with bounce buffers.
-When ``LIBCUDF_CUFILE_POLICY`` is set to "ALWAYS" and a GDS API call fails for any reason (unlikely, given that the compatibility mode is on), 
+When ``LIBCUDF_CUFILE_POLICY`` is set to "ALWAYS" and a GDS API call fails for any reason (unlikely, given that the compatibility mode is on),
 cuDF throws an exception to propagate the error to te user.
 
 Operations that support the use of GPUDirect Storage:
 
-- `read_avro`
-- `read_parquet`
-- `read_orc`
-- `to_csv`
-- `to_parquet`
-- `to_orc`
+- :py:func:`cudf.read_avro`
+- :py:func:`cudf.read_parquet`
+- :py:func:`cudf.read_orc`
+- :py:meth:`cudf.DataFrame.to_csv`
+- :py:meth:`cudf.DataFrame.to_parquet`
+- :py:meth:`cudf.DataFrame.to_orc`
 
 Several parameters that can be used to tune the performance of GDS-enabled I/O are exposed through environment variables:
 
diff --git a/docs/cudf/source/basics/io-nvcomp-integration.rst b/docs/cudf/source/basics/io-nvcomp-integration.rst
index 521833e2afd..fc24e0c15f4 100644
--- a/docs/cudf/source/basics/io-nvcomp-integration.rst
+++ b/docs/cudf/source/basics/io-nvcomp-integration.rst
@@ -1,14 +1,14 @@
 nvCOMP Integration
 =============================
 
-Some types of compression/decompression can be performed using either `nvCOMP library <https://github.com/NVIDIA/nvcomp>`_ or the internal implementation. 
+Some types of compression/decompression can be performed using either the `nvCOMP library <https://github.com/NVIDIA/nvcomp>`_ or the internal implementation.
 
 Which implementation is used by default depends on the data format and the compression type.
 Behavior can be influenced through environment variable ``LIBCUDF_NVCOMP_POLICY``.
 
 There are three valid values for the environment variable:
 
-- "STABLE": Only enable the nvCOMP in places where it has been deemed stable for production use. 
+- "STABLE": Only enable the nvCOMP in places where it has been deemed stable for production use.
 - "ALWAYS": Enable all available uses of nvCOMP, including new, experimental combinations.
 - "OFF": Disable nvCOMP use whenever possible and use the internal implementations instead.
 
diff --git a/python/cudf/cudf/core/cut.py b/python/cudf/cudf/core/cut.py
index 7c585602c23..915383e4852 100644
--- a/python/cudf/cudf/core/cut.py
+++ b/python/cudf/cudf/core/cut.py
@@ -1,3 +1,5 @@
+# Copyright (c) 2021-2022, NVIDIA CORPORATION.
+
 from collections.abc import Sequence
 
 import cupy
@@ -21,21 +23,27 @@ def cut(
     duplicates: str = "raise",
     ordered: bool = True,
 ):
+    """Bin values into discrete intervals.
 
-    """
-    Bin values into discrete intervals.
     Use cut when you need to segment and sort data values into bins. This
     function is also useful for going from a continuous variable to a
     categorical variable.
+
     Parameters
     ----------
     x : array-like
         The input array to be binned. Must be 1-dimensional.
     bins : int, sequence of scalars, or IntervalIndex
         The criteria to bin by.
-        * int : Defines the number of equal-width bins in the
-        range of x. The range of x is extended by .1% on each
-        side to include the minimum and maximum values of x.
+
+        * int : Defines the number of equal-width bins in the range of `x`. The
+          range of `x` is extended by .1% on each side to include the minimum
+          and maximum values of `x`.
+        * sequence of scalars : Defines the bin edges allowing for non-uniform
+          width. No extension of the range of `x` is done.
+        * IntervalIndex : Defines the exact bins to be used. Note that
+          IntervalIndex for `bins` must be non-overlapping.
+
     right : bool, default True
         Indicates whether bins includes the rightmost edge or not.
     labels : array or False, default None
@@ -66,30 +74,38 @@ def cut(
         For scalar or sequence bins, this is an ndarray with the computed
         bins. If set duplicates=drop, bins will drop non-unique bin. For
         an IntervalIndex bins, this is equal to bins.
+
     Examples
     --------
     Discretize into three equal-sized bins.
+
     >>> cudf.cut(np.array([1, 7, 5, 4, 6, 3]), 3)
     CategoricalIndex([(0.994, 3.0], (5.0, 7.0], (3.0, 5.0], (3.0, 5.0],
-    ...         (5.0, 7.0],(0.994, 3.0]], categories=[(0.994, 3.0],
-    ...         (3.0, 5.0], (5.0, 7.0]], ordered=True, dtype='category')
+                (5.0, 7.0], (0.994, 3.0]], categories=[(0.994, 3.0],
+                (3.0, 5.0], (5.0, 7.0]], ordered=True, dtype='category')
+
     >>> cudf.cut(np.array([1, 7, 5, 4, 6, 3]), 3, retbins=True)
     (CategoricalIndex([(0.994, 3.0], (5.0, 7.0], (3.0, 5.0], (3.0, 5.0],
-    ...         (5.0, 7.0],(0.994, 3.0]],categories=[(0.994, 3.0],
-    ...         (3.0, 5.0], (5.0, 7.0]],ordered=True, dtype='category'),
-    array([0.994, 3.   , 5.   , 7.   ]))
+                (5.0, 7.0], (0.994, 3.0]], categories=[(0.994, 3.0],
+                (3.0, 5.0], (5.0, 7.0]], ordered=True, dtype='category'),
+     array([0.994, 3.   , 5.   , 7.   ]))
+
     >>> cudf.cut(np.array([1, 7, 5, 4, 6, 3]),
-    ...        3, labels=["bad", "medium", "good"])
+    ...          3, labels=["bad", "medium", "good"])
     CategoricalIndex(['bad', 'good', 'medium', 'medium', 'good', 'bad'],
-    ...       categories=['bad', 'medium', 'good'],ordered=True,
-    ...       dtype='category')
+                     categories=['bad', 'medium', 'good'],ordered=True,
+                     dtype='category')
+
     >>> cudf.cut(np.array([1, 7, 5, 4, 6, 3]), 3,
-    ...       labels=["B", "A", "B"], ordered=False)
+    ...          labels=["B", "A", "B"], ordered=False)
     CategoricalIndex(['B', 'B', 'A', 'A', 'B', 'B'], categories=['A', 'B'],
-    ...        ordered=False, dtype='category')
+               ordered=False, dtype='category')
+
     >>> cudf.cut([0, 1, 1, 2], bins=4, labels=False)
     array([0, 1, 1, 3], dtype=int32)
+
     Passing a Series as an input returns a Series with categorical dtype:
+
     >>> s = cudf.Series(np.array([2, 4, 6, 8, 10]),
     ...        index=['a', 'b', 'c', 'd', 'e'])
     >>> cudf.cut(s, 3)
diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py
index 6b98e82d553..40f8eda0e4f 100644
--- a/python/cudf/cudf/core/groupby/groupby.py
+++ b/python/cudf/cudf/core/groupby/groupby.py
@@ -566,19 +566,20 @@ def mult(df):
             .. code-block::
 
                 >>> df = pd.DataFrame({
-                    'a': [1, 1, 2, 2],
-                    'b': [1, 2, 1, 2],
-                    'c': [1, 2, 3, 4]})
+                ...     'a': [1, 1, 2, 2],
+                ...     'b': [1, 2, 1, 2],
+                ...     'c': [1, 2, 3, 4],
+                ... })
                 >>> gdf = cudf.from_pandas(df)
                 >>> df.groupby('a').apply(lambda x: x.iloc[[0]])
-                        a  b  c
-                    a
-                    1 0  1  1  1
-                    2 2  2  1  3
+                     a  b  c
+                a
+                1 0  1  1  1
+                2 2  2  1  3
                 >>> gdf.groupby('a').apply(lambda x: x.iloc[[0]])
-                        a  b  c
-                    0  1  1  1
-                    2  2  1  3
+                   a  b  c
+                0  1  1  1
+                2  2  1  3
         """
         if not callable(function):
             raise TypeError(f"type {type(function)} is not callable")
diff --git a/python/cudf/cudf/core/single_column_frame.py b/python/cudf/cudf/core/single_column_frame.py
index addc823e7f1..7fa66bd831d 100644
--- a/python/cudf/cudf/core/single_column_frame.py
+++ b/python/cudf/cudf/core/single_column_frame.py
@@ -81,8 +81,8 @@ def name(self, value):
 
     @property  # type: ignore
     @_cudf_nvtx_annotate
-    def ndim(self):
-        """Get the dimensionality (always 1 for single-columned frames)."""
+    def ndim(self):  # noqa: D401
+        """Number of dimensions of the underlying data, by definition 1."""
         return 1
 
     @property  # type: ignore

From 6e6c325e7cb99baeecaec65aff8c97aa2450ff51 Mon Sep 17 00:00:00 2001
From: GALI PREM SAGAR <sagarprem75@gmail.com>
Date: Thu, 14 Apr 2022 18:58:48 -0500
Subject: [PATCH 15/33] Fix some docstrings formatting (#10660)

This PR fixes some of the broken docstring formattings in the code-base.

Authors:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/10660
---
 docs/cudf/source/api_docs/dataframe.rst       | 3 +++
 docs/cudf/source/api_docs/index_objects.rst   | 2 ++
 docs/cudf/source/api_docs/series.rst          | 2 ++
 docs/cudf/source/api_docs/string_handling.rst | 1 -
 docs/cudf/source/conf.py                      | 1 +
 python/cudf/cudf/core/_base_index.py          | 2 +-
 python/cudf/cudf/core/cut.py                  | 1 +
 python/cudf/cudf/core/indexed_frame.py        | 2 ++
 python/cudf/cudf/core/tools/numeric.py        | 2 +-
 9 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/docs/cudf/source/api_docs/dataframe.rst b/docs/cudf/source/api_docs/dataframe.rst
index 1d600acfef1..e0ef3cb2ff0 100644
--- a/docs/cudf/source/api_docs/dataframe.rst
+++ b/docs/cudf/source/api_docs/dataframe.rst
@@ -149,6 +149,7 @@ Computations / descriptive stats
    DataFrame.round
    DataFrame.skew
    DataFrame.sum
+   DataFrame.sum_of_squares
    DataFrame.std
    DataFrame.var
    DataFrame.nunique
@@ -248,9 +249,11 @@ Serialization / IO / conversion
    DataFrame.to_dlpack
    DataFrame.to_parquet
    DataFrame.to_csv
+   DataFrame.to_cupy
    DataFrame.to_hdf
    DataFrame.to_dict
    DataFrame.to_json
+   DataFrame.to_numpy
    DataFrame.to_pandas
    DataFrame.to_feather
    DataFrame.to_records
diff --git a/docs/cudf/source/api_docs/index_objects.rst b/docs/cudf/source/api_docs/index_objects.rst
index 6f5affd0ecd..8e0e3bbd411 100644
--- a/docs/cudf/source/api_docs/index_objects.rst
+++ b/docs/cudf/source/api_docs/index_objects.rst
@@ -92,7 +92,9 @@ Conversion
 
    Index.astype
    Index.to_arrow
+   Index.to_cupy
    Index.to_list
+   Index.to_numpy
    Index.to_series
    Index.to_frame
    Index.to_pandas
diff --git a/docs/cudf/source/api_docs/series.rst b/docs/cudf/source/api_docs/series.rst
index 95aa71919e4..d7015c9348d 100644
--- a/docs/cudf/source/api_docs/series.rst
+++ b/docs/cudf/source/api_docs/series.rst
@@ -390,10 +390,12 @@ Serialization / IO / conversion
    :toctree: api/
 
    Series.to_arrow
+   Series.to_cupy
    Series.to_dlpack
    Series.to_frame
    Series.to_hdf
    Series.to_json
+   Series.to_numpy
    Series.to_pandas
    Series.to_string
    Series.from_arrow
diff --git a/docs/cudf/source/api_docs/string_handling.rst b/docs/cudf/source/api_docs/string_handling.rst
index 3087bcaa826..8d4646c47a7 100644
--- a/docs/cudf/source/api_docs/string_handling.rst
+++ b/docs/cudf/source/api_docs/string_handling.rst
@@ -83,7 +83,6 @@ strings and apply several methods to it. These can be accessed like
    rsplit
    startswith
    strip
-   subword_tokenize
    swapcase
    title
    token_count
diff --git a/docs/cudf/source/conf.py b/docs/cudf/source/conf.py
index dbdf8e59e6a..d65b77ef74b 100644
--- a/docs/cudf/source/conf.py
+++ b/docs/cudf/source/conf.py
@@ -252,6 +252,7 @@ def process_class_docstrings(app, what, name, obj, options, lines):
             lines[:] = lines[:cut_index]
 
 
+nitpick_ignore = [("py:class", "SeriesOrIndex"),]
 
 
 def setup(app):
diff --git a/python/cudf/cudf/core/_base_index.py b/python/cudf/cudf/core/_base_index.py
index 259a7f711c3..6fed6510484 100644
--- a/python/cudf/cudf/core/_base_index.py
+++ b/python/cudf/cudf/core/_base_index.py
@@ -118,7 +118,7 @@ def get_level_values(self, level):
 
         See Also
         --------
-        cudf.core.multiindex.MultiIndex.get_level_values : Get values for
+        cudf.MultiIndex.get_level_values : Get values for
             a level of a MultiIndex.
 
         Notes
diff --git a/python/cudf/cudf/core/cut.py b/python/cudf/cudf/core/cut.py
index 915383e4852..0fef6630248 100644
--- a/python/cudf/cudf/core/cut.py
+++ b/python/cudf/cudf/core/cut.py
@@ -64,6 +64,7 @@ def cut(
         Categorical and Series (with Categorical dtype). If True,
         the resulting categorical will be ordered. If False, the resulting
         categorical will be unordered (labels must be provided).
+
     Returns
     -------
     out : CategoricalIndex
diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py
index 10736948b57..ea722ec3968 100644
--- a/python/cudf/cudf/core/indexed_frame.py
+++ b/python/cudf/cudf/core/indexed_frame.py
@@ -991,6 +991,7 @@ def add_prefix(self, prefix):
         Examples
         --------
         **Series**
+
         >>> s = cudf.Series([1, 2, 3, 4])
         >>> s
         0    1
@@ -1006,6 +1007,7 @@ def add_prefix(self, prefix):
         dtype: int64
 
         **DataFrame**
+
         >>> df = cudf.DataFrame({'A': [1, 2, 3, 4], 'B': [3, 4, 5, 6]})
         >>> df
            A  B
diff --git a/python/cudf/cudf/core/tools/numeric.py b/python/cudf/cudf/core/tools/numeric.py
index 7eea7cedaad..0273227010b 100644
--- a/python/cudf/cudf/core/tools/numeric.py
+++ b/python/cudf/cudf/core/tools/numeric.py
@@ -57,7 +57,7 @@ def to_numeric(arg, errors="raise", downcast=None):
         otherwise ndarray
 
     Notes
-    -------
+    -----
     An important difference from pandas is that this function does not accept
     mixed numeric/non-numeric type sequences. For example ``[1, 'a']``.
     A ``TypeError`` will be raised when such input is received, regardless of

From 8f5a04451f8f61015d08c5699f0427b550afb53b Mon Sep 17 00:00:00 2001
From: Vukasin Milovanovic <vmilovanovic@nvidia.com>
Date: Thu, 14 Apr 2022 17:24:37 -0700
Subject: [PATCH 16/33] Add option to drop cache in cuIO benchmarks (#10488)

Dropping cache allows us to benchmark I/O times in a realistic/fair way.
Cache is dropped before each iteration if `CUDF_BENCHMARK_DROP_CACHE` environment variable is set.

Authors:
  - Vukasin Milovanovic (https://github.com/vuule)

Approvers:
  - Mike Wilson (https://github.com/hyperbolic2346)
  - MithunR (https://github.com/mythrocks)
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/10488
---
 cpp/benchmarks/io/csv/csv_reader.cpp         |  2 ++
 cpp/benchmarks/io/cuio_common.cpp            | 28 ++++++++++++++++++++
 cpp/benchmarks/io/cuio_common.hpp            | 10 +++++++
 cpp/benchmarks/io/orc/orc_reader.cpp         |  2 ++
 cpp/benchmarks/io/parquet/parquet_reader.cpp |  2 ++
 cpp/benchmarks/io/text/multibyte_split.cpp   |  1 +
 6 files changed, 45 insertions(+)

diff --git a/cpp/benchmarks/io/csv/csv_reader.cpp b/cpp/benchmarks/io/csv/csv_reader.cpp
index c50f5220200..6f5e7160cd3 100644
--- a/cpp/benchmarks/io/csv/csv_reader.cpp
+++ b/cpp/benchmarks/io/csv/csv_reader.cpp
@@ -52,6 +52,7 @@ void BM_csv_read_varying_input(benchmark::State& state)
 
   auto mem_stats_logger = cudf::memory_stats_logger();
   for (auto _ : state) {
+    try_drop_l3_cache();
     cuda_event_timer raii(state, true);  // flush_l2_cache = true, stream = 0
     cudf_io::read_csv(read_options);
   }
@@ -98,6 +99,7 @@ void BM_csv_read_varying_options(benchmark::State& state)
   cudf::size_type const chunk_row_cnt = view.num_rows() / num_chunks;
   auto mem_stats_logger               = cudf::memory_stats_logger();
   for (auto _ : state) {
+    try_drop_l3_cache();
     cuda_event_timer raii(state, true);  // flush_l2_cache = true, stream = 0
     for (int32_t chunk = 0; chunk < num_chunks; ++chunk) {
       // only read the header in the first chunk
diff --git a/cpp/benchmarks/io/cuio_common.cpp b/cpp/benchmarks/io/cuio_common.cpp
index afe0cc77a4c..7d356263220 100644
--- a/cpp/benchmarks/io/cuio_common.cpp
+++ b/cpp/benchmarks/io/cuio_common.cpp
@@ -141,3 +141,31 @@ std::vector<cudf::size_type> segments_in_chunk(int num_segments, int num_chunks,
 
   return selected_segments;
 }
+
+// Executes the command and returns stderr output
+std::string exec_cmd(std::string_view cmd)
+{
+  // Switch stderr and stdout to only capture stderr
+  auto const redirected_cmd = std::string{"( "}.append(cmd).append(" 3>&2 2>&1 1>&3) 2>/dev/null");
+  std::unique_ptr<FILE, decltype(&pclose)> pipe(popen(redirected_cmd.c_str(), "r"), pclose);
+  CUDF_EXPECTS(pipe != nullptr, "popen() failed");
+
+  std::array<char, 128> buffer;
+  std::string error_out;
+  while (fgets(buffer.data(), buffer.size(), pipe.get()) != nullptr) {
+    error_out += buffer.data();
+  }
+  return error_out;
+}
+
+void try_drop_l3_cache()
+{
+  static bool is_drop_cache_enabled = std::getenv("CUDF_BENCHMARK_DROP_CACHE") != nullptr;
+  if (not is_drop_cache_enabled) { return; }
+
+  std::array drop_cache_cmds{"/sbin/sysctl vm.drop_caches=3", "sudo /sbin/sysctl vm.drop_caches=3"};
+  CUDF_EXPECTS(std::any_of(drop_cache_cmds.cbegin(),
+                           drop_cache_cmds.cend(),
+                           [](auto& cmd) { return exec_cmd(cmd).empty(); }),
+               "Failed to execute the drop cache command");
+}
diff --git a/cpp/benchmarks/io/cuio_common.hpp b/cpp/benchmarks/io/cuio_common.hpp
index 2ed534d5333..ff900d20e6f 100644
--- a/cpp/benchmarks/io/cuio_common.hpp
+++ b/cpp/benchmarks/io/cuio_common.hpp
@@ -132,3 +132,13 @@ std::vector<std::string> select_column_names(std::vector<std::string> const& col
  * The segments could be Parquet row groups or ORC stripes.
  */
 std::vector<cudf::size_type> segments_in_chunk(int num_segments, int num_chunks, int chunk);
+
+/**
+ * @brief Drops L3 cache if `CUDF_BENCHMARK_DROP_CACHE` environment variable is set.
+ *
+ * Has no effect if the environment variable is not set.
+ * May require sudo access ro run successfully.
+ *
+ * @throw cudf::logic_error if the environment variable is set and the command fails
+ */
+void try_drop_l3_cache();
diff --git a/cpp/benchmarks/io/orc/orc_reader.cpp b/cpp/benchmarks/io/orc/orc_reader.cpp
index 0fc2238a272..fc76fbe7603 100644
--- a/cpp/benchmarks/io/orc/orc_reader.cpp
+++ b/cpp/benchmarks/io/orc/orc_reader.cpp
@@ -60,6 +60,7 @@ void BM_orc_read_varying_input(benchmark::State& state)
 
   auto mem_stats_logger = cudf::memory_stats_logger();
   for (auto _ : state) {
+    try_drop_l3_cache();
     cuda_event_timer raii(state, true);  // flush_l2_cache = true, stream = 0
     cudf_io::read_orc(read_opts);
   }
@@ -117,6 +118,7 @@ void BM_orc_read_varying_options(benchmark::State& state)
   cudf::size_type const chunk_row_cnt = view.num_rows() / num_chunks;
   auto mem_stats_logger               = cudf::memory_stats_logger();
   for (auto _ : state) {
+    try_drop_l3_cache();
     cuda_event_timer raii(state, true);  // flush_l2_cache = true, stream = 0
 
     cudf::size_type rows_read = 0;
diff --git a/cpp/benchmarks/io/parquet/parquet_reader.cpp b/cpp/benchmarks/io/parquet/parquet_reader.cpp
index 8a97fd35c31..b20534e8ac0 100644
--- a/cpp/benchmarks/io/parquet/parquet_reader.cpp
+++ b/cpp/benchmarks/io/parquet/parquet_reader.cpp
@@ -60,6 +60,7 @@ void BM_parq_read_varying_input(benchmark::State& state)
 
   auto mem_stats_logger = cudf::memory_stats_logger();
   for (auto _ : state) {
+    try_drop_l3_cache();
     cuda_event_timer const raii(state, true);  // flush_l2_cache = true, stream = 0
     cudf_io::read_parquet(read_opts);
   }
@@ -117,6 +118,7 @@ void BM_parq_read_varying_options(benchmark::State& state)
   cudf::size_type const chunk_row_cnt = view.num_rows() / num_chunks;
   auto mem_stats_logger               = cudf::memory_stats_logger();
   for (auto _ : state) {
+    try_drop_l3_cache();
     cuda_event_timer raii(state, true);  // flush_l2_cache = true, stream = 0
 
     cudf::size_type rows_read = 0;
diff --git a/cpp/benchmarks/io/text/multibyte_split.cpp b/cpp/benchmarks/io/text/multibyte_split.cpp
index ada8856e8e5..af6c2c5e030 100644
--- a/cpp/benchmarks/io/text/multibyte_split.cpp
+++ b/cpp/benchmarks/io/text/multibyte_split.cpp
@@ -137,6 +137,7 @@ static void BM_multibyte_split(benchmark::State& state)
 
   auto mem_stats_logger = cudf::memory_stats_logger();
   for (auto _ : state) {
+    try_drop_l3_cache();
     cuda_event_timer raii(state, true);
     auto output = cudf::io::text::multibyte_split(*source, delim);
   }

From b542678fda6ea40544d42e759caf3a6f8ad2b44d Mon Sep 17 00:00:00 2001
From: Robert Maynard <rmaynard@nvidia.com>
Date: Fri, 15 Apr 2022 09:59:51 -0400
Subject: [PATCH 17/33] cuco isn't a cudf dependency when we are built shared
 (#10662)

With the corrections in https://github.com/rapidsai/cudf/pull/10545 we didn't install the cuco headers / cmake files as they aren't needed for shared builds. But we forgot to remove the `find_package(cuco)` call from the generated cudf-config.cmake.

Authors:
  - Robert Maynard (https://github.com/robertmaynard)

Approvers:
  - Paul Taylor (https://github.com/trxcllnt)
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/10662
---
 cpp/cmake/thirdparty/get_cucollections.cmake | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/cpp/cmake/thirdparty/get_cucollections.cmake b/cpp/cmake/thirdparty/get_cucollections.cmake
index 1639655d1e9..5232821d113 100644
--- a/cpp/cmake/thirdparty/get_cucollections.cmake
+++ b/cpp/cmake/thirdparty/get_cucollections.cmake
@@ -21,12 +21,14 @@ function(find_and_configure_cucollections)
     cuco 0.0.1
     GLOBAL_TARGETS cuco::cuco
     BUILD_EXPORT_SET cudf-exports
-    INSTALL_EXPORT_SET cudf-exports
     CPM_ARGS GITHUB_REPOSITORY NVIDIA/cuCollections
     GIT_TAG fb58a38701f1c24ecfe07d8f1f208bbe80930da5
     EXCLUDE_FROM_ALL ${BUILD_SHARED_LIBS}
     OPTIONS "BUILD_TESTS OFF" "BUILD_BENCHMARKS OFF" "BUILD_EXAMPLES OFF"
   )
+  if(NOT BUILD_SHARED_LIBS)
+    rapids_export_package(INSTALL cuco cudf-exports)
+  endif()
 
 endfunction()
 

From 4e668f27ba741ec1065b6ae6f99c0a4608df4336 Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Fri, 15 Apr 2022 09:40:49 -0500
Subject: [PATCH 18/33] Update UDF notebook in User Guide. (#10668)

I noticed a couple lines I didn't expect in the UDF notebook in the User Guide while working on #10663. I didn't get these changes into that PR (had to wait for a local build to verify some things). The two changes are:
- We don't require `method="cudf"` in groupby statements.
- We don't need to execute `from cudf.utils import cudautils` to run this notebook.

(The cell execution counts also changed. There were some cells executed multiple times the last time this notebook was executed so they got out of order - this fixes it.)

Authors:
  - Bradley Dice (https://github.com/bdice)

Approvers:
  - https://github.com/brandon-b-miller

URL: https://github.com/rapidsai/cudf/pull/10668
---
 .../source/user_guide/guide-to-udfs.ipynb     | 152 +++++++++---------
 1 file changed, 75 insertions(+), 77 deletions(-)

diff --git a/docs/cudf/source/user_guide/guide-to-udfs.ipynb b/docs/cudf/source/user_guide/guide-to-udfs.ipynb
index 41bce8b865e..0d05ddb00b4 100644
--- a/docs/cudf/source/user_guide/guide-to-udfs.ipynb
+++ b/docs/cudf/source/user_guide/guide-to-udfs.ipynb
@@ -138,7 +138,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -148,7 +148,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [
     {
@@ -160,7 +160,7 @@
        "dtype: int64"
       ]
      },
-     "execution_count": 7,
+     "execution_count": 6,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -193,7 +193,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [
     {
@@ -205,7 +205,7 @@
        "dtype: int64"
       ]
      },
-     "execution_count": 9,
+     "execution_count": 7,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -218,7 +218,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 8,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -229,7 +229,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 9,
    "metadata": {},
    "outputs": [
     {
@@ -241,7 +241,7 @@
        "dtype: int64"
       ]
      },
-     "execution_count": 11,
+     "execution_count": 9,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -260,7 +260,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 10,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -274,7 +274,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 11,
    "metadata": {},
    "outputs": [
     {
@@ -286,7 +286,7 @@
        "dtype: int64"
       ]
      },
-     "execution_count": 14,
+     "execution_count": 11,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -322,7 +322,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 12,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -331,7 +331,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 13,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -355,7 +355,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 14,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -373,7 +373,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": 15,
    "metadata": {},
    "outputs": [
     {
@@ -452,7 +452,7 @@
        "4   979   982  1011   9790.0"
       ]
      },
-     "execution_count": 19,
+     "execution_count": 15,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -497,7 +497,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": 16,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -514,7 +514,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": 17,
    "metadata": {},
    "outputs": [
     {
@@ -569,7 +569,7 @@
        "2  3     6"
       ]
      },
-     "execution_count": 21,
+     "execution_count": 17,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -591,7 +591,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 22,
+   "execution_count": 18,
    "metadata": {},
    "outputs": [
     {
@@ -603,7 +603,7 @@
        "dtype: int64"
       ]
      },
-     "execution_count": 22,
+     "execution_count": 18,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -621,7 +621,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 23,
+   "execution_count": 19,
    "metadata": {},
    "outputs": [
     {
@@ -633,7 +633,7 @@
        "dtype: object"
       ]
      },
-     "execution_count": 23,
+     "execution_count": 19,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -658,7 +658,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 24,
+   "execution_count": 20,
    "metadata": {},
    "outputs": [
     {
@@ -709,7 +709,7 @@
        "2     3"
       ]
      },
-     "execution_count": 24,
+     "execution_count": 20,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -728,7 +728,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 25,
+   "execution_count": 21,
    "metadata": {},
    "outputs": [
     {
@@ -740,7 +740,7 @@
        "dtype: int64"
       ]
      },
-     "execution_count": 25,
+     "execution_count": 21,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -758,7 +758,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 26,
+   "execution_count": 22,
    "metadata": {},
    "outputs": [
     {
@@ -813,7 +813,7 @@
        "2  3  1"
       ]
      },
-     "execution_count": 26,
+     "execution_count": 22,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -836,7 +836,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 27,
+   "execution_count": 23,
    "metadata": {},
    "outputs": [
     {
@@ -848,7 +848,7 @@
        "dtype: int64"
       ]
      },
-     "execution_count": 27,
+     "execution_count": 23,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -866,7 +866,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 28,
+   "execution_count": 24,
    "metadata": {},
    "outputs": [
     {
@@ -921,7 +921,7 @@
        "2  3  3.14"
       ]
      },
-     "execution_count": 28,
+     "execution_count": 24,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -939,7 +939,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 29,
+   "execution_count": 25,
    "metadata": {},
    "outputs": [
     {
@@ -951,7 +951,7 @@
        "dtype: float64"
       ]
      },
-     "execution_count": 29,
+     "execution_count": 25,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -982,7 +982,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 30,
+   "execution_count": 26,
    "metadata": {},
    "outputs": [
     {
@@ -1033,7 +1033,7 @@
        "2  5"
       ]
      },
-     "execution_count": 30,
+     "execution_count": 26,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1054,7 +1054,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 31,
+   "execution_count": 27,
    "metadata": {},
    "outputs": [
     {
@@ -1066,7 +1066,7 @@
        "dtype: float64"
       ]
      },
-     "execution_count": 31,
+     "execution_count": 27,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1084,7 +1084,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 32,
+   "execution_count": 28,
    "metadata": {},
    "outputs": [
     {
@@ -1151,7 +1151,7 @@
        "2  3  6     4  8  6"
       ]
      },
-     "execution_count": 32,
+     "execution_count": 28,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1172,7 +1172,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 33,
+   "execution_count": 29,
    "metadata": {},
    "outputs": [
     {
@@ -1184,7 +1184,7 @@
        "dtype: float64"
       ]
      },
-     "execution_count": 33,
+     "execution_count": 29,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1212,7 +1212,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 34,
+   "execution_count": 30,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1241,7 +1241,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 35,
+   "execution_count": 31,
    "metadata": {},
    "outputs": [
     {
@@ -1312,7 +1312,7 @@
        "2  3  6     4  8  6  9.0"
       ]
      },
-     "execution_count": 35,
+     "execution_count": 31,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1344,7 +1344,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 36,
+   "execution_count": 32,
    "metadata": {},
    "outputs": [
     {
@@ -1417,7 +1417,7 @@
        "4   979   982  1011"
       ]
      },
-     "execution_count": 36,
+     "execution_count": 32,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1443,7 +1443,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 37,
+   "execution_count": 33,
    "metadata": {},
    "outputs": [
     {
@@ -1522,7 +1522,7 @@
        "4   979   982  1011  1961.0"
       ]
      },
-     "execution_count": 37,
+     "execution_count": 33,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1555,7 +1555,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 38,
+   "execution_count": 34,
    "metadata": {},
    "outputs": [
     {
@@ -1570,7 +1570,7 @@
        "dtype: float64"
       ]
      },
-     "execution_count": 38,
+     "execution_count": 34,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1582,7 +1582,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 39,
+   "execution_count": 35,
    "metadata": {},
    "outputs": [
     {
@@ -1591,7 +1591,7 @@
        "Rolling [window=3,min_periods=3,center=False]"
       ]
      },
-     "execution_count": 39,
+     "execution_count": 35,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1610,7 +1610,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 40,
+   "execution_count": 36,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1634,7 +1634,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 41,
+   "execution_count": 37,
    "metadata": {},
    "outputs": [
     {
@@ -1649,7 +1649,7 @@
        "dtype: float64"
       ]
      },
-     "execution_count": 41,
+     "execution_count": 37,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1667,7 +1667,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 42,
+   "execution_count": 38,
    "metadata": {},
    "outputs": [
     {
@@ -1734,7 +1734,7 @@
        "4  59.0  59.0"
       ]
      },
-     "execution_count": 42,
+     "execution_count": 38,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1748,7 +1748,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 43,
+   "execution_count": 39,
    "metadata": {},
    "outputs": [
     {
@@ -1845,7 +1845,7 @@
        "9        100.0        100.0"
       ]
      },
-     "execution_count": 43,
+     "execution_count": 39,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1863,12 +1863,12 @@
     "\n",
     "We can also apply UDFs to grouped DataFrames using `apply_grouped`. This example is also drawn and adapted from the RAPIDS [API documentation]().\n",
     "\n",
-    "First, we'll group our DataFrame based on column `b`, which is either True or False. Note that we currently need to pass `method=\"cudf\"` to use UDFs with GroupBy objects."
+    "First, we'll group our DataFrame based on column `b`, which is either True or False."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 44,
+   "execution_count": 40,
    "metadata": {},
    "outputs": [
     {
@@ -1947,7 +1947,7 @@
        "4 -0.970850  False   Sarah  0.342905"
       ]
      },
-     "execution_count": 44,
+     "execution_count": 40,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1959,7 +1959,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 45,
+   "execution_count": 41,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1975,7 +1975,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 46,
+   "execution_count": 42,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2002,7 +2002,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 47,
+   "execution_count": 43,
    "metadata": {},
    "outputs": [
     {
@@ -2132,7 +2132,7 @@
        "9 -0.725581   True  George  0.405245       0.271319"
       ]
      },
-     "execution_count": 47,
+     "execution_count": 43,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -2162,7 +2162,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 48,
+   "execution_count": 44,
    "metadata": {},
    "outputs": [
     {
@@ -2171,7 +2171,7 @@
        "array([ 1.,  2.,  3.,  4., 10.])"
       ]
      },
-     "execution_count": 48,
+     "execution_count": 44,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -2193,7 +2193,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 49,
+   "execution_count": 45,
    "metadata": {},
    "outputs": [
     {
@@ -2207,14 +2207,12 @@
        "dtype: int32"
       ]
      },
-     "execution_count": 49,
+     "execution_count": 45,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "from cudf.utils import cudautils\n",
-    "\n",
     "@cuda.jit\n",
     "def multiply_by_5(x, out):\n",
     "    i = cuda.grid(1)\n",
@@ -2235,7 +2233,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 50,
+   "execution_count": 46,
    "metadata": {},
    "outputs": [
     {
@@ -2244,7 +2242,7 @@
        "array([ 5., 10., 15., 20., 50.])"
       ]
      },
-     "execution_count": 50,
+     "execution_count": 46,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -2307,7 +2305,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.13"
+   "version": "3.9.12"
   }
  },
  "nbformat": 4,

From 9e1258de32422f6f36e54bd3a2085a3c9c517a66 Mon Sep 17 00:00:00 2001
From: Vukasin Milovanovic <vmilovanovic@nvidia.com>
Date: Fri, 15 Apr 2022 12:13:45 -0700
Subject: [PATCH 19/33] Use `std::filesystem` for temporary directory location
 and deletion (#10664)

Addressing a long-standing TODO. Since std::filesystem is available since C++17, use it to recursively delete temporary directories (used for benchmarks, etc.).
Small step towards portable temp directory/file utilities.

Authors:
  - Vukasin Milovanovic (https://github.com/vuule)

Approvers:
  - Ram (Ramakrishna Prabhu) (https://github.com/rgsl888prabhu)
  - David Wendt (https://github.com/davidwendt)

URL: https://github.com/rapidsai/cudf/pull/10664
---
 cpp/benchmarks/text/subword.cpp          |  3 ++-
 cpp/include/cudf_test/file_utilities.hpp | 20 +++++++-------------
 2 files changed, 9 insertions(+), 14 deletions(-)

diff --git a/cpp/benchmarks/text/subword.cpp b/cpp/benchmarks/text/subword.cpp
index 150f578a22a..b8311324f70 100644
--- a/cpp/benchmarks/text/subword.cpp
+++ b/cpp/benchmarks/text/subword.cpp
@@ -21,6 +21,7 @@
 #include <cudf_test/column_utilities.hpp>
 #include <cudf_test/column_wrapper.hpp>
 
+#include <filesystem>
 #include <fstream>
 #include <iostream>
 #include <vector>
@@ -29,7 +30,7 @@
 
 static std::string create_hash_vocab_file()
 {
-  std::string dir_template("/tmp");
+  std::string dir_template{std::filesystem::temp_directory_path().string()};
   if (const char* env_p = std::getenv("WORKSPACE")) dir_template = env_p;
   std::string hash_file = dir_template + "/hash_vocab.txt";
   // create a fake hashed vocab text file for this test
diff --git a/cpp/include/cudf_test/file_utilities.hpp b/cpp/include/cudf_test/file_utilities.hpp
index 4df7b6a69c8..d722b836674 100644
--- a/cpp/include/cudf_test/file_utilities.hpp
+++ b/cpp/include/cudf_test/file_utilities.hpp
@@ -18,6 +18,7 @@
 
 #include <cstdio>
 #include <cstdlib>
+#include <filesystem>
 #include <string>
 
 #include <ftw.h>
@@ -34,17 +35,14 @@ class temp_directory {
  public:
   temp_directory(const std::string& base_name)
   {
-    std::string dir_template("/tmp");
-    if (const char* env_p = std::getenv("WORKSPACE")) dir_template = env_p;
+    std::string dir_template{std::filesystem::temp_directory_path().string()};
+    if (auto env_p = std::getenv("WORKSPACE")) dir_template = env_p;
+
     dir_template += "/" + base_name + ".XXXXXX";
     auto const tmpdirptr = mkdtemp(const_cast<char*>(dir_template.data()));
-    if (tmpdirptr == nullptr) CUDF_FAIL("Temporary directory creation failure: " + dir_template);
-    _path = dir_template + "/";
-  }
+    CUDF_EXPECTS(tmpdirptr != nullptr, "Temporary directory creation failure: " + dir_template);
 
-  static int rm_files(const char* pathname, const struct stat* sbuf, int type, struct FTW* ftwb)
-  {
-    return std::remove(pathname);
+    _path = dir_template + "/";
   }
 
   temp_directory& operator=(temp_directory const&) = delete;
@@ -52,11 +50,7 @@ class temp_directory {
   temp_directory& operator=(temp_directory&&) = default;
   temp_directory(temp_directory&&)            = default;
 
-  ~temp_directory()
-  {
-    // TODO: should use std::filesystem instead, once C++17 support added
-    nftw(_path.c_str(), rm_files, 10, FTW_DEPTH | FTW_MOUNT | FTW_PHYS);
-  }
+  ~temp_directory() { std::filesystem::remove_all(std::filesystem::path{_path}); }
 
   /**
    * @brief Returns the path of the temporary directory

From d5a982b621dc67b1a9db6292abcc4e72e4693b36 Mon Sep 17 00:00:00 2001
From: Yunsong Wang <yunsongw@nvidia.com>
Date: Fri, 15 Apr 2022 16:00:13 -0400
Subject: [PATCH 20/33] Add column field ID control in parquet writer (#10504)

Closes https://github.com/rapidsai/cudf/issues/10375
Closes https://github.com/rapidsai/cudf/issues/10376

This PR enables column `field_id` control in the parquet writer. When writing a parquet file, users can specify a column's `field_id` via `column_in_metadata.set_parquet_field_id()`. JNI bindings and uni tests are added as well.

Authors:
  - Yunsong Wang (https://github.com/PointKernel)

Approvers:
  - Jason Lowe (https://github.com/jlowe)
  - Vukasin Milovanovic (https://github.com/vuule)
  - Devavret Makkar (https://github.com/devavret)

URL: https://github.com/rapidsai/cudf/pull/10504
---
 cpp/include/cudf/io/types.hpp                 |  27 +++-
 .../io/parquet/compact_protocol_reader.cpp    |   1 +
 .../io/parquet/compact_protocol_reader.hpp    |  24 ++++
 .../io/parquet/compact_protocol_writer.cpp    |   1 +
 cpp/src/io/parquet/parquet.hpp                |   6 +-
 cpp/src/io/parquet/writer_impl.cu             |  15 ++-
 cpp/tests/io/parquet_test.cpp                 |  10 +-
 .../ai/rapids/cudf/ColumnWriterOptions.java   | 119 +++++++++++++++--
 .../CompressionMetadataWriterOptions.java     |  10 ++
 java/src/main/java/ai/rapids/cudf/Table.java  |  15 ++-
 java/src/main/native/src/TableJni.cpp         |  46 +++++--
 .../test/java/ai/rapids/cudf/TableTest.java   | 121 ++++++++++++++++++
 12 files changed, 368 insertions(+), 27 deletions(-)

diff --git a/cpp/include/cudf/io/types.hpp b/cpp/include/cudf/io/types.hpp
index 7e4ab5b8d9d..23ed0153f3f 100644
--- a/cpp/include/cudf/io/types.hpp
+++ b/cpp/include/cudf/io/types.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2020, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -244,6 +244,7 @@ class column_in_metadata {
   bool _use_int96_timestamp = false;
   // bool _output_as_binary = false;
   thrust::optional<uint8_t> _decimal_precision;
+  thrust::optional<int32_t> _parquet_field_id;
   std::vector<column_in_metadata> children;
 
  public:
@@ -324,6 +325,18 @@ class column_in_metadata {
     return *this;
   }
 
+  /**
+   * @brief Set the parquet field id of this column.
+   *
+   * @param field_id The parquet field id to set
+   * @return this for chaining
+   */
+  column_in_metadata& set_parquet_field_id(int32_t field_id)
+  {
+    _parquet_field_id = field_id;
+    return *this;
+  }
+
   /**
    * @brief Get reference to a child of this column
    *
@@ -379,6 +392,18 @@ class column_in_metadata {
    */
   [[nodiscard]] uint8_t get_decimal_precision() const { return _decimal_precision.value(); }
 
+  /**
+   * @brief Get whether parquet field id has been set for this column.
+   */
+  [[nodiscard]] bool is_parquet_field_id_set() const { return _parquet_field_id.has_value(); }
+
+  /**
+   * @brief Get the parquet field id that was set for this column.
+   * @throws If parquet field id was not set for this column.
+   *         Check using `is_parquet_field_id_set()` first.
+   */
+  [[nodiscard]] int32_t get_parquet_field_id() const { return _parquet_field_id.value(); }
+
   /**
    * @brief Get the number of children of this column
    */
diff --git a/cpp/src/io/parquet/compact_protocol_reader.cpp b/cpp/src/io/parquet/compact_protocol_reader.cpp
index 7feaa8e61b4..a1fc2edb0bb 100644
--- a/cpp/src/io/parquet/compact_protocol_reader.cpp
+++ b/cpp/src/io/parquet/compact_protocol_reader.cpp
@@ -156,6 +156,7 @@ bool CompactProtocolReader::read(SchemaElement* s)
                             ParquetFieldEnum<ConvertedType>(6, s->converted_type),
                             ParquetFieldInt32(7, s->decimal_scale),
                             ParquetFieldInt32(8, s->decimal_precision),
+                            ParquetFieldOptionalInt32(9, s->field_id),
                             ParquetFieldStruct(10, s->logical_type));
   return function_builder(this, op);
 }
diff --git a/cpp/src/io/parquet/compact_protocol_reader.hpp b/cpp/src/io/parquet/compact_protocol_reader.hpp
index ba48f7b127f..ddca6c37e08 100644
--- a/cpp/src/io/parquet/compact_protocol_reader.hpp
+++ b/cpp/src/io/parquet/compact_protocol_reader.hpp
@@ -18,6 +18,8 @@
 
 #include "parquet.hpp"
 
+#include <thrust/optional.h>
+
 #include <algorithm>
 #include <cstddef>
 #include <string>
@@ -137,6 +139,7 @@ class CompactProtocolReader {
   friend class ParquetFieldBool;
   friend class ParquetFieldInt8;
   friend class ParquetFieldInt32;
+  friend class ParquetFieldOptionalInt32;
   friend class ParquetFieldInt64;
   template <typename T>
   friend class ParquetFieldStructListFunctor;
@@ -216,6 +219,27 @@ class ParquetFieldInt32 {
   int field() { return field_val; }
 };
 
+/**
+ * @brief Functor to set value to optional 32 bit integer read from CompactProtocolReader
+ *
+ * @return True if field type is not int32
+ */
+class ParquetFieldOptionalInt32 {
+  int field_val;
+  thrust::optional<int32_t>& val;
+
+ public:
+  ParquetFieldOptionalInt32(int f, thrust::optional<int32_t>& v) : field_val(f), val(v) {}
+
+  inline bool operator()(CompactProtocolReader* cpr, int field_type)
+  {
+    val = cpr->get_i32();
+    return (field_type != ST_FLD_I32);
+  }
+
+  int field() { return field_val; }
+};
+
 /**
  * @brief Functor to set value to 64 bit integer read from CompactProtocolReader
  *
diff --git a/cpp/src/io/parquet/compact_protocol_writer.cpp b/cpp/src/io/parquet/compact_protocol_writer.cpp
index 927844cb1c2..176ecb6a572 100644
--- a/cpp/src/io/parquet/compact_protocol_writer.cpp
+++ b/cpp/src/io/parquet/compact_protocol_writer.cpp
@@ -144,6 +144,7 @@ size_t CompactProtocolWriter::write(const SchemaElement& s)
       c.field_int(8, s.decimal_precision);
     }
   }
+  if (s.field_id) { c.field_int(9, s.field_id.value()); }
   auto const isset = s.logical_type.isset;
   // TODO: add handling for all logical types
   // if (isset.STRING or isset.MAP or isset.LIST or isset.ENUM or isset.DECIMAL or isset.DATE or
diff --git a/cpp/src/io/parquet/parquet.hpp b/cpp/src/io/parquet/parquet.hpp
index b1800640c91..ccaf3485bdf 100644
--- a/cpp/src/io/parquet/parquet.hpp
+++ b/cpp/src/io/parquet/parquet.hpp
@@ -18,6 +18,8 @@
 
 #include "parquet_common.hpp"
 
+#include <thrust/optional.h>
+
 #include <cstdint>
 #include <string>
 #include <vector>
@@ -145,6 +147,7 @@ struct SchemaElement {
   int32_t num_children                = 0;
   int32_t decimal_scale               = 0;
   int32_t decimal_precision           = 0;
+  thrust::optional<int32_t> field_id  = thrust::nullopt;
 
   // The following fields are filled in later during schema initialization
   int max_definition_level = 0;
@@ -157,7 +160,8 @@ struct SchemaElement {
     return type == other.type && converted_type == other.converted_type &&
            type_length == other.type_length && repetition_type == other.repetition_type &&
            name == other.name && num_children == other.num_children &&
-           decimal_scale == other.decimal_scale && decimal_precision == other.decimal_precision;
+           decimal_scale == other.decimal_scale && decimal_precision == other.decimal_precision &&
+           field_id == other.field_id;
   }
 
   // the parquet format is a little squishy when it comes to interpreting
diff --git a/cpp/src/io/parquet/writer_impl.cu b/cpp/src/io/parquet/writer_impl.cu
index cb1acb4d9ec..4bc084c61d0 100644
--- a/cpp/src/io/parquet/writer_impl.cu
+++ b/cpp/src/io/parquet/writer_impl.cu
@@ -491,6 +491,13 @@ std::vector<schema_tree_node> construct_schema_tree(
     [&](cudf::detail::LinkedColPtr const& col, column_in_metadata& col_meta, size_t parent_idx) {
       bool col_nullable = is_col_nullable(col, col_meta, single_write_mode);
 
+      auto set_field_id = [&schema, parent_idx](schema_tree_node& s,
+                                                column_in_metadata const& col_meta) {
+        if (schema[parent_idx].name != "list" and col_meta.is_parquet_field_id_set()) {
+          s.field_id = col_meta.get_parquet_field_id();
+        }
+      };
+
       if (col->type().id() == type_id::STRUCT) {
         // if struct, add current and recursively call for all children
         schema_tree_node struct_schema{};
@@ -500,6 +507,7 @@ std::vector<schema_tree_node> construct_schema_tree(
         struct_schema.name = (schema[parent_idx].name == "list") ? "element" : col_meta.get_name();
         struct_schema.num_children = col->children.size();
         struct_schema.parent_idx   = parent_idx;
+        set_field_id(struct_schema, col_meta);
         schema.push_back(std::move(struct_schema));
 
         auto struct_node_index = schema.size() - 1;
@@ -524,6 +532,7 @@ std::vector<schema_tree_node> construct_schema_tree(
         list_schema_1.name = (schema[parent_idx].name == "list") ? "element" : col_meta.get_name();
         list_schema_1.num_children = 1;
         list_schema_1.parent_idx   = parent_idx;
+        set_field_id(list_schema_1, col_meta);
         schema.push_back(std::move(list_schema_1));
 
         schema_tree_node list_schema_2{};
@@ -555,7 +564,10 @@ std::vector<schema_tree_node> construct_schema_tree(
         map_schema.converted_type = ConvertedType::MAP;
         map_schema.repetition_type =
           col_nullable ? FieldRepetitionType::OPTIONAL : FieldRepetitionType::REQUIRED;
-        map_schema.name         = col_meta.get_name();
+        map_schema.name = col_meta.get_name();
+        if (col_meta.is_parquet_field_id_set()) {
+          map_schema.field_id = col_meta.get_parquet_field_id();
+        }
         map_schema.num_children = 1;
         map_schema.parent_idx   = parent_idx;
         schema.push_back(std::move(map_schema));
@@ -612,6 +624,7 @@ std::vector<schema_tree_node> construct_schema_tree(
         col_schema.name = (schema[parent_idx].name == "list") ? "element" : col_meta.get_name();
         col_schema.parent_idx  = parent_idx;
         col_schema.leaf_column = col;
+        set_field_id(col_schema, col_meta);
         schema.push_back(col_schema);
       }
     };
diff --git a/cpp/tests/io/parquet_test.cpp b/cpp/tests/io/parquet_test.cpp
index cd0aab3caeb..3905df2b274 100644
--- a/cpp/tests/io/parquet_test.cpp
+++ b/cpp/tests/io/parquet_test.cpp
@@ -219,15 +219,21 @@ struct ParquetWriterTimestampTypeTest : public ParquetWriterTest {
   auto type() { return cudf::data_type{cudf::type_to_id<T>()}; }
 };
 
+// Typed test fixture for all types
+template <typename T>
+struct ParquetWriterSchemaTest : public ParquetWriterTest {
+  auto type() { return cudf::data_type{cudf::type_to_id<T>()}; }
+};
+
 // Declare typed test cases
 // TODO: Replace with `NumericTypes` when unsigned support is added. Issue #5352
 using SupportedTypes = cudf::test::Types<int8_t, int16_t, int32_t, int64_t, bool, float, double>;
 TYPED_TEST_SUITE(ParquetWriterNumericTypeTest, SupportedTypes);
-using SupportedChronoTypes = cudf::test::Concat<cudf::test::ChronoTypes, cudf::test::DurationTypes>;
-TYPED_TEST_SUITE(ParquetWriterChronoTypeTest, SupportedChronoTypes);
+TYPED_TEST_SUITE(ParquetWriterChronoTypeTest, cudf::test::ChronoTypes);
 using SupportedTimestampTypes =
   cudf::test::Types<cudf::timestamp_ms, cudf::timestamp_us, cudf::timestamp_ns>;
 TYPED_TEST_SUITE(ParquetWriterTimestampTypeTest, SupportedTimestampTypes);
+TYPED_TEST_SUITE(ParquetWriterSchemaTest, cudf::test::AllTypes);
 
 // Base test fixture for chunked writer tests
 struct ParquetChunkedWriterTest : public cudf::test::BaseFixture {
diff --git a/java/src/main/java/ai/rapids/cudf/ColumnWriterOptions.java b/java/src/main/java/ai/rapids/cudf/ColumnWriterOptions.java
index 78b3d5d52ec..f3fb7de6abe 100644
--- a/java/src/main/java/ai/rapids/cudf/ColumnWriterOptions.java
+++ b/java/src/main/java/ai/rapids/cudf/ColumnWriterOptions.java
@@ -33,9 +33,15 @@ public class ColumnWriterOptions {
   private boolean isNullable;
   private boolean isMap = false;
   private String columnName;
+  // only for Parquet
+  private boolean hasParquetFieldId;
+  private int parquetFieldId;
+
   private ColumnWriterOptions(AbstractStructBuilder builder) {
     this.columnName = builder.name;
     this.isNullable = builder.isNullable;
+    this.hasParquetFieldId = builder.hasParquetFieldId;
+    this.parquetFieldId = builder.parquetFieldId;
     this.childColumnOptions =
         (ColumnWriterOptions[]) builder.children.toArray(new ColumnWriterOptions[0]);
   }
@@ -67,6 +73,10 @@ public AbstractStructBuilder(String name, boolean isNullable) {
       super(name, isNullable);
     }
 
+    public AbstractStructBuilder(String name, boolean isNullable, int parquetFieldId) {
+      super(name, isNullable, parquetFieldId);
+    }
+
     protected AbstractStructBuilder() {
       super();
     }
@@ -84,6 +94,9 @@ public static abstract class NestedBuilder<T extends NestedBuilder, V extends Co
     protected List<ColumnWriterOptions> children = new ArrayList<>();
     protected boolean isNullable = true;
     protected String name = "";
+    // Parquet structure needs
+    protected boolean hasParquetFieldId;
+    protected int parquetFieldId;
 
     /**
      * Builder specific to build a Struct meta
@@ -93,22 +106,43 @@ protected NestedBuilder(String name, boolean isNullable) {
       this.isNullable = isNullable;
     }
 
+    protected NestedBuilder(String name, boolean isNullable, int parquetFieldId) {
+      this.name = name;
+      this.isNullable = isNullable;
+      this.hasParquetFieldId = true;
+      this.parquetFieldId = parquetFieldId;
+    }
+
     protected NestedBuilder() {}
 
-    protected ColumnWriterOptions withColumns(String name, boolean isNullable) {
+    protected ColumnWriterOptions withColumn(String name, boolean isNullable) {
       return new ColumnWriterOptions(name, isNullable);
     }
 
+    protected ColumnWriterOptions withColumn(String name, boolean isNullable, int parquetFieldId) {
+      return new ColumnWriterOptions(name, isNullable, parquetFieldId);
+    }
+
     protected ColumnWriterOptions withDecimal(String name, int precision,
                                               boolean isNullable) {
       return new ColumnWriterOptions(name, false, precision, isNullable);
     }
 
+    protected ColumnWriterOptions withDecimal(String name, int precision,
+                                              boolean isNullable, int parquetFieldId) {
+      return new ColumnWriterOptions(name, false, precision, isNullable, parquetFieldId);
+    }
+
     protected ColumnWriterOptions withTimestamp(String name, boolean isInt96,
                                                 boolean isNullable) {
       return new ColumnWriterOptions(name, isInt96, UNKNOWN_PRECISION, isNullable);
     }
 
+    protected ColumnWriterOptions withTimestamp(String name, boolean isInt96,
+                                                boolean isNullable, int parquetFieldId) {
+      return new ColumnWriterOptions(name, isInt96, UNKNOWN_PRECISION, isNullable, parquetFieldId);
+    }
+
     /**
      * Set the list column meta.
      * Lists should have only one child in ColumnVector, but the metadata expects a
@@ -155,16 +189,16 @@ public T withStructColumn(StructColumnWriterOptions child) {
     /**
      * Set column name
      */
-    public T withNonNullableColumns(String... name) {
-      withColumns(false, name);
+    public T withNonNullableColumns(String... names) {
+      withColumns(false, names);
       return (T) this;
     }
 
     /**
      * Set nullable column meta data
      */
-    public T withNullableColumns(String... name) {
-      withColumns(true, name);
+    public T withNullableColumns(String... names) {
+      withColumns(true, names);
       return (T) this;
     }
 
@@ -172,13 +206,22 @@ public T withNullableColumns(String... name) {
      * Set a simple child meta data
      * @return this for chaining.
      */
-    public T withColumns(boolean nullable, String... name) {
-      for (String n : name) {
-        children.add(withColumns(n, nullable));
+    public T withColumns(boolean nullable, String... names) {
+      for (String n : names) {
+        children.add(withColumn(n, nullable));
       }
       return (T) this;
     }
 
+    /**
+     * Set a simple child meta data
+     * @return this for chaining.
+     */
+    public T withColumn(boolean nullable, String name, int parquetFieldId) {
+      children.add(withColumn(name, nullable, parquetFieldId));
+      return (T) this;
+    }
+
     /**
      * Set a Decimal child meta data
      * @return this for chaining.
@@ -188,6 +231,15 @@ public T withDecimalColumn(String name, int precision, boolean nullable) {
       return (T) this;
     }
 
+    /**
+     * Set a Decimal child meta data
+     * @return this for chaining.
+     */
+    public T withDecimalColumn(String name, int precision, boolean nullable, int parquetFieldId) {
+      children.add(withDecimal(name, precision, nullable, parquetFieldId));
+      return (T) this;
+    }
+
     /**
      * Set a Decimal child meta data
      * @return this for chaining.
@@ -206,6 +258,15 @@ public T withDecimalColumn(String name, int precision) {
       return (T) this;
     }
 
+    /**
+     * Set a timestamp child meta data
+     * @return this for chaining.
+     */
+    public T withTimestampColumn(String name, boolean isInt96, boolean nullable, int parquetFieldId) {
+      children.add(withTimestamp(name, isInt96, nullable, parquetFieldId));
+      return (T) this;
+    }
+
     /**
      * Set a timestamp child meta data
      * @return this for chaining.
@@ -244,6 +305,13 @@ public ColumnWriterOptions(String columnName, boolean isTimestampTypeInt96,
     this.columnName = columnName;
   }
 
+  public ColumnWriterOptions(String columnName, boolean isTimestampTypeInt96,
+                             int precision, boolean isNullable, int parquetFieldId) {
+    this(columnName, isTimestampTypeInt96, precision, isNullable);
+    this.hasParquetFieldId = true;
+    this.parquetFieldId = parquetFieldId;
+  }
+
   public ColumnWriterOptions(String columnName, boolean isNullable) {
     this.isTimestampTypeInt96 = false;
     this.precision = UNKNOWN_PRECISION;
@@ -251,6 +319,12 @@ public ColumnWriterOptions(String columnName, boolean isNullable) {
     this.columnName = columnName;
   }
 
+  public ColumnWriterOptions(String columnName, boolean isNullable, int parquetFieldId) {
+    this(columnName, isNullable);
+    this.hasParquetFieldId = true;
+    this.parquetFieldId = parquetFieldId;
+  }
+
   public ColumnWriterOptions(String columnName) {
     this(columnName, true);
   }
@@ -302,6 +376,24 @@ int[] getFlatPrecision() {
     }
   }
 
+  boolean[] getFlatHasParquetFieldId() {
+    boolean[] ret = {hasParquetFieldId};
+    if (childColumnOptions.length > 0) {
+      return getFlatBooleans(ret, (opt) -> opt.getFlatHasParquetFieldId());
+    } else {
+      return ret;
+    }
+  }
+
+  int[] getFlatParquetFieldId() {
+    int[] ret = {parquetFieldId};
+    if (childColumnOptions.length > 0) {
+      return getFlatInts(ret, (opt) -> opt.getFlatParquetFieldId());
+    } else {
+      return ret;
+    }
+  }
+
   boolean[] getFlatIsNullable() {
     boolean[] ret = {isNullable};
     if (childColumnOptions.length > 0) {
@@ -418,6 +510,13 @@ public static StructBuilder structBuilder(String name, boolean isNullable) {
     return new StructBuilder(name, isNullable);
   }
 
+  /**
+   * Creates a StructBuilder for column called 'name'
+   */
+  public static StructBuilder structBuilder(String name, boolean isNullable, int parquetFieldId) {
+    return new StructBuilder(name, isNullable, parquetFieldId);
+  }
+
   /**
    * Creates a StructBuilder for column called 'name'
    */
@@ -477,6 +576,10 @@ public StructBuilder(String name, boolean isNullable) {
       super(name, isNullable);
     }
 
+    public StructBuilder(String name, boolean isNullable, int parquetFieldId) {
+      super(name, isNullable, parquetFieldId);
+    }
+
     public StructColumnWriterOptions build() {
       return new StructColumnWriterOptions(this);
     }
diff --git a/java/src/main/java/ai/rapids/cudf/CompressionMetadataWriterOptions.java b/java/src/main/java/ai/rapids/cudf/CompressionMetadataWriterOptions.java
index 9292975d0ce..3a3b7d721b7 100644
--- a/java/src/main/java/ai/rapids/cudf/CompressionMetadataWriterOptions.java
+++ b/java/src/main/java/ai/rapids/cudf/CompressionMetadataWriterOptions.java
@@ -41,6 +41,16 @@ int[] getFlatPrecision() {
     return super.getFlatInts(new int[]{}, (opt) -> opt.getFlatPrecision());
   }
 
+  @Override
+  boolean[] getFlatHasParquetFieldId() {
+    return super.getFlatBooleans(new boolean[]{}, (opt) -> opt.getFlatHasParquetFieldId());
+  }
+
+  @Override
+  int[] getFlatParquetFieldId() {
+    return super.getFlatInts(new int[]{}, (opt) -> opt.getFlatParquetFieldId());
+  }
+
   @Override
   int[] getFlatNumChildren() {
     return super.getFlatInts(new int[]{}, (opt) -> opt.getFlatNumChildren());
diff --git a/java/src/main/java/ai/rapids/cudf/Table.java b/java/src/main/java/ai/rapids/cudf/Table.java
index ff966643866..24f7d44ed28 100644
--- a/java/src/main/java/ai/rapids/cudf/Table.java
+++ b/java/src/main/java/ai/rapids/cudf/Table.java
@@ -289,7 +289,10 @@ private static native long writeParquetFileBegin(String[] columnNames,
                                                    int statsFreq,
                                                    boolean[] isInt96,
                                                    int[] precisions,
-                                                   boolean[] isMapValues, String filename) throws CudfException;
+                                                   boolean[] isMapValues,
+                                                   boolean[] hasParquetFieldIds,
+                                                   int[] parquetFieldIds,
+                                                   String filename) throws CudfException;
 
   /**
    * Setup everything to write parquet formatted data to a buffer.
@@ -319,6 +322,8 @@ private static native long writeParquetBufferBegin(String[] columnNames,
                                                      boolean[] isInt96,
                                                      int[] precisions,
                                                      boolean[] isMapValues,
+                                                     boolean[] hasParquetFieldIds,
+                                                     int[] parquetFieldIds,
                                                      HostBufferConsumer consumer) throws CudfException;
 
   /**
@@ -1201,6 +1206,8 @@ private ParquetTableWriter(ParquetWriterOptions options, File outputFile) {
       boolean[] timeInt96Values = options.getFlatIsTimeTypeInt96();
       boolean[] isMapValues = options.getFlatIsMap();
       int[] precisions = options.getFlatPrecision();
+      boolean[] hasParquetFieldIds = options.getFlatHasParquetFieldId();
+      int[] parquetFieldIds = options.getFlatParquetFieldId();
       int[] flatNumChildren = options.getFlatNumChildren();
 
       this.consumer = null;
@@ -1215,6 +1222,8 @@ private ParquetTableWriter(ParquetWriterOptions options, File outputFile) {
           timeInt96Values,
           precisions,
           isMapValues,
+          hasParquetFieldIds,
+          parquetFieldIds,
           outputFile.getAbsolutePath());
     }
 
@@ -1224,6 +1233,8 @@ private ParquetTableWriter(ParquetWriterOptions options, HostBufferConsumer cons
       boolean[] timeInt96Values = options.getFlatIsTimeTypeInt96();
       boolean[] isMapValues = options.getFlatIsMap();
       int[] precisions = options.getFlatPrecision();
+      boolean[] hasParquetFieldIds = options.getFlatHasParquetFieldId();
+      int[] parquetFieldIds = options.getFlatParquetFieldId();
       int[] flatNumChildren = options.getFlatNumChildren();
 
       this.consumer = consumer;
@@ -1238,6 +1249,8 @@ private ParquetTableWriter(ParquetWriterOptions options, HostBufferConsumer cons
           timeInt96Values,
           precisions,
           isMapValues,
+          hasParquetFieldIds,
+          parquetFieldIds,
           consumer);
     }
 
diff --git a/java/src/main/native/src/TableJni.cpp b/java/src/main/native/src/TableJni.cpp
index cebe476dd87..919958d4db2 100644
--- a/java/src/main/native/src/TableJni.cpp
+++ b/java/src/main/native/src/TableJni.cpp
@@ -673,6 +673,8 @@ int set_column_metadata(cudf::io::column_in_metadata &column_metadata,
                         cudf::jni::native_jbooleanArray &is_int96,
                         cudf::jni::native_jintArray &precisions,
                         cudf::jni::native_jbooleanArray &is_map,
+                        cudf::jni::native_jbooleanArray &hasParquetFieldIds,
+                        cudf::jni::native_jintArray &parquetFieldIds,
                         cudf::jni::native_jintArray &children, int num_children, int read_index) {
   int write_index = 0;
   for (int i = 0; i < num_children; i++, write_index++) {
@@ -687,12 +689,15 @@ int set_column_metadata(cudf::io::column_in_metadata &column_metadata,
     if (is_map[read_index]) {
       child.set_list_column_as_map();
     }
+    if (!parquetFieldIds.is_null() && hasParquetFieldIds[read_index]) {
+      child.set_parquet_field_id(parquetFieldIds[read_index]);
+    }
     column_metadata.add_child(child);
     int childs_children = children[read_index++];
     if (childs_children > 0) {
-      read_index =
-          set_column_metadata(column_metadata.child(write_index), col_names, nullability, is_int96,
-                              precisions, is_map, children, childs_children, read_index);
+      read_index = set_column_metadata(column_metadata.child(write_index), col_names, nullability,
+                                       is_int96, precisions, is_map, hasParquetFieldIds,
+                                       parquetFieldIds, children, childs_children, read_index);
     }
   }
   return read_index;
@@ -701,12 +706,15 @@ int set_column_metadata(cudf::io::column_in_metadata &column_metadata,
 void createTableMetaData(JNIEnv *env, jint num_children, jobjectArray &j_col_names,
                          jintArray &j_children, jbooleanArray &j_col_nullability,
                          jbooleanArray &j_is_int96, jintArray &j_precisions,
-                         jbooleanArray &j_is_map, cudf::io::table_input_metadata &metadata) {
+                         jbooleanArray &j_is_map, cudf::io::table_input_metadata &metadata,
+                         jbooleanArray &j_hasParquetFieldIds, jintArray &j_parquetFieldIds) {
   cudf::jni::auto_set_device(env);
   cudf::jni::native_jstringArray col_names(env, j_col_names);
   cudf::jni::native_jbooleanArray col_nullability(env, j_col_nullability);
   cudf::jni::native_jbooleanArray is_int96(env, j_is_int96);
   cudf::jni::native_jintArray precisions(env, j_precisions);
+  cudf::jni::native_jbooleanArray hasParquetFieldIds(env, j_hasParquetFieldIds);
+  cudf::jni::native_jintArray parquetFieldIds(env, j_parquetFieldIds);
   cudf::jni::native_jintArray children(env, j_children);
   cudf::jni::native_jbooleanArray is_map(env, j_is_map);
 
@@ -729,11 +737,14 @@ void createTableMetaData(JNIEnv *env, jint num_children, jobjectArray &j_col_nam
     if (is_map[read_index]) {
       metadata.column_metadata[write_index].set_list_column_as_map();
     }
+    if (!parquetFieldIds.is_null() && hasParquetFieldIds[read_index]) {
+      metadata.column_metadata[write_index].set_parquet_field_id(parquetFieldIds[read_index]);
+    }
     int childs_children = children[read_index++];
     if (childs_children > 0) {
-      read_index =
-          set_column_metadata(metadata.column_metadata[write_index], cpp_names, col_nullability,
-                              is_int96, precisions, is_map, children, childs_children, read_index);
+      read_index = set_column_metadata(
+          metadata.column_metadata[write_index], cpp_names, col_nullability, is_int96, precisions,
+          is_map, hasParquetFieldIds, parquetFieldIds, children, childs_children, read_index);
     }
   }
 }
@@ -1539,7 +1550,8 @@ JNIEXPORT long JNICALL Java_ai_rapids_cudf_Table_writeParquetBufferBegin(
     JNIEnv *env, jclass, jobjectArray j_col_names, jint j_num_children, jintArray j_children,
     jbooleanArray j_col_nullability, jobjectArray j_metadata_keys, jobjectArray j_metadata_values,
     jint j_compression, jint j_stats_freq, jbooleanArray j_isInt96, jintArray j_precisions,
-    jbooleanArray j_is_map, jobject consumer) {
+    jbooleanArray j_is_map, jbooleanArray j_hasParquetFieldIds, jintArray j_parquetFieldIds,
+    jobject consumer) {
   JNI_NULL_CHECK(env, j_col_names, "null columns", 0);
   JNI_NULL_CHECK(env, j_col_nullability, "null nullability", 0);
   JNI_NULL_CHECK(env, j_metadata_keys, "null metadata keys", 0);
@@ -1554,7 +1566,7 @@ JNIEXPORT long JNICALL Java_ai_rapids_cudf_Table_writeParquetBufferBegin(
     sink_info sink{data_sink.get()};
     table_input_metadata metadata;
     createTableMetaData(env, j_num_children, j_col_names, j_children, j_col_nullability, j_isInt96,
-                        j_precisions, j_is_map, metadata);
+                        j_precisions, j_is_map, metadata, j_hasParquetFieldIds, j_parquetFieldIds);
 
     auto meta_keys = cudf::jni::native_jstringArray{env, j_metadata_keys}.as_cpp_vector();
     auto meta_values = cudf::jni::native_jstringArray{env, j_metadata_values}.as_cpp_vector();
@@ -1583,7 +1595,8 @@ JNIEXPORT long JNICALL Java_ai_rapids_cudf_Table_writeParquetFileBegin(
     JNIEnv *env, jclass, jobjectArray j_col_names, jint j_num_children, jintArray j_children,
     jbooleanArray j_col_nullability, jobjectArray j_metadata_keys, jobjectArray j_metadata_values,
     jint j_compression, jint j_stats_freq, jbooleanArray j_isInt96, jintArray j_precisions,
-    jbooleanArray j_is_map, jstring j_output_path) {
+    jbooleanArray j_is_map, jbooleanArray j_hasParquetFieldIds, jintArray j_parquetFieldIds,
+    jstring j_output_path) {
   JNI_NULL_CHECK(env, j_col_names, "null columns", 0);
   JNI_NULL_CHECK(env, j_col_nullability, "null nullability", 0);
   JNI_NULL_CHECK(env, j_metadata_keys, "null metadata keys", 0);
@@ -1596,7 +1609,7 @@ JNIEXPORT long JNICALL Java_ai_rapids_cudf_Table_writeParquetFileBegin(
     using namespace cudf::jni;
     table_input_metadata metadata;
     createTableMetaData(env, j_num_children, j_col_names, j_children, j_col_nullability, j_isInt96,
-                        j_precisions, j_is_map, metadata);
+                        j_precisions, j_is_map, metadata, j_hasParquetFieldIds, j_parquetFieldIds);
 
     auto meta_keys = cudf::jni::native_jstringArray{env, j_metadata_keys}.as_cpp_vector();
     auto meta_values = cudf::jni::native_jstringArray{env, j_metadata_values}.as_cpp_vector();
@@ -1721,8 +1734,12 @@ JNIEXPORT long JNICALL Java_ai_rapids_cudf_Table_writeORCBufferBegin(
     table_input_metadata metadata;
     // ORC has no `j_is_int96`, but `createTableMetaData` needs a lvalue.
     jbooleanArray j_is_int96 = NULL;
+    // ORC has no `j_parquetFieldIds`, but `createTableMetaData` needs a lvalue.
+    jbooleanArray j_hasParquetFieldIds = NULL;
+    jintArray j_parquetFieldIds = NULL;
+
     createTableMetaData(env, j_num_children, j_col_names, j_children, j_col_nullability, j_is_int96,
-                        j_precisions, j_is_map, metadata);
+                        j_precisions, j_is_map, metadata, j_hasParquetFieldIds, j_parquetFieldIds);
 
     auto meta_keys = cudf::jni::native_jstringArray{env, j_metadata_keys}.as_cpp_vector();
     auto meta_values = cudf::jni::native_jstringArray{env, j_metadata_values}.as_cpp_vector();
@@ -1766,8 +1783,11 @@ JNIEXPORT long JNICALL Java_ai_rapids_cudf_Table_writeORCFileBegin(
     table_input_metadata metadata;
     // ORC has no `j_is_int96`, but `createTableMetaData` needs a lvalue.
     jbooleanArray j_is_int96 = NULL;
+    // ORC has no `j_parquetFieldIds`, but `createTableMetaData` needs a lvalue.
+    jbooleanArray j_hasParquetFieldIds = NULL;
+    jintArray j_parquetFieldIds = NULL;
     createTableMetaData(env, j_num_children, j_col_names, j_children, j_col_nullability, j_is_int96,
-                        j_precisions, j_is_map, metadata);
+                        j_precisions, j_is_map, metadata, j_hasParquetFieldIds, j_parquetFieldIds);
 
     auto meta_keys = cudf::jni::native_jstringArray{env, j_metadata_keys}.as_cpp_vector();
     auto meta_values = cudf::jni::native_jstringArray{env, j_metadata_values}.as_cpp_vector();
diff --git a/java/src/test/java/ai/rapids/cudf/TableTest.java b/java/src/test/java/ai/rapids/cudf/TableTest.java
index 7be1ca2118b..af28cfb6d6c 100644
--- a/java/src/test/java/ai/rapids/cudf/TableTest.java
+++ b/java/src/test/java/ai/rapids/cudf/TableTest.java
@@ -36,6 +36,7 @@
 import org.apache.hadoop.fs.Path;
 import org.apache.parquet.hadoop.ParquetFileReader;
 import org.apache.parquet.hadoop.util.HadoopInputFile;
+import org.apache.parquet.schema.GroupType;
 import org.apache.parquet.schema.MessageType;
 import org.apache.parquet.schema.OriginalType;
 import org.junit.jupiter.api.Test;
@@ -7899,6 +7900,126 @@ void testParquetWriteToFileUncompressedNoStats() throws IOException {
     }
   }
 
+  @Test
+  void testParquetWriteWithFieldId() throws IOException {
+    // field IDs are:
+    // c1: -1, c2: 2, c3: 3, c31: 31, c32: 32, c4: -4, c5: not specified
+    ColumnWriterOptions.StructBuilder sBuilder =
+        structBuilder("c3", true, 3)
+            .withColumn(true, "c31", 31)
+            .withColumn(true, "c32", 32);
+    ParquetWriterOptions options = ParquetWriterOptions.builder()
+        .withColumn(true, "c1", -1)
+        .withDecimalColumn("c2", 9, true, 2)
+        .withStructColumn(sBuilder.build())
+        .withTimestampColumn("c4", true, true, -4)
+        .withColumns( true, "c5")
+        .build();
+
+    File tempFile = File.createTempFile("test-field-id", ".parquet");
+    try {
+      HostColumnVector.StructType structType = new HostColumnVector.StructType(
+          true,
+          new HostColumnVector.BasicType(true, DType.STRING),
+          new HostColumnVector.BasicType(true, DType.STRING));
+
+      try (Table table0 = new Table.TestBuilder()
+          .column(true, false) // c1
+          .decimal32Column(0, 298, 2473) // c2
+          .column(structType, // c3
+              new HostColumnVector.StructData("a", "b"), new HostColumnVector.StructData("a", "b"))
+          .timestampMicrosecondsColumn(1000L, 2000L) // c4
+          .column("a", "b") // c5
+          .build()) {
+        try (TableWriter writer = Table.writeParquetChunked(options, tempFile.getAbsoluteFile())) {
+          writer.write(table0);
+        }
+      }
+
+      try (ParquetFileReader reader = ParquetFileReader.open(HadoopInputFile.fromPath(
+          new Path(tempFile.getAbsolutePath()),
+          new Configuration()))) {
+        MessageType schema = reader.getFooter().getFileMetaData().getSchema();
+        assert (schema.getFields().get(0).getId().intValue() == -1);
+        assert (schema.getFields().get(1).getId().intValue() == 2);
+        assert (schema.getFields().get(2).getId().intValue() == 3);
+        assert (((GroupType) schema.getFields().get(2)).getFields().get(0).getId().intValue() == 31);
+        assert (((GroupType) schema.getFields().get(2)).getFields().get(1).getId().intValue() == 32);
+        assert (schema.getFields().get(3).getId().intValue() == -4);
+        assert (schema.getFields().get(4).getId() == null);
+      }
+    } finally {
+      tempFile.delete();
+    }
+  }
+
+  @Test
+  void testParquetWriteWithFieldIdNestNotSpecified() throws IOException {
+    // field IDs are:
+    // c0: no field ID
+    // c1: 1
+    // c2: no field ID
+    //   c21: 21
+    //   c22: no field ID
+    // c3: 3
+    //   c31: 31
+    //   c32: no field ID
+    // c4: 0
+    ColumnWriterOptions.StructBuilder c2Builder =
+        structBuilder("c2", true)
+            .withColumn(true, "c21", 21)
+            .withColumns(true, "c22");
+    ColumnWriterOptions.StructBuilder c3Builder =
+        structBuilder("c3", true, 3)
+            .withColumn(true, "c31", 31)
+            .withColumns(true, "c32");
+    ParquetWriterOptions options = ParquetWriterOptions.builder()
+        .withColumns(true, "c0")
+        .withDecimalColumn("c1", 9, true, 1)
+        .withStructColumn(c2Builder.build())
+        .withStructColumn(c3Builder.build())
+        .withColumn(true, "c4", 0)
+        .build();
+
+    File tempFile = File.createTempFile("test-field-id", ".parquet");
+    try {
+      HostColumnVector.StructType structType = new HostColumnVector.StructType(
+          true,
+          new HostColumnVector.BasicType(true, DType.STRING),
+          new HostColumnVector.BasicType(true, DType.STRING));
+
+      try (Table table0 = new Table.TestBuilder()
+          .column(true, false) // c0
+          .decimal32Column(0, 298, 2473) // c1
+          .column(structType, // c2
+              new HostColumnVector.StructData("a", "b"), new HostColumnVector.StructData("a", "b"))
+          .column(structType, // c3
+              new HostColumnVector.StructData("a", "b"), new HostColumnVector.StructData("a", "b"))
+          .column("a", "b") // c4
+          .build()) {
+        try (TableWriter writer = Table.writeParquetChunked(options, tempFile.getAbsoluteFile())) {
+          writer.write(table0);
+        }
+      }
+
+      try (ParquetFileReader reader = ParquetFileReader.open(HadoopInputFile.fromPath(
+          new Path(tempFile.getAbsolutePath()),
+          new Configuration()))) {
+        MessageType schema = reader.getFooter().getFileMetaData().getSchema();
+        assert (schema.getFields().get(0).getId() == null);
+        assert (schema.getFields().get(1).getId().intValue() == 1);
+        assert (schema.getFields().get(2).getId() == null);
+        assert (((GroupType) schema.getFields().get(2)).getFields().get(0).getId().intValue() == 21);
+        assert (((GroupType) schema.getFields().get(2)).getFields().get(1).getId() == null);
+        assert (((GroupType) schema.getFields().get(3)).getFields().get(0).getId().intValue() == 31);
+        assert (((GroupType) schema.getFields().get(3)).getFields().get(1).getId() == null);
+        assert (schema.getFields().get(4).getId().intValue() == 0);
+      }
+    } finally {
+      tempFile.delete();
+    }
+  }
+
   /** Return a column where DECIMAL64 has been up-casted to DECIMAL128 */
   private ColumnVector castDecimal64To128(ColumnView c) {
     DType dtype = c.getType();

From 94a5d4180b1281d4250e9f915e547789d8da3ce0 Mon Sep 17 00:00:00 2001
From: Matthew Murray <41342305+Matt711@users.noreply.github.com>
Date: Fri, 15 Apr 2022 16:08:09 -0400
Subject: [PATCH 21/33] Add support for null and non-numeric types in
 Series.diff and DataFrame.diff (#10625)

This PR supports non-numeric data types (timestamp and ranges) in `Series.diff` and `DataFrame.diff`. In `DataFrame.diff`, datetime ranges are already supported because `DataFrame.shift` works. But `Series.diff` doesn't use the `Series.shift` implementation, so there wasn't support for datetime ranges.
```python
import datetime
dti = pd.to_datetime(
    ["1/1/2018", np.datetime64("2018-01-01"), datetime.datetime(2018, 1, 1), datetime.datetime(2020, 1, 1)]
)
df = DataFrame({"dates": dti})
df.diff(periods=periods, axis=axis)
```
closes #10212.

Authors:
  - Matthew Murray (https://github.com/Matt711)
  - Bradley Dice (https://github.com/bdice)

Approvers:
  - Ashwin Srinath (https://github.com/shwina)
  - Ram (Ramakrishna Prabhu) (https://github.com/rgsl888prabhu)

URL: https://github.com/rapidsai/cudf/pull/10625
---
 python/cudf/cudf/core/dataframe.py       |  5 --
 python/cudf/cudf/core/series.py          | 52 +++++++--------------
 python/cudf/cudf/tests/test_dataframe.py | 28 +++++++++---
 python/cudf/cudf/tests/test_series.py    | 58 ++++++++++++++++++++++++
 python/cudf/cudf/utils/cudautils.py      | 21 ---------
 5 files changed, 96 insertions(+), 68 deletions(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index ae60cd91fac..8893b85c97c 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -2660,11 +2660,6 @@ def diff(self, periods=1, axis=0):
         if axis != 0:
             raise NotImplementedError("Only axis=0 is supported.")
 
-        if not all(is_numeric_dtype(i) for i in self.dtypes):
-            raise NotImplementedError(
-                "DataFrame.diff only supports numeric dtypes"
-            )
-
         if abs(periods) > len(self):
             df = cudf.DataFrame._from_data(
                 {
diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index 6e15c03e6b4..20ba52afccd 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -14,11 +14,11 @@
 import numpy as np
 import pandas as pd
 from pandas._config import get_option
+from pandas.core.dtypes.common import is_float
 
 import cudf
 from cudf import _lib as libcudf
 from cudf._lib.scalar import _is_null_host_scalar
-from cudf._lib.transform import bools_to_mask
 from cudf._typing import ColumnLike, DataFrameOrSeries, ScalarLike
 from cudf.api.types import (
     _is_non_decimal_numeric_dtype,
@@ -42,7 +42,6 @@
     arange,
     as_column,
     column,
-    column_empty_like,
     full,
 )
 from cudf.core.column.categorical import (
@@ -64,7 +63,7 @@
 )
 from cudf.core.single_column_frame import SingleColumnFrame
 from cudf.core.udf.scalar_function import _get_scalar_kernel
-from cudf.utils import cudautils, docutils
+from cudf.utils import docutils
 from cudf.utils.docutils import copy_docstring
 from cudf.utils.dtypes import (
     can_convert_to_column,
@@ -2969,19 +2968,22 @@ def digitize(self, bins, right=False):
 
     @_cudf_nvtx_annotate
     def diff(self, periods=1):
-        """Calculate the difference between values at positions i and i - N in
-        an array and store the output in a new array.
+        """First discrete difference of element.
+
+        Calculates the difference of a Series element compared with another
+        element in the Series (default is element in previous row).
+
+        Parameters
+        ----------
+        periods : int, default 1
+            Periods to shift for calculating difference,
+            accepts negative values.
 
         Returns
         -------
         Series
             First differences of the Series.
 
-        Notes
-        -----
-        Diff currently only supports float and integer dtype columns with
-        no null values.
-
         Examples
         --------
         >>> import cudf
@@ -3028,32 +3030,12 @@ def diff(self, periods=1):
         5    <NA>
         dtype: int64
         """
-        if self.has_nulls:
-            raise AssertionError(
-                "Diff currently requires columns with no null values"
-            )
-
-        if not np.issubdtype(self.dtype, np.number):
-            raise NotImplementedError(
-                "Diff currently only supports numeric dtypes"
-            )
-
-        # TODO: move this libcudf
-        input_col = self._column
-        output_col = column_empty_like(input_col)
-        output_mask = column_empty_like(input_col, dtype="bool")
-        if output_col.size > 0:
-            cudautils.gpu_diff.forall(output_col.size)(
-                input_col, output_col, output_mask, periods
-            )
-
-        output_col = column.build_column(
-            data=output_col.data,
-            dtype=output_col.dtype,
-            mask=bools_to_mask(output_mask),
-        )
+        if not is_integer(periods):
+            if not (is_float(periods) and periods.is_integer()):
+                raise ValueError("periods must be an integer")
+            periods = int(periods)
 
-        return Series(output_col, name=self.name, index=self.index)
+        return self - self.shift(periods=periods)
 
     @copy_docstring(SeriesGroupBy)
     @_cudf_nvtx_annotate
diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
index 13ab0b35822..07261534777 100644
--- a/python/cudf/cudf/tests/test_dataframe.py
+++ b/python/cudf/cudf/tests/test_dataframe.py
@@ -9098,7 +9098,7 @@ def test_groupby_cov_for_pandas_bug_case():
     ],
 )
 @pytest.mark.parametrize("periods", (-5, -1, 0, 1, 5))
-def test_diff_dataframe_numeric_dtypes(data, periods):
+def test_diff_numeric_dtypes(data, periods):
     gdf = cudf.DataFrame(data)
     pdf = gdf.to_pandas()
 
@@ -9137,7 +9137,7 @@ def test_diff_decimal_dtypes(precision, scale, dtype):
     )
 
 
-def test_diff_dataframe_invalid_axis():
+def test_diff_invalid_axis():
     gdf = cudf.DataFrame(np.array([1.123, 2.343, 5.890, 0.0]))
     with pytest.raises(NotImplementedError, match="Only axis=0 is supported."):
         gdf.diff(periods=1, axis=1)
@@ -9152,16 +9152,30 @@ def test_diff_dataframe_invalid_axis():
             "string_col": ["a", "b", "c", "d", "e"],
         },
         ["a", "b", "c", "d", "e"],
-        [np.nan, None, np.nan, None],
     ],
 )
-def test_diff_dataframe_non_numeric_dypes(data):
+def test_diff_unsupported_dtypes(data):
     gdf = cudf.DataFrame(data)
     with pytest.raises(
-        NotImplementedError,
-        match="DataFrame.diff only supports numeric dtypes",
+        TypeError,
+        match=r"unsupported operand type\(s\)",
     ):
-        gdf.diff(periods=2, axis=0)
+        gdf.diff()
+
+
+def test_diff_many_dtypes():
+    pdf = pd.DataFrame(
+        {
+            "dates": pd.date_range("2020-01-01", "2020-01-06", freq="D"),
+            "bools": [True, True, True, False, True, True],
+            "floats": [1.0, 2.0, 3.5, np.nan, 5.0, -1.7],
+            "ints": [1, 2, 3, 3, 4, 5],
+            "nans_nulls": [np.nan, None, None, np.nan, np.nan, None],
+        }
+    )
+    gdf = cudf.from_pandas(pdf)
+    assert_eq(pdf.diff(), gdf.diff())
+    assert_eq(pdf.diff(periods=2), gdf.diff(periods=2))
 
 
 def test_dataframe_assign_cp_np_array():
diff --git a/python/cudf/cudf/tests/test_series.py b/python/cudf/cudf/tests/test_series.py
index 6f0f77f0aa2..fccb9f680d9 100644
--- a/python/cudf/cudf/tests/test_series.py
+++ b/python/cudf/cudf/tests/test_series.py
@@ -18,6 +18,7 @@
     TIMEDELTA_TYPES,
     assert_eq,
     assert_exceptions_equal,
+    gen_rand,
 )
 
 
@@ -1724,3 +1725,60 @@ def test_isin_categorical(data, values):
     got = gsr.isin(values)
     expected = psr.isin(values)
     assert_eq(got, expected)
+
+
+@pytest.mark.parametrize("dtype", NUMERIC_TYPES)
+@pytest.mark.parametrize("period", [-1, -5, -10, -20, 0, 1, 5, 10, 20])
+@pytest.mark.parametrize("data_empty", [False, True])
+def test_diff(dtype, period, data_empty):
+    if data_empty:
+        data = None
+    else:
+        if dtype == np.int8:
+            # to keep data in range
+            data = gen_rand(dtype, 100000, low=-2, high=2)
+        else:
+            data = gen_rand(dtype, 100000)
+
+    gs = cudf.Series(data, dtype=dtype)
+    ps = pd.Series(data, dtype=dtype)
+
+    expected_outcome = ps.diff(period)
+    diffed_outcome = gs.diff(period).astype(expected_outcome.dtype)
+
+    if data_empty:
+        assert_eq(diffed_outcome, expected_outcome, check_index_type=False)
+    else:
+        assert_eq(diffed_outcome, expected_outcome)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        ["a", "b", "c", "d", "e"],
+    ],
+)
+def test_diff_unsupported_dtypes(data):
+    gs = cudf.Series(data)
+    with pytest.raises(
+        TypeError,
+        match=r"unsupported operand type\(s\)",
+    ):
+        gs.diff()
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        pd.date_range("2020-01-01", "2020-01-06", freq="D"),
+        [True, True, True, False, True, True],
+        [1.0, 2.0, 3.5, 4.0, 5.0, -1.7],
+        [1, 2, 3, 3, 4, 5],
+        [np.nan, None, None, np.nan, np.nan, None],
+    ],
+)
+def test_diff_many_dtypes(data):
+    ps = pd.Series(data)
+    gs = cudf.from_pandas(ps)
+    assert_eq(ps.diff(), gs.diff())
+    assert_eq(ps.diff(periods=2), gs.diff(periods=2))
diff --git a/python/cudf/cudf/utils/cudautils.py b/python/cudf/cudf/utils/cudautils.py
index 4796402f14d..fb6e35f4f58 100755
--- a/python/cudf/cudf/utils/cudautils.py
+++ b/python/cudf/cudf/utils/cudautils.py
@@ -14,27 +14,6 @@
 #
 
 
-@cuda.jit
-def gpu_diff(in_col, out_col, out_mask, N):
-    """Calculate the difference between values at positions i and i - N in an
-    array and store the output in a new array.
-    """
-    i = cuda.grid(1)
-
-    if N > 0:
-        if i < in_col.size:
-            out_col[i] = in_col[i] - in_col[i - N]
-            out_mask[i] = True
-        if i < N:
-            out_mask[i] = False
-    else:
-        if i <= (in_col.size + N):
-            out_col[i] = in_col[i] - in_col[i - N]
-            out_mask[i] = True
-        if i >= (in_col.size + N) and i < in_col.size:
-            out_mask[i] = False
-
-
 # Find segments
 
 

From 9409559433ae55b9c44d68ef52d13b79885a8fde Mon Sep 17 00:00:00 2001
From: Jason Lowe <jlowe@nvidia.com>
Date: Mon, 18 Apr 2022 09:25:59 -0500
Subject: [PATCH 22/33] Rework JNI CMake to leverage rapids_find_package
 (#10649)

The JNI CMakeLists.txt has been fragile, looking for specific .a or .so libraries and header file locations, and will break again when libcudf moves to a pre-installed nvcomp 2.3 package since it expects to find nvcomp in a very specific location today.  This refactors the JNI CMakeLists.txt to leverage `rapids_find_package` to reuse the work performed in the libcudf build and also has the nice side-effect of avoiding redundant pulls and builds of the Thrust and RMM repositories that is happening today.

Another side-effect is that the JNI will now automatically pull in the same RMM compile definitions that are used for libcudf, meaning the separate RMM logging flag for the JNI build is no longer necessary.  Similarly it's no longer necessary to explicitly specify to the JNI build which type of Arrow library to use (i.e,.: static or dynamic), it will automatically use whichever Arrow library was built by libcudf.

Authors:
  - Jason Lowe (https://github.com/jlowe)

Approvers:
  - Robert (Bobby) Evans (https://github.com/revans2)
  - Gera Shegalov (https://github.com/gerashegalov)

URL: https://github.com/rapidsai/cudf/pull/10649
---
 build.sh                            |   2 -
 java/README.md                      |  10 ---
 java/ci/build-in-docker.sh          |   1 -
 java/pom.xml                        |   4 -
 java/src/main/native/CMakeLists.txt | 132 ++++------------------------
 5 files changed, 17 insertions(+), 132 deletions(-)

diff --git a/build.sh b/build.sh
index e1d6df016dd..48182ca1a6f 100755
--- a/build.sh
+++ b/build.sh
@@ -148,10 +148,8 @@ function buildLibCudfJniInDocker {
                 -DCUDF_CPP_BUILD_DIR=$workspaceRepoDir/java/target/libcudf-cmake-build \
                 -DCUDA_STATIC_RUNTIME=ON \
                 -DPER_THREAD_DEFAULT_STREAM=ON \
-                -DRMM_LOGGING_LEVEL=OFF \
                 -DUSE_GDS=ON \
                 -DGPU_ARCHS=${CUDF_CMAKE_CUDA_ARCHITECTURES} \
-                -DCUDF_JNI_ARROW_STATIC=ON \
                 -DCUDF_JNI_LIBCUDF_STATIC=ON \
                 -Dtest=*,!CuFileTest"
 }
diff --git a/java/README.md b/java/README.md
index afd69df11ef..ea1b9e3e4e4 100644
--- a/java/README.md
+++ b/java/README.md
@@ -75,16 +75,6 @@ If you decide to build without Docker and the build script, examining the cmake
 settings in the [Java CI build script](ci/build-in-docker.sh) can be helpful if you are
 encountering difficulties during the build.
 
-## Dynamically Linking Arrow
-
-Since libcudf builds by default with a dynamically linked Arrow dependency, it may be
-desirable to build the Java bindings without requiring a statically-linked Arrow to avoid
-rebuilding an already built libcudf.so. To do so, specify the additional command-line flag
-`-DCUDF_JNI_ARROW_STATIC=OFF` when building the Java bindings with Maven.  However this will
-result in a jar that requires the correct Arrow version to be available in the runtime
-environment, and therefore is not recommended unless you are only performing local testing
-within the libcudf build environment.
-
 ## Statically Linking the CUDA Runtime
 
 If you use the default cmake options libcudart will be dynamically linked to libcudf and libcudfjni.
diff --git a/java/ci/build-in-docker.sh b/java/ci/build-in-docker.sh
index d6a193fbeaf..d21010ba30e 100755
--- a/java/ci/build-in-docker.sh
+++ b/java/ci/build-in-docker.sh
@@ -78,7 +78,6 @@ BUILD_ARG="-Dmaven.repo.local=\"$WORKSPACE/.m2\"\
  -DPER_THREAD_DEFAULT_STREAM=$ENABLE_PTDS\
  -DCUDA_STATIC_RUNTIME=$ENABLE_CUDA_STATIC_RUNTIME\
  -DCUDF_JNI_LIBCUDF_STATIC=ON\
- -DRMM_LOGGING_LEVEL=$RMM_LOGGING_LEVEL\
  -DUSE_GDS=$ENABLE_GDS -Dtest=*,!CuFileTest"
 
 if [ "$SIGN_FILE" == true ]; then
diff --git a/java/pom.xml b/java/pom.xml
index e2efed19636..50b6ca59440 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -165,10 +165,8 @@
         <CMAKE_EXPORT_COMPILE_COMMANDS>OFF</CMAKE_EXPORT_COMPILE_COMMANDS>
         <CUDA_STATIC_RUNTIME>OFF</CUDA_STATIC_RUNTIME>
         <PER_THREAD_DEFAULT_STREAM>OFF</PER_THREAD_DEFAULT_STREAM>
-        <RMM_LOGGING_LEVEL>INFO</RMM_LOGGING_LEVEL>
         <USE_GDS>OFF</USE_GDS>
         <GPU_ARCHS>ALL</GPU_ARCHS>
-        <CUDF_JNI_ARROW_STATIC>ON</CUDF_JNI_ARROW_STATIC>
         <CUDF_JNI_LIBCUDF_STATIC>OFF</CUDF_JNI_LIBCUDF_STATIC>
         <native.build.path>${project.build.directory}/cmake-build</native.build.path>
         <slf4j.version>1.7.30</slf4j.version>
@@ -386,13 +384,11 @@
                                     <arg value="${basedir}/src/main/native"/>
                                     <arg value="-DCUDA_STATIC_RUNTIME=${CUDA_STATIC_RUNTIME}" />
                                     <arg value="-DPER_THREAD_DEFAULT_STREAM=${PER_THREAD_DEFAULT_STREAM}" />
-                                    <arg value="-DRMM_LOGGING_LEVEL=${RMM_LOGGING_LEVEL}" />
                                     <arg value="-DUSE_GDS=${USE_GDS}" />
                                     <arg value="-DCMAKE_CXX_FLAGS=${cxx.flags}"/>
                                     <arg value="-DCMAKE_EXPORT_COMPILE_COMMANDS=${CMAKE_EXPORT_COMPILE_COMMANDS}"/>
                                     <arg value="-DCUDF_CPP_BUILD_DIR=${CUDF_CPP_BUILD_DIR}"/>
                                     <arg value="-DGPU_ARCHS=${GPU_ARCHS}"/>
-                                    <arg value="-DCUDF_JNI_ARROW_STATIC=${CUDF_JNI_ARROW_STATIC}"/>
                                     <arg value="-DCUDF_JNI_LIBCUDF_STATIC=${CUDF_JNI_LIBCUDF_STATIC}"/>
                                     <arg value="-DBUILD_SHARED_LIBS=ON"/>
                                 </exec>
diff --git a/java/src/main/native/CMakeLists.txt b/java/src/main/native/CMakeLists.txt
index 9851102d011..3a375412bbd 100755
--- a/java/src/main/native/CMakeLists.txt
+++ b/java/src/main/native/CMakeLists.txt
@@ -41,7 +41,6 @@ option(BUILD_TESTS "Configure CMake to build tests" ON)
 option(PER_THREAD_DEFAULT_STREAM "Build with per-thread default stream" OFF)
 option(CUDA_STATIC_RUNTIME "Statically link the CUDA runtime" OFF)
 option(USE_GDS "Build with GPUDirect Storage (GDS)/cuFile support" OFF)
-option(CUDF_JNI_ARROW_STATIC "Statically link Arrow" ON)
 option(CUDF_JNI_LIBCUDF_STATIC "Link with libcudf.a" OFF)
 
 message(VERBOSE "CUDF_JNI: Build with NVTX support: ${USE_NVTX}")
@@ -50,7 +49,6 @@ message(VERBOSE "CUDF_JNI: Configure CMake to build tests: ${BUILD_TESTS}")
 message(VERBOSE "CUDF_JNI: Build with per-thread default stream: ${PER_THREAD_DEFAULT_STREAM}")
 message(VERBOSE "CUDF_JNI: Statically link the CUDA runtime: ${CUDA_STATIC_RUNTIME}")
 message(VERBOSE "CUDF_JNI: Build with GPUDirect Storage support: ${USE_GDS}")
-message(VERBOSE "CUDF_JNI: Build with static Arrow library: ${CUDF_JNI_ARROW_STATIC}")
 message(VERBOSE "CUDF_JNI: Link with libcudf statically: ${CUDF_JNI_LIBCUDF_STATIC}")
 
 set(CUDF_SOURCE_DIR "${PROJECT_SOURCE_DIR}/../../../../cpp")
@@ -93,67 +91,16 @@ endif()
 rapids_cmake_build_type("Release")
 
 # ##################################################################################################
-# * Thrust/CUB
-# ------------------------------------------------------------------------------------
-include(${CUDF_SOURCE_DIR}/cmake/thirdparty/get_thrust.cmake)
+# * nvcomp------------------------------------------------------------------------------------------
 
-# ##################################################################################################
-# * CUDF ------------------------------------------------------------------------------------------
-
-set(CUDF_INCLUDE "${PROJECT_SOURCE_DIR}/../../../../cpp/include"
-                 "${PROJECT_SOURCE_DIR}/../../../../cpp/src/"
-)
-
-set(CUDF_LIB_HINTS HINTS "$ENV{CUDF_ROOT}" "$ENV{CUDF_ROOT}/lib" "$ENV{CONDA_PREFIX}/lib"
-                   "${CUDF_CPP_BUILD_DIR}"
-)
-
-find_library(CUDF_LIB "cudf" REQUIRED HINTS ${CUDF_LIB_HINTS})
-
-# ##################################################################################################
-# * ZLIB ------------------------------------------------------------------------------------------
-
-# find zlib
-rapids_find_package(ZLIB REQUIRED)
+set(nvcomp_DIR "${CUDF_CPP_BUILD_DIR}/_deps/nvcomp-build")
+rapids_find_package(nvcomp REQUIRED)
 
 # ##################################################################################################
-# * RMM -------------------------------------------------------------------------------------------
+# * CUDF ------------------------------------------------------------------------------------------
 
-include(${CUDF_SOURCE_DIR}/cmake/thirdparty/get_rmm.cmake)
-
-# ##################################################################################################
-# * ARROW -----------------------------------------------------------------------------------------
-
-find_path(ARROW_INCLUDE "arrow" HINTS "$ENV{ARROW_ROOT}/include"
-                                      "${CUDF_CPP_BUILD_DIR}/_deps/arrow-src/cpp/src"
-)
-
-message(STATUS "ARROW: ARROW_INCLUDE set to ${ARROW_INCLUDE}")
-
-if(CUDF_JNI_ARROW_STATIC)
-  # Find static version of Arrow lib
-  set(CUDF_JNI_ARROW_LIBNAME "libarrow.a")
-else()
-  set(CUDF_JNI_ARROW_LIBNAME "arrow")
-endif()
-
-find_library(
-  ARROW_LIBRARY ${CUDF_JNI_ARROW_LIBNAME} REQUIRED
-  HINTS "$ENV{ARROW_ROOT}/lib" "${CUDF_CPP_BUILD_DIR}/_deps/arrow-build/release"
-        "${CUDF_CPP_BUILD_DIR}/_deps/arrow-build/debug"
-)
-
-if(NOT ARROW_LIBRARY)
-  if(CUDF_JNI_ARROW_STATIC)
-    message(
-      FATAL_ERROR "Arrow static library not found. Was libcudf built with CUDF_USE_ARROW_STATIC=ON?"
-    )
-  else()
-    message(FATAL_ERROR "Arrow dynamic library not found.")
-  endif()
-else()
-  message(STATUS "ARROW: ARROW_LIBRARY set to ${ARROW_LIBRARY}")
-endif()
+set(cudf_ROOT "${CUDF_CPP_BUILD_DIR}")
+rapids_find_package(cudf REQUIRED)
 
 # ##################################################################################################
 # * find JNI -------------------------------------------------------------------------------------
@@ -164,27 +111,6 @@ else()
   message(FATAL_ERROR "JDK with JNI not found, please check your settings.")
 endif()
 
-# ##################################################################################################
-# * nvcomp ----------------------------------------------------------------------------------------
-
-find_path(NVCOMP_INCLUDE "nvcomp" HINTS "${CUDF_CPP_BUILD_DIR}/_deps/nvcomp-src/include"
-                                        "$ENV{CONDA_PREFIX}/include"
-)
-
-message(STATUS "NVCOMP: NVCOMP_INCLUDE set to ${NVCOMP_INCLUDE}")
-
-set(CUDF_JNI_NVCOMP_LIBNAME "libnvcomp.a")
-find_library(
-  NVCOMP_LIBRARY ${CUDF_JNI_NVCOMP_LIBNAME} REQUIRED HINTS "${CUDF_CPP_BUILD_DIR}/lib"
-                                                           "$ENV{CONDA_PREFIX}/lib"
-)
-
-if(NOT NVCOMP_LIBRARY)
-  message(FATAL_ERROR "nvcomp static library not found.")
-else()
-  message(STATUS "NVCOMP: NVCOMP_LIBRARY set to ${NVCOMP_LIBRARY}")
-endif()
-
 # ##################################################################################################
 # * GDS/cufile ------------------------------------------------------------------------------------
 
@@ -238,17 +164,8 @@ endif()
 # * include paths ---------------------------------------------------------------------------------
 
 target_include_directories(
-  cudfjni
-  PUBLIC cudf::Thrust
-         "${LIBCUDACXX_INCLUDE}"
-         "${CUDAToolkit_INCLUDE_DIRS}"
-         "${NVCOMP_INCLUDE}"
-         "${CMAKE_BINARY_DIR}/include"
-         "${CMAKE_SOURCE_DIR}/include"
-         "${CMAKE_SOURCE_DIR}/src"
-         "${JNI_INCLUDE_DIRS}"
-         "${CUDF_INCLUDE}"
-         "${ARROW_INCLUDE}"
+  cudfjni PUBLIC "${CMAKE_BINARY_DIR}/include" "${CMAKE_SOURCE_DIR}/include"
+                 "${CMAKE_SOURCE_DIR}/src" "${JNI_INCLUDE_DIRS}"
 )
 
 # ##################################################################################################
@@ -291,39 +208,24 @@ if(USE_GDS)
                POSITION_INDEPENDENT_CODE ON
                INTERFACE_POSITION_INDEPENDENT_CODE ON
   )
-  target_include_directories(
-    cufilejni
-    PUBLIC "${LIBCUDACXX_INCLUDE}" "${CUDF_INCLUDE}"
-    PRIVATE "${cuFile_INCLUDE_DIRS}"
-  )
-  target_link_libraries(cufilejni PRIVATE cudfjni rmm::rmm "${cuFile_LIBRARIES}")
+  target_include_directories(cufilejni PRIVATE "${cuFile_INCLUDE_DIRS}")
+  target_link_libraries(cufilejni PRIVATE cudfjni "${cuFile_LIBRARIES}")
 endif()
 
-# ##################################################################################################
-# * rmm logging level -----------------------------------------------------------------------------
-
-set(RMM_LOGGING_LEVEL
-    "INFO"
-    CACHE STRING "Choose the logging level."
-)
-# Set the possible values of build type for cmake-gui
-set_property(
-  CACHE RMM_LOGGING_LEVEL PROPERTY STRINGS "TRACE" "DEBUG" "INFO" "WARN" "ERROR" "CRITICAL" "OFF"
-)
-message(STATUS "RMM_LOGGING_LEVEL = '${RMM_LOGGING_LEVEL}'.")
-
-target_compile_definitions(cudfjni PUBLIC SPDLOG_ACTIVE_LEVEL=SPDLOG_LEVEL_${RMM_LOGGING_LEVEL})
-
 # ##################################################################################################
 # * link libraries --------------------------------------------------------------------------------
 
-set(CUDF_LINK ${CUDF_LIB})
+set(CUDF_LINK PUBLIC cudf::cudf)
 if(CUDF_JNI_LIBCUDF_STATIC)
-  set(CUDF_LINK -Wl,--whole-archive ${CUDF_LIB} -Wl,--no-whole-archive ZLIB::ZLIB)
+  # Whole-link libcudf.a into the shared library but not its dependencies
+  set(CUDF_LINK PRIVATE -Wl,--whole-archive cudf::cudf -Wl,--no-whole-archive PUBLIC cudf::cudf)
 endif()
 
+# When nvcomp is installed we need to use nvcomp::nvcomp but from the cudf build directory it will
+# just be nvcomp.
 target_link_libraries(
-  cudfjni PRIVATE ${CUDF_LINK} ${NVCOMP_LIBRARY} ${ARROW_LIBRARY} rmm::rmm CUDA::cuda_driver
+  cudfjni ${CUDF_LINK} PRIVATE $<TARGET_NAME_IF_EXISTS:nvcomp>
+                               $<TARGET_NAME_IF_EXISTS:nvcomp::nvcomp>
 )
 
 # ##################################################################################################

From 45c003dc70790f02b044fb06e5f95679df6600de Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Mon, 18 Apr 2022 13:39:37 -0500
Subject: [PATCH 23/33] Fix list of testing requirements in setup.py. (#10678)

The list of testing requirements was missing a comma in `setup.py`. This fixes it.

Authors:
  - Bradley Dice (https://github.com/bdice)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

URL: https://github.com/rapidsai/cudf/pull/10678
---
 python/cudf/setup.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/python/cudf/setup.py b/python/cudf/setup.py
index 2ec9909dd6f..a447fcfe027 100644
--- a/python/cudf/setup.py
+++ b/python/cudf/setup.py
@@ -47,7 +47,8 @@
         "pytest",
         "pytest-benchmark",
         "pytest-xdist",
-        "hypothesis" "mimesis",
+        "hypothesis",
+        "mimesis",
         "fastavro>=0.22.9",
         "python-snappy>=0.6.0",
         "pyorc",

From c322cbac3ef31836f3327b1c048905ebdccdeec0 Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Mon, 18 Apr 2022 13:42:39 -0500
Subject: [PATCH 24/33] Standardize imports. (#10680)

This PR standardizes a few imports across the cudf code base. Changes include:

- Removed usage of some non-standard "two letter" names. For example, `import numpy as np` is common, but `import pyorc as po` and `import fastavro as fa` are non-standard and not the style used by their documentation. I left `import cupy as cp`, since both `import cupy` and `import cupy as cp` are prevalent in the code base (the one exception that I changed was a file that had both `import cupy` and `import cupy as cp`).
- Avoid the pattern `from some_package import x as x` -- just write `from some_package import x`
- Fixed some `cimport`s
- Always use `import datetime` instead of `import datetime as dt` to avoid conflicts with the many other `dt` names in our code (including local names that had the potential to shadow/overwrite the library's name)
- Use `warnings.warn` rather than `from warnings import warn` for consistency across the library
- Remove some legacy Python 2 compatibility

Authors:
  - Bradley Dice (https://github.com/bdice)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

URL: https://github.com/rapidsai/cudf/pull/10680
---
 python/cudf/cudf/_lib/column.pyx           |  4 +-
 python/cudf/cudf/_lib/null_mask.pyx        |  5 +-
 python/cudf/cudf/_lib/parquet.pyx          |  4 +-
 python/cudf/cudf/_lib/rolling.pyx          |  4 +-
 python/cudf/cudf/core/algorithms.py        |  4 +-
 python/cudf/cudf/core/column/datetime.py   |  6 +-
 python/cudf/cudf/core/column/decimal.py    |  4 +-
 python/cudf/cudf/core/column/timedelta.py  |  4 +-
 python/cudf/cudf/core/dataframe.py         | 10 +--
 python/cudf/cudf/core/subword_tokenizer.py |  6 +-
 python/cudf/cudf/tests/test_api_types.py   | 18 ++---
 python/cudf/cudf/tests/test_contains.py    |  8 ++-
 python/cudf/cudf/tests/test_dataframe.py   |  5 +-
 python/cudf/cudf/tests/test_datetime.py    |  5 +-
 python/cudf/cudf/tests/test_duplicates.py  |  4 +-
 python/cudf/cudf/tests/test_hdfs.py        |  6 +-
 python/cudf/cudf/tests/test_orc.py         | 83 ++++++++++++----------
 python/cudf/cudf/tests/test_scalar.py      |  8 +--
 python/cudf/cudf/utils/dtypes.py           |  6 +-
 python/cudf/cudf/utils/queryutils.py       |  4 +-
 20 files changed, 101 insertions(+), 97 deletions(-)

diff --git a/python/cudf/cudf/_lib/column.pyx b/python/cudf/cudf/_lib/column.pyx
index 448a22425a4..8cbadfa19a5 100644
--- a/python/cudf/cudf/_lib/column.pyx
+++ b/python/cudf/cudf/_lib/column.pyx
@@ -7,7 +7,7 @@ import pandas as pd
 import rmm
 
 import cudf
-import cudf._lib as libcudfxx
+import cudf._lib as libcudf
 from cudf.api.types import is_categorical_dtype, is_list_dtype, is_struct_dtype
 from cudf.core.buffer import Buffer
 
@@ -160,7 +160,7 @@ cdef class Column:
             if self.base_mask is None or self.offset == 0:
                 self._mask = self.base_mask
             else:
-                self._mask = libcudfxx.null_mask.copy_bitmask(self)
+                self._mask = libcudf.null_mask.copy_bitmask(self)
         return self._mask
 
     @property
diff --git a/python/cudf/cudf/_lib/null_mask.pyx b/python/cudf/cudf/_lib/null_mask.pyx
index b6e26fe594f..ce83a6f0f18 100644
--- a/python/cudf/cudf/_lib/null_mask.pyx
+++ b/python/cudf/cudf/_lib/null_mask.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
+# Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
 from enum import Enum
 
@@ -8,9 +8,6 @@ from libcpp.utility cimport move
 from rmm._lib.device_buffer cimport DeviceBuffer, device_buffer
 
 from cudf._lib.column cimport Column
-
-import cudf._lib as libcudfxx
-
 from cudf._lib.cpp.column.column_view cimport column_view
 from cudf._lib.cpp.null_mask cimport (
     bitmask_allocation_size_bytes as cpp_bitmask_allocation_size_bytes,
diff --git a/python/cudf/cudf/_lib/parquet.pyx b/python/cudf/cudf/_lib/parquet.pyx
index 8cb7dd942c1..e363ea875f0 100644
--- a/python/cudf/cudf/_lib/parquet.pyx
+++ b/python/cudf/cudf/_lib/parquet.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2019-2021, NVIDIA CORPORATION.
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
 
 # cython: boundscheck = False
 
@@ -17,7 +17,7 @@ except ImportError:
     import json
 
 import numpy as np
-from cython.operator import dereference
+from cython.operator cimport dereference
 
 from cudf.api.types import (
     is_categorical_dtype,
diff --git a/python/cudf/cudf/_lib/rolling.pyx b/python/cudf/cudf/_lib/rolling.pyx
index b4b3384032c..a2cb115f668 100644
--- a/python/cudf/cudf/_lib/rolling.pyx
+++ b/python/cudf/cudf/_lib/rolling.pyx
@@ -1,6 +1,4 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
-
-from __future__ import print_function
+# Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
 import pandas as pd
 
diff --git a/python/cudf/cudf/core/algorithms.py b/python/cudf/cudf/core/algorithms.py
index 22a5666ef3f..d13c55dfcc0 100644
--- a/python/cudf/cudf/core/algorithms.py
+++ b/python/cudf/cudf/core/algorithms.py
@@ -1,5 +1,5 @@
 # Copyright (c) 2020-2022, NVIDIA CORPORATION.
-from warnings import warn
+import warnings
 
 import cupy as cp
 import numpy as np
@@ -50,7 +50,7 @@ def factorize(values, sort=False, na_sentinel=-1, size_hint=None):
         raise NotImplementedError("na_sentinel can not be None.")
 
     if size_hint:
-        warn("size_hint is not applicable for cudf.factorize")
+        warnings.warn("size_hint is not applicable for cudf.factorize")
 
     return_cupy_array = isinstance(values, cp.ndarray)
 
diff --git a/python/cudf/cudf/core/column/datetime.py b/python/cudf/cudf/core/column/datetime.py
index fac8af652c1..375a19f5423 100644
--- a/python/cudf/cudf/core/column/datetime.py
+++ b/python/cudf/cudf/core/column/datetime.py
@@ -2,7 +2,7 @@
 
 from __future__ import annotations
 
-import datetime as dt
+import datetime
 import locale
 import re
 from locale import nl_langinfo
@@ -237,9 +237,9 @@ def normalize_binop_value(self, other: DatetimeLikeScalar) -> ScalarLike:
         if isinstance(other, (cudf.Scalar, ColumnBase, cudf.DateOffset)):
             return other
 
-        if isinstance(other, dt.datetime):
+        if isinstance(other, datetime.datetime):
             other = np.datetime64(other)
-        elif isinstance(other, dt.timedelta):
+        elif isinstance(other, datetime.timedelta):
             other = np.timedelta64(other)
         elif isinstance(other, pd.Timestamp):
             other = other.to_datetime64()
diff --git a/python/cudf/cudf/core/column/decimal.py b/python/cudf/cudf/core/column/decimal.py
index f10e257d359..d8ddb3d8d1a 100644
--- a/python/cudf/cudf/core/column/decimal.py
+++ b/python/cudf/cudf/core/column/decimal.py
@@ -1,8 +1,8 @@
 # Copyright (c) 2021-2022, NVIDIA CORPORATION.
 
+import warnings
 from decimal import Decimal
 from typing import Any, Sequence, Tuple, Union, cast
-from warnings import warn
 
 import cupy as cp
 import numpy as np
@@ -43,7 +43,7 @@ def as_decimal_column(
             isinstance(dtype, cudf.core.dtypes.DecimalDtype)
             and dtype.scale < self.dtype.scale
         ):
-            warn(
+            warnings.warn(
                 "cuDF truncates when downcasting decimals to a lower scale. "
                 "To round, use Series.round() or DataFrame.round()."
             )
diff --git a/python/cudf/cudf/core/column/timedelta.py b/python/cudf/cudf/core/column/timedelta.py
index 15815427aca..810624e9f4e 100644
--- a/python/cudf/cudf/core/column/timedelta.py
+++ b/python/cudf/cudf/core/column/timedelta.py
@@ -2,7 +2,7 @@
 
 from __future__ import annotations
 
-import datetime as dt
+import datetime
 from typing import Any, Sequence, cast
 
 import numpy as np
@@ -211,7 +211,7 @@ def _binaryop(self, other: ColumnBinaryOperand, op: str) -> ColumnBase:
     def normalize_binop_value(self, other) -> ColumnBinaryOperand:
         if isinstance(other, (ColumnBase, cudf.Scalar)):
             return other
-        if isinstance(other, dt.timedelta):
+        if isinstance(other, datetime.timedelta):
             other = np.timedelta64(other)
         elif isinstance(other, pd.Timestamp):
             other = other.to_datetime64()
diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 8893b85c97c..24aa0d01b3c 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -5596,14 +5596,14 @@ def select_dtypes(self, include=None, exclude=None):
     @ioutils.doc_to_parquet()
     def to_parquet(self, path, *args, **kwargs):
         """{docstring}"""
-        from cudf.io import parquet as pq
+        from cudf.io import parquet
 
-        return pq.to_parquet(self, path, *args, **kwargs)
+        return parquet.to_parquet(self, path, *args, **kwargs)
 
     @ioutils.doc_to_feather()
     def to_feather(self, path, *args, **kwargs):
         """{docstring}"""
-        from cudf.io import feather as feather
+        from cudf.io import feather
 
         feather.to_feather(self, path, *args, **kwargs)
 
@@ -5623,7 +5623,7 @@ def to_csv(
         **kwargs,
     ):
         """{docstring}"""
-        from cudf.io import csv as csv
+        from cudf.io import csv
 
         return csv.to_csv(
             self,
@@ -5643,7 +5643,7 @@ def to_csv(
     @ioutils.doc_to_orc()
     def to_orc(self, fname, compression=None, *args, **kwargs):
         """{docstring}"""
-        from cudf.io import orc as orc
+        from cudf.io import orc
 
         orc.to_orc(self, fname, compression, *args, **kwargs)
 
diff --git a/python/cudf/cudf/core/subword_tokenizer.py b/python/cudf/cudf/core/subword_tokenizer.py
index 782b74ef4a6..83cceff5c4c 100644
--- a/python/cudf/cudf/core/subword_tokenizer.py
+++ b/python/cudf/cudf/core/subword_tokenizer.py
@@ -1,9 +1,9 @@
-# Copyright (c) 2021, NVIDIA CORPORATION.
+# Copyright (c) 2021-2022, NVIDIA CORPORATION.
 
 from __future__ import annotations
 
+import warnings
 from typing import Union
-from warnings import warn
 
 import cupy as cp
 
@@ -186,7 +186,7 @@ def __call__(
                 "When truncation is not True, the behaviour currently differs "
                 "from HuggingFace as cudf always returns overflowing tokens"
             )
-            warn(warning_msg)
+            warnings.warn(warning_msg)
 
         if padding != "max_length":
             error_msg = (
diff --git a/python/cudf/cudf/tests/test_api_types.py b/python/cudf/cudf/tests/test_api_types.py
index e7cf113f604..c2cd78f88a0 100644
--- a/python/cudf/cudf/tests/test_api_types.py
+++ b/python/cudf/cudf/tests/test_api_types.py
@@ -3,10 +3,10 @@
 import numpy as np
 import pandas as pd
 import pytest
-from pandas.api import types as ptypes
+from pandas.api import types as pd_types
 
 import cudf
-from cudf.api import types as types
+from cudf.api import types
 
 
 @pytest.mark.parametrize(
@@ -1035,11 +1035,13 @@ def test_is_decimal_dtype(obj, expect):
     ),
 )
 def test_pandas_agreement(obj):
-    assert types.is_categorical_dtype(obj) == ptypes.is_categorical_dtype(obj)
-    assert types.is_numeric_dtype(obj) == ptypes.is_numeric_dtype(obj)
-    assert types.is_integer_dtype(obj) == ptypes.is_integer_dtype(obj)
-    assert types.is_integer(obj) == ptypes.is_integer(obj)
-    assert types.is_string_dtype(obj) == ptypes.is_string_dtype(obj)
+    assert types.is_categorical_dtype(obj) == pd_types.is_categorical_dtype(
+        obj
+    )
+    assert types.is_numeric_dtype(obj) == pd_types.is_numeric_dtype(obj)
+    assert types.is_integer_dtype(obj) == pd_types.is_integer_dtype(obj)
+    assert types.is_integer(obj) == pd_types.is_integer(obj)
+    assert types.is_string_dtype(obj) == pd_types.is_string_dtype(obj)
 
 
 @pytest.mark.parametrize(
@@ -1115,7 +1117,7 @@ def test_pandas_agreement(obj):
     ),
 )
 def test_pandas_agreement_scalar(obj):
-    assert types.is_scalar(obj) == ptypes.is_scalar(obj)
+    assert types.is_scalar(obj) == pd_types.is_scalar(obj)
 
 
 # TODO: Add test of interval.
diff --git a/python/cudf/cudf/tests/test_contains.py b/python/cudf/cudf/tests/test_contains.py
index f06142f4cc9..15dfa111860 100644
--- a/python/cudf/cudf/tests/test_contains.py
+++ b/python/cudf/cudf/tests/test_contains.py
@@ -1,4 +1,6 @@
-from datetime import datetime as dt
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
+
+import datetime
 
 import numpy as np
 import pandas as pd
@@ -41,12 +43,12 @@ def get_string_series():
 testdata_all = [
     (
         cudf_date_series("20010101", "20020215", freq="400h"),
-        dt.strptime("2001-01-01", "%Y-%m-%d"),
+        datetime.datetime.strptime("2001-01-01", "%Y-%m-%d"),
         True,
     ),
     (
         cudf_date_series("20010101", "20020215", freq="400h"),
-        dt.strptime("2000-01-01", "%Y-%m-%d"),
+        datetime.datetime.strptime("2000-01-01", "%Y-%m-%d"),
         False,
     ),
     (cudf_date_series("20010101", "20020215", freq="400h"), 20000101, False),
diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
index 07261534777..2685524add4 100644
--- a/python/cudf/cudf/tests/test_dataframe.py
+++ b/python/cudf/cudf/tests/test_dataframe.py
@@ -13,7 +13,6 @@
 from copy import copy
 
 import cupy
-import cupy as cp
 import numpy as np
 import pandas as pd
 import pyarrow as pa
@@ -7332,7 +7331,7 @@ def test_sample_axis_0(
 
 @pytest.mark.parametrize("replace", [True, False])
 @pytest.mark.parametrize(
-    "random_state_lib", [cp.random.RandomState, np.random.RandomState]
+    "random_state_lib", [cupy.random.RandomState, np.random.RandomState]
 )
 def test_sample_reproducibility(replace, random_state_lib):
     df = cudf.DataFrame({"a": cupy.arange(0, 1024)})
@@ -7384,7 +7383,7 @@ def test_oversample_without_replace(n, frac, axis):
     )
 
 
-@pytest.mark.parametrize("random_state", [None, cp.random.RandomState(42)])
+@pytest.mark.parametrize("random_state", [None, cupy.random.RandomState(42)])
 def test_sample_unsupported_arguments(random_state):
     df = cudf.DataFrame({"float": [0.05, 0.2, 0.3, 0.2, 0.25]})
     with pytest.raises(
diff --git a/python/cudf/cudf/tests/test_datetime.py b/python/cudf/cudf/tests/test_datetime.py
index 964ac9e5457..8be338e787a 100644
--- a/python/cudf/cudf/tests/test_datetime.py
+++ b/python/cudf/cudf/tests/test_datetime.py
@@ -1,7 +1,6 @@
 # Copyright (c) 2019-2022, NVIDIA CORPORATION.
 
 import datetime
-import datetime as dt
 import operator
 import re
 
@@ -219,8 +218,8 @@ def test_sort_datetime():
 
 def test_issue_165():
     df_pandas = pd.DataFrame()
-    start_date = dt.datetime.strptime("2000-10-21", "%Y-%m-%d")
-    data = [(start_date + dt.timedelta(days=x)) for x in range(6)]
+    start_date = datetime.datetime.strptime("2000-10-21", "%Y-%m-%d")
+    data = [(start_date + datetime.timedelta(days=x)) for x in range(6)]
     df_pandas["dates"] = data
     df_pandas["num"] = [1, 2, 3, 4, 5, 6]
     df_cudf = DataFrame.from_pandas(df_pandas)
diff --git a/python/cudf/cudf/tests/test_duplicates.py b/python/cudf/cudf/tests/test_duplicates.py
index e8a695570f0..a80208cfd7d 100644
--- a/python/cudf/cudf/tests/test_duplicates.py
+++ b/python/cudf/cudf/tests/test_duplicates.py
@@ -1,6 +1,6 @@
 # Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
-import itertools as it
+import itertools
 import random
 
 import numpy as np
@@ -280,7 +280,7 @@ def test_drop_duplicates_empty(df):
 
 @pytest.mark.parametrize("num_columns", [3, 4, 5])
 def test_dataframe_drop_duplicates_numeric_method(num_columns):
-    comb = list(it.permutations(range(num_columns), num_columns))
+    comb = list(itertools.permutations(range(num_columns), num_columns))
     shuf = list(comb)
     random.Random(num_columns).shuffle(shuf)
 
diff --git a/python/cudf/cudf/tests/test_hdfs.py b/python/cudf/cudf/tests/test_hdfs.py
index de4303a34a8..8730cb187b5 100644
--- a/python/cudf/cudf/tests/test_hdfs.py
+++ b/python/cudf/cudf/tests/test_hdfs.py
@@ -3,12 +3,12 @@
 import os
 from io import BytesIO
 
-import fastavro as fa
+import fastavro
 import numpy as np
 import pandas as pd
 import pyarrow as pa
 import pytest
-from pyarrow import orc as orc
+from pyarrow import orc
 
 import cudf
 from cudf.testing._utils import assert_eq
@@ -253,7 +253,7 @@ def test_read_avro(datadir, hdfs, test_url):
 
     got = cudf.read_avro(hd_fpath)
     with open(fname, mode="rb") as f:
-        expect = pd.DataFrame.from_records(fa.reader(f))
+        expect = pd.DataFrame.from_records(fastavro.reader(f))
 
     for col in expect.columns:
         expect[col] = expect[col].astype(got[col].dtype)
diff --git a/python/cudf/cudf/tests/test_orc.py b/python/cudf/cudf/tests/test_orc.py
index 5082fb08b92..c3969bf6c14 100644
--- a/python/cudf/cudf/tests/test_orc.py
+++ b/python/cudf/cudf/tests/test_orc.py
@@ -11,7 +11,7 @@
 import pandas as pd
 import pyarrow as pa
 import pyarrow.orc
-import pyorc as po
+import pyorc
 import pytest
 
 import cudf
@@ -307,7 +307,7 @@ def test_orc_read_skiprows(tmpdir):
         {"a": [1, 0, 1, 0, None, 1, 1, 1, 0, None, 0, 0, 1, 1, 1, 1]},
         dtype=pd.BooleanDtype(),
     )
-    writer = po.Writer(buff, po.Struct(a=po.Boolean()))
+    writer = pyorc.Writer(buff, pyorc.Struct(a=pyorc.Boolean()))
     tuples = list(
         map(
             lambda x: (None,) if x[0] is pd.NA else x,
@@ -931,29 +931,35 @@ def generate_list_struct_buff(size=100_000):
     buff = BytesIO()
 
     schema = {
-        "lvl3_list": po.Array(po.Array(po.Array(po.BigInt()))),
-        "lvl1_list": po.Array(po.BigInt()),
-        "lvl1_struct": po.Struct(**{"a": po.BigInt(), "b": po.BigInt()}),
-        "lvl2_struct": po.Struct(
+        "lvl3_list": pyorc.Array(pyorc.Array(pyorc.Array(pyorc.BigInt()))),
+        "lvl1_list": pyorc.Array(pyorc.BigInt()),
+        "lvl1_struct": pyorc.Struct(
+            **{"a": pyorc.BigInt(), "b": pyorc.BigInt()}
+        ),
+        "lvl2_struct": pyorc.Struct(
             **{
-                "a": po.BigInt(),
-                "lvl1_struct": po.Struct(
-                    **{"c": po.BigInt(), "d": po.BigInt()}
+                "a": pyorc.BigInt(),
+                "lvl1_struct": pyorc.Struct(
+                    **{"c": pyorc.BigInt(), "d": pyorc.BigInt()}
                 ),
             }
         ),
-        "list_nests_struct": po.Array(
-            po.Array(po.Struct(**{"a": po.BigInt(), "b": po.BigInt()}))
+        "list_nests_struct": pyorc.Array(
+            pyorc.Array(
+                pyorc.Struct(**{"a": pyorc.BigInt(), "b": pyorc.BigInt()})
+            )
         ),
-        "struct_nests_list": po.Struct(
+        "struct_nests_list": pyorc.Struct(
             **{
-                "struct": po.Struct(**{"a": po.BigInt(), "b": po.BigInt()}),
-                "list": po.Array(po.BigInt()),
+                "struct": pyorc.Struct(
+                    **{"a": pyorc.BigInt(), "b": pyorc.BigInt()}
+                ),
+                "list": pyorc.Array(pyorc.BigInt()),
             }
         ),
     }
 
-    schema = po.Struct(**schema)
+    schema = pyorc.Struct(**schema)
 
     lvl3_list = [
         rd.choice(
@@ -1019,7 +1025,7 @@ def generate_list_struct_buff(size=100_000):
         }
     )
 
-    writer = po.Writer(buff, schema, stripe_size=1024)
+    writer = pyorc.Writer(buff, schema, stripe_size=1024)
     tuples = list(
         map(
             lambda x: (None,) if x[0] is pd.NA else x,
@@ -1101,15 +1107,17 @@ def gen_map_buff(size=10000):
     buff = BytesIO()
 
     schema = {
-        "lvl1_map": po.Map(key=po.String(), value=po.BigInt()),
-        "lvl2_map": po.Map(key=po.String(), value=po.Array(po.BigInt())),
-        "lvl2_struct_map": po.Map(
-            key=po.String(),
-            value=po.Struct(**{"a": po.BigInt(), "b": po.BigInt()}),
+        "lvl1_map": pyorc.Map(key=pyorc.String(), value=pyorc.BigInt()),
+        "lvl2_map": pyorc.Map(
+            key=pyorc.String(), value=pyorc.Array(pyorc.BigInt())
+        ),
+        "lvl2_struct_map": pyorc.Map(
+            key=pyorc.String(),
+            value=pyorc.Struct(**{"a": pyorc.BigInt(), "b": pyorc.BigInt()}),
         ),
     }
 
-    schema = po.Struct(**schema)
+    schema = pyorc.Struct(**schema)
 
     lvl1_map = [
         rd.choice(
@@ -1186,8 +1194,8 @@ def gen_map_buff(size=10000):
             "lvl2_struct_map": lvl2_struct_map,
         }
     )
-    writer = po.Writer(
-        buff, schema, stripe_size=1024, compression=po.CompressionKind.NONE
+    writer = pyorc.Writer(
+        buff, schema, stripe_size=1024, compression=pyorc.CompressionKind.NONE
     )
     tuples = list(
         map(
@@ -1479,8 +1487,9 @@ def test_statistics_sum_overflow():
     minint64 = np.iinfo(np.int64).min
 
     buff = BytesIO()
-    with po.Writer(
-        buff, po.Struct(a=po.BigInt(), b=po.BigInt(), c=po.BigInt())
+    with pyorc.Writer(
+        buff,
+        pyorc.Struct(a=pyorc.BigInt(), b=pyorc.BigInt(), c=pyorc.BigInt()),
     ) as writer:
         writer.write((maxint64, minint64, minint64))
         writer.write((1, -1, 1))
@@ -1497,20 +1506,20 @@ def test_statistics_sum_overflow():
 
 def test_empty_statistics():
     buff = BytesIO()
-    orc_schema = po.Struct(
-        a=po.BigInt(),
-        b=po.Double(),
-        c=po.String(),
-        d=po.Decimal(11, 2),
-        e=po.Date(),
-        f=po.Timestamp(),
-        g=po.Boolean(),
-        h=po.Binary(),
-        i=po.BigInt(),
+    orc_schema = pyorc.Struct(
+        a=pyorc.BigInt(),
+        b=pyorc.Double(),
+        c=pyorc.String(),
+        d=pyorc.Decimal(11, 2),
+        e=pyorc.Date(),
+        f=pyorc.Timestamp(),
+        g=pyorc.Boolean(),
+        h=pyorc.Binary(),
+        i=pyorc.BigInt(),
         # One column with non null value, else cudf/pyorc readers crash
     )
     data = tuple([None] * (len(orc_schema.fields) - 1) + [1])
-    with po.Writer(buff, orc_schema) as writer:
+    with pyorc.Writer(buff, orc_schema) as writer:
         writer.write(data)
 
     got = cudf.io.orc.read_orc_statistics([buff])
diff --git a/python/cudf/cudf/tests/test_scalar.py b/python/cudf/cudf/tests/test_scalar.py
index e8382681820..79211456996 100644
--- a/python/cudf/cudf/tests/test_scalar.py
+++ b/python/cudf/cudf/tests/test_scalar.py
@@ -1,7 +1,6 @@
 # Copyright (c) 2021-2022, NVIDIA CORPORATION.
 
 import datetime
-import datetime as dt
 import re
 from decimal import Decimal
 
@@ -11,7 +10,6 @@
 import pytest
 
 import cudf
-from cudf import Scalar as pycudf_scalar
 from cudf._lib.copying import get_element
 from cudf.testing._utils import (
     ALL_TYPES,
@@ -297,9 +295,9 @@ def test_date_duration_scalars(value):
 
     actual = s.value
 
-    if isinstance(value, dt.datetime):
+    if isinstance(value, datetime.datetime):
         expected = np.datetime64(value)
-    elif isinstance(value, dt.timedelta):
+    elif isinstance(value, datetime.timedelta):
         expected = np.timedelta64(value)
     elif isinstance(value, pd.Timestamp):
         expected = value.to_datetime64()
@@ -344,7 +342,7 @@ def test_scalar_invalid_implicit_conversion(cls, dtype):
         cls(pd.NA)
     except TypeError as e:
         with pytest.raises(TypeError, match=re.escape(str(e))):
-            slr = pycudf_scalar(None, dtype=dtype)
+            slr = cudf.Scalar(None, dtype=dtype)
             cls(slr)
 
 
diff --git a/python/cudf/cudf/utils/dtypes.py b/python/cudf/cudf/utils/dtypes.py
index 4cd1738996f..35c6fdc73f8 100644
--- a/python/cudf/cudf/utils/dtypes.py
+++ b/python/cudf/cudf/utils/dtypes.py
@@ -1,6 +1,6 @@
 # Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
-import datetime as dt
+import datetime
 from collections import namedtuple
 from decimal import Decimal
 
@@ -259,9 +259,9 @@ def to_cudf_compatible_scalar(val, dtype=None):
     ) or cudf.api.types.is_string_dtype(dtype):
         dtype = "str"
 
-    if isinstance(val, dt.datetime):
+    if isinstance(val, datetime.datetime):
         val = np.datetime64(val)
-    elif isinstance(val, dt.timedelta):
+    elif isinstance(val, datetime.timedelta):
         val = np.timedelta64(val)
     elif isinstance(val, pd.Timestamp):
         val = val.to_datetime64()
diff --git a/python/cudf/cudf/utils/queryutils.py b/python/cudf/cudf/utils/queryutils.py
index cdaaff6b2af..25b3d517e1c 100644
--- a/python/cudf/cudf/utils/queryutils.py
+++ b/python/cudf/cudf/utils/queryutils.py
@@ -1,7 +1,7 @@
 # Copyright (c) 2018-2022, NVIDIA CORPORATION.
 
 import ast
-import datetime as dt
+import datetime
 from typing import Any, Dict
 
 import numpy as np
@@ -232,7 +232,7 @@ def query_execute(df, expr, callenv):
         name = name[len(ENVREF_PREFIX) :]
         try:
             val = envdict[name]
-            if isinstance(val, dt.datetime):
+            if isinstance(val, datetime.datetime):
                 val = np.datetime64(val)
         except KeyError:
             msg = "{!r} not defined in the calling environment"

From 6c79b5902d55bab599731a9bded7e89b9c4875c5 Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Mon, 18 Apr 2022 18:20:38 -0500
Subject: [PATCH 25/33] Standardize usage of collections.abc. (#10679)

The codebase currently uses multiple ways of importing `collections.abc` classes in cudf. This can be problematic because the names in `collections.abc` can overlap with names in `typing`, so we need a way to disambiguate the two. This PR standardizes our imports such that abstract base classes follow a pattern so that `abc.` is always in the name of abstract base classes.

```python
from collections import abc
# Not "import collections.abc" or "from collections.abc import Mapping"

if isinstance(obj, abc.Mapping):
    pass
```

Authors:
  - Bradley Dice (https://github.com/bdice)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

URL: https://github.com/rapidsai/cudf/pull/10679
---
 python/cudf/cudf/_fuzz_testing/json.py      |  2 +-
 python/cudf/cudf/_lib/csv.pyx               |  8 +++----
 python/cudf/cudf/_lib/json.pyx              | 10 ++++-----
 python/cudf/cudf/api/types.py               |  4 ++--
 python/cudf/cudf/comm/gpuarrow.py           |  5 ++---
 python/cudf/cudf/core/column/categorical.py |  6 +++--
 python/cudf/cudf/core/column_accessor.py    |  8 +++----
 python/cudf/cudf/core/cut.py                |  6 ++---
 python/cudf/cudf/core/dataframe.py          | 25 +++++++++++----------
 python/cudf/cudf/core/df_protocol.py        |  6 ++---
 python/cudf/cudf/core/groupby/groupby.py    |  6 ++---
 python/cudf/cudf/core/join/_join_helpers.py |  4 ++--
 python/cudf/cudf/core/multiindex.py         |  6 ++---
 python/cudf/cudf/core/reshape.py            |  9 ++++----
 python/cudf/cudf/core/series.py             |  2 +-
 python/cudf/cudf/testing/_utils.py          | 10 ++++-----
 16 files changed, 60 insertions(+), 57 deletions(-)

diff --git a/python/cudf/cudf/_fuzz_testing/json.py b/python/cudf/cudf/_fuzz_testing/json.py
index f850a7e79f9..29e0aeb7050 100644
--- a/python/cudf/cudf/_fuzz_testing/json.py
+++ b/python/cudf/cudf/_fuzz_testing/json.py
@@ -2,7 +2,7 @@
 
 import logging
 import random
-from collections import abc as abc
+from collections import abc
 
 import numpy as np
 
diff --git a/python/cudf/cudf/_lib/csv.pyx b/python/cudf/cudf/_lib/csv.pyx
index 05ff32392fe..f1a75baa951 100644
--- a/python/cudf/cudf/_lib/csv.pyx
+++ b/python/cudf/cudf/_lib/csv.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
+# Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
 from libcpp cimport bool
 from libcpp.map cimport map
@@ -19,9 +19,9 @@ import cudf
 
 from cudf._lib.cpp.types cimport size_type
 
-import collections.abc as abc
 import errno
 import os
+from collections import abc
 from enum import IntEnum
 from io import BytesIO, StringIO
 
@@ -238,7 +238,7 @@ cdef csv_reader_options make_csv_reader_options(
                 "`parse_dates`: dictionaries are unsupported")
         if not isinstance(parse_dates, abc.Iterable):
             raise NotImplementedError(
-                "`parse_dates`: non-lists are unsupported")
+                "`parse_dates`: an iterable is required")
         for col in parse_dates:
             if isinstance(col, str):
                 c_parse_dates_names.push_back(str(col).encode())
@@ -279,7 +279,7 @@ cdef csv_reader_options make_csv_reader_options(
             )
             csv_reader_options_c.set_dtypes(c_dtypes_list)
             csv_reader_options_c.set_parse_hex(c_hex_col_indexes)
-        elif isinstance(dtype, abc.Iterable):
+        elif isinstance(dtype, abc.Collection):
             c_dtypes_list.reserve(len(dtype))
             for index, col_dtype in enumerate(dtype):
                 if col_dtype in CSV_HEX_TYPE_MAP:
diff --git a/python/cudf/cudf/_lib/json.pyx b/python/cudf/cudf/_lib/json.pyx
index 48da83450d7..263d70afe26 100644
--- a/python/cudf/cudf/_lib/json.pyx
+++ b/python/cudf/cudf/_lib/json.pyx
@@ -1,11 +1,11 @@
-# Copyright (c) 2019-2020, NVIDIA CORPORATION.
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
 
 # cython: boundscheck = False
 
 
-import collections.abc as abc
 import io
 import os
+from collections import abc
 
 import cudf
 
@@ -82,15 +82,15 @@ cpdef read_json(object filepaths_or_buffers,
             for k, v in dtype.items():
                 c_dtypes_map[str(k).encode()] = \
                     _get_cudf_data_type_from_dtype(v)
-        elif not isinstance(dtype, abc.Iterable):
-            raise TypeError("`dtype` must be 'list like' or 'dict'")
-        else:
+        elif isinstance(dtype, abc.Collection):
             is_list_like_dtypes = True
             c_dtypes_list.reserve(len(dtype))
             for col_dtype in dtype:
                 c_dtypes_list.push_back(
                     _get_cudf_data_type_from_dtype(
                         col_dtype))
+        else:
+            raise TypeError("`dtype` must be 'list like' or 'dict'")
 
     cdef json_reader_options opts = move(
         json_reader_options.builder(make_source_info(filepaths_or_buffers))
diff --git a/python/cudf/cudf/api/types.py b/python/cudf/cudf/api/types.py
index fad2a973681..56b453dae95 100644
--- a/python/cudf/cudf/api/types.py
+++ b/python/cudf/cudf/api/types.py
@@ -4,7 +4,7 @@
 
 from __future__ import annotations
 
-from collections.abc import Sequence
+from collections import abc
 from functools import wraps
 from inspect import isclass
 from typing import List, Union
@@ -174,7 +174,7 @@ def is_list_like(obj):
     bool
         Return True if given object is list-like.
     """
-    return isinstance(obj, (Sequence, np.ndarray)) and not isinstance(
+    return isinstance(obj, (abc.Sequence, np.ndarray)) and not isinstance(
         obj, (str, bytes)
     )
 
diff --git a/python/cudf/cudf/comm/gpuarrow.py b/python/cudf/cudf/comm/gpuarrow.py
index f21eb4e4d8c..09b4cc5ffba 100644
--- a/python/cudf/cudf/comm/gpuarrow.py
+++ b/python/cudf/cudf/comm/gpuarrow.py
@@ -1,6 +1,5 @@
 # Copyright (c) 2019-2022, NVIDIA CORPORATION.
-from collections import OrderedDict
-from collections.abc import Sequence
+from collections import OrderedDict, abc
 
 import numpy as np
 import pandas as pd
@@ -32,7 +31,7 @@ def __init__(self, source, schema=None):
         self._open(source, schema)
 
 
-class GpuArrowReader(Sequence):
+class GpuArrowReader(abc.Sequence):
     def __init__(self, schema, dev_ary):
         self._table = CudaRecordBatchStreamReader(dev_ary, schema).read_all()
 
diff --git a/python/cudf/cudf/core/column/categorical.py b/python/cudf/cudf/core/column/categorical.py
index 911391ef984..7c33b9f81fe 100644
--- a/python/cudf/cudf/core/column/categorical.py
+++ b/python/cudf/cudf/core/column/categorical.py
@@ -3,7 +3,7 @@
 from __future__ import annotations
 
 import pickle
-from collections.abc import MutableSequence
+from collections import abc
 from functools import cached_property
 from typing import (
     TYPE_CHECKING,
@@ -1379,7 +1379,9 @@ def view(self, dtype: Dtype) -> ColumnBase:
         )
 
     @staticmethod
-    def _concat(objs: MutableSequence[CategoricalColumn]) -> CategoricalColumn:
+    def _concat(
+        objs: abc.MutableSequence[CategoricalColumn],
+    ) -> CategoricalColumn:
         # TODO: This function currently assumes it is being called from
         # column.concat_columns, at least to the extent that all the
         # preprocessing in that function has already been done. That should be
diff --git a/python/cudf/cudf/core/column_accessor.py b/python/cudf/cudf/core/column_accessor.py
index 291e50386cc..24a2958ce97 100644
--- a/python/cudf/cudf/core/column_accessor.py
+++ b/python/cudf/cudf/core/column_accessor.py
@@ -3,7 +3,7 @@
 from __future__ import annotations
 
 import itertools
-from collections.abc import MutableMapping
+from collections import abc
 from functools import cached_property, reduce
 from typing import (
     TYPE_CHECKING,
@@ -78,7 +78,7 @@ def _to_flat_dict(d):
     return {k: v for k, v in _to_flat_dict_inner(d)}
 
 
-class ColumnAccessor(MutableMapping):
+class ColumnAccessor(abc.MutableMapping):
     """
     Parameters
     ----------
@@ -99,7 +99,7 @@ class ColumnAccessor(MutableMapping):
 
     def __init__(
         self,
-        data: Union[MutableMapping, ColumnAccessor] = None,
+        data: Union[abc.MutableMapping, ColumnAccessor] = None,
         multiindex: bool = False,
         level_names=None,
     ):
@@ -213,7 +213,7 @@ def columns(self) -> Tuple[ColumnBase, ...]:
         return tuple(self.values())
 
     @cached_property
-    def _grouped_data(self) -> MutableMapping:
+    def _grouped_data(self) -> abc.MutableMapping:
         """
         If self.multiindex is True,
         return the underlying mapping as a nested mapping.
diff --git a/python/cudf/cudf/core/cut.py b/python/cudf/cudf/core/cut.py
index 0fef6630248..2ec39043eb2 100644
--- a/python/cudf/cudf/core/cut.py
+++ b/python/cudf/cudf/core/cut.py
@@ -1,6 +1,6 @@
 # Copyright (c) 2021-2022, NVIDIA CORPORATION.
 
-from collections.abc import Sequence
+from collections import abc
 
 import cupy
 import numpy as np
@@ -140,7 +140,7 @@ def cut(
                 )
 
     # bins can either be an int, sequence of scalars or an intervalIndex
-    if isinstance(bins, Sequence):
+    if isinstance(bins, abc.Sequence):
         if len(set(bins)) is not len(bins):
             if duplicates == "raise":
                 raise ValueError(
@@ -158,7 +158,7 @@ def cut(
 
     # create bins if given an int or single scalar
     if not isinstance(bins, pd.IntervalIndex):
-        if not isinstance(bins, (Sequence)):
+        if not isinstance(bins, (abc.Sequence)):
             if isinstance(
                 x, (pd.Series, cudf.Series, np.ndarray, cupy.ndarray)
             ):
diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 24aa0d01b3c..4ffacfa2ccc 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -9,8 +9,7 @@
 import pickle
 import sys
 import warnings
-from collections import defaultdict
-from collections.abc import Iterable, Mapping, Sequence
+from collections import abc, defaultdict
 from typing import (
     Any,
     Callable,
@@ -1857,7 +1856,7 @@ def _make_operands_and_index_for_binop(
         Optional[BaseIndex],
     ]:
         # Check built-in types first for speed.
-        if isinstance(other, (list, dict, Sequence, Mapping)):
+        if isinstance(other, (list, dict, abc.Sequence, abc.Mapping)):
             warnings.warn(
                 "Binary operations between host objects such as "
                 f"{type(other)} and cudf.DataFrame are deprecated and will be "
@@ -1878,7 +1877,7 @@ def _make_operands_and_index_for_binop(
 
         if _is_scalar_or_zero_d_array(other):
             rhs = {name: other for name in self._data}
-        elif isinstance(other, (list, Sequence)):
+        elif isinstance(other, (list, abc.Sequence)):
             rhs = {name: o for (name, o) in zip(self._data, other)}
         elif isinstance(other, Series):
             rhs = dict(zip(other.index.values_host, other.values_host))
@@ -1907,7 +1906,7 @@ def _make_operands_and_index_for_binop(
             # the fill value.
             left_default = fill_value
 
-        if not isinstance(rhs, (dict, Mapping)):
+        if not isinstance(rhs, (dict, abc.Mapping)):
             return NotImplemented, None
 
         operands = {
@@ -2961,7 +2960,9 @@ def agg(self, aggs, axis=None):
         if axis == 0 or axis is not None:
             raise NotImplementedError("axis not implemented yet")
 
-        if isinstance(aggs, Iterable) and not isinstance(aggs, (str, dict)):
+        if isinstance(aggs, abc.Iterable) and not isinstance(
+            aggs, (str, dict)
+        ):
             result = DataFrame()
             # TODO : Allow simultaneous pass for multi-aggregation as
             # a future optimization
@@ -2997,13 +2998,13 @@ def agg(self, aggs, axis=None):
                             f"'Series' object"
                         )
                     result[key] = getattr(col, value)()
-            elif all([isinstance(val, Iterable) for val in aggs.values()]):
+            elif all([isinstance(val, abc.Iterable) for val in aggs.values()]):
                 idxs = set()
                 for val in aggs.values():
-                    if isinstance(val, Iterable):
-                        idxs.update(val)
-                    elif isinstance(val, str):
+                    if isinstance(val, str):
                         idxs.add(val)
+                    elif isinstance(val, abc.Iterable):
+                        idxs.update(val)
                 idxs = sorted(list(idxs))
                 for agg in idxs:
                     if agg is callable:
@@ -3017,7 +3018,7 @@ def agg(self, aggs, axis=None):
                         len(idxs), dtype=col.dtype, masked=True
                     )
                     ans = cudf.Series(data=col_empty, index=idxs)
-                    if isinstance(aggs.get(key), Iterable):
+                    if isinstance(aggs.get(key), abc.Iterable):
                         # TODO : Allow simultaneous pass for multi-aggregation
                         # as a future optimization
                         for agg in aggs.get(key):
@@ -6157,7 +6158,7 @@ def _sample_axis_1(
     def _from_columns_like_self(
         self,
         columns: List[ColumnBase],
-        column_names: Iterable[str],
+        column_names: abc.Iterable[str],
         index_names: Optional[List[str]] = None,
     ) -> DataFrame:
         result = super()._from_columns_like_self(
diff --git a/python/cudf/cudf/core/df_protocol.py b/python/cudf/cudf/core/df_protocol.py
index 4a30a78bf65..f4ce658bff3 100644
--- a/python/cudf/cudf/core/df_protocol.py
+++ b/python/cudf/cudf/core/df_protocol.py
@@ -1,7 +1,7 @@
 # Copyright (c) 2021-2022, NVIDIA CORPORATION.
 
-import collections
 import enum
+from collections import abc
 from typing import (
     Any,
     Dict,
@@ -569,13 +569,13 @@ def get_columns(self) -> Iterable[_CuDFColumn]:
         ]
 
     def select_columns(self, indices: Sequence[int]) -> "_CuDFDataFrame":
-        if not isinstance(indices, collections.abc.Sequence):
+        if not isinstance(indices, abc.Sequence):
             raise ValueError("`indices` is not a sequence")
 
         return _CuDFDataFrame(self._df.iloc[:, indices])
 
     def select_columns_by_name(self, names: Sequence[str]) -> "_CuDFDataFrame":
-        if not isinstance(names, collections.Sequence):
+        if not isinstance(names, abc.Sequence):
             raise ValueError("`names` is not a sequence")
 
         return _CuDFDataFrame(
diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py
index 40f8eda0e4f..76b0217df3b 100644
--- a/python/cudf/cudf/core/groupby/groupby.py
+++ b/python/cudf/cudf/core/groupby/groupby.py
@@ -1,9 +1,9 @@
 # Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
-import collections
 import itertools
 import pickle
 import warnings
+from collections import abc
 from functools import cached_property
 from typing import Any, Iterable, List, Tuple, Union
 
@@ -1638,7 +1638,7 @@ def _handle_by_or_level(self, by=None, level=None):
                     self._handle_series(by)
                 elif isinstance(by, cudf.BaseIndex):
                     self._handle_index(by)
-                elif isinstance(by, collections.abc.Mapping):
+                elif isinstance(by, abc.Mapping):
                     self._handle_mapping(by)
                 elif isinstance(by, Grouper):
                     self._handle_grouper(by)
@@ -1757,7 +1757,7 @@ def _is_multi_agg(aggs):
     Returns True if more than one aggregation is performed
     on any of the columns as specified in `aggs`.
     """
-    if isinstance(aggs, collections.abc.Mapping):
+    if isinstance(aggs, abc.Mapping):
         return any(is_list_like(agg) for agg in aggs.values())
     if is_list_like(aggs):
         return True
diff --git a/python/cudf/cudf/core/join/_join_helpers.py b/python/cudf/cudf/core/join/_join_helpers.py
index ead0cd566d9..e1057c3b997 100644
--- a/python/cudf/cudf/core/join/_join_helpers.py
+++ b/python/cudf/cudf/core/join/_join_helpers.py
@@ -2,8 +2,8 @@
 
 from __future__ import annotations
 
-import collections
 import warnings
+from collections import abc
 from typing import TYPE_CHECKING, Any, Tuple, cast
 
 import numpy as np
@@ -166,7 +166,7 @@ def _match_categorical_dtypes_both(
 
 
 def _coerce_to_tuple(obj):
-    if isinstance(obj, collections.abc.Iterable) and not isinstance(obj, str):
+    if isinstance(obj, abc.Iterable) and not isinstance(obj, str):
         return tuple(obj)
     else:
         return (obj,)
diff --git a/python/cudf/cudf/core/multiindex.py b/python/cudf/cudf/core/multiindex.py
index 591ec582a3b..9b0b922a7d3 100644
--- a/python/cudf/cudf/core/multiindex.py
+++ b/python/cudf/cudf/core/multiindex.py
@@ -5,7 +5,7 @@
 import itertools
 import numbers
 import pickle
-from collections.abc import Sequence
+from collections import abc
 from functools import cached_property
 from numbers import Integral
 from typing import Any, List, MutableMapping, Tuple, Union
@@ -95,7 +95,7 @@ def __init__(
         if len(levels) == 0:
             raise ValueError("Must pass non-zero number of levels/codes")
         if not isinstance(codes, cudf.DataFrame) and not isinstance(
-            codes[0], (Sequence, np.ndarray)
+            codes[0], (abc.Sequence, np.ndarray)
         ):
             raise TypeError("Codes is not a Sequence of sequences")
 
@@ -912,7 +912,7 @@ def deserialize(cls, header, frames):
     def __getitem__(self, index):
         flatten = isinstance(index, int)
 
-        if isinstance(index, (Integral, Sequence)):
+        if isinstance(index, (Integral, abc.Sequence)):
             index = np.array(index)
         elif isinstance(index, slice):
             start, stop, step = index.indices(len(self))
diff --git a/python/cudf/cudf/core/reshape.py b/python/cudf/cudf/core/reshape.py
index a388e2560ee..f58c93aa0dc 100644
--- a/python/cudf/cudf/core/reshape.py
+++ b/python/cudf/cudf/core/reshape.py
@@ -1,6 +1,7 @@
 # Copyright (c) 2018-2022, NVIDIA CORPORATION.
 
 import itertools
+from collections import abc
 from typing import Dict, Optional
 
 import numpy as np
@@ -485,14 +486,14 @@ def melt(
     1  b         B          3
     2  c         B          5
     """
-    assert col_level in (None,)
+    if col_level is not None:
+        raise NotImplementedError("col_level != None is not supported yet.")
 
     # Arg cleaning
-    import collections
 
     # id_vars
     if id_vars is not None:
-        if not isinstance(id_vars, collections.abc.Sequence):
+        if not isinstance(id_vars, abc.Sequence):
             id_vars = [id_vars]
         id_vars = list(id_vars)
         missing = set(id_vars) - set(frame._column_names)
@@ -506,7 +507,7 @@ def melt(
 
     # value_vars
     if value_vars is not None:
-        if not isinstance(value_vars, collections.abc.Sequence):
+        if not isinstance(value_vars, abc.Sequence):
             value_vars = [value_vars]
         value_vars = list(value_vars)
         missing = set(value_vars) - set(frame._column_names)
diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index 20ba52afccd..f780b5e3895 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -6,7 +6,7 @@
 import inspect
 import pickle
 import warnings
-from collections import abc as abc
+from collections import abc
 from shutil import get_terminal_size
 from typing import Any, Dict, MutableMapping, Optional, Set, Tuple, Type, Union
 
diff --git a/python/cudf/cudf/testing/_utils.py b/python/cudf/cudf/testing/_utils.py
index 4dd9f434097..fbae7850e60 100644
--- a/python/cudf/cudf/testing/_utils.py
+++ b/python/cudf/cudf/testing/_utils.py
@@ -3,7 +3,7 @@
 import itertools
 import re
 import warnings
-from collections.abc import Mapping, Sequence
+from collections import abc
 from contextlib import contextmanager
 from decimal import Decimal
 
@@ -238,9 +238,9 @@ def _get_args_kwars_for_assert_exceptions(func_args_and_kwargs):
     else:
         if len(func_args_and_kwargs) == 1:
             func_args, func_kwargs = [], {}
-            if isinstance(func_args_and_kwargs[0], Sequence):
+            if isinstance(func_args_and_kwargs[0], abc.Sequence):
                 func_args = func_args_and_kwargs[0]
-            elif isinstance(func_args_and_kwargs[0], Mapping):
+            elif isinstance(func_args_and_kwargs[0], abc.Mapping):
                 func_kwargs = func_args_and_kwargs[0]
             else:
                 raise ValueError(
@@ -248,12 +248,12 @@ def _get_args_kwars_for_assert_exceptions(func_args_and_kwargs):
                     "either a Sequence or a Mapping"
                 )
         elif len(func_args_and_kwargs) == 2:
-            if not isinstance(func_args_and_kwargs[0], Sequence):
+            if not isinstance(func_args_and_kwargs[0], abc.Sequence):
                 raise ValueError(
                     "Positional argument at 1st position of "
                     "func_args_and_kwargs should be a sequence."
                 )
-            if not isinstance(func_args_and_kwargs[1], Mapping):
+            if not isinstance(func_args_and_kwargs[1], abc.Mapping):
                 raise ValueError(
                     "Key-word argument at 2nd position of "
                     "func_args_and_kwargs should be a dictionary mapping."

From 17d49faf19f9c5ff52a3ddeb7ddcee5545fa0f11 Mon Sep 17 00:00:00 2001
From: Alfred Xu <lovedreamf@gmail.com>
Date: Tue, 19 Apr 2022 09:05:28 +0800
Subject: [PATCH 26/33] Enable segmented_gather in Java package (#10669)

Current PR is to enable cuDF API `segmented_gather` in Java package. `segmented_gather` is essential to implement spark array functions like `arrays_zip`(https://github.com/NVIDIA/spark-rapids/issues/5229).

Authors:
  - Alfred Xu (https://github.com/sperlingxx)

Approvers:
  - Robert (Bobby) Evans (https://github.com/revans2)

URL: https://github.com/rapidsai/cudf/pull/10669
---
 .../main/java/ai/rapids/cudf/ColumnView.java  | 28 +++++++++++++++++++
 java/src/main/native/src/ColumnViewJni.cpp    | 18 ++++++++++++
 .../java/ai/rapids/cudf/ColumnVectorTest.java | 25 +++++++++++++++++
 3 files changed, 71 insertions(+)

diff --git a/java/src/main/java/ai/rapids/cudf/ColumnView.java b/java/src/main/java/ai/rapids/cudf/ColumnView.java
index ed3ac124216..b2c001c6737 100644
--- a/java/src/main/java/ai/rapids/cudf/ColumnView.java
+++ b/java/src/main/java/ai/rapids/cudf/ColumnView.java
@@ -1546,6 +1546,31 @@ public ColumnVector segmentedReduce(ColumnView offsets, SegmentedReductionAggreg
     }
   }
 
+  /**
+   * Segmented gather of the elements within a list element in each row of a list column.
+   * For each list, assuming the size is N, valid indices of gather map ranges in [-N, N).
+   * Out of bound indices refer to null.
+   * @param gatherMap ListColumnView carrying lists of integral indices which maps the
+   * element in list of each row in the source columns to rows of lists in the result columns.
+   * @return the result.
+   */
+  public ColumnVector segmentedGather(ColumnView gatherMap) {
+    return segmentedGather(gatherMap, OutOfBoundsPolicy.NULLIFY);
+  }
+
+  /**
+   * Segmented gather of the elements within a list element in each row of a list column.
+   * @param gatherMap ListColumnView carrying lists of integral indices which maps the
+   * element in list of each row in the source columns to rows of lists in the result columns.
+   * @param policy OutOfBoundsPolicy, `DONT_CHECK` leads to undefined behaviour; `NULLIFY`
+   * replaces out of bounds with null.
+   * @return the result.
+   */
+  public ColumnVector segmentedGather(ColumnView gatherMap, OutOfBoundsPolicy policy) {
+    return new ColumnVector(segmentedGather(getNativeView(), gatherMap.getNativeView(),
+        policy.equals(OutOfBoundsPolicy.NULLIFY)));
+  }
+
   /**
    * Do a reduction on the values in a list. The output type will be the type of the data column
    * of this list.
@@ -3998,6 +4023,9 @@ private static native long scan(long viewHandle, long aggregation,
   private static native long segmentedReduce(long dataViewHandle, long offsetsViewHandle,
       long aggregation, boolean includeNulls, int dtype, int scale) throws CudfException;
 
+  private static native long segmentedGather(long sourceColumnHandle, long gatherMapListHandle,
+      boolean isNullifyOutBounds) throws CudfException;
+
   private static native long isNullNative(long viewHandle);
 
   private static native long isNanNative(long viewHandle);
diff --git a/java/src/main/native/src/ColumnViewJni.cpp b/java/src/main/native/src/ColumnViewJni.cpp
index 8c8e9b91e8d..6a294920d07 100644
--- a/java/src/main/native/src/ColumnViewJni.cpp
+++ b/java/src/main/native/src/ColumnViewJni.cpp
@@ -31,6 +31,7 @@
 #include <cudf/lists/detail/concatenate.hpp>
 #include <cudf/lists/drop_list_duplicates.hpp>
 #include <cudf/lists/extract.hpp>
+#include <cudf/lists/gather.hpp>
 #include <cudf/lists/lists_column_view.hpp>
 #include <cudf/lists/sorting.hpp>
 #include <cudf/null_mask.hpp>
@@ -288,6 +289,23 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_segmentedReduce(
   CATCH_STD(env, 0);
 }
 
+JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_segmentedGather(
+    JNIEnv *env, jclass, jlong source_column, jlong gather_map_list, jboolean nullify_out_bounds) {
+  JNI_NULL_CHECK(env, source_column, "source column view is null", 0);
+  JNI_NULL_CHECK(env, gather_map_list, "gather map is null", 0);
+  try {
+    cudf::jni::auto_set_device(env);
+    auto const &src_col =
+        cudf::lists_column_view(*reinterpret_cast<cudf::column_view *>(source_column));
+    auto const &gather_map =
+        cudf::lists_column_view(*reinterpret_cast<cudf::column_view *>(gather_map_list));
+    auto out_bounds_policy = nullify_out_bounds ? cudf::out_of_bounds_policy::NULLIFY :
+                                                  cudf::out_of_bounds_policy::DONT_CHECK;
+    return release_as_jlong(cudf::lists::segmented_gather(src_col, gather_map, out_bounds_policy));
+  }
+  CATCH_STD(env, 0);
+}
+
 JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_scan(JNIEnv *env, jclass, jlong j_col_view,
                                                             jlong j_agg, jboolean is_inclusive,
                                                             jboolean include_nulls) {
diff --git a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java
index 58901d5743b..9189cd27303 100644
--- a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java
+++ b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java
@@ -20,6 +20,7 @@
 
 import ai.rapids.cudf.ColumnView.FindOptions;
 import ai.rapids.cudf.HostColumnVector.*;
+import com.google.common.collect.Lists;
 import org.junit.jupiter.api.Disabled;
 import org.junit.jupiter.api.Test;
 
@@ -6259,4 +6260,28 @@ void testCopyWithBooleanColumnAsValidity() {
     });
     assertTrue(x.getMessage().contains("Exemplar and validity columns must have the same size"));
   }
+
+  @Test
+  void testSegmentedGather() {
+    HostColumnVector.DataType dt = new ListType(true, new BasicType(true, DType.STRING));
+    try (ColumnVector source = ColumnVector.fromLists(dt,
+        Lists.newArrayList("a", "b", null, "c"),
+        null,
+        Lists.newArrayList(),
+        Lists.newArrayList(null, "A", "B", "C", "D"));
+         ColumnVector gatherMap = ColumnVector.fromLists(
+             new ListType(false, new BasicType(false, DType.INT32)),
+             Lists.newArrayList(-3, 0, 2, 3, 4),
+             Lists.newArrayList(),
+             Lists.newArrayList(1),
+             Lists.newArrayList(1, -4, 5, -1, -6));
+         ColumnVector actual = source.segmentedGather(gatherMap);
+         ColumnVector expected = ColumnVector.fromLists(dt,
+             Lists.newArrayList("b", "a", null, "c", null),
+             null,
+             Lists.newArrayList((String) null),
+             Lists.newArrayList("A", "A", null, "D", null))) {
+      assertColumnsAreEqual(expected, actual);
+    }
+  }
 }

From 9dc728a5c4edb23e5d531409a120889d319f9a98 Mon Sep 17 00:00:00 2001
From: Michael Wang <isVoid@users.noreply.github.com>
Date: Tue, 19 Apr 2022 01:34:55 -0700
Subject: [PATCH 27/33] Use Lists of Columns for Various Files (#10463)

This PR covers many low hanging fruits for https://github.com/rapidsai/cudf/issues/10153. All API accepting Frames now accepts a list of columns in the following files:

- hash.pyx
- interop.pyx
- join.pyx
- partitioning.pyx
- quantiles.pyx
- reshape.pyx
- search.pyx
- transform.pyx
- lists.pyx
- string/combine.pyx

This PR covers point 5 in the follow-ups to https://github.com/rapidsai/cudf/pull/9889.
Also, in `join.pyx`, gil was not released when dispatching workload to libcudf. This PR fixes that.

Authors:
  - Michael Wang (https://github.com/isVoid)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

URL: https://github.com/rapidsai/cudf/pull/10463
---
 python/cudf/cudf/_lib/hash.pyx            |  32 ++---
 python/cudf/cudf/_lib/interop.pyx         |  65 ++++------
 python/cudf/cudf/_lib/join.pyx            |  51 +++-----
 python/cudf/cudf/_lib/lists.pyx           |  21 ++-
 python/cudf/cudf/_lib/partitioning.pyx    |  22 +---
 python/cudf/cudf/_lib/quantiles.pyx       |  14 +-
 python/cudf/cudf/_lib/reshape.pyx         |  23 ++--
 python/cudf/cudf/_lib/scalar.pyx          |  22 +---
 python/cudf/cudf/_lib/search.pyx          |  24 ++--
 python/cudf/cudf/_lib/strings/combine.pyx |   9 +-
 python/cudf/cudf/_lib/transform.pyx       |  20 +--
 python/cudf/cudf/core/column/column.py    |  20 +--
 python/cudf/cudf/core/column/lists.py     |   4 +-
 python/cudf/cudf/core/column/string.py    |   6 +-
 python/cudf/cudf/core/dataframe.py        |  85 ++++++++++---
 python/cudf/cudf/core/frame.py            | 148 +++++++---------------
 python/cudf/cudf/core/groupby/groupby.py  |   7 +-
 python/cudf/cudf/core/index.py            |   4 +-
 python/cudf/cudf/core/indexed_frame.py    |  58 +++++++--
 python/cudf/cudf/core/join/join.py        |  12 +-
 python/cudf/cudf/io/dlpack.py             |  13 +-
 python/cudf/cudf/tests/test_search.py     |  10 +-
 22 files changed, 299 insertions(+), 371 deletions(-)

diff --git a/python/cudf/cudf/_lib/hash.pyx b/python/cudf/cudf/_lib/hash.pyx
index 301f571f5fb..8bb8ab92a48 100644
--- a/python/cudf/cudf/_lib/hash.pyx
+++ b/python/cudf/cudf/_lib/hash.pyx
@@ -14,16 +14,14 @@ from cudf._lib.cpp.hash cimport hash as cpp_hash, hash_id as cpp_hash_id
 from cudf._lib.cpp.partitioning cimport hash_partition as cpp_hash_partition
 from cudf._lib.cpp.table.table cimport table
 from cudf._lib.cpp.table.table_view cimport table_view
-from cudf._lib.utils cimport data_from_unique_ptr, table_view_from_table
+from cudf._lib.utils cimport columns_from_unique_ptr, table_view_from_columns
 
 
-def hash_partition(source_table, object columns_to_hash,
-                   int num_partitions, bool keep_index=True):
+def hash_partition(list source_columns, object columns_to_hash,
+                   int num_partitions):
     cdef vector[libcudf_types.size_type] c_columns_to_hash = columns_to_hash
     cdef int c_num_partitions = num_partitions
-    cdef table_view c_source_view = table_view_from_table(
-        source_table, not keep_index
-    )
+    cdef table_view c_source_view = table_view_from_columns(source_columns)
 
     cdef pair[unique_ptr[table], vector[libcudf_types.size_type]] c_result
     with nogil:
@@ -36,27 +34,17 @@ def hash_partition(source_table, object columns_to_hash,
         )
 
     # Note that the offsets (`c_result.second`) may be empty when
-    # the original table (`source_table`) is empty. We need to
+    # the original table (`source_columns`) is empty. We need to
     # return a list of zeros in this case.
     return (
-        *data_from_unique_ptr(
-            move(c_result.first),
-            column_names=source_table._column_names,
-            index_names=(
-                source_table._index_names
-                if keep_index is True
-                else None
-            )
-
-        ),
-        list(c_result.second) if c_result.second.size()
-        else [0] * num_partitions
+        columns_from_unique_ptr(move(c_result.first)),
+        list(c_result.second)
+        if c_result.second.size() else [0] * num_partitions
     )
 
 
-def hash(source_table, str method, int seed=0):
-    cdef table_view c_source_view = table_view_from_table(
-        source_table, ignore_index=True)
+def hash(list source_columns, str method, int seed=0):
+    cdef table_view c_source_view = table_view_from_columns(source_columns)
     cdef unique_ptr[column] c_result
     cdef cpp_hash_id c_hash_function
     if method == "murmur3":
diff --git a/python/cudf/cudf/_lib/interop.pyx b/python/cudf/cudf/_lib/interop.pyx
index 06e287ee670..88c8b19ded0 100644
--- a/python/cudf/cudf/_lib/interop.pyx
+++ b/python/cudf/cudf/_lib/interop.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
+# Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
 import cudf
 
@@ -20,12 +20,12 @@ from cudf._lib.cpp.interop cimport (
 )
 from cudf._lib.cpp.table.table cimport table
 from cudf._lib.cpp.table.table_view cimport table_view
-from cudf._lib.utils cimport data_from_unique_ptr, table_view_from_table
+from cudf._lib.utils cimport columns_from_unique_ptr, table_view_from_columns
 
 
 def from_dlpack(dlpack_capsule):
     """
-    Converts a DLPack Tensor PyCapsule into a cudf Frame object.
+    Converts a DLPack Tensor PyCapsule into a list of columns.
 
     DLPack Tensor PyCapsule is expected to have the name "dltensor".
     """
@@ -40,31 +40,25 @@ def from_dlpack(dlpack_capsule):
             cpp_from_dlpack(dlpack_tensor)
         )
 
-    res = data_from_unique_ptr(
-        move(c_result),
-        column_names=range(0, c_result.get()[0].num_columns())
-    )
+    res = columns_from_unique_ptr(move(c_result))
     dlpack_tensor.deleter(dlpack_tensor)
     return res
 
 
-def to_dlpack(source_table):
+def to_dlpack(list source_columns):
     """
-    Converts a cudf Frame into a DLPack Tensor PyCapsule.
+    Converts a list of columns into a DLPack Tensor PyCapsule.
 
     DLPack Tensor PyCapsule will have the name "dltensor".
     """
-    for column in source_table._columns:
-        if column.null_count:
-            raise ValueError(
-                "Cannot create a DLPack tensor with null values. \
-                    Input is required to have null count as zero."
-            )
+    if any(column.null_count for column in source_columns):
+        raise ValueError(
+            "Cannot create a DLPack tensor with null values. \
+                Input is required to have null count as zero."
+        )
 
     cdef DLManagedTensor *dlpack_tensor
-    cdef table_view source_table_view = table_view_from_table(
-        source_table, ignore_index=True
-    )
+    cdef table_view source_table_view = table_view_from_columns(source_columns)
 
     with nogil:
         dlpack_tensor = cpp_to_dlpack(
@@ -110,17 +104,14 @@ cdef vector[column_metadata] gather_metadata(object metadata) except *:
         raise ValueError("Malformed metadata has been encountered")
 
 
-def to_arrow(input_table,
-             object metadata,
-             bool keep_index=True):
-    """Convert from cudf Frame to PyArrow Table.
+def to_arrow(list source_columns, object metadata):
+    """Convert a list of columns from
+    cudf Frame to a PyArrow Table.
 
     Parameters
     ----------
-    input_table : cudf table
-    column_names : names for the pyarrow arrays
-    field_names : field names for nested type arrays
-    keep_index : whether index needs to be part of arrow table
+    source_columns : a list of columns to convert
+    metadata : a list of metadata, see `gather_metadata` for layout
 
     Returns
     -------
@@ -128,9 +119,7 @@ def to_arrow(input_table,
     """
 
     cdef vector[column_metadata] cpp_metadata = gather_metadata(metadata)
-    cdef table_view input_table_view = (
-        table_view_from_table(input_table, not keep_index)
-    )
+    cdef table_view input_table_view = table_view_from_columns(source_columns)
 
     cdef shared_ptr[CTable] cpp_arrow_table
     with nogil:
@@ -141,22 +130,16 @@ def to_arrow(input_table,
     return pyarrow_wrap_table(cpp_arrow_table)
 
 
-def from_arrow(
-    object input_table,
-    object column_names=None,
-    object index_names=None
-):
-    """Convert from PyArrow Table to cudf Frame.
+def from_arrow(object input_table):
+    """Convert from PyArrow Table to a list of columns.
 
     Parameters
     ----------
     input_table : PyArrow table
-    column_names : names for the cudf table data columns
-    index_names : names for the cudf table index columns
 
     Returns
     -------
-    cudf Frame
+    A list of columns to construct Frame object
     """
     cdef shared_ptr[CTable] cpp_arrow_table = (
         pyarrow_unwrap_table(input_table)
@@ -166,8 +149,4 @@ def from_arrow(
     with nogil:
         c_result = move(cpp_from_arrow(cpp_arrow_table.get()[0]))
 
-    return data_from_unique_ptr(
-        move(c_result),
-        column_names=column_names,
-        index_names=index_names
-    )
+    return columns_from_unique_ptr(move(c_result))
diff --git a/python/cudf/cudf/_lib/join.pyx b/python/cudf/cudf/_lib/join.pyx
index 5921f06d36e..1baef266dab 100644
--- a/python/cudf/cudf/_lib/join.pyx
+++ b/python/cudf/cudf/_lib/join.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
+# Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
 from itertools import chain
 
@@ -16,31 +16,25 @@ from cudf._lib.cpp.column.column cimport column
 from cudf._lib.cpp.table.table cimport table
 from cudf._lib.cpp.table.table_view cimport table_view
 from cudf._lib.cpp.types cimport data_type, size_type, type_id
-from cudf._lib.utils cimport table_view_from_table
+from cudf._lib.utils cimport table_view_from_columns
 
 # The functions below return the *gathermaps* that represent
 # the join result when joining on the keys `lhs` and `rhs`.
 
-cpdef join(lhs, rhs, how=None):
+cpdef join(list lhs, list rhs, how=None):
     cdef pair[cpp_join.gather_map_type, cpp_join.gather_map_type] c_result
-    cdef table_view c_lhs = table_view_from_table(lhs)
-    cdef table_view c_rhs = table_view_from_table(rhs)
+    cdef table_view c_lhs = table_view_from_columns(lhs)
+    cdef table_view c_rhs = table_view_from_columns(rhs)
 
     if how == "inner":
-        c_result = move(cpp_join.inner_join(
-            c_lhs,
-            c_rhs
-        ))
+        with nogil:
+            c_result = move(cpp_join.inner_join(c_lhs, c_rhs))
     elif how == "left":
-        c_result = move(cpp_join.left_join(
-            c_lhs,
-            c_rhs
-        ))
+        with nogil:
+            c_result = move(cpp_join.left_join(c_lhs, c_rhs))
     elif how == "outer":
-        c_result = move(cpp_join.full_join(
-            c_lhs,
-            c_rhs
-        ))
+        with nogil:
+            c_result = move(cpp_join.full_join(c_lhs, c_rhs))
     else:
         raise ValueError(f"Invalid join type {how}")
 
@@ -49,30 +43,23 @@ cpdef join(lhs, rhs, how=None):
     return left_rows, right_rows
 
 
-cpdef semi_join(lhs, rhs, how=None):
+cpdef semi_join(list lhs, list rhs, how=None):
     # left-semi and left-anti joins
     cdef cpp_join.gather_map_type c_result
-    cdef table_view c_lhs = table_view_from_table(lhs)
-    cdef table_view c_rhs = table_view_from_table(rhs)
+    cdef table_view c_lhs = table_view_from_columns(lhs)
+    cdef table_view c_rhs = table_view_from_columns(rhs)
 
     if how == "leftsemi":
-        c_result = move(cpp_join.left_semi_join(
-            c_lhs,
-            c_rhs
-        ))
+        with nogil:
+            c_result = move(cpp_join.left_semi_join(c_lhs, c_rhs))
     elif how == "leftanti":
-        c_result = move(cpp_join.left_anti_join(
-            c_lhs,
-            c_rhs
-        ))
+        with nogil:
+            c_result = move(cpp_join.left_anti_join(c_lhs, c_rhs))
     else:
         raise ValueError(f"Invalid join type {how}")
 
     cdef Column left_rows = _gather_map_as_column(move(c_result))
-    return (
-        left_rows,
-        None
-    )
+    return left_rows, None
 
 
 cdef Column _gather_map_as_column(cpp_join.gather_map_type gather_map):
diff --git a/python/cudf/cudf/_lib/lists.pyx b/python/cudf/cudf/_lib/lists.pyx
index 523686fafe6..e5a705ab603 100644
--- a/python/cudf/cudf/_lib/lists.pyx
+++ b/python/cudf/cudf/_lib/lists.pyx
@@ -42,7 +42,7 @@ from cudf.core.dtypes import ListDtype
 
 from cudf._lib.cpp.lists.contains cimport contains, index_of as cpp_index_of
 from cudf._lib.cpp.lists.extract cimport extract_list_element
-from cudf._lib.utils cimport data_from_unique_ptr, table_view_from_table
+from cudf._lib.utils cimport columns_from_unique_ptr, table_view_from_columns
 
 
 def count_elements(Column col):
@@ -61,8 +61,10 @@ def count_elements(Column col):
     return result
 
 
-def explode_outer(tbl, int explode_column_idx, bool ignore_index=False):
-    cdef table_view c_table_view = table_view_from_table(tbl, ignore_index)
+def explode_outer(
+    list source_columns, int explode_column_idx
+):
+    cdef table_view c_table_view = table_view_from_columns(source_columns)
     cdef size_type c_explode_column_idx = explode_column_idx
 
     cdef unique_ptr[table] c_result
@@ -70,11 +72,7 @@ def explode_outer(tbl, int explode_column_idx, bool ignore_index=False):
     with nogil:
         c_result = move(cpp_explode_outer(c_table_view, c_explode_column_idx))
 
-    return data_from_unique_ptr(
-        move(c_result),
-        column_names=tbl._column_names,
-        index_names=None if ignore_index else tbl._index_names
-    )
+    return columns_from_unique_ptr(move(c_result))
 
 
 def drop_list_duplicates(Column col, bool nulls_equal, bool nans_all_equal):
@@ -197,18 +195,17 @@ def index_of(Column col, object py_search_key):
     return Column.from_unique_ptr(move(c_result))
 
 
-def concatenate_rows(tbl):
+def concatenate_rows(list source_columns):
     cdef unique_ptr[column] c_result
 
-    cdef table_view c_table_view = table_view_from_table(tbl)
+    cdef table_view c_table_view = table_view_from_columns(source_columns)
 
     with nogil:
         c_result = move(cpp_concatenate_rows(
             c_table_view,
         ))
 
-    result = Column.from_unique_ptr(move(c_result))
-    return result
+    return Column.from_unique_ptr(move(c_result))
 
 
 def concatenate_list_elements(Column input_column, dropna=False):
diff --git a/python/cudf/cudf/_lib/partitioning.pyx b/python/cudf/cudf/_lib/partitioning.pyx
index e53667e7589..f2f5a92aca1 100644
--- a/python/cudf/cudf/_lib/partitioning.pyx
+++ b/python/cudf/cudf/_lib/partitioning.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
+# Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
 from libcpp cimport bool
 from libcpp.memory cimport unique_ptr
@@ -11,21 +11,19 @@ from cudf._lib.cpp.column.column_view cimport column_view
 from cudf._lib.cpp.partitioning cimport partition as cpp_partition
 from cudf._lib.cpp.table.table cimport table
 from cudf._lib.cpp.table.table_view cimport table_view
-from cudf._lib.utils cimport data_from_unique_ptr, table_view_from_table
+from cudf._lib.utils cimport columns_from_unique_ptr, table_view_from_columns
 from cudf._lib.stream_compaction import distinct_count as cpp_distinct_count
 
 cimport cudf._lib.cpp.types as libcudf_types
 
 
-def partition(source_table, Column partition_map,
-              object num_partitions, bool keep_index=True):
+def partition(list source_columns, Column partition_map,
+              object num_partitions):
 
     if num_partitions is None:
         num_partitions = cpp_distinct_count(partition_map, ignore_nulls=True)
     cdef int c_num_partitions = num_partitions
-    cdef table_view c_source_view = table_view_from_table(
-        source_table, not keep_index
-    )
+    cdef table_view c_source_view = table_view_from_columns(source_columns)
 
     cdef column_view c_partition_map_view = partition_map.view()
 
@@ -40,13 +38,5 @@ def partition(source_table, Column partition_map,
         )
 
     return (
-        *data_from_unique_ptr(
-            move(c_result.first),
-            column_names=source_table._column_names,
-            index_names=source_table._index_names if(
-                keep_index is True)
-            else None
-
-        ),
-        list(c_result.second)
+        columns_from_unique_ptr(move(c_result.first)), list(c_result.second)
     )
diff --git a/python/cudf/cudf/_lib/quantiles.pyx b/python/cudf/cudf/_lib/quantiles.pyx
index 497a71df89d..f65c29a55a8 100644
--- a/python/cudf/cudf/_lib/quantiles.pyx
+++ b/python/cudf/cudf/_lib/quantiles.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
+# Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
 from libcpp cimport bool
 from libcpp.memory cimport unique_ptr
@@ -31,7 +31,7 @@ from cudf._lib.cpp.types cimport (
     order_info,
     sorted,
 )
-from cudf._lib.utils cimport data_from_unique_ptr, table_view_from_table
+from cudf._lib.utils cimport columns_from_unique_ptr, table_view_from_columns
 
 
 def quantile(
@@ -74,14 +74,13 @@ def quantile(
     return Column.from_unique_ptr(move(c_result))
 
 
-def quantiles(source_table,
+def quantiles(list source_columns,
               vector[double] q,
               object interp,
               object is_input_sorted,
               list column_order,
               list null_precedence):
-    cdef table_view c_input = table_view_from_table(
-        source_table, ignore_index=True)
+    cdef table_view c_input = table_view_from_columns(source_columns)
     cdef vector[double] c_q = q
     cdef interpolation c_interp = <interpolation>(
         <underlying_type_t_interpolation> interp
@@ -119,7 +118,4 @@ def quantiles(source_table,
             )
         )
 
-    return data_from_unique_ptr(
-        move(c_result),
-        column_names=source_table._column_names
-    )
+    return columns_from_unique_ptr(move(c_result))
diff --git a/python/cudf/cudf/_lib/reshape.pyx b/python/cudf/cudf/_lib/reshape.pyx
index d64d0543892..29223947eea 100644
--- a/python/cudf/cudf/_lib/reshape.pyx
+++ b/python/cudf/cudf/_lib/reshape.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2019-2020, NVIDIA CORPORATION.
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
 
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
@@ -13,32 +13,25 @@ from cudf._lib.cpp.reshape cimport (
 from cudf._lib.cpp.table.table cimport table
 from cudf._lib.cpp.table.table_view cimport table_view
 from cudf._lib.cpp.types cimport size_type
-from cudf._lib.utils cimport data_from_unique_ptr, table_view_from_table
+from cudf._lib.utils cimport columns_from_unique_ptr, table_view_from_columns
 
 
-def interleave_columns(source_table):
-    cdef table_view c_view = table_view_from_table(
-        source_table, ignore_index=True)
+def interleave_columns(list source_columns):
+    cdef table_view c_view = table_view_from_columns(source_columns)
     cdef unique_ptr[column] c_result
 
     with nogil:
         c_result = move(cpp_interleave_columns(c_view))
 
-    return Column.from_unique_ptr(
-        move(c_result)
-    )
+    return Column.from_unique_ptr(move(c_result))
 
 
-def tile(source_table, size_type count):
+def tile(list source_columns, size_type count):
     cdef size_type c_count = count
-    cdef table_view c_view = table_view_from_table(source_table)
+    cdef table_view c_view = table_view_from_columns(source_columns)
     cdef unique_ptr[table] c_result
 
     with nogil:
         c_result = move(cpp_tile(c_view, c_count))
 
-    return data_from_unique_ptr(
-        move(c_result),
-        column_names=source_table._column_names,
-        index_names=source_table._index_names
-    )
+    return columns_from_unique_ptr(move(c_result))
diff --git a/python/cudf/cudf/_lib/scalar.pyx b/python/cudf/cudf/_lib/scalar.pyx
index 32d6cb2ea6d..a7acfa8f906 100644
--- a/python/cudf/cudf/_lib/scalar.pyx
+++ b/python/cudf/cudf/_lib/scalar.pyx
@@ -65,6 +65,7 @@ from cudf._lib.cpp.wrappers.timestamps cimport (
     timestamp_us,
 )
 from cudf._lib.utils cimport (
+    columns_from_table_view,
     data_from_table_view,
     table_view_from_columns,
     table_view_from_table,
@@ -361,8 +362,8 @@ cdef _set_struct_from_pydict(unique_ptr[scalar]& s,
             names=columns
         )
 
-    data, _ = from_arrow(pyarrow_table, column_names=columns)
-    cdef table_view struct_view = table_view_from_columns(data.values())
+    data = from_arrow(pyarrow_table)
+    cdef table_view struct_view = table_view_from_columns(data)
 
     s.reset(
         new struct_scalar(struct_view, valid)
@@ -373,18 +374,10 @@ cdef _get_py_dict_from_struct(unique_ptr[scalar]& s):
         return cudf.NA
 
     cdef table_view struct_table_view = (<struct_scalar*>s.get()).view()
-    columns = [str(i) for i in range(struct_table_view.num_columns())]
+    column_names = [str(i) for i in range(struct_table_view.num_columns())]
 
-    data, _ = data_from_table_view(
-        struct_table_view,
-        None,
-        column_names=columns
-    )
-    to_arrow_table = cudf.core.frame.Frame(
-        cudf.core.column_accessor.ColumnAccessor(data)
-    )
-
-    python_dict = to_arrow(to_arrow_table, columns).to_pydict()
+    columns = columns_from_table_view(struct_table_view, None)
+    python_dict = to_arrow(columns, column_names).to_pydict()
 
     return {k: _nested_na_replace(python_dict[k])[0] for k in python_dict}
 
@@ -415,9 +408,8 @@ cdef _get_py_list_from_list(unique_ptr[scalar]& s):
 
     cdef column_view list_col_view = (<list_scalar*>s.get()).view()
     cdef Column list_col = Column.from_column_view(list_col_view, None)
-    to_arrow_table = cudf.core.frame.Frame({"col": list_col})
 
-    arrow_table = to_arrow(to_arrow_table, [["col", []]])
+    arrow_table = to_arrow([list_col], [["col", []]])
     result = arrow_table['col'].to_pylist()
     return _nested_na_replace(result)
 
diff --git a/python/cudf/cudf/_lib/search.pyx b/python/cudf/cudf/_lib/search.pyx
index f92ef753fc2..d5568f53231 100644
--- a/python/cudf/cudf/_lib/search.pyx
+++ b/python/cudf/cudf/_lib/search.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
+# Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
@@ -10,20 +10,20 @@ from cudf._lib.column cimport Column
 from cudf._lib.cpp.column.column cimport column
 from cudf._lib.cpp.column.column_view cimport column_view
 from cudf._lib.cpp.table.table_view cimport table_view
-from cudf._lib.utils cimport table_view_from_table
+from cudf._lib.utils cimport table_view_from_columns
 
 
 def search_sorted(
-    table, values, side, ascending=True, na_position="last"
+    list source, list values, side, ascending=True, na_position="last"
 ):
     """Find indices where elements should be inserted to maintain order
 
     Parameters
     ----------
-    table : Frame
-        Frame to search in
-    values : Frame
-        Frame of values to search for
+    source : list of columns
+        List of columns to search in
+    values : List of columns
+        List of value columns to search for
     side : str {‘left’, ‘right’} optional
         If ‘left’, the index of the first suitable location is given.
         If ‘right’, return the last such index
@@ -33,10 +33,8 @@ def search_sorted(
     cdef vector[libcudf_types.null_order] c_null_precedence
     cdef libcudf_types.order c_order
     cdef libcudf_types.null_order c_null_order
-    cdef table_view c_table_data = table_view_from_table(
-        table, ignore_index=True)
-    cdef table_view c_values_data = table_view_from_table(
-        values, ignore_index=True)
+    cdef table_view c_table_data = table_view_from_columns(source)
+    cdef table_view c_values_data = table_view_from_columns(values)
 
     # Note: We are ignoring index columns here
     c_order = (libcudf_types.order.ASCENDING
@@ -47,9 +45,9 @@ def search_sorted(
         if na_position=="last"
         else libcudf_types.null_order.BEFORE
     )
-    c_column_order = vector[libcudf_types.order](table._num_columns, c_order)
+    c_column_order = vector[libcudf_types.order](len(source), c_order)
     c_null_precedence = vector[libcudf_types.null_order](
-        table._num_columns, c_null_order
+        len(source), c_null_order
     )
 
     if side == 'left':
diff --git a/python/cudf/cudf/_lib/strings/combine.pyx b/python/cudf/cudf/_lib/strings/combine.pyx
index 3b5ef33a668..eeb39f70728 100644
--- a/python/cudf/cudf/_lib/strings/combine.pyx
+++ b/python/cudf/cudf/_lib/strings/combine.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2021, NVIDIA CORPORATION.
+# Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
 from libcpp.memory cimport unique_ptr
 from libcpp.string cimport string
@@ -18,10 +18,10 @@ from cudf._lib.cpp.strings.combine cimport (
 from cudf._lib.cpp.table.table_view cimport table_view
 from cudf._lib.cpp.types cimport size_type
 from cudf._lib.scalar cimport DeviceScalar
-from cudf._lib.utils cimport table_view_from_table
+from cudf._lib.utils cimport table_view_from_columns
 
 
-def concatenate(source_strings,
+def concatenate(list source_strings,
                 object sep,
                 object na_rep):
     """
@@ -33,8 +33,7 @@ def concatenate(source_strings,
     cdef DeviceScalar narep = na_rep.device_value
 
     cdef unique_ptr[column] c_result
-    cdef table_view source_view = table_view_from_table(
-        source_strings, ignore_index=True)
+    cdef table_view source_view = table_view_from_columns(source_strings)
 
     cdef const string_scalar* scalar_separator = \
         <const string_scalar*>(separator.get_raw_ptr())
diff --git a/python/cudf/cudf/_lib/transform.pyx b/python/cudf/cudf/_lib/transform.pyx
index 96d25cb92c9..175150b6865 100644
--- a/python/cudf/cudf/_lib/transform.pyx
+++ b/python/cudf/cudf/_lib/transform.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
+# Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
 import numpy as np
 from numba.np import numpy_support
@@ -25,9 +25,9 @@ from cudf._lib.cpp.table.table_view cimport table_view
 from cudf._lib.cpp.types cimport bitmask_type, data_type, size_type, type_id
 from cudf._lib.types cimport underlying_type_t_type_id
 from cudf._lib.utils cimport (
+    columns_from_unique_ptr,
     data_from_table_view,
-    data_from_unique_ptr,
-    table_view_from_table,
+    table_view_from_columns,
 )
 
 
@@ -123,21 +123,15 @@ def transform(Column input, op):
     return Column.from_unique_ptr(move(c_output))
 
 
-def table_encode(input):
-    cdef table_view c_input = table_view_from_table(
-        input, ignore_index=True)
+def table_encode(list source_columns):
+    cdef table_view c_input = table_view_from_columns(source_columns)
     cdef pair[unique_ptr[table], unique_ptr[column]] c_result
 
     with nogil:
         c_result = move(libcudf_transform.encode(c_input))
 
-    return (
-        *data_from_unique_ptr(
-            move(c_result.first),
-            column_names=input._column_names,
-        ),
-        Column.from_unique_ptr(move(c_result.second))
-    )
+    return columns_from_unique_ptr(
+        move(c_result.first)), Column.from_unique_ptr(move(c_result.second))
 
 
 def one_hot_encode(Column input_column, Column categories):
diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index b2e3e42531b..5c9d8535798 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -229,13 +229,9 @@ def to_arrow(self) -> pa.Array:
           4
         ]
         """
-        return libcudf.interop.to_arrow(
-            cudf.core.frame.Frame(
-                cudf.core.column_accessor.ColumnAccessor({"None": self})
-            ),
-            [["None"]],
-            keep_index=False,
-        )["None"].chunk(0)
+        return libcudf.interop.to_arrow([self], [["None"]],)[
+            "None"
+        ].chunk(0)
 
     @classmethod
     def from_arrow(cls, array: pa.Array) -> ColumnBase:
@@ -280,12 +276,8 @@ def from_arrow(cls, array: pa.Array) -> ColumnBase:
                 }
             )
 
-            codes = libcudf.interop.from_arrow(
-                indices_table, indices_table.column_names
-            )[0]["None"]
-            categories = libcudf.interop.from_arrow(
-                dictionaries_table, dictionaries_table.column_names
-            )[0]["None"]
+            codes = libcudf.interop.from_arrow(indices_table)[0]
+            categories = libcudf.interop.from_arrow(dictionaries_table)[0]
 
             return build_categorical_column(
                 categories=categories,
@@ -301,7 +293,7 @@ def from_arrow(cls, array: pa.Array) -> ColumnBase:
         ):
             return cudf.core.column.IntervalColumn.from_arrow(array)
 
-        result = libcudf.interop.from_arrow(data, data.column_names)[0]["None"]
+        result = libcudf.interop.from_arrow(data)[0]
 
         return result._with_type_metadata(cudf_dtype_from_pa_type(array.type))
 
diff --git a/python/cudf/cudf/core/column/lists.py b/python/cudf/cudf/core/column/lists.py
index 8578bfe8147..b383f7bc321 100644
--- a/python/cudf/cudf/core/column/lists.py
+++ b/python/cudf/cudf/core/column/lists.py
@@ -113,9 +113,7 @@ def _binaryop(self, other: ColumnBinaryOperand, op: str) -> ColumnBase:
             return NotImplemented
         if isinstance(other.dtype, ListDtype):
             if op == "__add__":
-                return concatenate_rows(
-                    cudf.core.frame.Frame({0: self, 1: other})
-                )
+                return concatenate_rows([self, other])
             else:
                 raise NotImplementedError(
                     "Lists concatenation for this operation is not yet"
diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py
index d5d45c341d5..6f4a6334a1d 100644
--- a/python/cudf/cudf/core/column/string.py
+++ b/python/cudf/cudf/core/column/string.py
@@ -365,9 +365,7 @@ def cat(self, others=None, sep=None, na_rep=None):
             other_cols = _get_cols_list(self._parent, others)
             all_cols = [self._column] + other_cols
             data = libstrings.concatenate(
-                cudf.DataFrame(
-                    {index: value for index, value in enumerate(all_cols)}
-                ),
+                all_cols,
                 cudf.Scalar(sep),
                 cudf.Scalar(na_rep, "str"),
             )
@@ -5531,7 +5529,7 @@ def _binaryop(
                 return cast(
                     "column.ColumnBase",
                     libstrings.concatenate(
-                        cudf.DataFrame._from_data(data={0: lhs, 1: rhs}),
+                        [lhs, rhs],
                         sep=cudf.Scalar(""),
                         na_rep=cudf.Scalar(None, "str"),
                     ),
diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 4ffacfa2ccc..50255b07077 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -3933,19 +3933,16 @@ def partition_by_hash(self, columns, nparts, keep_index=True):
         -------
         partitioned: list of DataFrame
         """
-        idx = (
-            0
-            if (self._index is None or keep_index is False)
-            else self._index._num_columns
-        )
-        key_indices = [self._data.names.index(k) + idx for k in columns]
 
-        output_data, output_index, offsets = libcudf.hash.hash_partition(
-            self, key_indices, nparts, keep_index
+        key_indices = [self._column_names.index(k) for k in columns]
+        output_columns, offsets = libcudf.hash.hash_partition(
+            [*self._columns], key_indices, nparts
+        )
+        outdf = self._from_columns_like_self(
+            [*(self._index._columns if keep_index else ()), *output_columns],
+            self._column_names,
+            self._index_names if keep_index else None,
         )
-        outdf = self.__class__._from_data(output_data, output_index)
-        outdf._copy_type_metadata(self, include_index=keep_index)
-
         # Slice into partition
         return [outdf[s:e] for s, e in zip(offsets, offsets[1:] + [None])]
 
@@ -5678,22 +5675,24 @@ def stack(self, level=-1, dropna=True):
         """
         assert level in (None, -1)
         repeated_index = self.index.repeat(self.shape[1])
-        name_index = cudf.DataFrame._from_data({0: self._column_names}).tile(
-            self.shape[0]
+        name_index = libcudf.reshape.tile(
+            [as_column(self._column_names)], self.shape[0]
         )
-        new_index = list(repeated_index._columns) + [name_index._columns[0]]
+        new_index_columns = [*repeated_index._columns, *name_index]
         if isinstance(self._index, MultiIndex):
             index_names = self._index.names + [None]
         else:
-            index_names = [None] * len(new_index)
+            index_names = [None] * len(new_index_columns)
         new_index = MultiIndex.from_frame(
-            DataFrame(dict(zip(range(0, len(new_index)), new_index))),
+            DataFrame._from_data(
+                dict(zip(range(0, len(new_index_columns)), new_index_columns))
+            ),
             names=index_names,
         )
 
         # Collect datatypes and cast columns as that type
         common_type = np.result_type(*self.dtypes)
-        homogenized = DataFrame(
+        homogenized = DataFrame._from_data(
             {
                 c: (
                     self._data[c].astype(common_type)
@@ -5704,9 +5703,15 @@ def stack(self, level=-1, dropna=True):
             }
         )
 
-        data_col = libcudf.reshape.interleave_columns(homogenized)
+        result = Series._from_data(
+            {
+                None: libcudf.reshape.interleave_columns(
+                    [*homogenized._columns]
+                )
+            },
+            index=new_index,
+        )
 
-        result = Series(data=data_col, index=new_index)
         if dropna:
             return result.dropna()
         else:
@@ -6167,6 +6172,48 @@ def _from_columns_like_self(
         result._set_column_names_like(self)
         return result
 
+    @_cudf_nvtx_annotate
+    def interleave_columns(self):
+        """
+        Interleave Series columns of a table into a single column.
+
+        Converts the column major table `cols` into a row major column.
+
+        Parameters
+        ----------
+        cols : input Table containing columns to interleave.
+
+        Examples
+        --------
+        >>> import cudf
+        >>> df = cudf.DataFrame({0: ['A1', 'A2', 'A3'], 1: ['B1', 'B2', 'B3']})
+        >>> df
+            0   1
+        0  A1  B1
+        1  A2  B2
+        2  A3  B3
+        >>> df.interleave_columns()
+        0    A1
+        1    B1
+        2    A2
+        3    B2
+        4    A3
+        5    B3
+        dtype: object
+
+        Returns
+        -------
+        The interleaved columns as a single column
+        """
+        if ("category" == self.dtypes).any():
+            raise ValueError(
+                "interleave_columns does not support 'category' dtype."
+            )
+
+        return self._constructor_sliced._from_data(
+            {None: libcudf.reshape.interleave_columns([*self._columns])}
+        )
+
 
 def from_dataframe(df, allow_copy=False):
     return df_protocol.from_dataframe(df, allow_copy=allow_copy)
diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index 806cdf14c71..d10f7c690bf 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -960,10 +960,16 @@ def scatter_by_map(
                     f"ERROR: map_size must be >= {count} (got {map_size})."
                 )
 
-        data, index, output_offsets = libcudf.partitioning.partition(
-            self, map_index, map_size, keep_index
+        partitioned_columns, output_offsets = libcudf.partitioning.partition(
+            [*(self._index._columns if keep_index else ()), *self._columns],
+            map_index,
+            map_size,
+        )
+        partitioned = self._from_columns_like_self(
+            partitioned_columns,
+            column_names=self._column_names,
+            index_names=self._index_names if keep_index else None,
         )
-        partitioned = self.__class__._from_data(data, index)
 
         # due to the split limitation mentioned
         # here: https://github.com/rapidsai/cudf/issues/4607
@@ -973,9 +979,6 @@ def scatter_by_map(
 
         result = partitioned._split(output_offsets, keep_index=keep_index)
 
-        for frame in result:
-            frame._copy_type_metadata(self, include_index=keep_index)
-
         if map_size:
             result += [
                 self._empty_like(keep_index)
@@ -1274,20 +1277,18 @@ def _quantiles(
             libcudf.types.NullOrder[key] for key in null_precedence
         ]
 
-        result = self.__class__._from_data(
-            *libcudf.quantiles.quantiles(
-                self,
+        return self._from_columns_like_self(
+            libcudf.quantiles.quantiles(
+                [*self._columns],
                 q,
                 interpolation,
                 is_sorted,
                 column_order,
                 null_precedence,
-            )
+            ),
+            column_names=self._column_names,
         )
 
-        result._copy_type_metadata(self)
-        return result
-
     @_cudf_nvtx_annotate
     def rank(
         self,
@@ -1466,30 +1467,33 @@ def from_arrow(cls, data):
 
             dict_indices_table = pa.table(dict_indices)
             data = data.drop(dict_indices_table.column_names)
-            cudf_indices_frame, _ = libcudf.interop.from_arrow(
-                dict_indices_table, dict_indices_table.column_names
-            )
+            indices_columns = libcudf.interop.from_arrow(dict_indices_table)
             # as dictionary size can vary, it can't be a single table
             cudf_dictionaries_columns = {
                 name: ColumnBase.from_arrow(dict_dictionaries[name])
                 for name in dict_dictionaries.keys()
             }
 
-            for name, codes in cudf_indices_frame.items():
-                cudf_category_frame[name] = build_categorical_column(
+            cudf_category_frame = {
+                name: build_categorical_column(
                     cudf_dictionaries_columns[name],
                     codes,
                     mask=codes.base_mask,
                     size=codes.size,
                     ordered=dict_ordered[name],
                 )
+                for name, codes in zip(
+                    dict_indices_table.column_names, indices_columns
+                )
+            }
 
         # Handle non-dict arrays
-        cudf_non_category_frame = (
-            {}
-            if data.num_columns == 0
-            else libcudf.interop.from_arrow(data, data.column_names)[0]
-        )
+        cudf_non_category_frame = {
+            name: col
+            for name, col in zip(
+                data.column_names, libcudf.interop.from_arrow(data)
+            )
+        }
 
         result = {**cudf_non_category_frame, **cudf_category_frame}
 
@@ -2027,76 +2031,6 @@ def notnull(self):
     # Alias for notnull
     notna = notnull
 
-    @_cudf_nvtx_annotate
-    def interleave_columns(self):
-        """
-        Interleave Series columns of a table into a single column.
-
-        Converts the column major table `cols` into a row major column.
-
-        Parameters
-        ----------
-        cols : input Table containing columns to interleave.
-
-        Examples
-        --------
-        >>> df = DataFrame([['A1', 'A2', 'A3'], ['B1', 'B2', 'B3']])
-        >>> df
-        0    [A1, A2, A3]
-        1    [B1, B2, B3]
-        >>> df.interleave_columns()
-        0    A1
-        1    B1
-        2    A2
-        3    B2
-        4    A3
-        5    B3
-
-        Returns
-        -------
-        The interleaved columns as a single column
-        """
-        if ("category" == self.dtypes).any():
-            raise ValueError(
-                "interleave_columns does not support 'category' dtype."
-            )
-
-        result = self._constructor_sliced(
-            libcudf.reshape.interleave_columns(self)
-        )
-
-        return result
-
-    @_cudf_nvtx_annotate
-    def tile(self, count):
-        """
-        Repeats the rows from `self` DataFrame `count` times to form a
-        new DataFrame.
-
-        Parameters
-        ----------
-        self : input Table containing columns to interleave.
-        count : Number of times to tile "rows". Must be non-negative.
-
-        Examples
-        --------
-        >>> df  = Dataframe([[8, 4, 7], [5, 2, 3]])
-        >>> count = 2
-        >>> df.tile(df, count)
-           0  1  2
-        0  8  4  7
-        1  5  2  3
-        0  8  4  7
-        1  5  2  3
-
-        Returns
-        -------
-        The table containing the tiled "rows".
-        """
-        result = self.__class__._from_data(*libcudf.reshape.tile(self, count))
-        result._copy_type_metadata(self)
-        return result
-
     @_cudf_nvtx_annotate
     def searchsorted(
         self, values, side="left", ascending=True, na_position="last"
@@ -2166,12 +2100,24 @@ def searchsorted(
             scalar_flag = True
 
         if not isinstance(values, Frame):
-            values = as_column(values)
-            if values.dtype != self.dtype:
-                self = self.astype(values.dtype)
-            values = values.as_frame()
+            values = [as_column(values)]
+        else:
+            values = [*values._columns]
+        if len(values) != len(self._data):
+            raise ValueError("Mismatch number of columns to search for.")
+
+        sources = [
+            col
+            if is_dtype_equal(col.dtype, val.dtype)
+            else col.astype(val.dtype)
+            for col, val in zip(self._columns, values)
+        ]
         outcol = libcudf.search.search_sorted(
-            self, values, side, ascending=ascending, na_position=na_position
+            sources,
+            values,
+            side,
+            ascending=ascending,
+            na_position=na_position,
         )
 
         # Retrun result as cupy array if the values is non-scalar
@@ -2462,10 +2408,8 @@ def _split(self, splits):
 
     @_cudf_nvtx_annotate
     def _encode(self):
-        data, index, indices = libcudf.transform.table_encode(self)
-        for name, col in data.items():
-            data[name] = col._with_type_metadata(self._data[name].dtype)
-        keys = self.__class__._from_data(data, index)
+        columns, indices = libcudf.transform.table_encode([*self._columns])
+        keys = self._from_columns_like_self(columns)
         return keys, indices
 
     @_cudf_nvtx_annotate
diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py
index 76b0217df3b..249cb7f4343 100644
--- a/python/cudf/cudf/core/groupby/groupby.py
+++ b/python/cudf/cudf/core/groupby/groupby.py
@@ -1105,16 +1105,11 @@ def _cov_or_corr(self, func, method_name):
             for i in range(0, len(cols_list), num_cols)
         ]
 
-        def combine_columns(gb_cov_corr, ys):
-            list_of_columns = [gb_cov_corr._data[y] for y in ys]
-            frame = cudf.core.frame.Frame._from_columns(list_of_columns, ys)
-            return interleave_columns(frame)
-
         # interleave: combines the correlation or covariance results for each
         # column-pair into a single column
         res = cudf.DataFrame._from_data(
             {
-                x: combine_columns(gb_cov_corr, ys)
+                x: interleave_columns([gb_cov_corr._data[y] for y in ys])
                 for ys, x in zip(cols_split, column_names)
             }
         )
diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py
index aff13025e72..fd918f723fe 100644
--- a/python/cudf/cudf/core/index.py
+++ b/python/cudf/cudf/core/index.py
@@ -76,10 +76,10 @@ def _lexsorted_equal_range(
         sort_inds = None
         sort_vals = idx
     lower_bound = search_sorted(
-        sort_vals, key_as_table, side="left"
+        [*sort_vals._data.columns], [*key_as_table._columns], side="left"
     ).element_indexing(0)
     upper_bound = search_sorted(
-        sort_vals, key_as_table, side="right"
+        [*sort_vals._data.columns], [*key_as_table._columns], side="right"
     ).element_indexing(0)
 
     return lower_bound, upper_bound, sort_inds
diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py
index ea722ec3968..ddb3082af96 100644
--- a/python/cudf/cudf/core/indexed_frame.py
+++ b/python/cudf/cudf/core/indexed_frame.py
@@ -818,7 +818,8 @@ def hash_values(self, method="murmur3"):
         # calculation, necessitating the unfortunate circular reference to the
         # child class here.
         return cudf.Series._from_data(
-            {None: libcudf.hash.hash(self, method)}, index=self.index
+            {None: libcudf.hash.hash([*self._columns], method)},
+            index=self.index,
         )
 
     def _gather(
@@ -2690,21 +2691,52 @@ def _explode(self, explode_column: Any, ignore_index: bool):
         if not ignore_index and self._index is not None:
             explode_column_num += self._index.nlevels
 
-        data, index = libcudf.lists.explode_outer(
-            self, explode_column_num, ignore_index
+        exploded = libcudf.lists.explode_outer(
+            [
+                *(self._index._data.columns if not ignore_index else ()),
+                *self._columns,
+            ],
+            explode_column_num,
         )
-        res = self.__class__._from_data(
-            ColumnAccessor(
-                data,
-                multiindex=self._data.multiindex,
-                level_names=self._data._level_names,
-            ),
-            index=index,
+
+        return self._from_columns_like_self(
+            exploded,
+            self._column_names,
+            self._index_names if not ignore_index else None,
         )
 
-        if not ignore_index and self._index is not None:
-            res.index.names = self._index.names
-        return res
+    @_cudf_nvtx_annotate
+    def tile(self, count):
+        """Repeats the rows `count` times to form a new Frame.
+
+        Parameters
+        ----------
+        self : input Table containing columns to interleave.
+        count : Number of times to tile "rows". Must be non-negative.
+
+        Examples
+        --------
+        >>> import cudf
+        >>> df  = cudf.Dataframe([[8, 4, 7], [5, 2, 3]])
+        >>> count = 2
+        >>> df.tile(df, count)
+           0  1  2
+        0  8  4  7
+        1  5  2  3
+        0  8  4  7
+        1  5  2  3
+
+        Returns
+        -------
+        The indexed frame containing the tiled "rows".
+        """
+        return self._from_columns_like_self(
+            libcudf.reshape.tile(
+                [*self._index._columns, *self._columns], count
+            ),
+            column_names=self._column_names,
+            index_names=self._index_names,
+        )
 
     @_cudf_nvtx_annotate
     @docutils.doc_apply(
diff --git a/python/cudf/cudf/core/join/join.py b/python/cudf/cudf/core/join/join.py
index c7e46cf0165..6a495ef8d9a 100644
--- a/python/cudf/cudf/core/join/join.py
+++ b/python/cudf/cudf/core/join/join.py
@@ -177,15 +177,15 @@ def __init__(
         )
 
     def perform_merge(self) -> Frame:
-        left_join_cols = {}
-        right_join_cols = {}
+        left_join_cols = []
+        right_join_cols = []
 
         for left_key, right_key in zip(self._left_keys, self._right_keys):
             lcol = left_key.get(self.lhs)
             rcol = right_key.get(self.rhs)
             lcol_casted, rcol_casted = _match_join_keys(lcol, rcol, self.how)
-            left_join_cols[left_key.name] = lcol_casted
-            right_join_cols[left_key.name] = rcol_casted
+            left_join_cols.append(lcol_casted)
+            right_join_cols.append(rcol_casted)
 
             # Categorical dtypes must be cast back from the underlying codes
             # type that was returned by _match_join_keys.
@@ -201,8 +201,8 @@ def perform_merge(self) -> Frame:
             right_key.set(self.rhs, rcol_casted, validate=False)
 
         left_rows, right_rows = self._joiner(
-            cudf.core.frame.Frame(left_join_cols),
-            cudf.core.frame.Frame(right_join_cols),
+            left_join_cols,
+            right_join_cols,
             how=self.how,
         )
 
diff --git a/python/cudf/cudf/io/dlpack.py b/python/cudf/cudf/io/dlpack.py
index 00a2cb4cee2..644643db83c 100644
--- a/python/cudf/cudf/io/dlpack.py
+++ b/python/cudf/cudf/io/dlpack.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2019-2021, NVIDIA CORPORATION.
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
 
 
 import cudf
@@ -34,12 +34,13 @@ def from_dlpack(pycapsule_obj):
     tensor is row-major, transpose it before passing it to this function.
     """
 
-    data, _ = libdlpack.from_dlpack(pycapsule_obj)
+    columns = libdlpack.from_dlpack(pycapsule_obj)
+    column_names = range(len(columns))
 
-    if len(data) == 1:
-        return cudf.Series._from_data(data)
+    if len(columns) == 1:
+        return cudf.Series._from_columns(columns, column_names=column_names)
     else:
-        return cudf.DataFrame._from_data(data)
+        return cudf.DataFrame._from_columns(columns, column_names=column_names)
 
 
 @ioutils.doc_to_dlpack()
@@ -91,4 +92,4 @@ def to_dlpack(cudf_obj):
     )
     gdf = gdf.astype(dtype)
 
-    return libdlpack.to_dlpack(gdf)
+    return libdlpack.to_dlpack([*gdf._columns])
diff --git a/python/cudf/cudf/tests/test_search.py b/python/cudf/cudf/tests/test_search.py
index cd029d02d79..d3433a589a7 100644
--- a/python/cudf/cudf/tests/test_search.py
+++ b/python/cudf/cudf/tests/test_search.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2018, NVIDIA CORPORATION.
+# Copyright (c) 2018-2022, NVIDIA CORPORATION.
 import cupy
 import numpy as np
 import pandas as pd
@@ -73,6 +73,14 @@ def test_searchsorted_dataframe(side, multiindex):
         assert result == [2, 0, 4, 1]
 
 
+def test_search_sorted_dataframe_unequal_number_of_columns():
+    values = cudf.DataFrame({"a": [1, 0, 5, 1]})
+    base = cudf.DataFrame({"a": [1, 0, 5, 1], "b": ["x", "z", "w", "a"]})
+
+    with pytest.raises(ValueError, match="Mismatch number of columns"):
+        base.searchsorted(values)
+
+
 @pytest.mark.parametrize("side", ["left", "right"])
 def test_searchsorted_categorical(side):
 

From ba1173d326fc540183dd2563cc7b4b66127cd222 Mon Sep 17 00:00:00 2001
From: Karthikeyan <6488848+karthikeyann@users.noreply.github.com>
Date: Tue, 19 Apr 2022 18:50:04 +0530
Subject: [PATCH 28/33] cleanup benchmark includes (#10661)

- remove cudf_test unnecessary includes
- fix include order
- remove benchmark/benchmark.h when benchmark fixture is included

Authors:
  - Karthikeyan (https://github.com/karthikeyann)

Approvers:
  - Nghia Truong (https://github.com/ttnghia)
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/10661
---
 cpp/benchmarks/copying/contiguous_split.cu       |  1 +
 cpp/benchmarks/copying/copy_if_else.cpp          |  1 -
 cpp/benchmarks/hashing/hash.cpp                  |  1 -
 cpp/benchmarks/io/csv/csv_reader.cpp             |  2 --
 cpp/benchmarks/io/csv/csv_writer.cpp             |  2 --
 cpp/benchmarks/io/cuio_common.hpp                |  4 ++--
 cpp/benchmarks/io/orc/orc_reader.cpp             |  2 --
 cpp/benchmarks/io/orc/orc_writer.cpp             |  2 --
 cpp/benchmarks/io/parquet/parquet_reader.cpp     |  2 --
 cpp/benchmarks/io/parquet/parquet_writer.cpp     |  2 --
 .../io/parquet/parquet_writer_chunks.cpp         | 11 ++---------
 cpp/benchmarks/io/text/multibyte_split.cpp       |  5 +----
 cpp/benchmarks/iterator/iterator.cu              |  9 ++++-----
 cpp/benchmarks/join/join_common.hpp              |  4 +---
 cpp/benchmarks/merge/merge.cpp                   | 11 ++++-------
 cpp/benchmarks/null_mask/set_null_mask.cpp       |  2 --
 cpp/benchmarks/reduction/scan.cpp                |  1 -
 cpp/benchmarks/replace/clamp.cpp                 |  1 -
 cpp/benchmarks/replace/nans.cpp                  |  1 -
 cpp/benchmarks/sort/rank.cpp                     | 13 +++----------
 cpp/benchmarks/sort/sort.cpp                     | 11 ++---------
 cpp/benchmarks/sort/sort_strings.cpp             |  1 -
 cpp/benchmarks/sort/sort_structs.cpp             |  5 ++---
 cpp/benchmarks/string/case.cpp                   |  1 -
 cpp/benchmarks/string/combine.cpp                |  2 --
 cpp/benchmarks/string/contains.cpp               |  1 -
 cpp/benchmarks/string/convert_datetime.cpp       |  1 -
 cpp/benchmarks/string/convert_durations.cpp      | 16 ++++++----------
 cpp/benchmarks/string/convert_fixed_point.cpp    |  6 ++----
 cpp/benchmarks/string/convert_numerics.cpp       |  6 ++----
 cpp/benchmarks/string/copy.cu                    |  2 +-
 cpp/benchmarks/string/extract.cpp                |  3 ++-
 cpp/benchmarks/string/factory.cu                 |  4 ++--
 cpp/benchmarks/string/filter.cpp                 |  4 ++--
 cpp/benchmarks/string/find.cpp                   |  4 ++--
 cpp/benchmarks/string/repeat_strings.cpp         |  1 -
 cpp/benchmarks/string/replace.cpp                |  8 ++++----
 cpp/benchmarks/string/replace_re.cpp             |  4 ++--
 cpp/benchmarks/string/split.cpp                  |  4 ++--
 cpp/benchmarks/string/substring.cpp              |  9 ++++-----
 cpp/benchmarks/string/translate.cpp              |  8 ++++----
 cpp/benchmarks/string/url_decode.cu              |  8 ++------
 cpp/benchmarks/text/ngrams.cpp                   |  2 --
 cpp/benchmarks/text/normalize.cpp                |  3 ---
 cpp/benchmarks/text/normalize_spaces.cpp         |  3 ---
 cpp/benchmarks/text/replace.cpp                  |  7 ++++---
 cpp/benchmarks/text/subword.cpp                  |  6 +++---
 cpp/benchmarks/text/tokenize.cpp                 |  5 ++---
 .../type_dispatcher/type_dispatcher.cu           |  2 --
 49 files changed, 68 insertions(+), 146 deletions(-)

diff --git a/cpp/benchmarks/copying/contiguous_split.cu b/cpp/benchmarks/copying/contiguous_split.cu
index 6b129a4a435..a61b18df8d1 100644
--- a/cpp/benchmarks/copying/contiguous_split.cu
+++ b/cpp/benchmarks/copying/contiguous_split.cu
@@ -17,6 +17,7 @@
 #include <benchmarks/common/generate_input.hpp>
 #include <benchmarks/fixture/benchmark_fixture.hpp>
 #include <benchmarks/synchronization/synchronization.hpp>
+
 #include <cudf_test/column_wrapper.hpp>
 
 #include <cudf/column/column.hpp>
diff --git a/cpp/benchmarks/copying/copy_if_else.cpp b/cpp/benchmarks/copying/copy_if_else.cpp
index 6f094aba680..6f355118f49 100644
--- a/cpp/benchmarks/copying/copy_if_else.cpp
+++ b/cpp/benchmarks/copying/copy_if_else.cpp
@@ -14,7 +14,6 @@
  * limitations under the License.
  */
 
-#include <benchmark/benchmark.h>
 #include <benchmarks/common/generate_input.hpp>
 #include <benchmarks/fixture/benchmark_fixture.hpp>
 #include <benchmarks/synchronization/synchronization.hpp>
diff --git a/cpp/benchmarks/hashing/hash.cpp b/cpp/benchmarks/hashing/hash.cpp
index 1110b6fe9ef..9c0ef5b528d 100644
--- a/cpp/benchmarks/hashing/hash.cpp
+++ b/cpp/benchmarks/hashing/hash.cpp
@@ -14,7 +14,6 @@
  * limitations under the License.
  */
 
-#include <benchmark/benchmark.h>
 #include <benchmarks/common/generate_input.hpp>
 #include <benchmarks/fixture/benchmark_fixture.hpp>
 #include <benchmarks/synchronization/synchronization.hpp>
diff --git a/cpp/benchmarks/io/csv/csv_reader.cpp b/cpp/benchmarks/io/csv/csv_reader.cpp
index 6f5e7160cd3..b61ba75ce6e 100644
--- a/cpp/benchmarks/io/csv/csv_reader.cpp
+++ b/cpp/benchmarks/io/csv/csv_reader.cpp
@@ -14,8 +14,6 @@
  * limitations under the License.
  */
 
-#include <benchmark/benchmark.h>
-
 #include <benchmarks/common/generate_input.hpp>
 #include <benchmarks/fixture/benchmark_fixture.hpp>
 #include <benchmarks/io/cuio_common.hpp>
diff --git a/cpp/benchmarks/io/csv/csv_writer.cpp b/cpp/benchmarks/io/csv/csv_writer.cpp
index 65aa31c68dc..079df45b1d8 100644
--- a/cpp/benchmarks/io/csv/csv_writer.cpp
+++ b/cpp/benchmarks/io/csv/csv_writer.cpp
@@ -14,8 +14,6 @@
  * limitations under the License.
  */
 
-#include <benchmark/benchmark.h>
-
 #include <benchmarks/common/generate_input.hpp>
 #include <benchmarks/fixture/benchmark_fixture.hpp>
 #include <benchmarks/io/cuio_common.hpp>
diff --git a/cpp/benchmarks/io/cuio_common.hpp b/cpp/benchmarks/io/cuio_common.hpp
index ff900d20e6f..8ea29684aae 100644
--- a/cpp/benchmarks/io/cuio_common.hpp
+++ b/cpp/benchmarks/io/cuio_common.hpp
@@ -16,12 +16,12 @@
 
 #pragma once
 
+#include <cudf_test/file_utilities.hpp>
+
 #include <cudf/io/data_sink.hpp>
 #include <cudf/io/datasource.hpp>
 #include <cudf/io/types.hpp>
 
-#include <cudf_test/file_utilities.hpp>
-
 using cudf::io::io_type;
 
 #define RD_BENCHMARK_DEFINE_ALL_SOURCES(benchmark, name, type_or_group)                  \
diff --git a/cpp/benchmarks/io/orc/orc_reader.cpp b/cpp/benchmarks/io/orc/orc_reader.cpp
index fc76fbe7603..7d6eb432b5b 100644
--- a/cpp/benchmarks/io/orc/orc_reader.cpp
+++ b/cpp/benchmarks/io/orc/orc_reader.cpp
@@ -14,8 +14,6 @@
  * limitations under the License.
  */
 
-#include <benchmark/benchmark.h>
-
 #include <benchmarks/common/generate_input.hpp>
 #include <benchmarks/fixture/benchmark_fixture.hpp>
 #include <benchmarks/io/cuio_common.hpp>
diff --git a/cpp/benchmarks/io/orc/orc_writer.cpp b/cpp/benchmarks/io/orc/orc_writer.cpp
index f61dac7677b..4e7781b402a 100644
--- a/cpp/benchmarks/io/orc/orc_writer.cpp
+++ b/cpp/benchmarks/io/orc/orc_writer.cpp
@@ -14,8 +14,6 @@
  * limitations under the License.
  */
 
-#include <benchmark/benchmark.h>
-
 #include <benchmarks/common/generate_input.hpp>
 #include <benchmarks/fixture/benchmark_fixture.hpp>
 #include <benchmarks/io/cuio_common.hpp>
diff --git a/cpp/benchmarks/io/parquet/parquet_reader.cpp b/cpp/benchmarks/io/parquet/parquet_reader.cpp
index b20534e8ac0..af7121d37dc 100644
--- a/cpp/benchmarks/io/parquet/parquet_reader.cpp
+++ b/cpp/benchmarks/io/parquet/parquet_reader.cpp
@@ -14,8 +14,6 @@
  * limitations under the License.
  */
 
-#include <benchmark/benchmark.h>
-
 #include <benchmarks/common/generate_input.hpp>
 #include <benchmarks/fixture/benchmark_fixture.hpp>
 #include <benchmarks/io/cuio_common.hpp>
diff --git a/cpp/benchmarks/io/parquet/parquet_writer.cpp b/cpp/benchmarks/io/parquet/parquet_writer.cpp
index d25fae42d0e..776121028ef 100644
--- a/cpp/benchmarks/io/parquet/parquet_writer.cpp
+++ b/cpp/benchmarks/io/parquet/parquet_writer.cpp
@@ -14,8 +14,6 @@
  * limitations under the License.
  */
 
-#include <benchmark/benchmark.h>
-
 #include <benchmarks/common/generate_input.hpp>
 #include <benchmarks/fixture/benchmark_fixture.hpp>
 #include <benchmarks/io/cuio_common.hpp>
diff --git a/cpp/benchmarks/io/parquet/parquet_writer_chunks.cpp b/cpp/benchmarks/io/parquet/parquet_writer_chunks.cpp
index 30ed245ed9a..e22696b9c01 100644
--- a/cpp/benchmarks/io/parquet/parquet_writer_chunks.cpp
+++ b/cpp/benchmarks/io/parquet/parquet_writer_chunks.cpp
@@ -14,21 +14,14 @@
  * limitations under the License.
  */
 
-#include <benchmark/benchmark.h>
-
-#include <cudf/column/column.hpp>
-#include <cudf/table/table.hpp>
-
-#include <cudf_test/base_fixture.hpp>
-#include <cudf_test/column_utilities.hpp>
-#include <cudf_test/column_wrapper.hpp>
-
 #include <benchmarks/common/generate_input.hpp>
 #include <benchmarks/fixture/benchmark_fixture.hpp>
 #include <benchmarks/io/cuio_common.hpp>
 #include <benchmarks/synchronization/synchronization.hpp>
 
+#include <cudf/column/column.hpp>
 #include <cudf/io/parquet.hpp>
+#include <cudf/table/table.hpp>
 
 // to enable, run cmake with -DBUILD_BENCHMARKS=ON
 
diff --git a/cpp/benchmarks/io/text/multibyte_split.cpp b/cpp/benchmarks/io/text/multibyte_split.cpp
index af6c2c5e030..d274f79a77c 100644
--- a/cpp/benchmarks/io/text/multibyte_split.cpp
+++ b/cpp/benchmarks/io/text/multibyte_split.cpp
@@ -19,10 +19,9 @@
 #include <benchmarks/io/cuio_common.hpp>
 #include <benchmarks/synchronization/synchronization.hpp>
 
-#include <cudf_test/column_wrapper.hpp>
-
 #include <cudf_test/file_utilities.hpp>
 
+#include <cudf/column/column_factories.hpp>
 #include <cudf/io/text/data_chunk_source_factories.hpp>
 #include <cudf/io/text/multibyte_split.hpp>
 #include <cudf/scalar/scalar_factories.hpp>
@@ -38,8 +37,6 @@
 #include <fstream>
 #include <memory>
 
-using cudf::test::fixed_width_column_wrapper;
-
 temp_directory const temp_dir("cudf_gbench");
 
 enum data_chunk_source_type {
diff --git a/cpp/benchmarks/iterator/iterator.cu b/cpp/benchmarks/iterator/iterator.cu
index 595775ddf00..5eaaec23211 100644
--- a/cpp/benchmarks/iterator/iterator.cu
+++ b/cpp/benchmarks/iterator/iterator.cu
@@ -14,13 +14,14 @@
  * limitations under the License.
  */
 
-#include "../fixture/benchmark_fixture.hpp"
-#include "../synchronization/synchronization.hpp"
+#include <benchmarks/fixture/benchmark_fixture.hpp>
+#include <benchmarks/synchronization/synchronization.hpp>
+
+#include <cudf_test/column_wrapper.hpp>
 
 #include <cudf/detail/iterator.cuh>
 #include <cudf/detail/utilities/device_operators.cuh>
 #include <cudf/detail/utilities/vector_factories.hpp>
-#include <cudf_test/column_wrapper.hpp>
 
 #include <rmm/device_uvector.hpp>
 
@@ -31,8 +32,6 @@
 #include <thrust/iterator/transform_iterator.h>
 #include <thrust/pair.h>
 
-#include <benchmark/benchmark.h>
-
 #include <random>
 
 template <typename T>
diff --git a/cpp/benchmarks/join/join_common.hpp b/cpp/benchmarks/join/join_common.hpp
index 6ff2543cf7d..a031b4e656d 100644
--- a/cpp/benchmarks/join/join_common.hpp
+++ b/cpp/benchmarks/join/join_common.hpp
@@ -21,10 +21,8 @@
 #include <benchmarks/fixture/benchmark_fixture.hpp>
 #include <benchmarks/synchronization/synchronization.hpp>
 
-#include <cudf_test/base_fixture.hpp>
-#include <cudf_test/column_wrapper.hpp>
-
 #include <cudf/ast/expressions.hpp>
+#include <cudf/column/column_factories.hpp>
 #include <cudf/detail/valid_if.cuh>
 #include <cudf/filling.hpp>
 #include <cudf/join.hpp>
diff --git a/cpp/benchmarks/merge/merge.cpp b/cpp/benchmarks/merge/merge.cpp
index 88354bcc731..82d89233a33 100644
--- a/cpp/benchmarks/merge/merge.cpp
+++ b/cpp/benchmarks/merge/merge.cpp
@@ -14,18 +14,15 @@
  * limitations under the License.
  */
 
-#include <benchmark/benchmark.h>
+#include <benchmarks/fixture/benchmark_fixture.hpp>
+#include <benchmarks/synchronization/synchronization.hpp>
+#include <cudf_test/column_wrapper.hpp>
 
 #include <cudf/column/column.hpp>
+#include <cudf/merge.hpp>
 #include <cudf/table/table.hpp>
 #include <cudf/table/table_view.hpp>
 
-#include <cudf/merge.hpp>
-
-#include <benchmarks/fixture/benchmark_fixture.hpp>
-#include <benchmarks/synchronization/synchronization.hpp>
-#include <cudf_test/column_wrapper.hpp>
-
 #include <thrust/iterator/constant_iterator.h>
 
 #include <random>
diff --git a/cpp/benchmarks/null_mask/set_null_mask.cpp b/cpp/benchmarks/null_mask/set_null_mask.cpp
index 2057951ff8d..429a62a2bfa 100644
--- a/cpp/benchmarks/null_mask/set_null_mask.cpp
+++ b/cpp/benchmarks/null_mask/set_null_mask.cpp
@@ -19,8 +19,6 @@
 
 #include <cudf/null_mask.hpp>
 
-#include <benchmark/benchmark.h>
-
 class SetNullmask : public cudf::benchmark {
 };
 
diff --git a/cpp/benchmarks/reduction/scan.cpp b/cpp/benchmarks/reduction/scan.cpp
index aef4960789a..8c434465795 100644
--- a/cpp/benchmarks/reduction/scan.cpp
+++ b/cpp/benchmarks/reduction/scan.cpp
@@ -14,7 +14,6 @@
  * limitations under the License.
  */
 
-#include <benchmark/benchmark.h>
 #include <benchmarks/common/generate_input.hpp>
 #include <benchmarks/fixture/benchmark_fixture.hpp>
 #include <benchmarks/synchronization/synchronization.hpp>
diff --git a/cpp/benchmarks/replace/clamp.cpp b/cpp/benchmarks/replace/clamp.cpp
index d3a7415a478..e9a259d0c7b 100644
--- a/cpp/benchmarks/replace/clamp.cpp
+++ b/cpp/benchmarks/replace/clamp.cpp
@@ -14,7 +14,6 @@
  * limitations under the License.
  */
 
-#include <benchmark/benchmark.h>
 #include <benchmarks/common/generate_input.hpp>
 #include <benchmarks/fixture/benchmark_fixture.hpp>
 #include <benchmarks/synchronization/synchronization.hpp>
diff --git a/cpp/benchmarks/replace/nans.cpp b/cpp/benchmarks/replace/nans.cpp
index e1b05bbc337..28ca798ebf0 100644
--- a/cpp/benchmarks/replace/nans.cpp
+++ b/cpp/benchmarks/replace/nans.cpp
@@ -14,7 +14,6 @@
  * limitations under the License.
  */
 
-#include <benchmark/benchmark.h>
 #include <benchmarks/common/generate_input.hpp>
 #include <benchmarks/fixture/benchmark_fixture.hpp>
 #include <benchmarks/synchronization/synchronization.hpp>
diff --git a/cpp/benchmarks/sort/rank.cpp b/cpp/benchmarks/sort/rank.cpp
index c3c77ebd52f..0a5c1844c69 100644
--- a/cpp/benchmarks/sort/rank.cpp
+++ b/cpp/benchmarks/sort/rank.cpp
@@ -14,20 +14,13 @@
  * limitations under the License.
  */
 
-#include <cudf/column/column_view.hpp>
-#include <cudf/sorting.hpp>
-
-#include <cudf_test/base_fixture.hpp>
-#include <cudf_test/column_utilities.hpp>
-#include <cudf_test/column_wrapper.hpp>
-#include <cudf_test/cudf_gtest.hpp>
-#include <cudf_test/table_utilities.hpp>
-
-#include <benchmark/benchmark.h>
 #include <benchmarks/common/generate_input.hpp>
 #include <benchmarks/fixture/benchmark_fixture.hpp>
 #include <benchmarks/synchronization/synchronization.hpp>
 
+#include <cudf/column/column_view.hpp>
+#include <cudf/sorting.hpp>
+
 class Rank : public cudf::benchmark {
 };
 
diff --git a/cpp/benchmarks/sort/sort.cpp b/cpp/benchmarks/sort/sort.cpp
index 1a42daa5bb0..d7c33e7170e 100644
--- a/cpp/benchmarks/sort/sort.cpp
+++ b/cpp/benchmarks/sort/sort.cpp
@@ -14,19 +14,12 @@
  * limitations under the License.
  */
 
-#include <cudf/sorting.hpp>
-
-#include <cudf_test/base_fixture.hpp>
-#include <cudf_test/column_utilities.hpp>
-#include <cudf_test/column_wrapper.hpp>
-#include <cudf_test/cudf_gtest.hpp>
-#include <cudf_test/table_utilities.hpp>
-
-#include <benchmark/benchmark.h>
 #include <benchmarks/common/generate_input.hpp>
 #include <benchmarks/fixture/benchmark_fixture.hpp>
 #include <benchmarks/synchronization/synchronization.hpp>
 
+#include <cudf/sorting.hpp>
+
 template <bool stable>
 class Sort : public cudf::benchmark {
 };
diff --git a/cpp/benchmarks/sort/sort_strings.cpp b/cpp/benchmarks/sort/sort_strings.cpp
index 30a7aee043b..a58b9a4f6da 100644
--- a/cpp/benchmarks/sort/sort_strings.cpp
+++ b/cpp/benchmarks/sort/sort_strings.cpp
@@ -14,7 +14,6 @@
  * limitations under the License.
  */
 
-#include <benchmark/benchmark.h>
 #include <benchmarks/common/generate_input.hpp>
 #include <benchmarks/fixture/benchmark_fixture.hpp>
 #include <benchmarks/synchronization/synchronization.hpp>
diff --git a/cpp/benchmarks/sort/sort_structs.cpp b/cpp/benchmarks/sort/sort_structs.cpp
index 81f7ad8a4c1..9b6c32940f5 100644
--- a/cpp/benchmarks/sort/sort_structs.cpp
+++ b/cpp/benchmarks/sort/sort_structs.cpp
@@ -16,11 +16,10 @@
 
 #include <benchmarks/fixture/rmm_pool_raii.hpp>
 
-#include <cudf/detail/sorting.hpp>
-
-#include <cudf_test/column_utilities.hpp>
 #include <cudf_test/column_wrapper.hpp>
 
+#include <cudf/detail/sorting.hpp>
+
 #include <nvbench/nvbench.cuh>
 
 #include <random>
diff --git a/cpp/benchmarks/string/case.cpp b/cpp/benchmarks/string/case.cpp
index 0d74d0a6b7c..daa22d25677 100644
--- a/cpp/benchmarks/string/case.cpp
+++ b/cpp/benchmarks/string/case.cpp
@@ -14,7 +14,6 @@
  * limitations under the License.
  */
 
-#include <benchmark/benchmark.h>
 #include <benchmarks/common/generate_input.hpp>
 #include <benchmarks/fixture/benchmark_fixture.hpp>
 #include <benchmarks/synchronization/synchronization.hpp>
diff --git a/cpp/benchmarks/string/combine.cpp b/cpp/benchmarks/string/combine.cpp
index a0cfcd15fe8..85c48e18ce1 100644
--- a/cpp/benchmarks/string/combine.cpp
+++ b/cpp/benchmarks/string/combine.cpp
@@ -16,7 +16,6 @@
 
 #include "string_bench_args.hpp"
 
-#include <benchmark/benchmark.h>
 #include <benchmarks/common/generate_input.hpp>
 #include <benchmarks/fixture/benchmark_fixture.hpp>
 #include <benchmarks/synchronization/synchronization.hpp>
@@ -24,7 +23,6 @@
 #include <cudf/scalar/scalar.hpp>
 #include <cudf/strings/combine.hpp>
 #include <cudf/strings/strings_column_view.hpp>
-#include <cudf_test/column_wrapper.hpp>
 
 class StringCombine : public cudf::benchmark {
 };
diff --git a/cpp/benchmarks/string/contains.cpp b/cpp/benchmarks/string/contains.cpp
index 3a89b5646d7..6689e3611d1 100644
--- a/cpp/benchmarks/string/contains.cpp
+++ b/cpp/benchmarks/string/contains.cpp
@@ -14,7 +14,6 @@
  * limitations under the License.
  */
 
-#include <benchmark/benchmark.h>
 #include <benchmarks/common/generate_input.hpp>
 #include <benchmarks/fixture/benchmark_fixture.hpp>
 #include <benchmarks/synchronization/synchronization.hpp>
diff --git a/cpp/benchmarks/string/convert_datetime.cpp b/cpp/benchmarks/string/convert_datetime.cpp
index 3782fea1e36..488ce95d397 100644
--- a/cpp/benchmarks/string/convert_datetime.cpp
+++ b/cpp/benchmarks/string/convert_datetime.cpp
@@ -14,7 +14,6 @@
  * limitations under the License.
  */
 
-#include <benchmark/benchmark.h>
 #include <benchmarks/common/generate_input.hpp>
 #include <benchmarks/fixture/benchmark_fixture.hpp>
 #include <benchmarks/synchronization/synchronization.hpp>
diff --git a/cpp/benchmarks/string/convert_durations.cpp b/cpp/benchmarks/string/convert_durations.cpp
index 8af111d9a63..6e3a9e8faa9 100644
--- a/cpp/benchmarks/string/convert_durations.cpp
+++ b/cpp/benchmarks/string/convert_durations.cpp
@@ -13,21 +13,17 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
+#include <benchmarks/fixture/benchmark_fixture.hpp>
+#include <benchmarks/synchronization/synchronization.hpp>
+
+#include <cudf_test/column_wrapper.hpp>
+
 #include <cudf/column/column_view.hpp>
 #include <cudf/strings/convert/convert_durations.hpp>
 #include <cudf/types.hpp>
 #include <cudf/wrappers/durations.hpp>
 
-#include <cudf_test/base_fixture.hpp>
-#include <cudf_test/column_utilities.hpp>
-#include <cudf_test/column_wrapper.hpp>
-#include <cudf_test/cudf_gtest.hpp>
-
-#include <benchmark/benchmark.h>
-
-#include <benchmarks/fixture/benchmark_fixture.hpp>
-#include <benchmarks/synchronization/synchronization.hpp>
-
 #include <algorithm>
 #include <random>
 
diff --git a/cpp/benchmarks/string/convert_fixed_point.cpp b/cpp/benchmarks/string/convert_fixed_point.cpp
index 05b87906eca..88657c409cd 100644
--- a/cpp/benchmarks/string/convert_fixed_point.cpp
+++ b/cpp/benchmarks/string/convert_fixed_point.cpp
@@ -14,11 +14,9 @@
  * limitations under the License.
  */
 
-#include <fixture/benchmark_fixture.hpp>
-#include <synchronization/synchronization.hpp>
-
-#include <benchmark/benchmark.h>
 #include <benchmarks/common/generate_input.hpp>
+#include <benchmarks/fixture/benchmark_fixture.hpp>
+#include <benchmarks/synchronization/synchronization.hpp>
 
 #include <cudf/strings/convert/convert_fixed_point.hpp>
 #include <cudf/strings/convert/convert_floats.hpp>
diff --git a/cpp/benchmarks/string/convert_numerics.cpp b/cpp/benchmarks/string/convert_numerics.cpp
index 71a23c76829..3025c32b888 100644
--- a/cpp/benchmarks/string/convert_numerics.cpp
+++ b/cpp/benchmarks/string/convert_numerics.cpp
@@ -14,11 +14,9 @@
  * limitations under the License.
  */
 
-#include <fixture/benchmark_fixture.hpp>
-#include <synchronization/synchronization.hpp>
-
-#include <benchmark/benchmark.h>
 #include <benchmarks/common/generate_input.hpp>
+#include <benchmarks/fixture/benchmark_fixture.hpp>
+#include <benchmarks/synchronization/synchronization.hpp>
 
 #include <cudf/strings/convert/convert_floats.hpp>
 #include <cudf/strings/convert/convert_integers.hpp>
diff --git a/cpp/benchmarks/string/copy.cu b/cpp/benchmarks/string/copy.cu
index a8f9eb111fc..0280322a3a1 100644
--- a/cpp/benchmarks/string/copy.cu
+++ b/cpp/benchmarks/string/copy.cu
@@ -20,9 +20,9 @@
 #include <benchmarks/fixture/benchmark_fixture.hpp>
 #include <benchmarks/synchronization/synchronization.hpp>
 
+#include <cudf/column/column_factories.hpp>
 #include <cudf/copying.hpp>
 #include <cudf/strings/strings_column_view.hpp>
-#include <cudf_test/column_wrapper.hpp>
 
 #include <thrust/execution_policy.h>
 #include <thrust/iterator/counting_iterator.h>
diff --git a/cpp/benchmarks/string/extract.cpp b/cpp/benchmarks/string/extract.cpp
index b8d206386f5..4ff29285482 100644
--- a/cpp/benchmarks/string/extract.cpp
+++ b/cpp/benchmarks/string/extract.cpp
@@ -20,9 +20,10 @@
 #include <benchmarks/fixture/benchmark_fixture.hpp>
 #include <benchmarks/synchronization/synchronization.hpp>
 
+#include <cudf_test/column_wrapper.hpp>
+
 #include <cudf/strings/extract.hpp>
 #include <cudf/strings/strings_column_view.hpp>
-#include <cudf_test/column_wrapper.hpp>
 
 #include <random>
 
diff --git a/cpp/benchmarks/string/factory.cu b/cpp/benchmarks/string/factory.cu
index 2e0bf4afb36..dde0b7e4424 100644
--- a/cpp/benchmarks/string/factory.cu
+++ b/cpp/benchmarks/string/factory.cu
@@ -16,14 +16,14 @@
 
 #include "string_bench_args.hpp"
 
-#include <benchmark/benchmark.h>
 #include <benchmarks/common/generate_input.hpp>
 #include <benchmarks/fixture/benchmark_fixture.hpp>
 #include <benchmarks/synchronization/synchronization.hpp>
 
+#include <cudf_test/column_wrapper.hpp>
+
 #include <cudf/strings/string_view.cuh>
 #include <cudf/strings/strings_column_view.hpp>
-#include <cudf_test/column_wrapper.hpp>
 
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/device_uvector.hpp>
diff --git a/cpp/benchmarks/string/filter.cpp b/cpp/benchmarks/string/filter.cpp
index b39cf25bc91..064b824619e 100644
--- a/cpp/benchmarks/string/filter.cpp
+++ b/cpp/benchmarks/string/filter.cpp
@@ -14,17 +14,17 @@
  * limitations under the License.
  */
 
-#include <benchmark/benchmark.h>
 #include <benchmarks/common/generate_input.hpp>
 #include <benchmarks/fixture/benchmark_fixture.hpp>
 #include <benchmarks/synchronization/synchronization.hpp>
 
+#include <cudf_test/column_wrapper.hpp>
+
 #include <cudf/scalar/scalar.hpp>
 #include <cudf/strings/char_types/char_types.hpp>
 #include <cudf/strings/strings_column_view.hpp>
 #include <cudf/strings/strip.hpp>
 #include <cudf/strings/translate.hpp>
-#include <cudf_test/column_wrapper.hpp>
 
 #include <limits>
 #include <vector>
diff --git a/cpp/benchmarks/string/find.cpp b/cpp/benchmarks/string/find.cpp
index 55eb52c9b30..aaa7bd29b31 100644
--- a/cpp/benchmarks/string/find.cpp
+++ b/cpp/benchmarks/string/find.cpp
@@ -14,16 +14,16 @@
  * limitations under the License.
  */
 
-#include <benchmark/benchmark.h>
 #include <benchmarks/common/generate_input.hpp>
 #include <benchmarks/fixture/benchmark_fixture.hpp>
 #include <benchmarks/synchronization/synchronization.hpp>
 
+#include <cudf_test/column_wrapper.hpp>
+
 #include <cudf/scalar/scalar.hpp>
 #include <cudf/strings/find.hpp>
 #include <cudf/strings/find_multiple.hpp>
 #include <cudf/strings/strings_column_view.hpp>
-#include <cudf_test/column_wrapper.hpp>
 
 #include <limits>
 
diff --git a/cpp/benchmarks/string/repeat_strings.cpp b/cpp/benchmarks/string/repeat_strings.cpp
index 9044db18522..835a437e3b5 100644
--- a/cpp/benchmarks/string/repeat_strings.cpp
+++ b/cpp/benchmarks/string/repeat_strings.cpp
@@ -16,7 +16,6 @@
 
 #include "string_bench_args.hpp"
 
-#include <benchmark/benchmark.h>
 #include <benchmarks/common/generate_input.hpp>
 #include <benchmarks/fixture/benchmark_fixture.hpp>
 #include <benchmarks/synchronization/synchronization.hpp>
diff --git a/cpp/benchmarks/string/replace.cpp b/cpp/benchmarks/string/replace.cpp
index 0a3607c64f0..10f6e2a19ed 100644
--- a/cpp/benchmarks/string/replace.cpp
+++ b/cpp/benchmarks/string/replace.cpp
@@ -14,20 +14,20 @@
  * limitations under the License.
  */
 
-#include <benchmark/benchmark.h>
+#include "string_bench_args.hpp"
+
 #include <benchmarks/common/generate_input.hpp>
 #include <benchmarks/fixture/benchmark_fixture.hpp>
 #include <benchmarks/synchronization/synchronization.hpp>
 
+#include <cudf_test/column_wrapper.hpp>
+
 #include <cudf/scalar/scalar.hpp>
 #include <cudf/strings/replace.hpp>
 #include <cudf/strings/strings_column_view.hpp>
-#include <cudf_test/column_wrapper.hpp>
 
 #include <limits>
 
-#include "string_bench_args.hpp"
-
 class StringReplace : public cudf::benchmark {
 };
 
diff --git a/cpp/benchmarks/string/replace_re.cpp b/cpp/benchmarks/string/replace_re.cpp
index b9d04630837..148cbe678bd 100644
--- a/cpp/benchmarks/string/replace_re.cpp
+++ b/cpp/benchmarks/string/replace_re.cpp
@@ -16,14 +16,14 @@
 
 #include "string_bench_args.hpp"
 
-#include <benchmark/benchmark.h>
 #include <benchmarks/common/generate_input.hpp>
 #include <benchmarks/fixture/benchmark_fixture.hpp>
 #include <benchmarks/synchronization/synchronization.hpp>
 
+#include <cudf_test/column_wrapper.hpp>
+
 #include <cudf/strings/replace_re.hpp>
 #include <cudf/strings/strings_column_view.hpp>
-#include <cudf_test/column_wrapper.hpp>
 
 class StringReplace : public cudf::benchmark {
 };
diff --git a/cpp/benchmarks/string/split.cpp b/cpp/benchmarks/string/split.cpp
index ad25cfe54de..97eb0ba6dbf 100644
--- a/cpp/benchmarks/string/split.cpp
+++ b/cpp/benchmarks/string/split.cpp
@@ -14,15 +14,15 @@
  * limitations under the License.
  */
 
-#include <benchmark/benchmark.h>
 #include <benchmarks/common/generate_input.hpp>
 #include <benchmarks/fixture/benchmark_fixture.hpp>
 #include <benchmarks/synchronization/synchronization.hpp>
 
+#include <cudf_test/column_wrapper.hpp>
+
 #include <cudf/scalar/scalar.hpp>
 #include <cudf/strings/split/split.hpp>
 #include <cudf/strings/strings_column_view.hpp>
-#include <cudf_test/column_wrapper.hpp>
 
 #include <limits>
 
diff --git a/cpp/benchmarks/string/substring.cpp b/cpp/benchmarks/string/substring.cpp
index 2195cc56515..a18462385fc 100644
--- a/cpp/benchmarks/string/substring.cpp
+++ b/cpp/benchmarks/string/substring.cpp
@@ -16,21 +16,20 @@
 
 #include "string_bench_args.hpp"
 
-#include <benchmark/benchmark.h>
 #include <benchmarks/common/generate_input.hpp>
 #include <benchmarks/fixture/benchmark_fixture.hpp>
 #include <benchmarks/synchronization/synchronization.hpp>
 
+#include <cudf_test/column_wrapper.hpp>
+
 #include <cudf/scalar/scalar.hpp>
 #include <cudf/strings/strings_column_view.hpp>
 #include <cudf/strings/substring.hpp>
-#include <cudf_test/base_fixture.hpp>
-#include <cudf_test/column_wrapper.hpp>
-
-#include <limits>
 
 #include <thrust/iterator/constant_iterator.h>
 
+#include <limits>
+
 class StringSubstring : public cudf::benchmark {
 };
 
diff --git a/cpp/benchmarks/string/translate.cpp b/cpp/benchmarks/string/translate.cpp
index 38c6ff9c701..2ed0ccceba6 100644
--- a/cpp/benchmarks/string/translate.cpp
+++ b/cpp/benchmarks/string/translate.cpp
@@ -16,19 +16,19 @@
 
 #include "string_bench_args.hpp"
 
-#include <benchmark/benchmark.h>
 #include <benchmarks/common/generate_input.hpp>
 #include <benchmarks/fixture/benchmark_fixture.hpp>
 #include <benchmarks/synchronization/synchronization.hpp>
 
-#include <cudf/strings/strings_column_view.hpp>
-#include <cudf/strings/translate.hpp>
 #include <cudf_test/column_wrapper.hpp>
 
-#include <algorithm>
+#include <cudf/strings/strings_column_view.hpp>
+#include <cudf/strings/translate.hpp>
 
 #include <thrust/iterator/counting_iterator.h>
 
+#include <algorithm>
+
 class StringTranslate : public cudf::benchmark {
 };
 
diff --git a/cpp/benchmarks/string/url_decode.cu b/cpp/benchmarks/string/url_decode.cu
index 7971d44536d..40bf2b090d4 100644
--- a/cpp/benchmarks/string/url_decode.cu
+++ b/cpp/benchmarks/string/url_decode.cu
@@ -14,21 +14,17 @@
  * limitations under the License.
  */
 
-#include <benchmark/benchmark.h>
 #include <benchmarks/fixture/benchmark_fixture.hpp>
 #include <benchmarks/synchronization/synchronization.hpp>
 
+#include <cudf_test/column_wrapper.hpp>
+
 #include <cudf/column/column_view.hpp>
 #include <cudf/filling.hpp>
 #include <cudf/strings/convert/convert_urls.hpp>
 #include <cudf/strings/strings_column_view.hpp>
 #include <cudf/types.hpp>
 
-#include <cudf_test/base_fixture.hpp>
-#include <cudf_test/column_utilities.hpp>
-#include <cudf_test/column_wrapper.hpp>
-#include <cudf_test/cudf_gtest.hpp>
-
 #include <thrust/execution_policy.h>
 #include <thrust/for_each.h>
 #include <thrust/iterator/counting_iterator.h>
diff --git a/cpp/benchmarks/text/ngrams.cpp b/cpp/benchmarks/text/ngrams.cpp
index 157c27ae48a..b1e70517aea 100644
--- a/cpp/benchmarks/text/ngrams.cpp
+++ b/cpp/benchmarks/text/ngrams.cpp
@@ -14,7 +14,6 @@
  * limitations under the License.
  */
 
-#include <benchmark/benchmark.h>
 #include <benchmarks/common/generate_input.hpp>
 #include <benchmarks/fixture/benchmark_fixture.hpp>
 #include <benchmarks/string/string_bench_args.hpp>
@@ -22,7 +21,6 @@
 
 #include <cudf/scalar/scalar.hpp>
 #include <cudf/strings/strings_column_view.hpp>
-#include <cudf_test/base_fixture.hpp>
 
 #include <nvtext/generate_ngrams.hpp>
 
diff --git a/cpp/benchmarks/text/normalize.cpp b/cpp/benchmarks/text/normalize.cpp
index 2cc083f4ae8..3b58a7dd187 100644
--- a/cpp/benchmarks/text/normalize.cpp
+++ b/cpp/benchmarks/text/normalize.cpp
@@ -14,15 +14,12 @@
  * limitations under the License.
  */
 
-#include <benchmark/benchmark.h>
 #include <benchmarks/common/generate_input.hpp>
 #include <benchmarks/fixture/benchmark_fixture.hpp>
 #include <benchmarks/synchronization/synchronization.hpp>
 
 #include <cudf/scalar/scalar.hpp>
 #include <cudf/strings/strings_column_view.hpp>
-#include <cudf_test/base_fixture.hpp>
-#include <cudf_test/column_wrapper.hpp>
 
 #include <nvtext/normalize.hpp>
 
diff --git a/cpp/benchmarks/text/normalize_spaces.cpp b/cpp/benchmarks/text/normalize_spaces.cpp
index 3bd636d4aa9..1fe912e5740 100644
--- a/cpp/benchmarks/text/normalize_spaces.cpp
+++ b/cpp/benchmarks/text/normalize_spaces.cpp
@@ -14,7 +14,6 @@
  * limitations under the License.
  */
 
-#include <benchmark/benchmark.h>
 #include <benchmarks/common/generate_input.hpp>
 #include <benchmarks/fixture/benchmark_fixture.hpp>
 #include <benchmarks/string/string_bench_args.hpp>
@@ -22,8 +21,6 @@
 
 #include <cudf/scalar/scalar.hpp>
 #include <cudf/strings/strings_column_view.hpp>
-#include <cudf_test/base_fixture.hpp>
-#include <cudf_test/column_wrapper.hpp>
 
 #include <nvtext/normalize.hpp>
 
diff --git a/cpp/benchmarks/text/replace.cpp b/cpp/benchmarks/text/replace.cpp
index 3fbb6054d5c..a093cd767b3 100644
--- a/cpp/benchmarks/text/replace.cpp
+++ b/cpp/benchmarks/text/replace.cpp
@@ -14,17 +14,18 @@
  * limitations under the License.
  */
 
-#include <benchmark/benchmark.h>
 #include <benchmarks/fixture/benchmark_fixture.hpp>
 #include <benchmarks/string/string_bench_args.hpp>
 #include <benchmarks/synchronization/synchronization.hpp>
 
-#include <cudf/strings/strings_column_view.hpp>
-#include <cudf_test/base_fixture.hpp>
 #include <cudf_test/column_wrapper.hpp>
 
+#include <cudf/strings/strings_column_view.hpp>
+
 #include <nvtext/replace.hpp>
 
+#include <random>
+
 class TextReplace : public cudf::benchmark {
 };
 
diff --git a/cpp/benchmarks/text/subword.cpp b/cpp/benchmarks/text/subword.cpp
index b8311324f70..d8357dcf92c 100644
--- a/cpp/benchmarks/text/subword.cpp
+++ b/cpp/benchmarks/text/subword.cpp
@@ -15,12 +15,12 @@
  */
 
 #include <benchmark/benchmark.h>
-#include <cudf/strings/strings_column_view.hpp>
-#include <nvtext/subword_tokenize.hpp>
 
-#include <cudf_test/column_utilities.hpp>
 #include <cudf_test/column_wrapper.hpp>
 
+#include <cudf/strings/strings_column_view.hpp>
+#include <nvtext/subword_tokenize.hpp>
+
 #include <filesystem>
 #include <fstream>
 #include <iostream>
diff --git a/cpp/benchmarks/text/tokenize.cpp b/cpp/benchmarks/text/tokenize.cpp
index 4cb9c9e5271..fea1973c026 100644
--- a/cpp/benchmarks/text/tokenize.cpp
+++ b/cpp/benchmarks/text/tokenize.cpp
@@ -14,16 +14,15 @@
  * limitations under the License.
  */
 
-#include <benchmark/benchmark.h>
 #include <benchmarks/common/generate_input.hpp>
 #include <benchmarks/fixture/benchmark_fixture.hpp>
 #include <benchmarks/string/string_bench_args.hpp>
 #include <benchmarks/synchronization/synchronization.hpp>
 
+#include <cudf_test/column_wrapper.hpp>
+
 #include <cudf/scalar/scalar.hpp>
 #include <cudf/strings/strings_column_view.hpp>
-#include <cudf_test/base_fixture.hpp>
-#include <cudf_test/column_wrapper.hpp>
 
 #include <nvtext/ngrams_tokenize.hpp>
 #include <nvtext/tokenize.hpp>
diff --git a/cpp/benchmarks/type_dispatcher/type_dispatcher.cu b/cpp/benchmarks/type_dispatcher/type_dispatcher.cu
index aba78dad3fe..53dac455b04 100644
--- a/cpp/benchmarks/type_dispatcher/type_dispatcher.cu
+++ b/cpp/benchmarks/type_dispatcher/type_dispatcher.cu
@@ -17,8 +17,6 @@
 #include <benchmarks/fixture/benchmark_fixture.hpp>
 #include <benchmarks/synchronization/synchronization.hpp>
 
-#include <cudf_test/column_wrapper.hpp>
-
 #include <cudf/column/column_device_view.cuh>
 #include <cudf/column/column_view.hpp>
 #include <cudf/detail/utilities/cuda.cuh>

From 08cd4284229cce35c6824b10ac9bcebc7ccc5514 Mon Sep 17 00:00:00 2001
From: David Wendt <45795991+davidwendt@users.noreply.github.com>
Date: Tue, 19 Apr 2022 11:35:01 -0400
Subject: [PATCH 29/33] Add device_memory_resource parameter to
 create_string_vector_from_column (#10673)

Adds the `rmm::mr::device_memory_resource` parameter to the `cudf::strings::detail::create_string_vector_from_column` function. This will be called in a future API in a later PR and the resulting memory object will returned to the user.

Also found and removed a few related functions that are no longer necessary and updated the callers appropriately simplifying the logic there.

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Nghia Truong (https://github.com/ttnghia)
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/10673
---
 cpp/include/cudf/strings/detail/scatter.cuh   | 21 ++----
 cpp/include/cudf/strings/detail/utilities.cuh | 22 ------
 cpp/include/cudf/strings/detail/utilities.hpp | 31 +-------
 cpp/src/lists/copying/scatter_helper.cu       | 40 ++--------
 cpp/src/strings/utilities.cu                  | 75 ++++---------------
 5 files changed, 33 insertions(+), 156 deletions(-)

diff --git a/cpp/include/cudf/strings/detail/scatter.cuh b/cpp/include/cudf/strings/detail/scatter.cuh
index b6aa22cc316..f167206f36b 100644
--- a/cpp/include/cudf/strings/detail/scatter.cuh
+++ b/cpp/include/cudf/strings/detail/scatter.cuh
@@ -15,14 +15,13 @@
  */
 #pragma once
 
-#include <cudf/column/column.hpp>
-#include <cudf/column/column_device_view.cuh>
-#include <cudf/detail/null_mask.hpp>
-#include <cudf/strings/detail/utilities.cuh>
+#include <cudf/column/column_factories.hpp>
 #include <cudf/strings/detail/utilities.hpp>
 #include <cudf/strings/strings_column_view.hpp>
+#include <cudf/utilities/span.hpp>
 
 #include <rmm/cuda_stream_view.hpp>
+#include <rmm/device_uvector.hpp>
 #include <rmm/exec_policy.hpp>
 
 #include <thrust/scatter.h>
@@ -71,17 +70,9 @@ std::unique_ptr<column> scatter(
   // do the scatter
   thrust::scatter(rmm::exec_policy(stream), begin, end, scatter_map, target_vector.begin());
 
-  // build offsets column
-  auto offsets_column = child_offsets_from_string_vector(target_vector, stream, mr);
-  // build chars column
-  auto chars_column =
-    child_chars_from_string_vector(target_vector, offsets_column->view(), stream, mr);
-
-  return make_strings_column(target.size(),
-                             std::move(offsets_column),
-                             std::move(chars_column),
-                             UNKNOWN_NULL_COUNT,
-                             cudf::detail::copy_bitmask(target.parent(), stream, mr));
+  // build the output column
+  auto sv_span = cudf::device_span<string_view const>(target_vector);
+  return make_strings_column(sv_span, string_view{nullptr, 0}, stream, mr);
 }
 
 }  // namespace detail
diff --git a/cpp/include/cudf/strings/detail/utilities.cuh b/cpp/include/cudf/strings/detail/utilities.cuh
index b9ea2d9ecff..bb7f29a4172 100644
--- a/cpp/include/cudf/strings/detail/utilities.cuh
+++ b/cpp/include/cudf/strings/detail/utilities.cuh
@@ -71,28 +71,6 @@ std::unique_ptr<column> make_offsets_child_column(
   return offsets_column;
 }
 
-/**
- * @brief Creates an offsets column from a string_view iterator, and size.
- *
- * @tparam Iter Iterator type that returns string_view instances
- * @param strings_begin Iterator to the beginning of the string_view sequence
- * @param num_strings The number of string_view instances in the sequence
- * @param stream CUDA stream used for device memory operations and kernel launches.
- * @param mr Device memory resource used to allocate the returned column's device memory.
- * @return Child offsets column
- */
-template <typename Iter>
-std::unique_ptr<cudf::column> child_offsets_from_string_iterator(
-  Iter strings_begin,
-  cudf::size_type num_strings,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
-{
-  auto transformer = [] __device__(string_view v) { return v.size_bytes(); };
-  auto begin       = thrust::make_transform_iterator(strings_begin, transformer);
-  return make_offsets_child_column(begin, begin + num_strings, stream, mr);
-}
-
 /**
  * @brief Copies input string data into a buffer and increments the pointer by the number of bytes
  * copied.
diff --git a/cpp/include/cudf/strings/detail/utilities.hpp b/cpp/include/cudf/strings/detail/utilities.hpp
index 6424841ba86..c4f9e547148 100644
--- a/cpp/include/cudf/strings/detail/utilities.hpp
+++ b/cpp/include/cudf/strings/detail/utilities.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2021, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -45,36 +45,11 @@ std::unique_ptr<column> create_chars_child_column(
  *
  * @param strings Strings column instance.
  * @param stream CUDA stream used for device memory operations and kernel launches.
+ * @param mr Device memory resource used to allocate the returned vector's device memory.
  * @return Device vector of string_views
  */
 rmm::device_uvector<string_view> create_string_vector_from_column(
-  cudf::strings_column_view const strings, rmm::cuda_stream_view stream = rmm::cuda_stream_default);
-
-/**
- * @brief Creates an offsets column from a string_view vector.
- *
- * @param strings Strings input data
- * @param stream CUDA stream used for device memory operations and kernel launches.
- * @param mr Device memory resource used to allocate the returned column's device memory.
- * @return Child offsets column
- */
-std::unique_ptr<cudf::column> child_offsets_from_string_vector(
-  cudf::device_span<string_view> strings,
-  rmm::cuda_stream_view stream        = rmm::cuda_stream_default,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
-
-/**
- * @brief Creates a chars column from a string_view vector.
- *
- * @param strings Strings input data
- * @param d_offsets Offsets vector for placing strings into column's memory.
- * @param stream CUDA stream used for device memory operations and kernel launches.
- * @param mr Device memory resource used to allocate the returned column's device memory.
- * @return Child chars column
- */
-std::unique_ptr<cudf::column> child_chars_from_string_vector(
-  cudf::device_span<string_view> strings,
-  column_view const& offsets,
+  cudf::strings_column_view const strings,
   rmm::cuda_stream_view stream        = rmm::cuda_stream_default,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
diff --git a/cpp/src/lists/copying/scatter_helper.cu b/cpp/src/lists/copying/scatter_helper.cu
index adc1b95a9e6..fecf6e1c1a1 100644
--- a/cpp/src/lists/copying/scatter_helper.cu
+++ b/cpp/src/lists/copying/scatter_helper.cu
@@ -21,8 +21,7 @@
 #include <cudf/lists/detail/copying.hpp>
 #include <cudf/lists/detail/scatter_helper.cuh>
 #include <cudf/strings/detail/utilities.cuh>
-#include <cudf/strings/detail/utilities.hpp>
-#include <cudf/strings/strings_column_view.hpp>
+#include <cudf/utilities/span.hpp>
 
 #include <thrust/binary_search.h>
 #include <thrust/distance.h>
@@ -253,39 +252,16 @@ struct list_child_constructor {
         auto lists_column    = actual_list_row.get_column();
         auto lists_offsets_ptr    = lists_column.offsets().template data<offset_type>();
         auto child_strings_column = lists_column.child();
-        auto string_offsets_ptr =
-          child_strings_column.child(cudf::strings_column_view::offsets_column_index)
-            .template data<offset_type>();
-        auto string_chars_ptr =
-          child_strings_column.child(cudf::strings_column_view::chars_column_index)
-            .template data<char>();
-
-        auto strings_offset = lists_offsets_ptr[row_index] + intra_index;
-        auto char_offset    = string_offsets_ptr[strings_offset];
-        auto char_ptr       = string_chars_ptr + char_offset;
-        auto string_size =
-          string_offsets_ptr[strings_offset + 1] - string_offsets_ptr[strings_offset];
-        return string_view{char_ptr, string_size};
+        auto strings_offset       = lists_offsets_ptr[row_index] + intra_index;
+
+        return child_strings_column.is_null(strings_offset)
+                 ? string_view{nullptr, 0}
+                 : child_strings_column.template element<string_view>(strings_offset);
       });
 
     // string_views should now have been populated with source and target references.
-
-    auto string_offsets = cudf::strings::detail::child_offsets_from_string_iterator(
-      string_views.begin(), string_views.size(), stream, mr);
-
-    auto string_chars = cudf::strings::detail::child_chars_from_string_vector(
-      string_views, string_offsets->view(), stream, mr);
-    auto child_null_mask =
-      source_lists_column_view.child().nullable() || target_lists_column_view.child().nullable()
-        ? construct_child_nullmask(
-            list_vector, list_offsets, source_lists, target_lists, num_child_rows, stream, mr)
-        : std::make_pair(rmm::device_buffer{}, 0);
-
-    return cudf::make_strings_column(num_child_rows,
-                                     std::move(string_offsets),
-                                     std::move(string_chars),
-                                     child_null_mask.second,  // Null count.
-                                     std::move(child_null_mask.first));
+    auto sv_span = cudf::device_span<string_view const>(string_views);
+    return cudf::make_strings_column(sv_span, string_view{nullptr, 0}, stream, mr);
   }
 
   /**
diff --git a/cpp/src/strings/utilities.cu b/cpp/src/strings/utilities.cu
index d7cc72fdfff..a7ef2afb47f 100644
--- a/cpp/src/strings/utilities.cu
+++ b/cpp/src/strings/utilities.cu
@@ -20,7 +20,6 @@
 
 #include <cudf/column/column_device_view.cuh>
 #include <cudf/column/column_factories.hpp>
-#include <cudf/detail/get_value.cuh>
 #include <cudf/strings/detail/utilities.cuh>
 #include <cudf/utilities/error.hpp>
 
@@ -28,12 +27,8 @@
 #include <rmm/device_uvector.hpp>
 #include <rmm/exec_policy.hpp>
 
-#include <thrust/for_each.h>
 #include <thrust/iterator/counting_iterator.h>
-#include <thrust/transform_reduce.h>
-#include <thrust/transform_scan.h>
-
-#include <cstring>
+#include <thrust/transform.h>
 
 namespace cudf {
 namespace strings {
@@ -42,65 +37,27 @@ namespace detail {
 /**
  * @copydoc create_string_vector_from_column
  */
-rmm::device_uvector<string_view> create_string_vector_from_column(cudf::strings_column_view strings,
-                                                                  rmm::cuda_stream_view stream)
-{
-  auto strings_column = column_device_view::create(strings.parent(), stream);
-  auto d_column       = *strings_column;
-
-  rmm::device_uvector<string_view> strings_vector(strings.size(), stream);
-  string_view* d_strings = strings_vector.data();
-  thrust::for_each_n(rmm::exec_policy(stream),
-                     thrust::make_counting_iterator<size_type>(0),
-                     strings.size(),
-                     [d_column, d_strings] __device__(size_type idx) {
-                       if (d_column.is_null(idx))
-                         d_strings[idx] = string_view(nullptr, 0);
-                       else
-                         d_strings[idx] = d_column.element<string_view>(idx);
-                     });
-  return strings_vector;
-}
-
-/**
- * @copydoc child_offsets_from_string_vector
- */
-std::unique_ptr<cudf::column> child_offsets_from_string_vector(
-  cudf::device_span<string_view> strings,
+rmm::device_uvector<string_view> create_string_vector_from_column(
+  cudf::strings_column_view const input,
   rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr)
 {
-  return child_offsets_from_string_iterator(strings.begin(), strings.size(), stream, mr);
-}
+  auto d_strings = column_device_view::create(input.parent(), stream);
 
-/**
- * @copydoc child_chars_from_string_vector
- */
-std::unique_ptr<cudf::column> child_chars_from_string_vector(cudf::device_span<string_view> strings,
-                                                             column_view const& offsets,
-                                                             rmm::cuda_stream_view stream,
-                                                             rmm::mr::device_memory_resource* mr)
-{
-  auto const d_strings = strings.data();
-  auto const bytes     = cudf::detail::get_value<int32_t>(offsets, strings.size(), stream);
-  auto const d_offsets = offsets.data<int32_t>();
-
-  // create column
-  auto chars_column = create_chars_child_column(bytes, stream, mr);
-  // get it's view
-  auto d_chars = chars_column->mutable_view().data<char>();
-  thrust::for_each_n(rmm::exec_policy(stream),
-                     thrust::make_counting_iterator<size_type>(0),
-                     strings.size(),
-                     [d_strings, d_offsets, d_chars] __device__(size_type idx) {
-                       string_view const d_str = d_strings[idx];
-                       memcpy(d_chars + d_offsets[idx], d_str.data(), d_str.size_bytes());
-                     });
-
-  return chars_column;
+  auto strings_vector = rmm::device_uvector<string_view>(input.size(), stream, mr);
+
+  thrust::transform(
+    rmm::exec_policy(stream),
+    thrust::make_counting_iterator<size_type>(0),
+    thrust::make_counting_iterator<size_type>(input.size()),
+    strings_vector.begin(),
+    [d_strings = *d_strings] __device__(size_type idx) {
+      return d_strings.is_null(idx) ? string_view{nullptr, 0} : d_strings.element<string_view>(idx);
+    });
+
+  return strings_vector;
 }
 
-//
 std::unique_ptr<column> create_chars_child_column(cudf::size_type total_bytes,
                                                   rmm::cuda_stream_view stream,
                                                   rmm::mr::device_memory_resource* mr)

From 565f4743f797ff31afd402f715a4c57547cb6c66 Mon Sep 17 00:00:00 2001
From: David Wendt <45795991+davidwendt@users.noreply.github.com>
Date: Tue, 19 Apr 2022 12:17:05 -0400
Subject: [PATCH 30/33] Split up mixed-join kernels source files (#10671)

Split up `mixed_join_kernels.cu` and `mixed_join_size_kernels.cu` to improve overall build time.
Currently these take about 30 minutes each on the gpuCI build. Example of a recent build metrics report:
https://gpuci.gpuopenanalytics.com/job/rapidsai/job/gpuci-22-06/job/cudf/job/prb/job/cudf-cpu-cuda-build/CUDA=11.5/164/Build_20Metrics_20Report/

The nulls and non-nulls definitions are placed into separate source files.

The kernel source used for `mixed_join_kernels.cu` (both null and non-null) is moved to `mixed_join_kernel.cuh` and the nulls definition is moved to `mixed_join_kernel_nulls.cu`. For consistency the `mixed_join_kernels.cu` name is changed to just `mixed_join_kernel.cu` since it now only contains one definition.
This same pattern applies to `mixed_join_size_kernels.cu` splitting into `mixed_join_size_kernel.cuh`, `mixed_join_size_kernel_nulls.cu` and `mixed_join_size_kernel.cu`

No function behavior or actual code generation has changed. The source code has just moved into more source files to help better parallelize and speed up the build process. This improves compile time by 10% for a release build and ~25% for a debug build.

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Robert Maynard (https://github.com/robertmaynard)
  - Nghia Truong (https://github.com/ttnghia)
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/10671
---
 cpp/CMakeLists.txt                            |  6 ++-
 cpp/src/join/mixed_join_kernel.cu             | 38 +++++++++++++++++++
 ..._join_kernels.cu => mixed_join_kernel.cuh} | 31 ++-------------
 cpp/src/join/mixed_join_kernel_nulls.cu       | 38 +++++++++++++++++++
 cpp/src/join/mixed_join_size_kernel.cu        | 36 ++++++++++++++++++
 ..._kernels.cu => mixed_join_size_kernel.cuh} | 27 -------------
 cpp/src/join/mixed_join_size_kernel_nulls.cu  | 36 ++++++++++++++++++
 7 files changed, 155 insertions(+), 57 deletions(-)
 create mode 100644 cpp/src/join/mixed_join_kernel.cu
 rename cpp/src/join/{mixed_join_kernels.cu => mixed_join_kernel.cuh} (82%)
 create mode 100644 cpp/src/join/mixed_join_kernel_nulls.cu
 create mode 100644 cpp/src/join/mixed_join_size_kernel.cu
 rename cpp/src/join/{mixed_join_size_kernels.cu => mixed_join_size_kernel.cuh} (80%)
 create mode 100644 cpp/src/join/mixed_join_size_kernel_nulls.cu

diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index d9422edaa8f..dbc55827a32 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -344,10 +344,12 @@ add_library(
   src/join/join.cu
   src/join/join_utils.cu
   src/join/mixed_join.cu
-  src/join/mixed_join_kernels.cu
+  src/join/mixed_join_kernel.cu
+  src/join/mixed_join_kernel_nulls.cu
   src/join/mixed_join_kernels_semi.cu
   src/join/mixed_join_semi.cu
-  src/join/mixed_join_size_kernels.cu
+  src/join/mixed_join_size_kernel.cu
+  src/join/mixed_join_size_kernel_nulls.cu
   src/join/mixed_join_size_kernels_semi.cu
   src/join/semi_join.cu
   src/lists/contains.cu
diff --git a/cpp/src/join/mixed_join_kernel.cu b/cpp/src/join/mixed_join_kernel.cu
new file mode 100644
index 00000000000..f8912f0c7bd
--- /dev/null
+++ b/cpp/src/join/mixed_join_kernel.cu
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "mixed_join_kernel.cuh"
+
+namespace cudf {
+namespace detail {
+
+template __global__ void mixed_join<DEFAULT_JOIN_BLOCK_SIZE, false>(
+  table_device_view left_table,
+  table_device_view right_table,
+  table_device_view probe,
+  table_device_view build,
+  row_equality const equality_probe,
+  join_kind const join_type,
+  cudf::detail::mixed_multimap_type::device_view hash_table_view,
+  size_type* join_output_l,
+  size_type* join_output_r,
+  cudf::ast::detail::expression_device_view device_expression_data,
+  cudf::size_type const* join_result_offsets,
+  bool const swap_tables);
+
+}  // namespace detail
+
+}  // namespace cudf
diff --git a/cpp/src/join/mixed_join_kernels.cu b/cpp/src/join/mixed_join_kernel.cuh
similarity index 82%
rename from cpp/src/join/mixed_join_kernels.cu
rename to cpp/src/join/mixed_join_kernel.cuh
index efaea841e45..f7081cc4d63 100644
--- a/cpp/src/join/mixed_join_kernels.cu
+++ b/cpp/src/join/mixed_join_kernel.cuh
@@ -14,6 +14,8 @@
  * limitations under the License.
  */
 
+#pragma once
+
 #include <join/hash_join.cuh>
 #include <join/join_common_utils.cuh>
 #include <join/join_common_utils.hpp>
@@ -32,6 +34,7 @@
 
 namespace cudf {
 namespace detail {
+
 namespace cg = cooperative_groups;
 
 template <cudf::size_type block_size, bool has_nulls>
@@ -107,34 +110,6 @@ __launch_bounds__(block_size) __global__
   }
 }
 
-template __global__ void mixed_join<DEFAULT_JOIN_BLOCK_SIZE, true>(
-  table_device_view left_table,
-  table_device_view right_table,
-  table_device_view probe,
-  table_device_view build,
-  row_equality const equality_probe,
-  join_kind const join_type,
-  cudf::detail::mixed_multimap_type::device_view hash_table_view,
-  size_type* join_output_l,
-  size_type* join_output_r,
-  cudf::ast::detail::expression_device_view device_expression_data,
-  cudf::size_type const* join_result_offsets,
-  bool const swap_tables);
-
-template __global__ void mixed_join<DEFAULT_JOIN_BLOCK_SIZE, false>(
-  table_device_view left_table,
-  table_device_view right_table,
-  table_device_view probe,
-  table_device_view build,
-  row_equality const equality_probe,
-  join_kind const join_type,
-  cudf::detail::mixed_multimap_type::device_view hash_table_view,
-  size_type* join_output_l,
-  size_type* join_output_r,
-  cudf::ast::detail::expression_device_view device_expression_data,
-  cudf::size_type const* join_result_offsets,
-  bool const swap_tables);
-
 }  // namespace detail
 
 }  // namespace cudf
diff --git a/cpp/src/join/mixed_join_kernel_nulls.cu b/cpp/src/join/mixed_join_kernel_nulls.cu
new file mode 100644
index 00000000000..a911c62b349
--- /dev/null
+++ b/cpp/src/join/mixed_join_kernel_nulls.cu
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "mixed_join_kernel.cuh"
+
+namespace cudf {
+namespace detail {
+
+template __global__ void mixed_join<DEFAULT_JOIN_BLOCK_SIZE, true>(
+  table_device_view left_table,
+  table_device_view right_table,
+  table_device_view probe,
+  table_device_view build,
+  row_equality const equality_probe,
+  join_kind const join_type,
+  cudf::detail::mixed_multimap_type::device_view hash_table_view,
+  size_type* join_output_l,
+  size_type* join_output_r,
+  cudf::ast::detail::expression_device_view device_expression_data,
+  cudf::size_type const* join_result_offsets,
+  bool const swap_tables);
+
+}  // namespace detail
+
+}  // namespace cudf
diff --git a/cpp/src/join/mixed_join_size_kernel.cu b/cpp/src/join/mixed_join_size_kernel.cu
new file mode 100644
index 00000000000..cf8236e2be2
--- /dev/null
+++ b/cpp/src/join/mixed_join_size_kernel.cu
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "mixed_join_size_kernel.cuh"
+
+namespace cudf {
+namespace detail {
+
+template __global__ void compute_mixed_join_output_size<DEFAULT_JOIN_BLOCK_SIZE, false>(
+  table_device_view left_table,
+  table_device_view right_table,
+  table_device_view probe,
+  table_device_view build,
+  row_equality const equality_probe,
+  join_kind const join_type,
+  cudf::detail::mixed_multimap_type::device_view hash_table_view,
+  ast::detail::expression_device_view device_expression_data,
+  bool const swap_tables,
+  std::size_t* output_size,
+  cudf::device_span<cudf::size_type> matches_per_row);
+
+}  // namespace detail
+}  // namespace cudf
diff --git a/cpp/src/join/mixed_join_size_kernels.cu b/cpp/src/join/mixed_join_size_kernel.cuh
similarity index 80%
rename from cpp/src/join/mixed_join_size_kernels.cu
rename to cpp/src/join/mixed_join_size_kernel.cuh
index 22c71bfc33a..9eedc1a8015 100644
--- a/cpp/src/join/mixed_join_size_kernels.cu
+++ b/cpp/src/join/mixed_join_size_kernel.cuh
@@ -99,32 +99,5 @@ __launch_bounds__(block_size) __global__ void compute_mixed_join_output_size(
   if (threadIdx.x == 0) atomicAdd(output_size, block_counter);
 }
 
-template __global__ void compute_mixed_join_output_size<DEFAULT_JOIN_BLOCK_SIZE, true>(
-  table_device_view left_table,
-  table_device_view right_table,
-  table_device_view probe,
-  table_device_view build,
-  row_equality const equality_probe,
-  join_kind const join_type,
-  cudf::detail::mixed_multimap_type::device_view hash_table_view,
-  ast::detail::expression_device_view device_expression_data,
-  bool const swap_tables,
-  std::size_t* output_size,
-  cudf::device_span<cudf::size_type> matches_per_row);
-
-template __global__ void compute_mixed_join_output_size<DEFAULT_JOIN_BLOCK_SIZE, false>(
-  table_device_view left_table,
-  table_device_view right_table,
-  table_device_view probe,
-  table_device_view build,
-  row_equality const equality_probe,
-  join_kind const join_type,
-  cudf::detail::mixed_multimap_type::device_view hash_table_view,
-  ast::detail::expression_device_view device_expression_data,
-  bool const swap_tables,
-  std::size_t* output_size,
-  cudf::device_span<cudf::size_type> matches_per_row);
-
 }  // namespace detail
-
 }  // namespace cudf
diff --git a/cpp/src/join/mixed_join_size_kernel_nulls.cu b/cpp/src/join/mixed_join_size_kernel_nulls.cu
new file mode 100644
index 00000000000..f05d674b3b5
--- /dev/null
+++ b/cpp/src/join/mixed_join_size_kernel_nulls.cu
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "mixed_join_size_kernel.cuh"
+
+namespace cudf {
+namespace detail {
+
+template __global__ void compute_mixed_join_output_size<DEFAULT_JOIN_BLOCK_SIZE, true>(
+  table_device_view left_table,
+  table_device_view right_table,
+  table_device_view probe,
+  table_device_view build,
+  row_equality const equality_probe,
+  join_kind const join_type,
+  cudf::detail::mixed_multimap_type::device_view hash_table_view,
+  ast::detail::expression_device_view device_expression_data,
+  bool const swap_tables,
+  std::size_t* output_size,
+  cudf::device_span<cudf::size_type> matches_per_row);
+
+}  // namespace detail
+}  // namespace cudf

From 304711a98c5786901b6c939d3fa8ae0f174840dd Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <3190405+shwina@users.noreply.github.com>
Date: Tue, 19 Apr 2022 13:37:28 -0400
Subject: [PATCH 31/33] Handle RuntimeError thrown by CUDA Python in
 `validate_setup` (#10653)

The call to `getDeviceCount()` can raise a `RuntimeError` when `libcuda.so` is missing. We should handle that too in `validate_setup()`.

Authors:
  - Ashwin Srinath (https://github.com/shwina)

Approvers:
  - Charles Blackmon-Luca (https://github.com/charlesbluca)
  - GALI PREM SAGAR (https://github.com/galipremsagar)

URL: https://github.com/rapidsai/cudf/pull/10653
---
 python/cudf/cudf/utils/gpu_utils.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/python/cudf/cudf/utils/gpu_utils.py b/python/cudf/cudf/utils/gpu_utils.py
index a722d350ef4..ab3adc1651a 100644
--- a/python/cudf/cudf/utils/gpu_utils.py
+++ b/python/cudf/cudf/utils/gpu_utils.py
@@ -55,6 +55,12 @@ def validate_setup():
             raise e
         # If there is no GPU detected, set `gpus_count` to -1
         gpus_count = -1
+    except RuntimeError as e:
+        # getDeviceCount() can raise a RuntimeError
+        # when ``libcuda.so`` is missing.
+        # We don't want this to propagate up to the user.
+        warnings.warn(str(e))
+        return
 
     if gpus_count > 0:
         # Cupy throws RunTimeException to get GPU count,

From 31a5f44a23135a46beee019fa21f54a695d719f9 Mon Sep 17 00:00:00 2001
From: Michael Wang <isVoid@users.noreply.github.com>
Date: Tue, 19 Apr 2022 10:47:22 -0700
Subject: [PATCH 32/33] Cython API Refactor: `transpose.pyx`, `sort.pyx`
 (#10675)

This PR contributes to #10153, refactors all cython APIs in `transpose.pyx`, `sort.pyx` to accept a list of columns as input.

This PR also includes several minor improvements in the code base, see comments below for detail.

Authors:
  - Michael Wang (https://github.com/isVoid)

Approvers:
  - Ashwin Srinath (https://github.com/shwina)

URL: https://github.com/rapidsai/cudf/pull/10675
---
 python/cudf/cudf/_lib/sort.pyx            | 101 ++++++++-----------
 python/cudf/cudf/_lib/transpose.pyx       |  60 ++----------
 python/cudf/cudf/_lib/utils.pyx           |   8 +-
 python/cudf/cudf/core/column/numerical.py |   4 +-
 python/cudf/cudf/core/dataframe.py        |  37 +++++--
 python/cudf/cudf/core/frame.py            | 113 +++++-----------------
 python/cudf/cudf/core/indexed_frame.py    |  87 +++++++++++++++++
 python/cudf/cudf/tests/test_dataframe.py  |  29 ------
 python/cudf/cudf/tests/test_series.py     |  29 ++++++
 9 files changed, 222 insertions(+), 246 deletions(-)

diff --git a/python/cudf/cudf/_lib/sort.pyx b/python/cudf/cudf/_lib/sort.pyx
index 3aa0b35e90e..faa4279c1ca 100644
--- a/python/cudf/cudf/_lib/sort.pyx
+++ b/python/cudf/cudf/_lib/sort.pyx
@@ -1,6 +1,4 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
-
-import pandas as pd
+# Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
 from libcpp cimport bool
 from libcpp.memory cimport unique_ptr
@@ -23,19 +21,19 @@ from cudf._lib.cpp.table.table cimport table
 from cudf._lib.cpp.table.table_view cimport table_view
 from cudf._lib.cpp.types cimport null_order, null_policy, order
 from cudf._lib.sort cimport underlying_type_t_rank_method
-from cudf._lib.utils cimport data_from_unique_ptr, table_view_from_table
+from cudf._lib.utils cimport columns_from_unique_ptr, table_view_from_columns
 
 
 def is_sorted(
-    source_table, object ascending=None, object null_position=None
+    list source_columns, object ascending=None, object null_position=None
 ):
     """
     Checks whether the rows of a `table` are sorted in lexicographical order.
 
     Parameters
     ----------
-    source_table : Frame
-        Frame whose columns are to be checked for sort order
+    source_columns : list of columns
+        columns to be checked for sort order
     ascending : None or list-like of booleans
         None or list-like of boolean values indicating expected sort order of
         each column. If list-like, size of list-like must be len(columns). If
@@ -58,51 +56,39 @@ def is_sorted(
     cdef vector[null_order] null_precedence
 
     if ascending is None:
-        column_order = vector[order](
-            source_table._num_columns, order.ASCENDING
-        )
-    elif pd.api.types.is_list_like(ascending):
-        if len(ascending) != source_table._num_columns:
+        column_order = vector[order](len(source_columns), order.ASCENDING)
+    else:
+        if len(ascending) != len(source_columns):
             raise ValueError(
-                f"Expected a list-like of length {source_table._num_columns}, "
+                f"Expected a list-like of length {len(source_columns)}, "
                 f"got length {len(ascending)} for `ascending`"
             )
         column_order = vector[order](
-            source_table._num_columns, order.DESCENDING
+            len(source_columns), order.DESCENDING
         )
         for idx, val in enumerate(ascending):
             if val:
                 column_order[idx] = order.ASCENDING
-    else:
-        raise TypeError(
-            f"Expected a list-like or None for `ascending`, got "
-            f"{type(ascending)}"
-        )
 
     if null_position is None:
         null_precedence = vector[null_order](
-            source_table._num_columns, null_order.AFTER
+            len(source_columns), null_order.AFTER
         )
-    elif pd.api.types.is_list_like(null_position):
-        if len(null_position) != source_table._num_columns:
+    else:
+        if len(null_position) != len(source_columns):
             raise ValueError(
-                f"Expected a list-like of length {source_table._num_columns}, "
+                f"Expected a list-like of length {len(source_columns)}, "
                 f"got length {len(null_position)} for `null_position`"
             )
         null_precedence = vector[null_order](
-            source_table._num_columns, null_order.AFTER
+            len(source_columns), null_order.AFTER
         )
         for idx, val in enumerate(null_position):
             if val:
                 null_precedence[idx] = null_order.BEFORE
-    else:
-        raise TypeError(
-            f"Expected a list-like or None for `null_position`, got "
-            f"{type(null_position)}"
-        )
 
     cdef bool c_result
-    cdef table_view source_table_view = table_view_from_table(source_table)
+    cdef table_view source_table_view = table_view_from_columns(source_columns)
     with nogil:
         c_result = cpp_is_sorted(
             source_table_view,
@@ -113,34 +99,34 @@ def is_sorted(
     return c_result
 
 
-def order_by(source_table, object ascending, str na_position):
+def order_by(list columns_from_table, object ascending, str na_position):
     """
-    Sorting the table ascending/descending
+    Get index to sort the table in ascending/descending order.
 
     Parameters
     ----------
-    source_table : table which will be sorted
-    ascending : list of boolean values which correspond to each column
+    columns_from_table : columns from the table which will be sorted
+    ascending : sequence of boolean values which correspond to each column
                 in source_table signifying order of each column
                 True - Ascending and False - Descending
     na_position : whether null value should show up at the "first" or "last"
                 position of **all** sorted column.
     """
-    cdef table_view source_table_view = table_view_from_table(
-        source_table, ignore_index=True
+    cdef table_view source_table_view = table_view_from_columns(
+        columns_from_table
     )
     cdef vector[order] column_order
     column_order.reserve(len(ascending))
     cdef vector[null_order] null_precedence
     null_precedence.reserve(len(ascending))
 
-    for i in ascending:
-        if i is True:
+    for asc in ascending:
+        if asc:
             column_order.push_back(order.ASCENDING)
         else:
             column_order.push_back(order.DESCENDING)
 
-        if i ^ (na_position == "first"):
+        if asc ^ (na_position == "first"):
             null_precedence.push_back(null_order.AFTER)
         else:
             null_precedence.push_back(null_order.BEFORE)
@@ -154,21 +140,21 @@ def order_by(source_table, object ascending, str na_position):
     return Column.from_unique_ptr(move(c_result))
 
 
-def digitize(source_values_table, bins, bool right=False):
+def digitize(list source_columns, list bins, bool right=False):
     """
     Return the indices of the bins to which each value in source_table belongs.
 
     Parameters
     ----------
-    source_table : Input table to be binned.
-    bins : Frame containing columns of bins
+    source_columns : Input columns to be binned.
+    bins : List containing columns of bins
     right : Indicating whether the intervals include the
             right or the left bin edge.
     """
 
-    cdef table_view bins_view = table_view_from_table(bins)
-    cdef table_view source_values_table_view = table_view_from_table(
-        source_values_table
+    cdef table_view bins_view = table_view_from_columns(bins)
+    cdef table_view source_table_view = table_view_from_columns(
+        source_columns
     )
     cdef vector[order] column_order = (
         vector[order](
@@ -184,11 +170,11 @@ def digitize(source_values_table, bins, bool right=False):
     )
 
     cdef unique_ptr[column] c_result
-    if right is True:
+    if right:
         with nogil:
             c_result = move(lower_bound(
                 bins_view,
-                source_values_table_view,
+                source_table_view,
                 column_order,
                 null_precedence)
             )
@@ -196,7 +182,7 @@ def digitize(source_values_table, bins, bool right=False):
         with nogil:
             c_result = move(upper_bound(
                 bins_view,
-                source_values_table_view,
+                source_table_view,
                 column_order,
                 null_precedence)
             )
@@ -212,15 +198,13 @@ class RankMethod(IntEnum):
     DENSE = < underlying_type_t_rank_method > rank_method.DENSE
 
 
-def rank_columns(source_table, object method, str na_option,
+def rank_columns(list source_columns, object method, str na_option,
                  bool ascending, bool pct
                  ):
     """
     Compute numerical data ranks (1 through n) of each column in the dataframe
     """
-    cdef table_view source_table_view = table_view_from_table(
-        source_table, ignore_index=True
-    )
+    cdef table_view source_table_view = table_view_from_columns(source_columns)
 
     cdef rank_method c_rank_method = < rank_method > (
         < underlying_type_t_rank_method > method
@@ -260,7 +244,7 @@ def rank_columns(source_table, object method, str na_option,
     cdef vector[unique_ptr[column]] c_results
     cdef column_view c_view
     cdef Column col
-    for col in source_table._columns:
+    for col in source_columns:
         c_view = col.view()
         with nogil:
             c_results.push_back(move(
@@ -274,11 +258,6 @@ def rank_columns(source_table, object method, str na_option,
                 )
             ))
 
-    cdef unique_ptr[table] c_result
-    c_result.reset(new table(move(c_results)))
-    data, _ = data_from_unique_ptr(
-        move(c_result),
-        column_names=source_table._column_names,
-        index_names=None
-    )
-    return data, source_table._index
+    return [Column.from_unique_ptr(
+        move(c_results[i])
+    ) for i in range(c_results.size())]
diff --git a/python/cudf/cudf/_lib/transpose.pyx b/python/cudf/cudf/_lib/transpose.pyx
index 931a2702612..b9eea6169bd 100644
--- a/python/cudf/cudf/_lib/transpose.pyx
+++ b/python/cudf/cudf/_lib/transpose.pyx
@@ -1,7 +1,4 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
-
-import cudf
-from cudf.api.types import is_categorical_dtype
+# Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
 from libcpp.memory cimport unique_ptr
 from libcpp.pair cimport pair
@@ -9,65 +6,22 @@ from libcpp.utility cimport move
 
 from cudf._lib.column cimport Column
 from cudf._lib.cpp.column.column cimport column
-from cudf._lib.cpp.column.column_view cimport column_view
-from cudf._lib.cpp.table.table cimport table
 from cudf._lib.cpp.table.table_view cimport table_view
 from cudf._lib.cpp.transpose cimport transpose as cpp_transpose
-from cudf._lib.utils cimport data_from_table_view, table_view_from_table
-
+from cudf._lib.utils cimport columns_from_table_view, table_view_from_columns
 
-def transpose(source):
-    """Transpose index and columns.
 
-    See Also
-    --------
-    cudf.core.DataFrame.transpose
+def transpose(list source_columns):
+    """Transpose m n-row columns into n m-row columns
     """
-
-    if source._num_columns == 0:
-        return source
-
-    cats = None
-    columns = source._columns
-    dtype = columns[0].dtype
-
-    if is_categorical_dtype(dtype):
-        if any(not is_categorical_dtype(c.dtype) for c in columns):
-            raise ValueError('Columns must all have the same dtype')
-        cats = list(c.categories for c in columns)
-        cats = cudf.core.column.concat_columns(cats).unique()
-        source = cudf.core.frame.Frame(index=source._index, data=[
-            (name, col._set_categories(cats, is_unique=True).codes)
-            for name, col in source._data.items()
-        ])
-    elif any(c.dtype != dtype for c in columns):
-        raise ValueError('Columns must all have the same dtype')
-
     cdef pair[unique_ptr[column], table_view] c_result
-    cdef table_view c_input = table_view_from_table(
-        source, ignore_index=True)
+    cdef table_view c_input = table_view_from_columns(source_columns)
 
     with nogil:
         c_result = move(cpp_transpose(c_input))
 
     result_owner = Column.from_unique_ptr(move(c_result.first))
-    data, _ = data_from_table_view(
+    return columns_from_table_view(
         c_result.second,
-        owner=result_owner,
-        column_names=range(c_input.num_rows())
+        owners=[result_owner] * c_result.second.num_columns()
     )
-
-    if cats is not None:
-        data= [
-            (name, cudf.core.column.column.build_categorical_column(
-                codes=cudf.core.column.column.build_column(
-                    col.base_data, dtype=col.dtype),
-                mask=col.base_mask,
-                size=col.size,
-                categories=cats,
-                offset=col.offset,
-            ))
-            for name, col in data.items()
-        ]
-
-    return data
diff --git a/python/cudf/cudf/_lib/utils.pyx b/python/cudf/cudf/_lib/utils.pyx
index 8557f430e25..643a1adca9f 100644
--- a/python/cudf/cudf/_lib/utils.pyx
+++ b/python/cudf/cudf/_lib/utils.pyx
@@ -317,10 +317,10 @@ cdef columns_from_table_view(
 ):
     """
     Given a ``cudf::table_view``, construsts a list of columns from it,
-    along with referencing an ``owner`` Python object that owns the memory
-    lifetime. ``owner`` must be either None or a list of column. If ``owner``
-    is a list of columns, the owner of the `i`th ``cudf::column_view`` in the
-    table view is ``owners[i]``. For more about memory ownership,
+    along with referencing an owner Python object that owns the memory
+    lifetime. owner must be either None or a list of column. If owner
+    is a list of columns, the owner of the `i`th ``cudf::column_view``
+    in the table view is ``owners[i]``. For more about memory ownership,
     see ``Column.from_column_view``.
     """
 
diff --git a/python/cudf/cudf/core/column/numerical.py b/python/cudf/cudf/core/column/numerical.py
index 216faaa8250..e7b8d62f886 100644
--- a/python/cudf/cudf/core/column/numerical.py
+++ b/python/cudf/cudf/core/column/numerical.py
@@ -774,6 +774,4 @@ def digitize(
     if bin_col.nullable:
         raise ValueError("`bins` cannot contain null entries.")
 
-    return as_column(
-        libcudf.sort.digitize(column.as_frame(), bin_col.as_frame(), right)
-    )
+    return as_column(libcudf.sort.digitize([column], [bin_col], right))
diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 50255b07077..d87cb788a7e 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -3194,17 +3194,42 @@ def transpose(self):
         Difference from pandas:
         Not supporting *copy* because default and only behavior is copy=True
         """
-        # Never transpose a MultiIndex - remove the existing columns and
-        # replace with a RangeIndex. Afterward, reassign.
-        columns = self.index.copy(deep=False)
+
         index = self._data.to_pandas_index()
+        columns = self.index.copy(deep=False)
         if self._num_columns == 0 or self._num_rows == 0:
             return DataFrame(index=index, columns=columns)
+
+        # No column from index is transposed with libcudf.
+        source_columns = [*self._columns]
+        source_dtype = source_columns[0].dtype
+        if is_categorical_dtype(source_dtype):
+            if any(not is_categorical_dtype(c.dtype) for c in source_columns):
+                raise ValueError("Columns must all have the same dtype")
+            cats = list(c.categories for c in source_columns)
+            cats = cudf.core.column.concat_columns(cats).unique()
+            source_columns = [
+                col._set_categories(cats, is_unique=True).codes
+                for col in source_columns
+            ]
+
+        if any(c.dtype != source_columns[0].dtype for c in source_columns):
+            raise ValueError("Columns must all have the same dtype")
+
+        result_columns = libcudf.transpose.transpose(source_columns)
+
+        if is_categorical_dtype(source_dtype):
+            result_columns = [
+                codes._with_type_metadata(
+                    cudf.core.dtypes.CategoricalDtype(categories=cats)
+                )
+                for codes in result_columns
+            ]
+
         # Set the old column names as the new index
         result = self.__class__._from_data(
-            # Cython renames the columns to the range [0...ncols]
-            libcudf.transpose.transpose(self),
-            as_index(index),
+            {i: col for i, col in enumerate(result_columns)},
+            index=as_index(index),
         )
         # Set the old index as the new column names
         result.columns = columns
diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index d10f7c690bf..e5863b52a5d 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -1289,89 +1289,6 @@ def _quantiles(
             column_names=self._column_names,
         )
 
-    @_cudf_nvtx_annotate
-    def rank(
-        self,
-        axis=0,
-        method="average",
-        numeric_only=None,
-        na_option="keep",
-        ascending=True,
-        pct=False,
-    ):
-        """
-        Compute numerical data ranks (1 through n) along axis.
-        By default, equal values are assigned a rank that is the average of the
-        ranks of those values.
-
-        Parameters
-        ----------
-        axis : {0 or 'index'}, default 0
-            Index to direct ranking.
-        method : {'average', 'min', 'max', 'first', 'dense'}, default 'average'
-            How to rank the group of records that have the same value
-            (i.e. ties):
-            * average: average rank of the group
-            * min: lowest rank in the group
-            * max: highest rank in the group
-            * first: ranks assigned in order they appear in the array
-            * dense: like 'min', but rank always increases by 1 between groups.
-        numeric_only : bool, optional
-            For DataFrame objects, rank only numeric columns if set to True.
-        na_option : {'keep', 'top', 'bottom'}, default 'keep'
-            How to rank NaN values:
-            * keep: assign NaN rank to NaN values
-            * top: assign smallest rank to NaN values if ascending
-            * bottom: assign highest rank to NaN values if ascending.
-        ascending : bool, default True
-            Whether or not the elements should be ranked in ascending order.
-        pct : bool, default False
-            Whether or not to display the returned rankings in percentile
-            form.
-
-        Returns
-        -------
-        same type as caller
-            Return a Series or DataFrame with data ranks as values.
-        """
-        if isinstance(self, cudf.BaseIndex):
-            warnings.warn(
-                "Index.rank is deprecated and will be removed.",
-                FutureWarning,
-            )
-
-        if method not in {"average", "min", "max", "first", "dense"}:
-            raise KeyError(method)
-
-        method_enum = libcudf.sort.RankMethod[method.upper()]
-        if na_option not in {"keep", "top", "bottom"}:
-            raise ValueError(
-                "na_option must be one of 'keep', 'top', or 'bottom'"
-            )
-
-        if axis not in (0, "index"):
-            raise NotImplementedError(
-                f"axis must be `0`/`index`, "
-                f"axis={axis} is not yet supported in rank"
-            )
-
-        source = self
-        if numeric_only:
-            numeric_cols = (
-                name
-                for name in self._data.names
-                if _is_non_decimal_numeric_dtype(self._data[name])
-            )
-            source = self._get_columns_by_label(numeric_cols)
-            if source.empty:
-                return source.astype("float64")
-
-        data, index = libcudf.sort.rank_columns(
-            source, method_enum, na_option, ascending, pct
-        )
-
-        return self._from_data(data, index).astype(np.float64)
-
     @_cudf_nvtx_annotate
     def shift(self, periods=1, freq=None, axis=0, fill_value=None):
         """Shift values by `periods` positions."""
@@ -2219,15 +2136,17 @@ def _get_sorted_inds(self, by=None, ascending=True, na_position="last"):
         # Get an int64 column consisting of the indices required to sort self
         # according to the columns specified in by.
 
-        to_sort = (
-            self
-            if by is None
-            else self._get_columns_by_label(list(by), downcast=False)
-        )
+        to_sort = [
+            *(
+                self
+                if by is None
+                else self._get_columns_by_label(list(by), downcast=False)
+            )._columns
+        ]
 
         # If given a scalar need to construct a sequence of length # of columns
         if np.isscalar(ascending):
-            ascending = [ascending] * to_sort._num_columns
+            ascending = [ascending] * len(to_sort)
 
         return libcudf.sort.order_by(to_sort, ascending, na_position)
 
@@ -2387,8 +2306,22 @@ def _is_sorted(self, ascending=None, null_position=None):
             Returns True, if sorted as expected by ``ascending`` and
             ``null_position``, False otherwise.
         """
+        if ascending is not None and not cudf.api.types.is_list_like(
+            ascending
+        ):
+            raise TypeError(
+                f"Expected a list-like or None for `ascending`, got "
+                f"{type(ascending)}"
+            )
+        if null_position is not None and not cudf.api.types.is_list_like(
+            null_position
+        ):
+            raise TypeError(
+                f"Expected a list-like or None for `null_position`, got "
+                f"{type(null_position)}"
+            )
         return libcudf.sort.is_sorted(
-            self, ascending=ascending, null_position=null_position
+            [*self._columns], ascending=ascending, null_position=null_position
         )
 
     @_cudf_nvtx_annotate
diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py
index ddb3082af96..fedbaed28db 100644
--- a/python/cudf/cudf/core/indexed_frame.py
+++ b/python/cudf/cudf/core/indexed_frame.py
@@ -3577,6 +3577,93 @@ def ge(
             other=other, op="__ge__", fill_value=fill_value, can_reindex=True
         )
 
+    @_cudf_nvtx_annotate
+    def rank(
+        self,
+        axis=0,
+        method="average",
+        numeric_only=None,
+        na_option="keep",
+        ascending=True,
+        pct=False,
+    ):
+        """
+        Compute numerical data ranks (1 through n) along axis.
+
+        By default, equal values are assigned a rank that is the average of the
+        ranks of those values.
+
+        Parameters
+        ----------
+        axis : {0 or 'index'}, default 0
+            Index to direct ranking.
+        method : {'average', 'min', 'max', 'first', 'dense'}, default 'average'
+            How to rank the group of records that have the same value
+            (i.e. ties):
+            * average: average rank of the group
+            * min: lowest rank in the group
+            * max: highest rank in the group
+            * first: ranks assigned in order they appear in the array
+            * dense: like 'min', but rank always increases by 1 between groups.
+        numeric_only : bool, optional
+            For DataFrame objects, rank only numeric columns if set to True.
+        na_option : {'keep', 'top', 'bottom'}, default 'keep'
+            How to rank NaN values:
+            * keep: assign NaN rank to NaN values
+            * top: assign smallest rank to NaN values if ascending
+            * bottom: assign highest rank to NaN values if ascending.
+        ascending : bool, default True
+            Whether or not the elements should be ranked in ascending order.
+        pct : bool, default False
+            Whether or not to display the returned rankings in percentile
+            form.
+
+        Returns
+        -------
+        same type as caller
+            Return a Series or DataFrame with data ranks as values.
+        """
+        if isinstance(self, cudf.BaseIndex):
+            warnings.warn(
+                "Index.rank is deprecated and will be removed.",
+                FutureWarning,
+            )
+
+        if method not in {"average", "min", "max", "first", "dense"}:
+            raise KeyError(method)
+
+        method_enum = libcudf.sort.RankMethod[method.upper()]
+        if na_option not in {"keep", "top", "bottom"}:
+            raise ValueError(
+                "na_option must be one of 'keep', 'top', or 'bottom'"
+            )
+
+        if axis not in (0, "index"):
+            raise NotImplementedError(
+                f"axis must be `0`/`index`, "
+                f"axis={axis} is not yet supported in rank"
+            )
+
+        source = self
+        if numeric_only:
+            numeric_cols = (
+                name
+                for name in self._data.names
+                if _is_non_decimal_numeric_dtype(self._data[name])
+            )
+            source = self._get_columns_by_label(numeric_cols)
+            if source.empty:
+                return source.astype("float64")
+
+        result_columns = libcudf.sort.rank_columns(
+            [*source._columns], method_enum, na_option, ascending, pct
+        )
+
+        return self.__class__._from_data(
+            dict(zip(source._column_names, result_columns)),
+            index=source._index,
+        ).astype(np.float64)
+
 
 def _check_duplicate_level_names(specified, level_names):
     """Raise if any of `specified` has duplicates in `level_names`."""
diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
index 2685524add4..957277d7f9b 100644
--- a/python/cudf/cudf/tests/test_dataframe.py
+++ b/python/cudf/cudf/tests/test_dataframe.py
@@ -2467,35 +2467,6 @@ def test_arrow_handle_no_index_name(pdf, gdf):
     assert_eq(expect, got)
 
 
-@pytest.mark.parametrize("num_rows", [1, 3, 10, 100])
-@pytest.mark.parametrize("num_bins", [1, 2, 4, 20])
-@pytest.mark.parametrize("right", [True, False])
-@pytest.mark.parametrize("dtype", NUMERIC_TYPES + ["bool"])
-@pytest.mark.parametrize("series_bins", [True, False])
-def test_series_digitize(num_rows, num_bins, right, dtype, series_bins):
-    data = np.random.randint(0, 100, num_rows).astype(dtype)
-    bins = np.unique(np.sort(np.random.randint(2, 95, num_bins).astype(dtype)))
-    s = cudf.Series(data)
-    if series_bins:
-        s_bins = cudf.Series(bins)
-        indices = s.digitize(s_bins, right)
-    else:
-        indices = s.digitize(bins, right)
-    np.testing.assert_array_equal(
-        np.digitize(data, bins, right), indices.to_numpy()
-    )
-
-
-def test_series_digitize_invalid_bins():
-    s = cudf.Series(np.random.randint(0, 30, 80), dtype="int32")
-    bins = cudf.Series([2, None, None, 50, 90], dtype="int32")
-
-    with pytest.raises(
-        ValueError, match="`bins` cannot contain null entries."
-    ):
-        _ = s.digitize(bins)
-
-
 def test_pandas_non_contiguious():
     arr1 = np.random.sample([5000, 10])
     assert arr1.flags["C_CONTIGUOUS"] is True
diff --git a/python/cudf/cudf/tests/test_series.py b/python/cudf/cudf/tests/test_series.py
index fccb9f680d9..87fb9bff7ed 100644
--- a/python/cudf/cudf/tests/test_series.py
+++ b/python/cudf/cudf/tests/test_series.py
@@ -1782,3 +1782,32 @@ def test_diff_many_dtypes(data):
     gs = cudf.from_pandas(ps)
     assert_eq(ps.diff(), gs.diff())
     assert_eq(ps.diff(periods=2), gs.diff(periods=2))
+
+
+@pytest.mark.parametrize("num_rows", [1, 100])
+@pytest.mark.parametrize("num_bins", [1, 10])
+@pytest.mark.parametrize("right", [True, False])
+@pytest.mark.parametrize("dtype", NUMERIC_TYPES + ["bool"])
+@pytest.mark.parametrize("series_bins", [True, False])
+def test_series_digitize(num_rows, num_bins, right, dtype, series_bins):
+    data = np.random.randint(0, 100, num_rows).astype(dtype)
+    bins = np.unique(np.sort(np.random.randint(2, 95, num_bins).astype(dtype)))
+    s = cudf.Series(data)
+    if series_bins:
+        s_bins = cudf.Series(bins)
+        indices = s.digitize(s_bins, right)
+    else:
+        indices = s.digitize(bins, right)
+    np.testing.assert_array_equal(
+        np.digitize(data, bins, right), indices.to_numpy()
+    )
+
+
+def test_series_digitize_invalid_bins():
+    s = cudf.Series(np.random.randint(0, 30, 80), dtype="int32")
+    bins = cudf.Series([2, None, None, 50, 90], dtype="int32")
+
+    with pytest.raises(
+        ValueError, match="`bins` cannot contain null entries."
+    ):
+        _ = s.digitize(bins)

From 65b1cbdeda9cab57243d0a98e646c860ef86039e Mon Sep 17 00:00:00 2001
From: Karthikeyan <6488848+karthikeyann@users.noreply.github.com>
Date: Wed, 20 Apr 2022 03:24:43 +0530
Subject: [PATCH 33/33] add data generation to benchmark documentation (#10677)

add device data generation to benchmark documentation

Authors:
  - Karthikeyan (https://github.com/karthikeyann)

Approvers:
  - Conor Hoekstra (https://github.com/codereport)
  - Nghia Truong (https://github.com/ttnghia)

URL: https://github.com/rapidsai/cudf/pull/10677
---
 cpp/docs/BENCHMARKING.md | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/cpp/docs/BENCHMARKING.md b/cpp/docs/BENCHMARKING.md
index 8794c90d1db..270e7a87e85 100644
--- a/cpp/docs/BENCHMARKING.md
+++ b/cpp/docs/BENCHMARKING.md
@@ -35,6 +35,12 @@ provided in `cpp/benchmarks/synchronization/synchronization.hpp` to help with th
 can also optionally clear the GPU L2 cache in order to ensure cache hits do not artificially inflate
 performance in repeated iterations.
 
+## Data generation
+
+For generating benchmark input data, helper functions are available at [cpp/benchmarks/common/generate_input.hpp](/cpp/benchmarks/common/generate_input.hpp). The input data generation happens on device, in contrast to any `column_wrapper` where data generation happens on the host.
+* `create_sequence_table` can generate sequence columns starting with value 0 in first row and increasing by 1 in subsequent rows.
+* `create_random_table` can generate a table filled with random data. The random data parameters are configurable.
+
 ## What should we benchmark?
 
 In general, we should benchmark all features over a range of data sizes and types, so that we can