diff --git a/ci/benchmark/build.sh b/ci/benchmark/build.sh
index 07d6a0e50af..035c89f8bcc 100755
--- a/ci/benchmark/build.sh
+++ b/ci/benchmark/build.sh
@@ -75,10 +75,10 @@ conda install "rmm=$MINOR_VERSION.*" "cudatoolkit=$CUDA_REL" \
 # conda install "your-pkg=1.0.0"
 
 # Install the master version of dask, distributed, and streamz
-logger "pip install git+https://github.com/dask/distributed.git@main --upgrade --no-deps"
-pip install "git+https://github.com/dask/distributed.git@main" --upgrade --no-deps
-logger "pip install git+https://github.com/dask/dask.git@main --upgrade --no-deps"
-pip install "git+https://github.com/dask/dask.git@main" --upgrade --no-deps
+logger "pip install git+https://github.com/dask/distributed.git@2021.06.0 --upgrade --no-deps"
+pip install "git+https://github.com/dask/distributed.git@2021.06.0" --upgrade --no-deps
+logger "pip install git+https://github.com/dask/dask.git@2021.06.0 --upgrade --no-deps"
+pip install "git+https://github.com/dask/dask.git@2021.06.0" --upgrade --no-deps
 logger "pip install git+https://github.com/python-streamz/streamz.git --upgrade --no-deps"
 pip install "git+https://github.com/python-streamz/streamz.git" --upgrade --no-deps
 
diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh
index d88fe837103..8299afa18a9 100755
--- a/ci/gpu/build.sh
+++ b/ci/gpu/build.sh
@@ -101,8 +101,8 @@ function install_dask {
     # Install the main version of dask, distributed, and streamz
     gpuci_logger "Install the main version of dask, distributed, and streamz"
     set -x
-    pip install "git+https://github.com/dask/distributed.git@main" --upgrade --no-deps
-    pip install "git+https://github.com/dask/dask.git@main" --upgrade --no-deps
+    pip install "git+https://github.com/dask/distributed.git@2021.06.0" --upgrade --no-deps
+    pip install "git+https://github.com/dask/dask.git@2021.06.0" --upgrade --no-deps
     pip install "git+https://github.com/python-streamz/streamz.git" --upgrade --no-deps
     set +x
 }
diff --git a/conda/environments/cudf_dev_cuda11.0.yml b/conda/environments/cudf_dev_cuda11.0.yml
index 44396715e02..21ced3a0022 100644
--- a/conda/environments/cudf_dev_cuda11.0.yml
+++ b/conda/environments/cudf_dev_cuda11.0.yml
@@ -60,7 +60,7 @@ dependencies:
   - cachetools
   - transformers
   - pip:
-      - git+https://github.com/dask/dask.git@main
-      - git+https://github.com/dask/distributed.git@main
+      - git+https://github.com/dask/dask.git@2021.06.0
+      - git+https://github.com/dask/distributed.git@2021.06.0
       - git+https://github.com/python-streamz/streamz.git
       - pyorc
diff --git a/conda/environments/cudf_dev_cuda11.2.yml b/conda/environments/cudf_dev_cuda11.2.yml
index 8d88fb3d583..156a028ffdb 100644
--- a/conda/environments/cudf_dev_cuda11.2.yml
+++ b/conda/environments/cudf_dev_cuda11.2.yml
@@ -60,7 +60,7 @@ dependencies:
   - cachetools
   - transformers
   - pip:
-      - git+https://github.com/dask/dask.git@main
-      - git+https://github.com/dask/distributed.git@main
+      - git+https://github.com/dask/dask.git@2021.06.0
+      - git+https://github.com/dask/distributed.git@2021.06.0
       - git+https://github.com/python-streamz/streamz.git
       - pyorc
diff --git a/conda/recipes/dask-cudf/run_test.sh b/conda/recipes/dask-cudf/run_test.sh
index 3fc1182b33b..34d3fd632b7 100644
--- a/conda/recipes/dask-cudf/run_test.sh
+++ b/conda/recipes/dask-cudf/run_test.sh
@@ -9,11 +9,11 @@ function logger() {
 }
 
 # Install the latest version of dask and distributed
-logger "pip install git+https://github.com/dask/distributed.git@main --upgrade --no-deps"
-pip install "git+https://github.com/dask/distributed.git@main" --upgrade --no-deps
+logger "pip install git+https://github.com/dask/distributed.git@2021.06.0 --upgrade --no-deps"
+pip install "git+https://github.com/dask/distributed.git@2021.06.0" --upgrade --no-deps
 
-logger "pip install git+https://github.com/dask/dask.git@main --upgrade --no-deps"
-pip install "git+https://github.com/dask/dask.git@main" --upgrade --no-deps
+logger "pip install git+https://github.com/dask/dask.git@2021.06.0 --upgrade --no-deps"
+pip install "git+https://github.com/dask/dask.git@2021.06.0" --upgrade --no-deps
 
 logger "python -c 'import dask_cudf'"
 python -c "import dask_cudf"
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 090f613a9d1..87a04a17b37 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -168,6 +168,7 @@ add_library(cudf
     src/copying/gather.cu
     src/copying/get_element.cu
     src/copying/pack.cpp
+    src/copying/reverse.cu
     src/copying/sample.cu
     src/copying/scatter.cu
     src/copying/shift.cu
diff --git a/cpp/cmake/thirdparty/CUDF_GetArrow.cmake b/cpp/cmake/thirdparty/CUDF_GetArrow.cmake
index c1c29a693d5..79d3c0770a3 100644
--- a/cpp/cmake/thirdparty/CUDF_GetArrow.cmake
+++ b/cpp/cmake/thirdparty/CUDF_GetArrow.cmake
@@ -50,6 +50,7 @@ function(find_and_configure_arrow VERSION BUILD_STATIC)
                         "ARROW_WITH_BACKTRACE ON"
                         "ARROW_CXXFLAGS -w"
                         "ARROW_JEMALLOC OFF"
+                        "ARROW_S3 ON"
                         # Arrow modifies CMake's GLOBAL RULE_LAUNCH_COMPILE unless this is off
                         "ARROW_USE_CCACHE OFF"
                         "ARROW_ARMV8_ARCH ${ARROW_ARMV8_ARCH}"
diff --git a/cpp/docs/DEVELOPER_GUIDE.md b/cpp/docs/DEVELOPER_GUIDE.md
index f2873e31c5b..8ec111acdb2 100644
--- a/cpp/docs/DEVELOPER_GUIDE.md
+++ b/cpp/docs/DEVELOPER_GUIDE.md
@@ -342,6 +342,7 @@ namespace detail{
 } // namespace detail
 
 void external_function(...){
+    CUDF_FUNC_RANGE(); // Auto generates NVTX range for lifetime of this function
     detail::external_function(...);
 }
 ```
@@ -355,6 +356,12 @@ asynchrony if and when we add an asynchronous API to libcudf.
 **Note:** `cudaDeviceSynchronize()` should *never* be used.
  This limits the ability to do any multi-stream/multi-threaded work with libcudf APIs.
 
+ ### NVTX Ranges
+
+ In order to aid in performance optimization and debugging, all compute intensive libcudf functions should have a corresponding NVTX range.
+ In libcudf, we have a convenience macro `CUDF_FUNC_RANGE()` that will automatically annotate the lifetime of the enclosing function and use the functions name as the name of the NVTX range. 
+ For more information about NVTX, see [here](https://github.com/NVIDIA/NVTX/tree/dev/cpp).
+
  ### Stream Creation
 
 There may be times in implementing libcudf features where it would be advantageous to use streams 
diff --git a/cpp/include/cudf/copying.hpp b/cpp/include/cudf/copying.hpp
index c9a4eab2154..477c53535de 100644
--- a/cpp/include/cudf/copying.hpp
+++ b/cpp/include/cudf/copying.hpp
@@ -81,6 +81,36 @@ std::unique_ptr<table> gather(
   out_of_bounds_policy bounds_policy  = out_of_bounds_policy::DONT_CHECK,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
+/**
+ * @brief Reverses the rows within a table.
+ * Creates a new table that is the reverse of @p source_table.
+ * Example:
+ * ```
+ * source = [[4,5,6], [7,8,9], [10,11,12]]
+ * return = [[6,5,4], [9,8,7], [12,11,10]]
+ * ```
+ *
+ * @param source_table Table that will be reversed
+ */
+std::unique_ptr<table> reverse(
+  table_view const& source_table,
+  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+
+/**
+ * @brief Reverses the elements of a column
+ * Creates a new column that is the reverse of @p source_column.
+ * Example:
+ * ```
+ * source = [4,5,6]
+ * return = [6,5,4]
+ * ```
+ *
+ * @param source_column Column that will be reversed
+ */
+std::unique_ptr<column> reverse(
+  column_view const& source_column,
+  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+
 /**
  * @brief Scatters the rows of the source table into a copy of the target table
  * according to a scatter map.
diff --git a/cpp/include/cudf/detail/scatter.cuh b/cpp/include/cudf/detail/scatter.cuh
index 410cd213618..d71a8d0ec24 100644
--- a/cpp/include/cudf/detail/scatter.cuh
+++ b/cpp/include/cudf/detail/scatter.cuh
@@ -305,7 +305,7 @@ struct column_scatterer_impl<struct_view> {
                                             [](auto const& col) { return col.nullable(); });
     if (child_nullable) {
       auto const gather_map =
-        scatter_to_gather(scatter_map_begin, scatter_map_end, source.size(), stream);
+        scatter_to_gather(scatter_map_begin, scatter_map_end, target.size(), stream);
       gather_bitmask(cudf::table_view{std::vector<cudf::column_view>{structs_src.child_begin(),
                                                                      structs_src.child_end()}},
                      gather_map.begin(),
diff --git a/cpp/include/cudf/io/datasource.hpp b/cpp/include/cudf/io/datasource.hpp
index ab7a3a6fa9b..6c885a874ee 100644
--- a/cpp/include/cudf/io/datasource.hpp
+++ b/cpp/include/cudf/io/datasource.hpp
@@ -22,9 +22,13 @@
 #include <rmm/cuda_stream_view.hpp>
 
 #include <arrow/buffer.h>
+#include <arrow/filesystem/filesystem.h>
+#include <arrow/filesystem/s3fs.h>
 #include <arrow/io/file.h>
 #include <arrow/io/interfaces.h>
 #include <arrow/io/memory.h>
+#include <arrow/result.h>
+#include <arrow/status.h>
 
 #include <memory>
 
@@ -302,6 +306,34 @@ class arrow_io_source : public datasource {
   };
 
  public:
+  /**
+   * @brief Constructs an object from an Apache Arrow Filesystem URI
+   *
+   * @param Apache Arrow Filesystem URI
+   */
+  explicit arrow_io_source(std::string_view arrow_uri)
+  {
+    const std::string uri_start_delimiter = "//";
+    const std::string uri_end_delimiter   = "?";
+
+    arrow::Result<std::shared_ptr<arrow::fs::FileSystem>> result =
+      arrow::fs::FileSystemFromUri(static_cast<std::string>(arrow_uri));
+    CUDF_EXPECTS(result.ok(), "Failed to generate Arrow Filesystem instance from URI.");
+    filesystem = result.ValueOrDie();
+
+    // Parse the path from the URI
+    size_t start = arrow_uri.find(uri_start_delimiter) == std::string::npos
+                     ? 0
+                     : arrow_uri.find(uri_start_delimiter) + uri_start_delimiter.size();
+    size_t end            = arrow_uri.find(uri_end_delimiter) - start;
+    std::string_view path = arrow_uri.substr(start, end);
+
+    arrow::Result<std::shared_ptr<arrow::io::RandomAccessFile>> in_stream =
+      filesystem->OpenInputFile(static_cast<std::string>(path).c_str());
+    CUDF_EXPECTS(in_stream.ok(), "Failed to open Arrow RandomAccessFile");
+    arrow_file = in_stream.ValueOrDie();
+  }
+
   /**
    * @brief Constructs an object from an `arrow` source object.
    *
@@ -340,6 +372,7 @@ class arrow_io_source : public datasource {
   }
 
  private:
+  std::shared_ptr<arrow::fs::FileSystem> filesystem;
   std::shared_ptr<arrow::io::RandomAccessFile> arrow_file;
 };
 
diff --git a/cpp/include/cudf/join.hpp b/cpp/include/cudf/join.hpp
index 428a4195bf8..1f9ed71ce8c 100644
--- a/cpp/include/cudf/join.hpp
+++ b/cpp/include/cudf/join.hpp
@@ -22,6 +22,7 @@
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/device_uvector.hpp>
 
+#include <optional>
 #include <vector>
 
 namespace cudf {
@@ -522,13 +523,15 @@ class hash_join {
 
   /**
    * Returns the row indices that can be used to construct the result of performing
-   * an inner join between two tables. @see cudf::inner_join().
+   * an inner join between two tables. @see cudf::inner_join(). Behavior is undefined if the
+   * provided `output_size` is smaller than the actual output size.
    *
    * @param probe The probe table, from which the tuples are probed.
    * @param compare_nulls Controls whether null join-key values should match or not.
+   * @param output_size Optional value which allows users to specify the exact output size.
+   * @param stream CUDA stream used for device memory operations and kernel launches
    * @param mr Device memory resource used to allocate the returned table and columns' device
    * memory.
-   * @param stream CUDA stream used for device memory operations and kernel launches
    *
    * @return A pair of columns [`left_indices`, `right_indices`] that can be used to construct
    * the result of performing an inner join between two tables with `build` and `probe`
@@ -537,19 +540,22 @@ class hash_join {
   std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
             std::unique_ptr<rmm::device_uvector<size_type>>>
   inner_join(cudf::table_view const& probe,
-             null_equality compare_nulls         = null_equality::EQUAL,
-             rmm::cuda_stream_view stream        = rmm::cuda_stream_default,
-             rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) const;
+             null_equality compare_nulls            = null_equality::EQUAL,
+             std::optional<std::size_t> output_size = {},
+             rmm::cuda_stream_view stream           = rmm::cuda_stream_default,
+             rmm::mr::device_memory_resource* mr    = rmm::mr::get_current_device_resource()) const;
 
   /**
    * Returns the row indices that can be used to construct the result of performing
-   * a left join between two tables. @see cudf::left_join().
+   * a left join between two tables. @see cudf::left_join(). Behavior is undefined if the
+   * provided `output_size` is smaller than the actual output size.
    *
    * @param probe The probe table, from which the tuples are probed.
    * @param compare_nulls Controls whether null join-key values should match or not.
+   * @param output_size Optional value which allows users to specify the exact output size.
+   * @param stream CUDA stream used for device memory operations and kernel launches
    * @param mr Device memory resource used to allocate the returned table and columns' device
    * memory.
-   * @param stream CUDA stream used for device memory operations and kernel launches
    *
    * @return A pair of columns [`left_indices`, `right_indices`] that can be used to construct
    * the result of performing a left join between two tables with `build` and `probe`
@@ -558,19 +564,22 @@ class hash_join {
   std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
             std::unique_ptr<rmm::device_uvector<size_type>>>
   left_join(cudf::table_view const& probe,
-            null_equality compare_nulls         = null_equality::EQUAL,
-            rmm::cuda_stream_view stream        = rmm::cuda_stream_default,
-            rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) const;
+            null_equality compare_nulls            = null_equality::EQUAL,
+            std::optional<std::size_t> output_size = {},
+            rmm::cuda_stream_view stream           = rmm::cuda_stream_default,
+            rmm::mr::device_memory_resource* mr    = rmm::mr::get_current_device_resource()) const;
 
   /**
    * Returns the row indices that can be used to construct the result of performing
-   * a full join between two tables. @see cudf::full_join().
+   * a full join between two tables. @see cudf::full_join(). Behavior is undefined if the
+   * provided `output_size` is smaller than the actual output size.
    *
    * @param probe The probe table, from which the tuples are probed.
    * @param compare_nulls Controls whether null join-key values should match or not.
+   * @param output_size Optional value which allows users to specify the exact output size.
+   * @param stream CUDA stream used for device memory operations and kernel launches
    * @param mr Device memory resource used to allocate the returned table and columns' device
    * memory.
-   * @param stream CUDA stream used for device memory operations and kernel launches
    *
    * @return A pair of columns [`left_indices`, `right_indices`] that can be used to construct
    * the result of performing a full join between two tables with `build` and `probe`
@@ -579,9 +588,59 @@ class hash_join {
   std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
             std::unique_ptr<rmm::device_uvector<size_type>>>
   full_join(cudf::table_view const& probe,
-            null_equality compare_nulls         = null_equality::EQUAL,
-            rmm::cuda_stream_view stream        = rmm::cuda_stream_default,
-            rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) const;
+            null_equality compare_nulls            = null_equality::EQUAL,
+            std::optional<std::size_t> output_size = {},
+            rmm::cuda_stream_view stream           = rmm::cuda_stream_default,
+            rmm::mr::device_memory_resource* mr    = rmm::mr::get_current_device_resource()) const;
+
+  /**
+   * Returns the exact number of matches (rows) when performing an inner join with the specified
+   * probe table.
+   *
+   * @param probe The probe table, from which the tuples are probed.
+   * @param compare_nulls Controls whether null join-key values should match or not.
+   * @param stream CUDA stream used for device memory operations and kernel launches
+   *
+   * @return The exact number of output when performing an inner join between two tables with
+   * `build` and `probe` as the the join keys .
+   */
+  std::size_t inner_join_size(cudf::table_view const& probe,
+                              null_equality compare_nulls  = null_equality::EQUAL,
+                              rmm::cuda_stream_view stream = rmm::cuda_stream_default) const;
+
+  /**
+   * Returns the exact number of matches (rows) when performing a left join with the specified probe
+   * table.
+   *
+   * @param probe The probe table, from which the tuples are probed.
+   * @param compare_nulls Controls whether null join-key values should match or not.
+   * @param stream CUDA stream used for device memory operations and kernel launches
+   *
+   * @return The exact number of output when performing a left join between two tables with `build`
+   * and `probe` as the the join keys .
+   */
+  std::size_t left_join_size(cudf::table_view const& probe,
+                             null_equality compare_nulls  = null_equality::EQUAL,
+                             rmm::cuda_stream_view stream = rmm::cuda_stream_default) const;
+
+  /**
+   * Returns the exact number of matches (rows) when performing a full join with the specified probe
+   * table.
+   *
+   * @param probe The probe table, from which the tuples are probed.
+   * @param compare_nulls Controls whether null join-key values should match or not.
+   * @param stream CUDA stream used for device memory operations and kernel launches
+   * @param mr Device memory resource used to allocate the intermediate table and columns' device
+   * memory.
+   *
+   * @return The exact number of output when performing a full join between two tables with `build`
+   * and `probe` as the the join keys .
+   */
+  std::size_t full_join_size(
+    cudf::table_view const& probe,
+    null_equality compare_nulls         = null_equality::EQUAL,
+    rmm::cuda_stream_view stream        = rmm::cuda_stream_default,
+    rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) const;
 
  private:
   struct hash_join_impl;
diff --git a/cpp/include/cudf/strings/convert/convert_integers.hpp b/cpp/include/cudf/strings/convert/convert_integers.hpp
index 4d29b0a5b6a..17430d3eafe 100644
--- a/cpp/include/cudf/strings/convert/convert_integers.hpp
+++ b/cpp/include/cudf/strings/convert/convert_integers.hpp
@@ -171,7 +171,7 @@ std::unique_ptr<column> hex_to_integers(
  * @code{.pseudo}
  * Example:
  * s = ['123', '-456', '', 'AGE', '+17EA', '0x9EF' '123ABC']
- * b = s.is_hex(s)
+ * b = is_hex(s)
  * b is [true, false, false, false, false, true, true]
  * @endcode
  *
@@ -185,6 +185,37 @@ std::unique_ptr<column> is_hex(
   strings_column_view const& strings,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
+/**
+ * @brief Returns a new strings column converting integer columns to hexadecimal
+ * characters.
+ *
+ * Any null entries will result in corresponding null entries in the output column.
+ *
+ * The output character set is '0'-'9' and 'A'-'F'. The output string width will
+ * be a multiple of 2 depending on the size of the integer type. A single leading
+ * zero is applied to the first non-zero output byte if it less than 0x10.
+ *
+ * @code{.pseudo}
+ * Example:
+ * input = [123, -1, 0, 27, 342718233] // int32 type input column
+ * s = integers_to_hex(input)
+ * s is [ '04D2', 'FFFFFFFF', '00', '1B', '146D7719']
+ * @endcode
+ *
+ * The example above shows an `INT32` type column where each integer is 4 bytes.
+ * Leading zeros are suppressed unless filling out a complete byte as in
+ * `123 -> '04D2'` instead of `000004D2` or `4D2`.
+ *
+ * @throw cudf::logic_error if the input column is not integral type.
+ *
+ * @param input Integer column to convert to hex.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
+ * @return New strings column with hexadecimal characters.
+ */
+std::unique_ptr<column> integers_to_hex(
+  column_view const& input,
+  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+
 /** @} */  // end of doxygen group
 }  // namespace strings
 }  // namespace cudf
diff --git a/cpp/include/cudf/wrappers/timestamps.hpp b/cpp/include/cudf/wrappers/timestamps.hpp
index 275ac20048e..ac13dae6a74 100644
--- a/cpp/include/cudf/wrappers/timestamps.hpp
+++ b/cpp/include/cudf/wrappers/timestamps.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2020, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -32,19 +32,7 @@ template <class Duration>
 using time_point = cuda::std::chrono::sys_time<Duration>;
 
 template <class Duration>
-struct timestamp : time_point<Duration> {
-  // Bring over base class constructors and make them visible here
-  using time_point<Duration>::time_point;
-
-  // This is needed as __shared__ objects of this type can't be assigned in device code
-  // when the initializer list constructs subobjects with values, which is what std::time_point
-  // does.
-  constexpr timestamp() : time_point<Duration>(Duration()){};
-
-  // The inherited copy constructor will hide the auto generated copy constructor;
-  // hence, explicitly define and delegate
-  constexpr timestamp(const time_point<Duration>& other) : time_point<Duration>(other) {}
-};
+using timestamp = time_point<Duration>;
 }  // namespace detail
 
 /**
diff --git a/cpp/src/copying/contiguous_split.cu b/cpp/src/copying/contiguous_split.cu
index 809390553a4..4b11382a3f2 100644
--- a/cpp/src/copying/contiguous_split.cu
+++ b/cpp/src/copying/contiguous_split.cu
@@ -886,13 +886,15 @@ std::vector<packed_table> contiguous_split(cudf::table_view const& input,
       size_type* offset_stack  = &d_offset_stack[stack_pos];
       int parent_offsets_index = src_info.parent_offsets_index;
       int stack_size           = 0;
+      int root_column_offset   = src_info.column_offset;
       while (parent_offsets_index >= 0) {
         offset_stack[stack_size++] = parent_offsets_index;
+        root_column_offset         = d_src_buf_info[parent_offsets_index].column_offset;
         parent_offsets_index       = d_src_buf_info[parent_offsets_index].parent_offsets_index;
       }
-      // make sure to include the -column- offset in our calculations
-      int row_start = d_indices[split_index] + src_info.column_offset;
-      int row_end   = d_indices[split_index + 1] + src_info.column_offset;
+      // make sure to include the -column- offset on the root column in our calculation.
+      int row_start = d_indices[split_index] + root_column_offset;
+      int row_end   = d_indices[split_index + 1] + root_column_offset;
       while (stack_size > 0) {
         stack_size--;
         auto const offsets = d_src_buf_info[offset_stack[stack_size]].offsets;
@@ -923,6 +925,7 @@ std::vector<packed_table> contiguous_split(cudf::table_view const& input,
       int const element_size = cudf::type_dispatcher(data_type{src_info.type}, size_of_helper{});
       std::size_t const bytes =
         static_cast<std::size_t>(num_elements) * static_cast<std::size_t>(element_size);
+
       return dst_buf_info{_round_up_safe(bytes, 64),
                           num_elements,
                           element_size,
diff --git a/cpp/src/copying/copy.cu b/cpp/src/copying/copy.cu
index 9f8e6f7bdcb..b0de9cd750e 100644
--- a/cpp/src/copying/copy.cu
+++ b/cpp/src/copying/copy.cu
@@ -195,21 +195,6 @@ std::unique_ptr<column> scatter_gather_based_if_else(Left const& lhs,
 {
   if constexpr (std::is_same<Left, cudf::column_view>::value &&
                 std::is_same<Right, cudf::column_view>::value) {
-    auto const null_map_entry = size + 1;  // Out of bounds index, for gather() to nullify.
-
-    auto const gather_lhs = make_counting_transform_iterator(
-      size_type{0}, lhs_gather_map_functor<Filter>{is_left, null_map_entry});
-
-    auto const lhs_gathered_columns =
-      cudf::detail::gather(table_view{std::vector<cudf::column_view>{lhs}},
-                           gather_lhs,
-                           gather_lhs + size,
-                           out_of_bounds_policy::NULLIFY,
-                           stream,
-                           mr)
-        ->release();
-    auto& lhs_partial_output = lhs_gathered_columns[0];
-
     auto scatter_map_rhs = rmm::device_uvector<size_type>{static_cast<std::size_t>(size), stream};
     auto const scatter_map_end = thrust::copy_if(rmm::exec_policy(stream),
                                                  thrust::make_counting_iterator(size_type{0}),
@@ -227,7 +212,7 @@ std::unique_ptr<column> scatter_gather_based_if_else(Left const& lhs,
       table_view{std::vector<column_view>{scatter_src_rhs->get_column(0).view()}},
       scatter_map_rhs.begin(),
       scatter_map_end,
-      table_view{std::vector<column_view>{lhs_partial_output->view()}},
+      table_view{std::vector<column_view>{lhs}},
       false,
       stream,
       mr);
diff --git a/cpp/src/copying/reverse.cu b/cpp/src/copying/reverse.cu
new file mode 100644
index 00000000000..73a36d70f7b
--- /dev/null
+++ b/cpp/src/copying/reverse.cu
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cudf/column/column_view.hpp>
+#include <cudf/copying.hpp>
+#include <cudf/detail/gather.cuh>
+#include <cudf/detail/nvtx/ranges.hpp>
+#include <cudf/table/table.hpp>
+#include <cudf/table/table_view.hpp>
+
+#include <rmm/cuda_stream_view.hpp>
+#include <rmm/device_uvector.hpp>
+#include <rmm/exec_policy.hpp>
+#include <rmm/mr/device/per_device_resource.hpp>
+
+#include <thrust/iterator/constant_iterator.h>
+#include <thrust/iterator/counting_iterator.h>
+#include <thrust/iterator/transform_output_iterator.h>
+#include <thrust/scan.h>
+
+namespace cudf {
+namespace detail {
+std::unique_ptr<table> reverse(table_view const& source_table,
+                               rmm::cuda_stream_view stream,
+                               rmm::mr::device_memory_resource* mr)
+{
+  size_type num_rows = source_table.num_rows();
+  auto elements =
+    make_counting_transform_iterator(0, [num_rows] __device__(auto i) { return num_rows - i - 1; });
+  auto elements_end = elements + source_table.num_rows();
+
+  return gather(source_table, elements, elements_end, out_of_bounds_policy::DONT_CHECK, stream, mr);
+}
+
+std::unique_ptr<column> reverse(column_view const& source_column,
+                                rmm::cuda_stream_view stream,
+                                rmm::mr::device_memory_resource* mr)
+{
+  return std::move(cudf::reverse(table_view({source_column}))->release().front());
+}
+}  // namespace detail
+
+std::unique_ptr<table> reverse(table_view const& source_table, rmm::mr::device_memory_resource* mr)
+{
+  CUDF_FUNC_RANGE();
+  return detail::reverse(source_table, rmm::cuda_stream_default, mr);
+}
+
+std::unique_ptr<column> reverse(column_view const& source_column,
+                                rmm::mr::device_memory_resource* mr)
+{
+  CUDF_FUNC_RANGE();
+  return detail::reverse(source_column, rmm::cuda_stream_default, mr);
+}
+}  // namespace cudf
diff --git a/cpp/src/io/orc/writer_impl.cu b/cpp/src/io/orc/writer_impl.cu
index 2aa1e2d866a..4a2330d479b 100644
--- a/cpp/src/io/orc/writer_impl.cu
+++ b/cpp/src/io/orc/writer_impl.cu
@@ -565,6 +565,8 @@ orc_streams::orc_stream_offsets orc_streams::compute_offsets(
       // Everything else uses RLE
       return true;
     }();
+    // non-RLE and RLE streams are separated in the buffer that stores encoded data
+    // The computed offsets do not take the streams of the other type into account
     if (is_rle_data) {
       strm_offsets[i] = rle_data_size;
       rle_data_size += (stream.length * num_rowgroups + 7) & ~7;
@@ -681,6 +683,10 @@ encoded_data writer::impl::encode_columns(const table_device_view &view,
                     : (((stripe_dict->num_strings + 0x1ff) >> 9) * (512 * 4 + 2));
                 if (stripe.id == 0) {
                   strm.data_ptrs[strm_type] = encoded_data.data() + stream_offsets.offsets[strm_id];
+                  // Dictionary lengths are encoded as RLE, which are all stored after non-RLE data:
+                  // include non-RLE data size in the offset only in that case
+                  if (strm_type == gpu::CI_DATA2 && ck.encoding_kind == DICTIONARY_V2)
+                    strm.data_ptrs[strm_type] += stream_offsets.non_rle_data_size;
                 } else {
                   auto const &strm_up = col_streams[stripe_dict[-dict_stride].start_chunk];
                   strm.data_ptrs[strm_type] =
@@ -710,7 +716,8 @@ encoded_data writer::impl::encode_columns(const table_device_view &view,
                                             : (col_streams[rg_idx - 1].data_ptrs[strm_type] +
                                                col_streams[rg_idx - 1].lengths[strm_type]);
             } else {
-              strm.lengths[strm_type]   = streams[strm_id].length;
+              strm.lengths[strm_type] = streams[strm_id].length;
+              // RLE encoded streams are stored after all non-RLE streams
               strm.data_ptrs[strm_type] = encoded_data.data() + stream_offsets.non_rle_data_size +
                                           stream_offsets.offsets[strm_id] +
                                           streams[strm_id].length * rg_idx;
diff --git a/cpp/src/join/hash_join.cu b/cpp/src/join/hash_join.cu
index 3f59bc13dda..dfe3231e897 100644
--- a/cpp/src/join/hash_join.cu
+++ b/cpp/src/join/hash_join.cu
@@ -84,6 +84,7 @@ struct valid_range {
  * @param left_table_row_count Number of rows of left table
  * @param right_table_row_count Number of rows of right table
  * @param stream CUDA stream used for device memory operations and kernel launches.
+ * @param mr Device memory resource used to allocate the returned vectors.
  *
  * @return Pair of vectors containing the left join indices complement
  */
@@ -208,6 +209,7 @@ std::unique_ptr<multimap_type, std::function<void(multimap_type *)>> build_join_
 /**
  * @brief Probes the `hash_table` built from `build_table` for tuples in `probe_table`,
  * and returns the output indices of `build_table` and `probe_table` as a combined table.
+ * Behavior is undefined if the provided `output_size` is smaller than the actual output size.
  *
  * @tparam JoinKind The type of join to be performed.
  *
@@ -215,7 +217,9 @@ std::unique_ptr<multimap_type, std::function<void(multimap_type *)>> build_join_
  * @param probe_table Table of probe side columns to join.
  * @param hash_table Hash table built from `build_table`.
  * @param compare_nulls Controls whether null join-key values should match or not.
+ * @param output_size Optional value which allows users to specify the exact output size.
  * @param stream CUDA stream used for device memory operations and kernel launches.
+ * @param mr Device memory resource used to allocate the returned vectors.
  *
  * @return Join output indices vector pair.
  */
@@ -226,39 +230,52 @@ probe_join_hash_table(cudf::table_device_view build_table,
                       cudf::table_device_view probe_table,
                       multimap_type const &hash_table,
                       null_equality compare_nulls,
+                      std::optional<std::size_t> output_size,
                       rmm::cuda_stream_view stream,
                       rmm::mr::device_memory_resource *mr)
 {
-  std::size_t estimated_size = estimate_join_output_size<JoinKind, multimap_type>(
-    build_table, probe_table, hash_table, compare_nulls, stream);
+  // Use the output size directly if provided. Otherwise, compute the exact output size
+  constexpr cudf::detail::join_kind ProbeJoinKind = (JoinKind == cudf::detail::join_kind::FULL_JOIN)
+                                                      ? cudf::detail::join_kind::LEFT_JOIN
+                                                      : JoinKind;
+  std::size_t const join_size = output_size.value_or(compute_join_output_size<ProbeJoinKind>(
+    build_table, probe_table, hash_table, compare_nulls, stream));
 
-  // If the estimated output size is zero, return immediately
-  if (estimated_size == 0) {
+  // If output size is zero, return immediately
+  if (join_size == 0) {
     return std::make_pair(std::make_unique<rmm::device_uvector<size_type>>(0, stream, mr),
                           std::make_unique<rmm::device_uvector<size_type>>(0, stream, mr));
   }
 
-  // Because we are approximating the number of joined elements, our approximation
-  // might be incorrect and we might have underestimated the number of joined elements.
-  // As such we will need to de-allocate memory and re-allocate memory to ensure
-  // that the final output is correct.
   rmm::device_scalar<size_type> write_index(0, stream);
-  std::size_t join_size{0};
-
-  auto left_indices  = std::make_unique<rmm::device_uvector<size_type>>(0, stream, mr);
-  auto right_indices = std::make_unique<rmm::device_uvector<size_type>>(0, stream, mr);
-
-  auto current_estimated_size = estimated_size;
-  do {
-    left_indices->resize(estimated_size, stream);
-    right_indices->resize(estimated_size, stream);
 
-    constexpr int block_size{DEFAULT_JOIN_BLOCK_SIZE};
-    detail::grid_1d config(probe_table.num_rows(), block_size);
-    write_index.set_value_to_zero_async(stream);
+  auto left_indices  = std::make_unique<rmm::device_uvector<size_type>>(join_size, stream, mr);
+  auto right_indices = std::make_unique<rmm::device_uvector<size_type>>(join_size, stream, mr);
 
-    row_hash hash_probe{probe_table};
-    row_equality equality{probe_table, build_table, compare_nulls == null_equality::EQUAL};
+  constexpr int block_size{DEFAULT_JOIN_BLOCK_SIZE};
+  detail::grid_1d config(probe_table.num_rows(), block_size);
+
+  row_hash hash_probe{probe_table};
+  row_equality equality{probe_table, build_table, compare_nulls == null_equality::EQUAL};
+  if constexpr (JoinKind == cudf::detail::join_kind::FULL_JOIN) {
+    probe_hash_table<cudf::detail::join_kind::LEFT_JOIN,
+                     multimap_type,
+                     block_size,
+                     DEFAULT_JOIN_CACHE_SIZE>
+      <<<config.num_blocks, config.num_threads_per_block, 0, stream.value()>>>(
+        hash_table,
+        build_table,
+        probe_table,
+        hash_probe,
+        equality,
+        left_indices->data(),
+        right_indices->data(),
+        write_index.data(),
+        join_size);
+    auto const actual_size = write_index.value(stream);
+    left_indices->resize(actual_size, stream);
+    right_indices->resize(actual_size, stream);
+  } else {
     probe_hash_table<JoinKind, multimap_type, block_size, DEFAULT_JOIN_CACHE_SIZE>
       <<<config.num_blocks, config.num_threads_per_block, 0, stream.value()>>>(
         hash_table,
@@ -269,18 +286,101 @@ probe_join_hash_table(cudf::table_device_view build_table,
         left_indices->data(),
         right_indices->data(),
         write_index.data(),
-        estimated_size);
+        join_size);
+  }
+  return std::make_pair(std::move(left_indices), std::move(right_indices));
+}
+
+/**
+ * @brief Probes the `hash_table` built from `build_table` for tuples in `probe_table` twice,
+ * and returns the output size of a full join operation between `build_table` and `probe_table`.
+ * TODO: this is a temporary solution as part of `full_join_size`. To be refactored during
+ * cuco integration.
+ *
+ * @param build_table Table of build side columns to join.
+ * @param probe_table Table of probe side columns to join.
+ * @param hash_table Hash table built from `build_table`.
+ * @param compare_nulls Controls whether null join-key values should match or not.
+ * @param stream CUDA stream used for device memory operations and kernel launches.
+ * @param mr Device memory resource used to allocate the intermediate vectors.
+ *
+ * @return Output size of full join.
+ */
+std::size_t get_full_join_size(cudf::table_device_view build_table,
+                               cudf::table_device_view probe_table,
+                               multimap_type const &hash_table,
+                               null_equality compare_nulls,
+                               rmm::cuda_stream_view stream,
+                               rmm::mr::device_memory_resource *mr)
+{
+  std::size_t join_size = compute_join_output_size<cudf::detail::join_kind::LEFT_JOIN>(
+    build_table, probe_table, hash_table, compare_nulls, stream);
 
-    CHECK_CUDA(stream.value());
+  // If output size is zero, return immediately
+  if (join_size == 0) { return join_size; }
 
-    join_size              = write_index.value(stream);
-    current_estimated_size = estimated_size;
-    estimated_size *= 2;
-  } while ((current_estimated_size < join_size));
+  rmm::device_scalar<size_type> write_index(0, stream);
 
-  left_indices->resize(join_size, stream);
-  right_indices->resize(join_size, stream);
-  return std::make_pair(std::move(left_indices), std::move(right_indices));
+  auto left_indices  = std::make_unique<rmm::device_uvector<size_type>>(join_size, stream, mr);
+  auto right_indices = std::make_unique<rmm::device_uvector<size_type>>(join_size, stream, mr);
+
+  constexpr int block_size{DEFAULT_JOIN_BLOCK_SIZE};
+  detail::grid_1d config(probe_table.num_rows(), block_size);
+
+  row_hash hash_probe{probe_table};
+  row_equality equality{probe_table, build_table, compare_nulls == null_equality::EQUAL};
+  probe_hash_table<cudf::detail::join_kind::LEFT_JOIN,
+                   multimap_type,
+                   block_size,
+                   DEFAULT_JOIN_CACHE_SIZE>
+    <<<config.num_blocks, config.num_threads_per_block, 0, stream.value()>>>(hash_table,
+                                                                             build_table,
+                                                                             probe_table,
+                                                                             hash_probe,
+                                                                             equality,
+                                                                             left_indices->data(),
+                                                                             right_indices->data(),
+                                                                             write_index.data(),
+                                                                             join_size);
+  // Rlease intermediate memory alloation
+  left_indices->resize(0, stream);
+
+  auto const left_table_row_count  = probe_table.num_rows();
+  auto const right_table_row_count = build_table.num_rows();
+
+  std::size_t left_join_complement_size;
+
+  // If left table is empty then all rows of the right table should be represented in the joined
+  // indices.
+  if (left_table_row_count == 0) {
+    left_join_complement_size = right_table_row_count;
+  } else {
+    // Assume all the indices in invalid_index_map are invalid
+    auto invalid_index_map =
+      std::make_unique<rmm::device_uvector<size_type>>(right_table_row_count, stream);
+    thrust::uninitialized_fill(
+      rmm::exec_policy(stream), invalid_index_map->begin(), invalid_index_map->end(), int32_t{1});
+
+    // Functor to check for index validity since left joins can create invalid indices
+    valid_range<size_type> valid(0, right_table_row_count);
+
+    // invalid_index_map[index_ptr[i]] = 0 for i = 0 to right_table_row_count
+    // Thus specifying that those locations are valid
+    thrust::scatter_if(rmm::exec_policy(stream),
+                       thrust::make_constant_iterator(0),
+                       thrust::make_constant_iterator(0) + right_indices->size(),
+                       right_indices->begin(),      // Index locations
+                       right_indices->begin(),      // Stencil - Check if index location is valid
+                       invalid_index_map->begin(),  // Output indices
+                       valid);                      // Stencil Predicate
+
+    // Create list of indices that have been marked as invalid
+    left_join_complement_size = thrust::count_if(rmm::exec_policy(stream),
+                                                 invalid_index_map->begin(),
+                                                 invalid_index_map->end(),
+                                                 thrust::identity<size_type>());
+  }
+  return join_size + left_join_complement_size;
 }
 
 std::unique_ptr<cudf::table> combine_table_pair(std::unique_ptr<cudf::table> &&left,
@@ -323,33 +423,85 @@ std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
           std::unique_ptr<rmm::device_uvector<size_type>>>
 hash_join::hash_join_impl::inner_join(cudf::table_view const &probe,
                                       null_equality compare_nulls,
+                                      std::optional<std::size_t> output_size,
                                       rmm::cuda_stream_view stream,
                                       rmm::mr::device_memory_resource *mr) const
 {
   CUDF_FUNC_RANGE();
-  return compute_hash_join<cudf::detail::join_kind::INNER_JOIN>(probe, compare_nulls, stream, mr);
+  return compute_hash_join<cudf::detail::join_kind::INNER_JOIN>(
+    probe, compare_nulls, output_size, stream, mr);
 }
 
 std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
           std::unique_ptr<rmm::device_uvector<size_type>>>
 hash_join::hash_join_impl::left_join(cudf::table_view const &probe,
                                      null_equality compare_nulls,
+                                     std::optional<std::size_t> output_size,
                                      rmm::cuda_stream_view stream,
                                      rmm::mr::device_memory_resource *mr) const
 {
   CUDF_FUNC_RANGE();
-  return compute_hash_join<cudf::detail::join_kind::LEFT_JOIN>(probe, compare_nulls, stream, mr);
+  return compute_hash_join<cudf::detail::join_kind::LEFT_JOIN>(
+    probe, compare_nulls, output_size, stream, mr);
 }
 
 std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
           std::unique_ptr<rmm::device_uvector<size_type>>>
 hash_join::hash_join_impl::full_join(cudf::table_view const &probe,
                                      null_equality compare_nulls,
+                                     std::optional<std::size_t> output_size,
                                      rmm::cuda_stream_view stream,
                                      rmm::mr::device_memory_resource *mr) const
 {
   CUDF_FUNC_RANGE();
-  return compute_hash_join<cudf::detail::join_kind::FULL_JOIN>(probe, compare_nulls, stream, mr);
+  return compute_hash_join<cudf::detail::join_kind::FULL_JOIN>(
+    probe, compare_nulls, output_size, stream, mr);
+}
+
+std::size_t hash_join::hash_join_impl::inner_join_size(cudf::table_view const &probe,
+                                                       null_equality compare_nulls,
+                                                       rmm::cuda_stream_view stream) const
+{
+  CUDF_FUNC_RANGE();
+  CUDF_EXPECTS(_hash_table, "Hash table of hash join is null.");
+
+  auto build_table = cudf::table_device_view::create(_build, stream);
+  auto probe_table = cudf::table_device_view::create(probe, stream);
+
+  return cudf::detail::compute_join_output_size<cudf::detail::join_kind::INNER_JOIN>(
+    *build_table, *probe_table, *_hash_table, compare_nulls, stream);
+}
+
+std::size_t hash_join::hash_join_impl::left_join_size(cudf::table_view const &probe,
+                                                      null_equality compare_nulls,
+                                                      rmm::cuda_stream_view stream) const
+{
+  CUDF_FUNC_RANGE();
+
+  // Trivial left join case - exit early
+  if (!_hash_table) { return probe.num_rows(); }
+
+  auto build_table = cudf::table_device_view::create(_build, stream);
+  auto probe_table = cudf::table_device_view::create(probe, stream);
+
+  return cudf::detail::compute_join_output_size<cudf::detail::join_kind::LEFT_JOIN>(
+    *build_table, *probe_table, *_hash_table, compare_nulls, stream);
+}
+
+std::size_t hash_join::hash_join_impl::full_join_size(cudf::table_view const &probe,
+                                                      null_equality compare_nulls,
+                                                      rmm::cuda_stream_view stream,
+                                                      rmm::mr::device_memory_resource *mr) const
+{
+  CUDF_FUNC_RANGE();
+
+  // Trivial left join case - exit early
+  if (!_hash_table) { return probe.num_rows(); }
+
+  auto build_table = cudf::table_device_view::create(_build, stream);
+  auto probe_table = cudf::table_device_view::create(probe, stream);
+
+  return get_full_join_size(*build_table, *probe_table, *_hash_table, compare_nulls, stream, mr);
 }
 
 template <cudf::detail::join_kind JoinKind>
@@ -357,6 +509,7 @@ std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
           std::unique_ptr<rmm::device_uvector<size_type>>>
 hash_join::hash_join_impl::compute_hash_join(cudf::table_view const &probe,
                                              null_equality compare_nulls,
+                                             std::optional<std::size_t> output_size,
                                              rmm::cuda_stream_view stream,
                                              rmm::mr::device_memory_resource *mr) const
 {
@@ -383,7 +536,8 @@ hash_join::hash_join_impl::compute_hash_join(cudf::table_view const &probe,
                           [](const auto &b, const auto &p) { return b.type() == p.type(); }),
                "Mismatch in joining column data types");
 
-  return probe_join_indices<JoinKind>(flattened_probe_table, compare_nulls, stream, mr);
+  return probe_join_indices<JoinKind>(
+    flattened_probe_table, compare_nulls, output_size, stream, mr);
 }
 
 template <cudf::detail::join_kind JoinKind>
@@ -391,6 +545,7 @@ std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
           std::unique_ptr<rmm::device_uvector<size_type>>>
 hash_join::hash_join_impl::probe_join_indices(cudf::table_view const &probe,
                                               null_equality compare_nulls,
+                                              std::optional<std::size_t> output_size,
                                               rmm::cuda_stream_view stream,
                                               rmm::mr::device_memory_resource *mr) const
 {
@@ -404,11 +559,8 @@ hash_join::hash_join_impl::probe_join_indices(cudf::table_view const &probe,
   auto build_table = cudf::table_device_view::create(_build, stream);
   auto probe_table = cudf::table_device_view::create(probe, stream);
 
-  constexpr cudf::detail::join_kind ProbeJoinKind = (JoinKind == cudf::detail::join_kind::FULL_JOIN)
-                                                      ? cudf::detail::join_kind::LEFT_JOIN
-                                                      : JoinKind;
-  auto join_indices = cudf::detail::probe_join_hash_table<ProbeJoinKind>(
-    *build_table, *probe_table, *_hash_table, compare_nulls, stream, mr);
+  auto join_indices = cudf::detail::probe_join_hash_table<JoinKind>(
+    *build_table, *probe_table, *_hash_table, compare_nulls, output_size, stream, mr);
 
   if (JoinKind == cudf::detail::join_kind::FULL_JOIN) {
     auto complement_indices = detail::get_left_join_indices_complement(
diff --git a/cpp/src/join/hash_join.cuh b/cpp/src/join/hash_join.cuh
index 8fefda9f841..f9ccbd68c74 100644
--- a/cpp/src/join/hash_join.cuh
+++ b/cpp/src/join/hash_join.cuh
@@ -39,16 +39,11 @@
 namespace cudf {
 namespace detail {
 /**
- * @brief Gives an estimate of the size of the join output produced when
+ * @brief Calculates the exact size of the join output produced when
  * joining two tables together.
  *
- * If the two tables are of relatively equal size, then the returned output
- * size will be the exact output size. However, if the probe table is
- * significantly larger than the build table, then we attempt to estimate the
- * output size by using only a subset of the rows in the probe table.
- *
  * @throw cudf::logic_error if JoinKind is not INNER_JOIN or LEFT_JOIN
- * @throw cudf::logic_error if the estimated size overflows cudf::size_type
+ * @throw cudf::logic_error if the exact size overflows cudf::size_type
  *
  * @tparam JoinKind The type of join to be performed
  * @tparam multimap_type The type of the hash table
@@ -60,28 +55,21 @@ namespace detail {
  * @param compare_nulls Controls whether null join-key values should match or not.
  * @param stream CUDA stream used for device memory operations and kernel launches
  *
- * @return An estimate of the size of the output of the join operation
+ * @return The exact size of the output of the join operation
  */
 template <join_kind JoinKind, typename multimap_type>
-std::size_t estimate_join_output_size(table_device_view build_table,
-                                      table_device_view probe_table,
-                                      multimap_type const& hash_table,
-                                      null_equality compare_nulls,
-                                      rmm::cuda_stream_view stream)
+std::size_t compute_join_output_size(table_device_view build_table,
+                                     table_device_view probe_table,
+                                     multimap_type const& hash_table,
+                                     null_equality compare_nulls,
+                                     rmm::cuda_stream_view stream)
 {
   const size_type build_table_num_rows{build_table.num_rows()};
   const size_type probe_table_num_rows{probe_table.num_rows()};
 
-  // If the probe table is significantly larger (5x) than the build table,
-  // then we attempt to only use a subset of the probe table rows to compute an
-  // estimate of the join output size.
-  size_type probe_to_build_ratio{0};
-  if (build_table_num_rows > 0) {
-    probe_to_build_ratio = static_cast<size_type>(
-      std::ceil(static_cast<float>(probe_table_num_rows) / build_table_num_rows));
-  } else {
-    // If the build table is empty, we know exactly how large the output
-    // will be for the different types of joins and can return immediately
+  // If the build table is empty, we know exactly how large the output
+  // will be for the different types of joins and can return immediately
+  if (0 == build_table_num_rows) {
     switch (JoinKind) {
       // Inner join with an empty table will have no output
       case join_kind::INNER_JOIN: return 0;
@@ -94,13 +82,9 @@ std::size_t estimate_join_output_size(table_device_view build_table,
     }
   }
 
-  size_type sample_probe_num_rows{probe_table_num_rows};
-  constexpr size_type MAX_RATIO{5};
-  if (probe_to_build_ratio > MAX_RATIO) { sample_probe_num_rows = build_table_num_rows; }
-
   // Allocate storage for the counter used to get the size of the join output
-  std::size_t h_size_estimate{0};
-  rmm::device_scalar<std::size_t> size_estimate(0, stream);
+  std::size_t h_size{0};
+  rmm::device_scalar<std::size_t> d_size(0, stream);
 
   CHECK_CUDA(stream.value());
 
@@ -116,55 +100,23 @@ std::size_t estimate_join_output_size(table_device_view build_table,
   int num_sms{-1};
   CUDA_TRY(cudaDeviceGetAttribute(&num_sms, cudaDevAttrMultiProcessorCount, dev_id));
 
-  // Continue probing with a subset of the probe table until either:
-  // a non-zero output size estimate is found OR
-  // all of the rows in the probe table have been sampled
-  do {
-    sample_probe_num_rows = std::min(sample_probe_num_rows, probe_table_num_rows);
-
-    size_estimate.set_value_to_zero_async(stream);
-
-    row_hash hash_probe{probe_table};
-    row_equality equality{probe_table, build_table, compare_nulls == null_equality::EQUAL};
-    // Probe the hash table without actually building the output to simply
-    // find what the size of the output will be.
-    compute_join_output_size<JoinKind, multimap_type, block_size>
-      <<<numBlocks * num_sms, block_size, 0, stream.value()>>>(hash_table,
-                                                               build_table,
-                                                               probe_table,
-                                                               hash_probe,
-                                                               equality,
-                                                               sample_probe_num_rows,
-                                                               size_estimate.data());
-    CHECK_CUDA(stream.value());
-
-    // Only in case subset of probe table is chosen,
-    // increase the estimated output size by a factor of the ratio between the
-    // probe and build tables
-    if (sample_probe_num_rows < probe_table_num_rows) {
-      h_size_estimate = size_estimate.value(stream) * probe_to_build_ratio;
-    } else {
-      h_size_estimate = size_estimate.value(stream);
-    }
-
-    // If the size estimate is non-zero, then we have a valid estimate and can break
-    // If sample_probe_num_rows >= probe_table_num_rows, then we've sampled the entire
-    // probe table, in which case the estimate is exact and we can break
-    if ((h_size_estimate > 0) || (sample_probe_num_rows >= probe_table_num_rows)) { break; }
-
-    // If the size estimate is zero, then double the number of sampled rows in the probe
-    // table. Reduce the ratio of the number of probe rows sampled to the number of rows
-    // in the build table by the same factor
-    if (0 == h_size_estimate) {
-      constexpr size_type GROW_RATIO{2};
-      sample_probe_num_rows *= GROW_RATIO;
-      probe_to_build_ratio =
-        static_cast<size_t>(std::ceil(static_cast<float>(probe_to_build_ratio) / GROW_RATIO));
-    }
+  row_hash hash_probe{probe_table};
+  row_equality equality{probe_table, build_table, compare_nulls == null_equality::EQUAL};
+  // Probe the hash table without actually building the output to simply
+  // find what the size of the output will be.
+  compute_join_output_size<JoinKind, multimap_type, block_size>
+    <<<numBlocks * num_sms, block_size, 0, stream.value()>>>(hash_table,
+                                                             build_table,
+                                                             probe_table,
+                                                             hash_probe,
+                                                             equality,
+                                                             probe_table_num_rows,
+                                                             d_size.data());
 
-  } while (true);
+  CHECK_CUDA(stream.value());
+  h_size = d_size.value(stream);
 
-  return h_size_estimate;
+  return h_size;
 }
 
 /**
@@ -236,6 +188,7 @@ struct hash_join::hash_join_impl {
             std::unique_ptr<rmm::device_uvector<size_type>>>
   inner_join(cudf::table_view const& probe,
              null_equality compare_nulls,
+             std::optional<std::size_t> output_size,
              rmm::cuda_stream_view stream,
              rmm::mr::device_memory_resource* mr) const;
 
@@ -243,6 +196,7 @@ struct hash_join::hash_join_impl {
             std::unique_ptr<rmm::device_uvector<size_type>>>
   left_join(cudf::table_view const& probe,
             null_equality compare_nulls,
+            std::optional<std::size_t> output_size,
             rmm::cuda_stream_view stream,
             rmm::mr::device_memory_resource* mr) const;
 
@@ -250,22 +204,38 @@ struct hash_join::hash_join_impl {
             std::unique_ptr<rmm::device_uvector<size_type>>>
   full_join(cudf::table_view const& probe,
             null_equality compare_nulls,
+            std::optional<std::size_t> output_size,
             rmm::cuda_stream_view stream,
             rmm::mr::device_memory_resource* mr) const;
 
+  std::size_t inner_join_size(cudf::table_view const& probe,
+                              null_equality compare_nulls,
+                              rmm::cuda_stream_view stream) const;
+
+  std::size_t left_join_size(cudf::table_view const& probe,
+                             null_equality compare_nulls,
+                             rmm::cuda_stream_view stream) const;
+
+  std::size_t full_join_size(cudf::table_view const& probe,
+                             null_equality compare_nulls,
+                             rmm::cuda_stream_view stream,
+                             rmm::mr::device_memory_resource* mr) const;
+
  private:
   template <cudf::detail::join_kind JoinKind>
   std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
             std::unique_ptr<rmm::device_uvector<size_type>>>
   compute_hash_join(cudf::table_view const& probe,
                     null_equality compare_nulls,
+                    std::optional<std::size_t> output_size,
                     rmm::cuda_stream_view stream,
                     rmm::mr::device_memory_resource* mr) const;
 
   /**
    * @brief Probes the `_hash_table` built from `_build` for tuples in `probe_table`,
    * and returns the output indices of `build_table` and `probe_table` as a combined table,
-   * i.e. if full join is specified as the join type then left join is called.
+   * i.e. if full join is specified as the join type then left join is called. Behavior
+   * is undefined if the provided `output_size` is smaller than the actual output size.
    *
    * @throw cudf::logic_error if hash table is null.
    *
@@ -273,6 +243,7 @@ struct hash_join::hash_join_impl {
    *
    * @param probe_table Table of probe side columns to join.
    * @param compare_nulls Controls whether null join-key values should match or not.
+   * @param output_size Optional value which allows users to specify the exact output size.
    * @param stream CUDA stream used for device memory operations and kernel launches.
    * @param mr Device memory resource used to allocate the returned vectors.
    *
@@ -283,6 +254,7 @@ struct hash_join::hash_join_impl {
             std::unique_ptr<rmm::device_uvector<size_type>>>
   probe_join_indices(cudf::table_view const& probe,
                      null_equality compare_nulls,
+                     std::optional<std::size_t> output_size,
                      rmm::cuda_stream_view stream,
                      rmm::mr::device_memory_resource* mr) const;
 };
diff --git a/cpp/src/join/join.cu b/cpp/src/join/join.cu
index f2e4bab02c6..6cb04cadcac 100644
--- a/cpp/src/join/join.cu
+++ b/cpp/src/join/join.cu
@@ -50,11 +50,11 @@ inner_join(table_view const& left_input,
   // build the hash map from the smaller table.
   if (right.num_rows() > left.num_rows()) {
     cudf::hash_join hj_obj(left, compare_nulls, stream);
-    auto result = hj_obj.inner_join(right, compare_nulls, stream, mr);
+    auto result = hj_obj.inner_join(right, compare_nulls, std::nullopt, stream, mr);
     return std::make_pair(std::move(result.second), std::move(result.first));
   } else {
     cudf::hash_join hj_obj(right, compare_nulls, stream);
-    return hj_obj.inner_join(left, compare_nulls, stream, mr);
+    return hj_obj.inner_join(left, compare_nulls, std::nullopt, stream, mr);
   }
 }
 
@@ -112,7 +112,7 @@ left_join(table_view const& left_input,
   table_view const right = matched.second.back();
 
   cudf::hash_join hj_obj(right, compare_nulls, stream);
-  return hj_obj.left_join(left, compare_nulls, stream, mr);
+  return hj_obj.left_join(left, compare_nulls, std::nullopt, stream, mr);
 }
 
 std::unique_ptr<table> left_join(table_view const& left_input,
@@ -175,7 +175,7 @@ full_join(table_view const& left_input,
   table_view const right = matched.second.back();
 
   cudf::hash_join hj_obj(right, compare_nulls, stream);
-  return hj_obj.full_join(left, compare_nulls, stream, mr);
+  return hj_obj.full_join(left, compare_nulls, std::nullopt, stream, mr);
 }
 
 std::unique_ptr<table> full_join(table_view const& left_input,
@@ -234,30 +234,55 @@ std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
           std::unique_ptr<rmm::device_uvector<size_type>>>
 hash_join::inner_join(cudf::table_view const& probe,
                       null_equality compare_nulls,
+                      std::optional<std::size_t> output_size,
                       rmm::cuda_stream_view stream,
                       rmm::mr::device_memory_resource* mr) const
 {
-  return impl->inner_join(probe, compare_nulls, stream, mr);
+  return impl->inner_join(probe, compare_nulls, output_size, stream, mr);
 }
 
 std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
           std::unique_ptr<rmm::device_uvector<size_type>>>
 hash_join::left_join(cudf::table_view const& probe,
                      null_equality compare_nulls,
+                     std::optional<std::size_t> output_size,
                      rmm::cuda_stream_view stream,
                      rmm::mr::device_memory_resource* mr) const
 {
-  return impl->left_join(probe, compare_nulls, stream, mr);
+  return impl->left_join(probe, compare_nulls, output_size, stream, mr);
 }
 
 std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
           std::unique_ptr<rmm::device_uvector<size_type>>>
 hash_join::full_join(cudf::table_view const& probe,
                      null_equality compare_nulls,
+                     std::optional<std::size_t> output_size,
                      rmm::cuda_stream_view stream,
                      rmm::mr::device_memory_resource* mr) const
 {
-  return impl->full_join(probe, compare_nulls, stream, mr);
+  return impl->full_join(probe, compare_nulls, output_size, stream, mr);
+}
+
+std::size_t hash_join::inner_join_size(cudf::table_view const& probe,
+                                       null_equality compare_nulls,
+                                       rmm::cuda_stream_view stream) const
+{
+  return impl->inner_join_size(probe, compare_nulls, stream);
+}
+
+std::size_t hash_join::left_join_size(cudf::table_view const& probe,
+                                      null_equality compare_nulls,
+                                      rmm::cuda_stream_view stream) const
+{
+  return impl->left_join_size(probe, compare_nulls, stream);
+}
+
+std::size_t hash_join::full_join_size(cudf::table_view const& probe,
+                                      null_equality compare_nulls,
+                                      rmm::cuda_stream_view stream,
+                                      rmm::mr::device_memory_resource* mr) const
+{
+  return impl->full_join_size(probe, compare_nulls, stream, mr);
 }
 
 // external APIs
diff --git a/cpp/src/labeling/label_bins.cu b/cpp/src/labeling/label_bins.cu
index 70a6826d9eb..66b5bb98dbf 100644
--- a/cpp/src/labeling/label_bins.cu
+++ b/cpp/src/labeling/label_bins.cu
@@ -161,7 +161,7 @@ constexpr auto is_supported_bin_type()
 struct bin_type_dispatcher {
   template <typename T, typename... Args>
   std::enable_if_t<not detail::is_supported_bin_type<T>(), std::unique_ptr<column>> operator()(
-    Args&&... args)
+    Args&&...)
   {
     CUDF_FAIL("Type not support for cudf::bin");
   }
diff --git a/cpp/src/lists/contains.cu b/cpp/src/lists/contains.cu
index e54651c8473..014828a6cad 100644
--- a/cpp/src/lists/contains.cu
+++ b/cpp/src/lists/contains.cu
@@ -88,7 +88,7 @@ struct lookup_functor {
                             cudf::mutable_column_device_view mutable_ret_bools,
                             cudf::mutable_column_device_view mutable_ret_validity,
                             rmm::cuda_stream_view stream,
-                            rmm::mr::device_memory_resource* mr)
+                            rmm::mr::device_memory_resource*)
   {
     thrust::for_each(
       rmm::exec_policy(stream),
@@ -163,8 +163,6 @@ struct lookup_functor {
     auto const d_lists     = lists_column_device_view(*device_view);
     auto const d_skeys     = get_search_keys_device_iterable_view(search_key, stream);
 
-    auto const lists_column_has_nulls = lists.has_nulls() || lists.child().has_nulls();
-
     auto result_validity = make_fixed_width_column(
       data_type{type_id::BOOL8}, lists.size(), cudf::mask_state::UNALLOCATED, stream, mr);
     auto result_bools = make_fixed_width_column(
diff --git a/cpp/src/lists/interleave_columns.cu b/cpp/src/lists/interleave_columns.cu
index 222c37507c4..5da8aef5853 100644
--- a/cpp/src/lists/interleave_columns.cu
+++ b/cpp/src/lists/interleave_columns.cu
@@ -210,7 +210,6 @@ struct interleave_list_entries_fn {
     rmm::mr::device_memory_resource* mr) const noexcept
   {
     auto const num_cols     = input.num_columns();
-    auto const num_rows     = input.num_rows();
     auto const table_dv_ptr = table_device_view::create(input);
 
     // The output child column.
diff --git a/cpp/src/replace/nulls.cu b/cpp/src/replace/nulls.cu
index 2cbd568a64c..1cd2f326f44 100644
--- a/cpp/src/replace/nulls.cu
+++ b/cpp/src/replace/nulls.cu
@@ -366,7 +366,7 @@ std::unique_ptr<cudf::column> replace_nulls_scalar_kernel_forwarder::operator()<
 std::unique_ptr<cudf::column> replace_nulls_policy_impl(cudf::column_view const& input,
                                                         cudf::replace_policy const& replace_policy,
                                                         rmm::cuda_stream_view stream,
-                                                        rmm::mr::device_memory_resource*)
+                                                        rmm::mr::device_memory_resource* mr)
 {
   auto device_in = cudf::column_device_view::create(input);
   auto index     = thrust::make_counting_iterator<cudf::size_type>(0);
@@ -392,7 +392,8 @@ std::unique_ptr<cudf::column> replace_nulls_policy_impl(cudf::column_view const&
                                      gather_map.begin(),
                                      gather_map.end(),
                                      cudf::out_of_bounds_policy::DONT_CHECK,
-                                     stream);
+                                     stream,
+                                     mr);
 
   return std::move(output->release()[0]);
 }
diff --git a/cpp/src/reshape/byte_cast.cu b/cpp/src/reshape/byte_cast.cu
index 89006651bac..5bbdb5988e7 100644
--- a/cpp/src/reshape/byte_cast.cu
+++ b/cpp/src/reshape/byte_cast.cu
@@ -34,10 +34,10 @@ struct byte_list_conversion {
    */
   template <typename T>
   std::enable_if_t<!std::is_integral<T>::value and !is_floating_point<T>(), std::unique_ptr<column>>
-  operator()(column_view const& input_column,
-             flip_endianness configuration,
-             rmm::cuda_stream_view stream,
-             rmm::mr::device_memory_resource* mr) const
+  operator()(column_view const&,
+             flip_endianness,
+             rmm::cuda_stream_view,
+             rmm::mr::device_memory_resource*) const
   {
     CUDF_FAIL("Unsupported non-numeric and non-string column");
   }
@@ -87,7 +87,7 @@ struct byte_list_conversion {
 template <>
 std::unique_ptr<cudf::column> byte_list_conversion::operator()<string_view>(
   column_view const& input_column,
-  flip_endianness configuration,
+  flip_endianness,
   rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr) const
 {
diff --git a/cpp/src/reshape/interleave_columns.cu b/cpp/src/reshape/interleave_columns.cu
index 9a6df2a664c..328959732a0 100644
--- a/cpp/src/reshape/interleave_columns.cu
+++ b/cpp/src/reshape/interleave_columns.cu
@@ -34,7 +34,7 @@ struct interleave_columns_functor {
                      not std::is_same<T, cudf::string_view>::value and
                      not std::is_same<T, cudf::list_view>::value,
                    std::unique_ptr<cudf::column>>
-  operator()(Args&&... args)
+  operator()(Args&&...)
   {
     CUDF_FAIL("Called `interleave_columns` on none-supported data type.");
   }
diff --git a/cpp/src/strings/convert/convert_datetime.cu b/cpp/src/strings/convert/convert_datetime.cu
index 379ceceaf17..cc918305349 100644
--- a/cpp/src/strings/convert/convert_datetime.cu
+++ b/cpp/src/strings/convert/convert_datetime.cu
@@ -911,14 +911,9 @@ struct dispatch_from_timestamps_fn {
                        d_timestamps.size(),
                        pfn);
   }
-  template <typename T, std::enable_if_t<not cudf::is_timestamp<T>()>* = nullptr>
-  void operator()(column_device_view const&,
-                  format_item const*,
-                  size_type,
-                  timestamp_units,
-                  const int32_t*,
-                  char* d_chars,
-                  rmm::cuda_stream_view stream) const
+
+  template <typename T, typename... Args>
+  std::enable_if_t<not cudf::is_timestamp<T>(), void> operator()(Args&&...) const
   {
     CUDF_FAIL("Only timestamps type are expected");
   }
diff --git a/cpp/src/strings/convert/convert_durations.cu b/cpp/src/strings/convert/convert_durations.cu
index 6923f8a24fd..82039ad7692 100644
--- a/cpp/src/strings/convert/convert_durations.cu
+++ b/cpp/src/strings/convert/convert_durations.cu
@@ -271,7 +271,7 @@ struct duration_to_string_fn : public duration_to_string_size_fn<T> {
     return str;
   }
 
-  __device__ char* int_to_2digitstr(char* str, int min_digits, int8_t value)
+  __device__ char* int_to_2digitstr(char* str, int8_t value)
   {
     assert(value >= -99 && value <= 99);
     value  = std::abs(value);
@@ -287,11 +287,11 @@ struct duration_to_string_fn : public duration_to_string_size_fn<T> {
 
   inline __device__ char* hour_12(char* ptr, duration_component const* timeparts)
   {
-    return int_to_2digitstr(ptr, 2, timeparts->hour % 12);
+    return int_to_2digitstr(ptr, timeparts->hour % 12);
   }
   inline __device__ char* hour_24(char* ptr, duration_component const* timeparts)
   {
-    return int_to_2digitstr(ptr, 2, timeparts->hour);
+    return int_to_2digitstr(ptr, timeparts->hour);
   }
   inline __device__ char* am_or_pm(char* ptr, duration_component const* timeparts)
   {
@@ -301,11 +301,11 @@ struct duration_to_string_fn : public duration_to_string_size_fn<T> {
   }
   inline __device__ char* minute(char* ptr, duration_component const* timeparts)
   {
-    return int_to_2digitstr(ptr, 2, timeparts->minute);
+    return int_to_2digitstr(ptr, timeparts->minute);
   }
   inline __device__ char* second(char* ptr, duration_component const* timeparts)
   {
-    return int_to_2digitstr(ptr, 2, timeparts->second);
+    return int_to_2digitstr(ptr, timeparts->second);
   }
 
   inline __device__ char* subsecond(char* ptr, duration_component const* timeparts)
@@ -446,11 +446,8 @@ struct dispatch_from_durations_fn {
   }
 
   // non-duration types throw an exception
-  template <typename T, std::enable_if_t<not cudf::is_duration<T>()>* = nullptr>
-  std::unique_ptr<column> operator()(column_view const&,
-                                     std::string const& format,
-                                     rmm::cuda_stream_view,
-                                     rmm::mr::device_memory_resource*) const
+  template <typename T, typename... Args>
+  std::enable_if_t<not cudf::is_duration<T>(), std::unique_ptr<column>> operator()(Args&&...) const
   {
     CUDF_FAIL("Values for from_durations function must be a duration type.");
   }
diff --git a/cpp/src/strings/convert/convert_hex.cu b/cpp/src/strings/convert/convert_hex.cu
index 48d25c1707f..7043174f5bf 100644
--- a/cpp/src/strings/convert/convert_hex.cu
+++ b/cpp/src/strings/convert/convert_hex.cu
@@ -103,8 +103,8 @@ struct dispatch_hex_to_integers_fn {
                       hex_to_integer_fn<IntegerType>{strings_column});
   }
   // non-integral types throw an exception
-  template <typename T, std::enable_if_t<not std::is_integral<T>::value>* = nullptr>
-  void operator()(column_device_view const&, mutable_column_view&, rmm::cuda_stream_view) const
+  template <typename T, typename... Args>
+  std::enable_if_t<not std::is_integral<T>::value, void> operator()(Args&&...) const
   {
     CUDF_FAIL("Output for hex_to_integers must be an integral type.");
   }
@@ -118,6 +118,86 @@ void dispatch_hex_to_integers_fn::operator()<bool>(column_device_view const&,
   CUDF_FAIL("Output for hex_to_integers must not be a boolean type.");
 }
 
+/**
+ * @brief Functor to convert integers to hexadecimal strings
+ *
+ * @tparam IntegerType The specific integer type to convert from.
+ */
+template <typename IntegerType>
+struct integer_to_hex_fn {
+  column_device_view const d_column;
+  offset_type* d_offsets{};
+  char* d_chars{};
+
+  __device__ void byte_to_hex(uint8_t byte, char* hex)
+  {
+    hex[0] = [&] {
+      if (byte < 16) { return '0'; }
+      uint8_t const nibble = byte / 16;
+
+      byte = byte - (nibble * 16);
+      return static_cast<char>(nibble < 10 ? '0' + nibble : 'A' + (nibble - 10));
+    }();
+    hex[1] = byte < 10 ? '0' + byte : 'A' + (byte - 10);
+  }
+
+  __device__ void operator()(size_type idx)
+  {
+    if (d_column.is_null(idx)) {
+      if (!d_chars) { d_offsets[idx] = 0; }
+      return;
+    }
+
+    auto const value = d_column.element<IntegerType>(idx);        // ex. 123456
+    auto value_bytes = reinterpret_cast<uint8_t const*>(&value);  // 0x40E20100
+
+    // compute the number of output bytes
+    int bytes      = sizeof(IntegerType);
+    int byte_index = sizeof(IntegerType);
+    while ((--byte_index > 0) && (value_bytes[byte_index] & 0xFF) == 0) { --bytes; }
+
+    // create output
+    byte_index = bytes - 1;
+    if (d_chars) {
+      auto d_buffer = d_chars + d_offsets[idx];
+      while (byte_index >= 0) {
+        byte_to_hex(value_bytes[byte_index], d_buffer);
+        d_buffer += 2;
+        --byte_index;
+      }
+    } else {
+      d_offsets[idx] = static_cast<offset_type>(bytes) * 2;  // 2 hex characters per byte
+    }
+  }
+};
+
+struct dispatch_integers_to_hex_fn {
+  template <typename IntegerType, std::enable_if_t<std::is_integral_v<IntegerType>>* = nullptr>
+  std::unique_ptr<column> operator()(column_view const& input,
+                                     rmm::cuda_stream_view stream,
+                                     rmm::mr::device_memory_resource* mr) const
+  {
+    auto const d_column = column_device_view::create(input, stream);
+
+    auto children = cudf::strings::detail::make_strings_children(
+      integer_to_hex_fn<IntegerType>{*d_column}, input.size(), stream, mr);
+
+    return make_strings_column(input.size(),
+                               std::move(children.first),
+                               std::move(children.second),
+                               input.null_count(),
+                               cudf::detail::copy_bitmask(input, stream, mr),
+                               stream,
+                               mr);
+  }
+  // non-integral types throw an exception
+  template <typename T, typename... Args>
+  std::enable_if_t<not std::is_integral_v<T>, std::unique_ptr<column>> operator()(Args...) const
+  {
+    CUDF_FAIL("integers_to_hex only supports integral type columns");
+  }
+};
+
 }  // namespace
 
 // This will convert a strings column into any integer column type.
@@ -183,6 +263,14 @@ std::unique_ptr<column> is_hex(strings_column_view const& strings,
   return results;
 }
 
+std::unique_ptr<column> integers_to_hex(column_view const& input,
+                                        rmm::cuda_stream_view stream,
+                                        rmm::mr::device_memory_resource* mr)
+{
+  if (input.is_empty()) { return cudf::make_empty_column(data_type{type_id::STRING}); }
+  return type_dispatcher(input.type(), dispatch_integers_to_hex_fn{}, input, stream, mr);
+}
+
 }  // namespace detail
 
 // external API
@@ -201,5 +289,12 @@ std::unique_ptr<column> is_hex(strings_column_view const& strings,
   return detail::is_hex(strings, rmm::cuda_stream_default, mr);
 }
 
+std::unique_ptr<column> integers_to_hex(column_view const& input,
+                                        rmm::mr::device_memory_resource* mr)
+{
+  CUDF_FUNC_RANGE();
+  return detail::integers_to_hex(input, rmm::cuda_stream_default, mr);
+}
+
 }  // namespace strings
 }  // namespace cudf
diff --git a/cpp/src/strings/split/split.cu b/cpp/src/strings/split/split.cu
index 5194bc6e86a..ae0ea4b90e6 100644
--- a/cpp/src/strings/split/split.cu
+++ b/cpp/src/strings/split/split.cu
@@ -124,7 +124,6 @@ struct split_tokenizer_fn : base_split_tokenizer {
    * for string at `string_index`.
    *
    * @param idx Index of the delimiter in the chars column
-   * @param column_count Number of output columns
    * @param d_token_counts Token counts for each string
    * @param d_positions The beginning byte position of each delimiter
    * @param positions_count Number of delimiters
@@ -132,7 +131,6 @@ struct split_tokenizer_fn : base_split_tokenizer {
    * @param d_all_tokens All output tokens for the strings column
    */
   __device__ void process_tokens(size_type idx,
-                                 size_type column_count,
                                  size_type const* d_token_counts,
                                  size_type const* d_positions,
                                  size_type positions_count,
@@ -253,7 +251,6 @@ struct rsplit_tokenizer_fn : base_split_tokenizer {
    * for string at `string_index`.
    *
    * @param idx Index of the delimiter in the chars column
-   * @param column_count Number of output columns
    * @param d_token_counts Token counts for each string
    * @param d_positions The ending byte position of each delimiter
    * @param positions_count Number of delimiters
@@ -261,7 +258,6 @@ struct rsplit_tokenizer_fn : base_split_tokenizer {
    * @param d_all_tokens All output tokens for the strings column
    */
   __device__ void process_tokens(size_type idx,                    // delimiter position index
-                                 size_type column_count,           // number of output columns
                                  size_type const* d_token_counts,  // token counts for each string
                                  size_type const* d_positions,     // end of each delimiter
                                  size_type positions_count,        // total number of delimiters
@@ -301,10 +297,9 @@ struct rsplit_tokenizer_fn : base_split_tokenizer {
    *
    * @param idx Index of a byte in the chars column.
    * @param d_offsets Offsets values to locate the chars ranges.
-   * @param chars_bytes Total number of characters to process.
    * @return true if delimiter is found ending at position `idx`
    */
-  __device__ bool is_delimiter(size_type idx, int32_t const* d_offsets, size_type chars_bytes) const
+  __device__ bool is_delimiter(size_type idx, int32_t const* d_offsets, size_type) const
   {
     auto delim_length = d_delimiter.size_bytes();
     if (idx < delim_length - 1) return false;
@@ -524,24 +519,19 @@ std::unique_ptr<table> split_fn(strings_column_view const& strings_column,
                      });
 
   // get the positions for every token using the delimiter positions
-  thrust::for_each_n(rmm::exec_policy(stream),
-                     thrust::make_counting_iterator<size_type>(0),
-                     delimiter_count,
-                     [tokenizer,
-                      columns_count,
-                      d_token_counts,
-                      d_positions,
-                      delimiter_count,
-                      d_string_indices,
-                      d_tokens] __device__(size_type idx) {
-                       tokenizer.process_tokens(idx,
-                                                columns_count,
-                                                d_token_counts,
-                                                d_positions,
-                                                delimiter_count,
-                                                d_string_indices,
-                                                d_tokens);
-                     });
+  thrust::for_each_n(
+    rmm::exec_policy(stream),
+    thrust::make_counting_iterator<size_type>(0),
+    delimiter_count,
+    [tokenizer,
+     d_token_counts,
+     d_positions,
+     delimiter_count,
+     d_string_indices,
+     d_tokens] __device__(size_type idx) {
+      tokenizer.process_tokens(
+        idx, d_token_counts, d_positions, delimiter_count, d_string_indices, d_tokens);
+    });
 
   // Create each column.
   // - Each pair points to the strings for that column for each row.
@@ -609,12 +599,10 @@ struct whitespace_split_tokenizer_fn : base_whitespace_split_tokenizer {
    * for string at `string_index`.
    *
    * @param idx Index of the string to process
-   * @param column_count Number of output columns
    * @param d_token_counts Token counts for each string
    * @param d_all_tokens All output tokens for the strings column
    */
   __device__ void process_tokens(size_type idx,
-                                 size_type column_count,
                                  size_type const* d_token_counts,
                                  string_index_pair* d_all_tokens) const
   {
@@ -660,12 +648,10 @@ struct whitespace_rsplit_tokenizer_fn : base_whitespace_split_tokenizer {
    * for string at `string_index`.
    *
    * @param idx Index of the string to process
-   * @param column_count Number of output columns
    * @param d_token_counts Token counts for each string
    * @param d_all_tokens All output tokens for the strings column
    */
   __device__ void process_tokens(size_type idx,  // string position index
-                                 size_type column_count,
                                  size_type const* d_token_counts,
                                  string_index_pair* d_all_tokens) const
   {
@@ -787,13 +773,12 @@ std::unique_ptr<table> whitespace_split_fn(size_type strings_count,
                d_tokens,
                d_tokens + (columns_count * strings_count),
                string_index_pair{nullptr, 0});
-  thrust::for_each_n(
-    rmm::exec_policy(stream),
-    thrust::make_counting_iterator<size_type>(0),
-    strings_count,
-    [tokenizer, columns_count, d_token_counts, d_tokens] __device__(size_type idx) {
-      tokenizer.process_tokens(idx, columns_count, d_token_counts, d_tokens);
-    });
+  thrust::for_each_n(rmm::exec_policy(stream),
+                     thrust::make_counting_iterator<size_type>(0),
+                     strings_count,
+                     [tokenizer, d_token_counts, d_tokens] __device__(size_type idx) {
+                       tokenizer.process_tokens(idx, d_token_counts, d_tokens);
+                     });
 
   // Create each column.
   // - Each pair points to a string for that column for each row.
diff --git a/cpp/src/strings/substring.cu b/cpp/src/strings/substring.cu
index a74f6638a61..834bac5e1ac 100644
--- a/cpp/src/strings/substring.cu
+++ b/cpp/src/strings/substring.cu
@@ -238,7 +238,7 @@ void compute_substring_indices(column_device_view const& d_column,
                                size_type* start_char_pos,
                                size_type* end_char_pos,
                                rmm::cuda_stream_view stream,
-                               rmm::mr::device_memory_resource* mr)
+                               rmm::mr::device_memory_resource*)
 {
   auto strings_count = d_column.size();
 
diff --git a/cpp/src/text/detokenize.cu b/cpp/src/text/detokenize.cu
index 061597ae817..4be081bcf93 100644
--- a/cpp/src/text/detokenize.cu
+++ b/cpp/src/text/detokenize.cu
@@ -124,7 +124,7 @@ struct token_row_offsets_fn {
 
   // non-integral types throw an exception
   template <typename T, typename... Args, std::enable_if_t<not cudf::is_index_type<T>()>* = nullptr>
-  std::unique_ptr<rmm::device_uvector<cudf::size_type>> operator()(Args&&... args) const
+  std::unique_ptr<rmm::device_uvector<cudf::size_type>> operator()(Args&&...) const
   {
     CUDF_FAIL("The detokenize indices parameter must be an integer type.");
   }
diff --git a/cpp/src/text/normalize.cu b/cpp/src/text/normalize.cu
index d286aa55bdb..84a7db5dd4e 100644
--- a/cpp/src/text/normalize.cu
+++ b/cpp/src/text/normalize.cu
@@ -129,7 +129,6 @@ struct codepoint_to_utf8_fn {
       if (!d_chars) d_offsets[idx] = 0;
       return;
     }
-    auto const d_str  = d_strings.element<cudf::string_view>(idx);
     auto const offset = d_cp_offsets[idx];
     auto const count  = d_cp_offsets[idx + 1] - offset;  // number of code-points
     auto str_cps      = cp_data + offset;                // code-points for this string
diff --git a/cpp/src/text/stemmer.cu b/cpp/src/text/stemmer.cu
index 77e457bbf16..a7bb03f389f 100644
--- a/cpp/src/text/stemmer.cu
+++ b/cpp/src/text/stemmer.cu
@@ -138,7 +138,7 @@ struct dispatch_is_letter_fn {
   }
 
   template <typename T, typename... Args, std::enable_if_t<not cudf::is_index_type<T>()>* = nullptr>
-  std::unique_ptr<cudf::column> operator()(Args&&... args) const
+  std::unique_ptr<cudf::column> operator()(Args&&...) const
   {
     CUDF_FAIL("The is_letter indices parameter must be an integer type.");
   }
diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt
index fdaeb3ebdab..813c755cfff 100644
--- a/cpp/tests/CMakeLists.txt
+++ b/cpp/tests/CMakeLists.txt
@@ -186,6 +186,7 @@ ConfigureTest(CSV_TEST io/csv_test.cpp)
 ConfigureTest(ORC_TEST io/orc_test.cpp)
 ConfigureTest(PARQUET_TEST io/parquet_test.cpp)
 ConfigureTest(JSON_TEST io/json_test.cpp)
+ConfigureTest(ARROW_IO_SOURCE_TEST io/arrow_io_source_test.cpp)
 
 ###################################################################################################
 # - sort tests ------------------------------------------------------------------------------------
@@ -217,7 +218,8 @@ ConfigureTest(COPYING_TEST
     copying/shift_tests.cpp
     copying/slice_tests.cpp
     copying/split_tests.cpp
-    copying/utility_tests.cpp)
+    copying/utility_tests.cpp
+    copying/reverse_tests.cpp)
 
 ###################################################################################################
 # - utilities tests -------------------------------------------------------------------------------
diff --git a/cpp/tests/copying/copy_if_else_nested_tests.cpp b/cpp/tests/copying/copy_if_else_nested_tests.cpp
index 9ac34a3044e..7cd56f0ea43 100644
--- a/cpp/tests/copying/copy_if_else_nested_tests.cpp
+++ b/cpp/tests/copying/copy_if_else_nested_tests.cpp
@@ -102,6 +102,35 @@ TYPED_TEST(TypedCopyIfElseNestedTest, StructsWithNulls)
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result_column->view(), expected_result->view());
 }
 
+TYPED_TEST(TypedCopyIfElseNestedTest, LongerStructsWithNulls)
+{
+  using T = TypeParam;
+
+  using namespace cudf;
+  using namespace cudf::test;
+
+  using ints    = fixed_width_column_wrapper<T, int32_t>;
+  using structs = structs_column_wrapper;
+  using bools   = fixed_width_column_wrapper<bool, int32_t>;
+
+  auto selector_column = bools{1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0,
+                               0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0,
+                               0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0}
+                           .release();
+  auto lhs_child_1 =
+    ints{{27, -80, -24, 76,  -56, 42,  5,   13,  -69, -77, 61,   -77,  72,  0,   31,  118, -30,
+          86, 125, 0,   0,   0,   75,  -49, 125, 60,  116, 118,  64,   20,  -70, -18, 0,   -25,
+          22, -46, -89, -9,  27,  -56, -77, 123, 0,   -90, 87,   -113, -37, 22,  -22, -53, 73,
+          99, 113, -2,  -24, 113, 75,  6,   82,  -58, 122, -123, -127, 19,  -62, -24},
+         iterator_with_null_at(std::vector<size_type>{13, 19, 20, 21, 32, 42})};
+
+  auto lhs_structs_column = structs{{lhs_child_1}}.release();
+  auto result_column =
+    copy_if_else(lhs_structs_column->view(), lhs_structs_column->view(), selector_column->view());
+
+  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result_column->view(), lhs_structs_column->view());
+}
+
 TYPED_TEST(TypedCopyIfElseNestedTest, Lists)
 {
   using T = TypeParam;
diff --git a/cpp/tests/copying/pack_tests.cu b/cpp/tests/copying/pack_tests.cu
index f3b9cf25357..3f345689ce2 100644
--- a/cpp/tests/copying/pack_tests.cu
+++ b/cpp/tests/copying/pack_tests.cu
@@ -418,6 +418,35 @@ TEST_F(PackUnpackTest, NestedEmpty)
     this->run_test(src_table);
   }
 }
+
+TEST_F(PackUnpackTest, NestedSliced)
+{ 
+  auto valids =
+    cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 2 == 0; });
+
+  using LCW = cudf::test::lists_column_wrapper<int>;
+
+  cudf::test::lists_column_wrapper<int> col0{ {{{1, 2, 3}, valids}, {4, 5}},
+                                              {{LCW{}, LCW{}, {7, 8}, LCW{}}, valids},                                              
+                                              {{6, 12}},
+                                              {{{7, 8}, {{9, 10, 11}, valids}, LCW{}}, valids},
+                                              {{LCW{}, {-1, -2, -3, -4, -5}}, valids},
+                                              {LCW{}},
+                                              {{-10}, {-100, -200}} };
+
+  cudf::test::strings_column_wrapper col1{"Vimes", "Carrot", "Angua", "Cheery", "Detritus", "Slant", "Fred"};
+  cudf::test::fixed_width_column_wrapper<float> col2{ 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f };
+
+  std::vector<std::unique_ptr<cudf::column>> children;
+  children.push_back(std::make_unique<cudf::column>(col2));
+  children.push_back(std::make_unique<cudf::column>(col0));
+  children.push_back(std::make_unique<cudf::column>(col1));
+  auto col3 = cudf::make_structs_column(static_cast<cudf::column_view>(col0).size(), std::move(children), 0, rmm::device_buffer{});
+  
+  cudf::table_view t({col0, col1, col2, *col3});
+  this->run_test(t);
+}
+
 // clang-format on
 
 }  // namespace test
diff --git a/cpp/tests/copying/reverse_tests.cpp b/cpp/tests/copying/reverse_tests.cpp
new file mode 100644
index 00000000000..7f911e0aa85
--- /dev/null
+++ b/cpp/tests/copying/reverse_tests.cpp
@@ -0,0 +1,180 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cudf_test/base_fixture.hpp>
+#include <cudf_test/column_utilities.hpp>
+#include <cudf_test/column_wrapper.hpp>
+#include <cudf_test/cudf_gtest.hpp>
+#include <cudf_test/type_lists.hpp>
+
+#include <cudf/copying.hpp>
+#include <cudf/detail/iterator.cuh>
+#include <cudf/scalar/scalar.hpp>
+#include <cudf/scalar/scalar_factories.hpp>
+#include <cudf/table/table.hpp>
+#include <cudf/table/table_view.hpp>
+
+#include <thrust/iterator/counting_iterator.h>
+#include <thrust/tabulate.h>
+
+template <typename T>
+class ReverseTypedTestFixture : public cudf::test::BaseFixture {
+};
+
+TYPED_TEST_CASE(ReverseTypedTestFixture, cudf::test::AllTypes);
+TYPED_TEST(ReverseTypedTestFixture, ReverseTable)
+{
+  using T = TypeParam;
+  constexpr cudf::size_type num_values{10};
+
+  auto input = cudf::test::fixed_width_column_wrapper<T, int32_t>(
+    thrust::make_counting_iterator(0), thrust::make_counting_iterator(0) + num_values);
+
+  auto expected_elements = cudf::detail::make_counting_transform_iterator(
+    0, [num_values] __device__(auto i) { return num_values - i - 1; });
+
+  auto expected =
+    cudf::test::fixed_width_column_wrapper<T, typename decltype(expected_elements)::value_type>(
+      expected_elements, expected_elements + num_values);
+
+  auto input_table = cudf::table_view{{input}};
+  auto const p_ret = cudf::reverse(input_table);
+
+  EXPECT_EQ(p_ret->num_columns(), 1);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(p_ret->view().column(0), expected, true);
+}
+
+TYPED_TEST(ReverseTypedTestFixture, ReverseColumn)
+{
+  using T = TypeParam;
+  constexpr cudf::size_type num_values{10};
+
+  auto input = cudf::test::fixed_width_column_wrapper<T, int32_t>(
+    thrust::make_counting_iterator(0), thrust::make_counting_iterator(0) + num_values);
+
+  auto expected_elements = cudf::detail::make_counting_transform_iterator(
+    0, [num_values] __device__(auto i) { return num_values - i - 1; });
+
+  auto expected =
+    cudf::test::fixed_width_column_wrapper<T, typename decltype(expected_elements)::value_type>(
+      expected_elements, expected_elements + num_values);
+
+  auto const column_ret = cudf::reverse(input);
+
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(column_ret->view(), expected, true);
+}
+
+TYPED_TEST(ReverseTypedTestFixture, ReverseNullable)
+{
+  using T = TypeParam;
+  constexpr cudf::size_type num_values{20};
+
+  std::vector<int64_t> input_values(num_values);
+  std::iota(input_values.begin(), input_values.end(), 1);
+
+  thrust::host_vector<bool> input_valids(num_values);
+  thrust::tabulate(
+    thrust::seq, input_valids.begin(), input_valids.end(), [](auto i) { return not(i % 2); });
+
+  std::vector<T> expected_values(input_values.size());
+  thrust::host_vector<bool> expected_valids(input_valids.size());
+
+  std::transform(std::make_reverse_iterator(input_values.end()),
+                 std::make_reverse_iterator(input_values.begin()),
+                 expected_values.begin(),
+                 [](auto i) { return cudf::test::make_type_param_scalar<T>(i); });
+  std::reverse_copy(input_valids.begin(), input_valids.end(), expected_valids.begin());
+
+  cudf::test::fixed_width_column_wrapper<T, int64_t> input(
+    input_values.begin(), input_values.end(), input_valids.begin());
+
+  cudf::test::fixed_width_column_wrapper<T> expected(
+    expected_values.begin(), expected_values.end(), expected_valids.begin());
+
+  cudf::table_view input_table{{input}};
+  auto p_ret = cudf::reverse(input_table);
+
+  EXPECT_EQ(p_ret->num_columns(), 1);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(p_ret->view().column(0), expected);
+}
+
+TYPED_TEST(ReverseTypedTestFixture, ZeroSizeInput)
+{
+  using T = TypeParam;
+  cudf::test::fixed_width_column_wrapper<T, int32_t> input(thrust::make_counting_iterator(0),
+                                                           thrust::make_counting_iterator(0));
+
+  cudf::test::fixed_width_column_wrapper<T, int32_t> expected(thrust::make_counting_iterator(0),
+                                                              thrust::make_counting_iterator(0));
+
+  cudf::table_view input_table{{input}};
+  auto p_ret = cudf::reverse(input_table);
+
+  EXPECT_EQ(p_ret->num_columns(), 1);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(p_ret->view().column(0), expected);
+}
+
+class ReverseStringTestFixture : public cudf::test::BaseFixture {
+};
+
+TEST_F(ReverseStringTestFixture, ReverseNullable)
+{
+  constexpr cudf::size_type num_values{20};
+
+  std::vector<std::string> input_values(num_values);
+  thrust::host_vector<bool> input_valids(num_values);
+
+  thrust::tabulate(thrust::seq, input_values.begin(), input_values.end(), [](auto i) {
+    return "#" + std::to_string(i);
+  });
+  thrust::tabulate(
+    thrust::seq, input_valids.begin(), input_valids.end(), [](auto i) { return not(i % 2); });
+
+  std::vector<std::string> expected_values(input_values.size());
+  thrust::host_vector<bool> expected_valids(input_valids.size());
+
+  std::reverse_copy(input_values.begin(), input_values.end(), expected_values.begin());
+  std::reverse_copy(input_valids.begin(), input_valids.end(), expected_valids.begin());
+
+  auto input = cudf::test::strings_column_wrapper(
+    input_values.begin(), input_values.end(), input_valids.begin());
+
+  auto expected = cudf::test::strings_column_wrapper(
+    expected_values.begin(), expected_values.end(), expected_valids.begin());
+
+  cudf::table_view input_table{{input}};
+  auto p_ret = cudf::reverse(input_table);
+
+  EXPECT_EQ(p_ret->num_columns(), 1);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(p_ret->view().column(0), expected);
+}
+
+TEST_F(ReverseStringTestFixture, ZeroSizeInput)
+{
+  std::vector<std::string> input_values{};
+  auto input = cudf::test::strings_column_wrapper(input_values.begin(), input_values.end());
+
+  auto count = cudf::test::fixed_width_column_wrapper<cudf::size_type>(
+    thrust::make_counting_iterator(0), thrust::make_counting_iterator(0));
+
+  auto expected = cudf::test::strings_column_wrapper(input_values.begin(), input_values.end());
+
+  cudf::table_view input_table{{input}};
+  auto p_ret = cudf::reverse(input_table);
+
+  EXPECT_EQ(p_ret->num_columns(), 1);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(p_ret->view().column(0), expected);
+}
diff --git a/cpp/tests/copying/scatter_struct_tests.cpp b/cpp/tests/copying/scatter_struct_tests.cpp
index 4ca805f2c18..c07ec7ca7f7 100644
--- a/cpp/tests/copying/scatter_struct_tests.cpp
+++ b/cpp/tests/copying/scatter_struct_tests.cpp
@@ -241,3 +241,22 @@ TYPED_TEST(TypedStructScatterTest, SourceSmallerThanTargetScatterTest)
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(
     *structs_expected, scatter_structs(structs_src, structs_tgt, scatter_map), print_all);
 }
+
+TYPED_TEST(TypedStructScatterTest, IntStructNullMaskRegression)
+{
+  using col_wrapper = cudf::test::fixed_width_column_wrapper<TypeParam, int32_t>;
+
+  auto child_tgt      = col_wrapper({0, null, 2}, null_at(1));
+  auto struct_col_tgt = structs_col({child_tgt}).release();
+
+  auto child_src      = col_wrapper{20};
+  auto struct_col_src = structs_col({child_src}).release();
+
+  auto scatter_map = int32s_col{2}.release();
+
+  auto expected_child  = col_wrapper({0, null, 20}, null_at(1));
+  auto expected_struct = structs_col({expected_child}).release();
+
+  auto const result = scatter_structs(struct_col_src, struct_col_tgt, scatter_map);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*expected_struct, result, print_all);
+}
diff --git a/cpp/tests/copying/split_tests.cpp b/cpp/tests/copying/split_tests.cpp
index c0c1ff727c6..32a7ce1a038 100644
--- a/cpp/tests/copying/split_tests.cpp
+++ b/cpp/tests/copying/split_tests.cpp
@@ -1498,6 +1498,57 @@ TEST_F(ContiguousSplitTableCornerCases, MixedColumnTypes)
 }
 
 TEST_F(ContiguousSplitTableCornerCases, PreSplitTable)
+{
+  auto valids =
+    cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 2 == 0; });
+
+  using LCW = cudf::test::lists_column_wrapper<int>;
+
+  cudf::test::lists_column_wrapper<int> col0{{{{1, 2, 3}, valids}, {4, 5}},
+                                             {{LCW{}, LCW{}, {7, 8}, LCW{}}, valids},
+                                             {{{6}}},
+                                             {{{7, 8}, {{9, 10, 11}, valids}, LCW{}}, valids},
+                                             {{LCW{}, {-1, -2, -3, -4, -5}}, valids},
+                                             {LCW{}},
+                                             {{-10}, {-100, -200}}};
+
+  cudf::test::strings_column_wrapper col1{
+    "Vimes", "Carrot", "Angua", "Cheery", "Detritus", "Slant", "Fred"};
+  cudf::test::fixed_width_column_wrapper<float> col2{1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f};
+
+  std::vector<std::unique_ptr<cudf::column>> children;
+  children.push_back(std::make_unique<cudf::column>(col2));
+  children.push_back(std::make_unique<cudf::column>(col0));
+  children.push_back(std::make_unique<cudf::column>(col1));
+  auto col3 = cudf::make_structs_column(
+    static_cast<cudf::column_view>(col0).size(), std::move(children), 0, rmm::device_buffer{});
+
+  {
+    cudf::table_view t({col0, col1, col2, *col3});
+    std::vector<cudf::size_type> splits{1, 4};
+
+    auto result   = cudf::contiguous_split(t, splits);
+    auto expected = cudf::split(t, splits);
+
+    for (size_t index = 0; index < expected.size(); index++) {
+      CUDF_TEST_EXPECT_TABLES_EQUIVALENT(expected[index], result[index].table);
+    }
+  }
+
+  {
+    cudf::table_view t({col0, col1, col2, *col3});
+    std::vector<cudf::size_type> splits{0, 6};
+
+    auto result   = cudf::contiguous_split(t, splits);
+    auto expected = cudf::split(t, splits);
+
+    for (size_t index = 0; index < expected.size(); index++) {
+      CUDF_TEST_EXPECT_TABLES_EQUIVALENT(expected[index], result[index].table);
+    }
+  }
+}
+
+TEST_F(ContiguousSplitTableCornerCases, PreSplitTableLarge)
 {
   // test splitting a table that is already split (has an offset)
   cudf::size_type start        = 0;
diff --git a/cpp/tests/io/arrow_io_source_test.cpp b/cpp/tests/io/arrow_io_source_test.cpp
new file mode 100644
index 00000000000..92bb3d9862e
--- /dev/null
+++ b/cpp/tests/io/arrow_io_source_test.cpp
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cudf_test/base_fixture.hpp>
+#include <cudf_test/column_utilities.hpp>
+#include <cudf_test/column_wrapper.hpp>
+#include <cudf_test/cudf_gtest.hpp>
+#include <cudf_test/table_utilities.hpp>
+#include <cudf_test/type_lists.hpp>
+
+#include <cudf/io/datasource.hpp>
+#include <cudf/io/json.hpp>
+#include <cudf/io/parquet.hpp>
+
+#include <arrow/io/api.h>
+
+#include <fstream>
+#include <memory>
+#include <string>
+
+// Global environment for temporary files
+auto const temp_env = static_cast<cudf::test::TempDirTestEnvironment*>(
+  ::testing::AddGlobalTestEnvironment(new cudf::test::TempDirTestEnvironment));
+
+// Base test fixture for tests
+struct ArrowIOTest : public cudf::test::BaseFixture {
+};
+
+TEST_F(ArrowIOTest, URIFileSystem)
+{
+  const std::string file_name = temp_env->get_temp_dir() + "JsonLinesFileTest.json";
+  std::ofstream outfile(file_name, std::ofstream::out);
+  outfile << "[11, 1.1]\n[22, 2.2]";
+  outfile.close();
+
+  std::string file_uri = "file://" + file_name;
+  std::unique_ptr<cudf::io::arrow_io_source> datasource =
+    std::make_unique<cudf::io::arrow_io_source>(file_uri);
+
+  // Populate the JSON Reader Options
+  cudf::io::json_reader_options options =
+    cudf::io::json_reader_options::builder(cudf::io::source_info(datasource.get())).lines(true);
+
+  // Read the JSON file from the LocalFileSystem
+  cudf::io::table_with_metadata tbl = cudf::io::read_json(options);
+
+  ASSERT_EQ(2, tbl.tbl->num_columns());
+  ASSERT_EQ(2, tbl.tbl->num_rows());
+}
+
+TEST_F(ArrowIOTest, S3FileSystem)
+{
+  std::string s3_uri = "s3://rapidsai-data/cudf/test/tips.parquet?region=us-east-2";
+  std::unique_ptr<cudf::io::arrow_io_source> datasource =
+    std::make_unique<cudf::io::arrow_io_source>(s3_uri);
+
+  // Populate the Parquet Reader Options
+  cudf::io::source_info src(datasource.get());
+  std::vector<std::string> single_column;
+  single_column.insert(single_column.begin(), "total_bill");
+  cudf::io::parquet_reader_options_builder builder(src);
+  cudf::io::parquet_reader_options options = builder.columns(single_column).build();
+
+  // Read the Parquet file from S3
+  cudf::io::table_with_metadata tbl = cudf::io::read_parquet(options);
+
+  ASSERT_EQ(1, tbl.tbl->num_columns());  // Only single column specified in reader_options
+  ASSERT_EQ(244, tbl.tbl->num_rows());   // known number of rows from the S3 file
+}
+
+CUDF_TEST_PROGRAM_MAIN()
diff --git a/cpp/tests/iterator/iterator_tests.cuh b/cpp/tests/iterator/iterator_tests.cuh
index 68051098595..4ec347c4bc1 100644
--- a/cpp/tests/iterator/iterator_tests.cuh
+++ b/cpp/tests/iterator/iterator_tests.cuh
@@ -83,18 +83,7 @@ struct IteratorTest : public cudf::test::BaseFixture {
     EXPECT_EQ(thrust::distance(d_in, d_in_last), num_items);
     auto dev_expected = cudf::detail::make_device_uvector_sync(expected);
 
-    // Can't use this because time_point make_pair bug in libcudacxx
-    // bool result = thrust::equal(thrust::device, d_in, d_in_last, dev_expected.begin());
-    bool result = thrust::transform_reduce(
-      rmm::exec_policy(),
-      thrust::make_zip_iterator(thrust::make_tuple(d_in, dev_expected.begin())),
-      thrust::make_zip_iterator(thrust::make_tuple(d_in_last, dev_expected.end())),
-      [] __device__(auto it) {
-        return static_cast<T_output>(thrust::get<0>(it)) == T_output(thrust::get<1>(it));
-      },
-      true,
-      thrust::logical_and<bool>());
-
+    bool result = thrust::equal(thrust::device, d_in, d_in_last, dev_expected.begin());
     EXPECT_TRUE(result) << "thrust test";
   }
 
diff --git a/cpp/tests/join/join_tests.cpp b/cpp/tests/join/join_tests.cpp
index b0a2149d50f..212458d5118 100644
--- a/cpp/tests/join/join_tests.cpp
+++ b/cpp/tests/join/join_tests.cpp
@@ -1225,8 +1225,13 @@ TEST_F(JoinTest, HashJoinSequentialProbes)
 
     Table t0(std::move(cols0));
 
-    auto result = hash_join.full_join(t0);
+    auto output_size                         = hash_join.full_join_size(t0);
+    std::optional<std::size_t> optional_size = output_size;
 
+    std::size_t const size_gold = 9;
+    EXPECT_EQ(output_size, size_gold);
+
+    auto result = hash_join.full_join(t0, cudf::null_equality::EQUAL, optional_size);
     auto result_table =
       cudf::table_view({cudf::column_view{cudf::data_type{cudf::type_id::INT32},
                                           static_cast<cudf::size_type>(result.first->size()),
@@ -1258,7 +1263,13 @@ TEST_F(JoinTest, HashJoinSequentialProbes)
 
     Table t0(std::move(cols0));
 
-    auto result = hash_join.left_join(t0);
+    auto output_size                         = hash_join.left_join_size(t0);
+    std::optional<std::size_t> optional_size = output_size;
+
+    std::size_t const size_gold = 5;
+    EXPECT_EQ(output_size, size_gold);
+
+    auto result = hash_join.left_join(t0, cudf::null_equality::EQUAL, optional_size);
     auto result_table =
       cudf::table_view({cudf::column_view{cudf::data_type{cudf::type_id::INT32},
                                           static_cast<cudf::size_type>(result.first->size()),
@@ -1290,7 +1301,13 @@ TEST_F(JoinTest, HashJoinSequentialProbes)
 
     Table t0(std::move(cols0));
 
-    auto result = hash_join.inner_join(t0);
+    auto output_size                         = hash_join.inner_join_size(t0);
+    std::optional<std::size_t> optional_size = output_size;
+
+    std::size_t const size_gold = 3;
+    EXPECT_EQ(output_size, size_gold);
+
+    auto result = hash_join.inner_join(t0, cudf::null_equality::EQUAL, optional_size);
     auto result_table =
       cudf::table_view({cudf::column_view{cudf::data_type{cudf::type_id::INT32},
                                           static_cast<cudf::size_type>(result.first->size()),
diff --git a/cpp/tests/strings/integers_tests.cpp b/cpp/tests/strings/integers_tests.cpp
index d5f17954c50..25459b17d6f 100644
--- a/cpp/tests/strings/integers_tests.cpp
+++ b/cpp/tests/strings/integers_tests.cpp
@@ -410,3 +410,50 @@ TEST_F(StringsConvertTest, IsHex)
   auto results = cudf::strings::is_hex(cudf::strings_column_view(strings));
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
 }
+
+TYPED_TEST(StringsIntegerConvertTest, IntegerToHex)
+{
+  std::vector<TypeParam> h_integers(255);
+  std::generate(h_integers.begin(), h_integers.end(), []() {
+    static TypeParam data = 0;
+    return data++ << (sizeof(TypeParam) - 1) * 8;
+  });
+
+  cudf::test::fixed_width_column_wrapper<TypeParam> integers(h_integers.begin(), h_integers.end());
+
+  std::vector<std::string> h_expected(255);
+  std::transform(h_integers.begin(), h_integers.end(), h_expected.begin(), [](auto v) {
+    if (v == 0) { return std::string("00"); }
+    // special handling for single-byte types
+    if constexpr (std::is_same_v<TypeParam, int8_t> || std::is_same_v<TypeParam, uint8_t>) {
+      char const hex_digits[16] = {
+        '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'};
+      std::string str;
+      str += hex_digits[(v & 0xF0) >> 4];
+      str += hex_digits[(v & 0x0F)];
+      return str;
+    }
+    // all other types work with this
+    std::stringstream str;
+    str << std::setfill('0') << std::setw(sizeof(TypeParam) * 2) << std::hex << std::uppercase << v;
+    return str.str();
+  });
+
+  cudf::test::strings_column_wrapper expected(h_expected.begin(), h_expected.end());
+
+  auto results = cudf::strings::integers_to_hex(integers);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
+}
+
+TEST_F(StringsConvertTest, IntegerToHexWithNull)
+{
+  cudf::test::fixed_width_column_wrapper<int32_t> integers(
+    {123456, -1, 0, 0, 12, 12345, 123456789, -123456789}, {1, 1, 1, 0, 1, 1, 1, 1});
+
+  cudf::test::strings_column_wrapper expected(
+    {"01E240", "FFFFFFFF", "00", "", "0C", "3039", "075BCD15", "F8A432EB"},
+    {1, 1, 1, 0, 1, 1, 1, 1});
+
+  auto results = cudf::strings::integers_to_hex(integers);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
+}
diff --git a/python/.flake8 b/python/.flake8
index 1ba1c9c644e..721558764af 100644
--- a/python/.flake8
+++ b/python/.flake8
@@ -9,7 +9,8 @@ ignore =
     E203
 
 [pydocstyle]
-match = ^.*abc\.py$
+match = ^(.*abc\.py|types\.py)$
+#match = ^(types\.py)$
 # Due to https://github.com/PyCQA/pydocstyle/issues/363, we must exclude rather than include using match-dir.
 match-dir = ^(?!ci|cpp|python/dask_cudf|python/cudf_kafka|python/custreamz).*$
 # In addition to numpy style, we additionally ignore magic methods (D105) and newlines before docstrings (D204).
diff --git a/python/cudf/cudf/__init__.py b/python/cudf/cudf/__init__.py
index 551a565ec5a..b26d95e7951 100644
--- a/python/cudf/cudf/__init__.py
+++ b/python/cudf/cudf/__init__.py
@@ -38,15 +38,16 @@
     UInt16Index,
     UInt32Index,
     UInt64Index,
+    cut,
     from_pandas,
     interval_range,
     merge,
-    cut,
 )
 from cudf.core.algorithms import factorize
 from cudf.core.dtypes import (
     CategoricalDtype,
     Decimal64Dtype,
+    IntervalDtype,
     ListDtype,
     StructDtype,
 )
diff --git a/python/cudf/cudf/_lib/copying.pyx b/python/cudf/cudf/_lib/copying.pyx
index 548e16155dd..463082f0687 100644
--- a/python/cudf/cudf/_lib/copying.pyx
+++ b/python/cudf/cudf/_lib/copying.pyx
@@ -276,6 +276,47 @@ def scatter(object input, object scatter_map, Table target,
         return _scatter_scalar(input, scatter_map, target, bounds_check)
 
 
+def _reverse_column(Column source_column):
+    cdef column_view reverse_column_view = source_column.view()
+
+    cdef unique_ptr[column] c_result
+
+    with nogil:
+        c_result = move(cpp_copying.reverse(
+            reverse_column_view
+        ))
+
+    return Column.from_unique_ptr(
+        move(c_result)
+    )
+
+
+def _reverse_table(Table source_table):
+    cdef table_view reverse_table_view = source_table.view()
+
+    cdef unique_ptr[table] c_result
+    with nogil:
+        c_result = move(cpp_copying.reverse(
+            reverse_table_view
+        ))
+
+    return Table.from_unique_ptr(
+        move(c_result),
+        column_names=source_table._column_names,
+        index_names=source_table._index_names
+    )
+
+
+def reverse(object source):
+    """
+    Reversing a column or a table
+    """
+    if isinstance(source, Column):
+        return _reverse_column(source)
+    else:
+        return _reverse_table(source)
+
+
 def column_empty_like(Column input_column):
 
     cdef column_view input_column_view = input_column.view()
diff --git a/python/cudf/cudf/_lib/cpp/copying.pxd b/python/cudf/cudf/_lib/cpp/copying.pxd
index 55cbc3880ac..c32eb13d908 100644
--- a/python/cudf/cudf/_lib/cpp/copying.pxd
+++ b/python/cudf/cudf/_lib/cpp/copying.pxd
@@ -31,6 +31,14 @@ cdef extern from "cudf/copying.hpp" namespace "cudf" nogil:
         out_of_bounds_policy policy
     ) except +
 
+    cdef unique_ptr[table] reverse (
+        const table_view& source_table
+    ) except +
+
+    cdef unique_ptr[column] reverse (
+        const column_view& source_column
+    ) except +
+
     cdef unique_ptr[column] shift(
         const column_view& input,
         size_type offset,
diff --git a/python/cudf/cudf/_lib/cpp/strings/convert/convert_integers.pxd b/python/cudf/cudf/_lib/cpp/strings/convert/convert_integers.pxd
index 6e45d4ba869..ec45b985544 100644
--- a/python/cudf/cudf/_lib/cpp/strings/convert/convert_integers.pxd
+++ b/python/cudf/cudf/_lib/cpp/strings/convert/convert_integers.pxd
@@ -26,3 +26,6 @@ cdef extern from "cudf/strings/convert/convert_integers.hpp" namespace \
     cdef unique_ptr[column] is_hex(
         column_view source_strings
     ) except +
+
+    cdef unique_ptr[column] integers_to_hex(
+        column_view input_col) except +
diff --git a/python/cudf/cudf/_lib/groupby.pyx b/python/cudf/cudf/_lib/groupby.pyx
index f8f4188df58..1a7f34d74b9 100644
--- a/python/cudf/cudf/_lib/groupby.pyx
+++ b/python/cudf/cudf/_lib/groupby.pyx
@@ -136,7 +136,7 @@ cdef class GroupBy:
                 _LIST_AGGS if is_list_dtype(dtype)
                 else _STRING_AGGS if is_string_dtype(dtype)
                 else _CATEGORICAL_AGGS if is_categorical_dtype(dtype)
-                else _STRING_AGGS if is_struct_dtype(dtype)
+                else _STRUCT_AGGS if is_struct_dtype(dtype)
                 else _INTERVAL_AGGS if is_interval_dtype(dtype)
                 else _DECIMAL_AGGS if is_decimal_dtype(dtype)
                 else "ALL"
diff --git a/python/cudf/cudf/_lib/string_casting.pyx b/python/cudf/cudf/_lib/string_casting.pyx
index 7d58e3b5dcc..772a4d60ade 100644
--- a/python/cudf/cudf/_lib/string_casting.pyx
+++ b/python/cudf/cudf/_lib/string_casting.pyx
@@ -30,7 +30,8 @@ from cudf._lib.cpp.strings.convert.convert_integers cimport (
     to_integers as cpp_to_integers,
     from_integers as cpp_from_integers,
     hex_to_integers as cpp_hex_to_integers,
-    is_hex as cpp_is_hex
+    is_hex as cpp_is_hex,
+    integers_to_hex as cpp_integers_to_hex
 )
 from cudf._lib.cpp.strings.convert.convert_ipv4 cimport (
     ipv4_to_integers as cpp_ipv4_to_integers,
@@ -771,3 +772,26 @@ def is_hex(Column source_strings):
         ))
 
     return Column.from_unique_ptr(move(c_result))
+
+
+def itoh(Column input_col):
+    """
+    Converting input column of type integer to a string
+    column with hexadecimal character digits.
+
+    Parameters
+    ----------
+    input_col : input column of type integer
+
+    Returns
+    -------
+    A Column of strings with hexadecimal characters.
+    """
+
+    cdef column_view input_column_view = input_col.view()
+    cdef unique_ptr[column] c_result
+    with nogil:
+        c_result = move(
+            cpp_integers_to_hex(input_column_view))
+
+    return Column.from_unique_ptr(move(c_result))
diff --git a/python/cudf/cudf/api/__init__.py b/python/cudf/cudf/api/__init__.py
index c05b33f448f..21c24015e41 100644
--- a/python/cudf/cudf/api/__init__.py
+++ b/python/cudf/cudf/api/__init__.py
@@ -1,3 +1,3 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
+# Copyright (c) 2021, NVIDIA CORPORATION.
 
 from cudf.api import types
diff --git a/python/cudf/cudf/api/types.py b/python/cudf/cudf/api/types.py
new file mode 100644
index 00000000000..a985efeca51
--- /dev/null
+++ b/python/cudf/cudf/api/types.py
@@ -0,0 +1,263 @@
+# Copyright (c) 2021, NVIDIA CORPORATION.
+"""Define common type operations."""
+
+from __future__ import annotations
+
+from collections.abc import Sequence
+from functools import wraps
+from inspect import isclass
+from typing import List, Union
+
+import cupy as cp
+import numpy as np
+import pandas as pd
+from pandas.api import types as pd_types
+
+import cudf
+from cudf._lib.scalar import DeviceScalar
+from cudf.core.dtypes import (
+    _BaseDtype,
+    is_categorical_dtype,
+    is_decimal_dtype,
+    is_interval_dtype,
+    is_list_dtype,
+    is_struct_dtype,
+)
+
+
+def is_numeric_dtype(obj):
+    """Check whether the provided array or dtype is of a numeric dtype.
+
+    Parameters
+    ----------
+    obj : array-like or dtype
+        The array or dtype to check.
+
+    Returns
+    -------
+    bool
+        Whether or not the array or dtype is of a numeric dtype.
+    """
+    if isclass(obj):
+        if issubclass(obj, cudf.Decimal64Dtype):
+            return True
+        if issubclass(obj, _BaseDtype):
+            return False
+    else:
+        if isinstance(obj, cudf.Decimal64Dtype) or isinstance(
+            getattr(obj, "dtype", None), cudf.Decimal64Dtype
+        ):
+            return True
+        if isinstance(obj, _BaseDtype) or isinstance(
+            getattr(obj, "dtype", None), _BaseDtype
+        ):
+            return False
+    return pd_types.is_numeric_dtype(obj)
+
+
+# A version of numerical type check that does not include cudf decimals for
+# places where we need to distinguish fixed and floating point numbers.
+def _is_non_decimal_numeric_dtype(obj):
+    if isinstance(obj, _BaseDtype) or isinstance(
+        getattr(obj, "dtype", None), _BaseDtype
+    ):
+        return False
+    try:
+        return pd_types.is_numeric_dtype(obj)
+    except TypeError:
+        return False
+
+
+def is_integer(obj):
+    """Return True if given object is integer.
+
+    Returns
+    -------
+    bool
+    """
+    if isinstance(obj, cudf.Scalar):
+        return pd.api.types.is_integer(obj.dtype)
+    return pd.api.types.is_integer(obj)
+
+
+def is_string_dtype(obj):
+    """Check whether the provided array or dtype is of the string dtype.
+
+    Parameters
+    ----------
+    obj : array-like or dtype
+        The array or dtype to check.
+
+    Returns
+    -------
+    bool
+        Whether or not the array or dtype is of the string dtype.
+    """
+    return (
+        pd.api.types.is_string_dtype(obj)
+        # Reject all cudf extension types.
+        and not is_categorical_dtype(obj)
+        and not is_decimal_dtype(obj)
+        and not is_list_dtype(obj)
+        and not is_struct_dtype(obj)
+        and not is_interval_dtype(obj)
+    )
+
+
+def is_scalar(val):
+    """Return True if given object is scalar.
+
+    Parameters
+    ----------
+    val : object
+        Possibly scalar object.
+
+    Returns
+    -------
+    bool
+        Return True if given object is scalar.
+    """
+    return (
+        isinstance(val, DeviceScalar)
+        or isinstance(val, cudf.Scalar)
+        or pd_types.is_scalar(val)
+    )
+
+
+def _is_scalar_or_zero_d_array(val):
+    """Return True if given object is scalar or a 0d array.
+
+    This is an internal function primarily used by indexing applications that
+    need to flatten dimensions that are indexed by 0d arrays.
+
+    Parameters
+    ----------
+    val : object
+        Possibly scalar object.
+
+    Returns
+    -------
+    bool
+        Return True if given object is scalar.
+    """
+    return (
+        (isinstance(val, (np.ndarray, cp.ndarray)) and val.ndim == 0)
+        or (isinstance(val, pd.Categorical) and len(val) == 1)
+        or is_scalar(val)
+    )
+
+
+# TODO: We should be able to reuse the pandas function for this, need to figure
+# out why we can't.
+def is_list_like(obj):
+    """Return `True` if the given `obj` is list-like (list, tuple, Series...).
+
+    Parameters
+    ----------
+    obj : object of any type which needs to be validated.
+
+    Returns
+    -------
+    bool
+        Return True if given object is list-like.
+    """
+    return isinstance(obj, (Sequence, np.ndarray)) and not isinstance(
+        obj, (str, bytes)
+    )
+
+
+# These methods are aliased directly into this namespace, but can be modified
+# later if we determine that there is a need.
+
+
+def _wrap_pandas_is_dtype_api(func):
+    """Wrap a pandas dtype checking function to ignore cudf types."""
+
+    @wraps(func)
+    def wrapped_func(obj):
+        if (
+            (isclass(obj) and issubclass(obj, _BaseDtype))
+            or isinstance(obj, _BaseDtype)
+            or isinstance(getattr(obj, "dtype", None), _BaseDtype)
+        ):
+            return False
+        return func(obj)
+
+    return wrapped_func
+
+
+def _union_categoricals(
+    to_union: List[Union[cudf.Series, cudf.Index]],
+    sort_categories: bool = False,
+    ignore_order: bool = False,
+):
+    """Combine categorical data.
+
+    This API is currently internal but should be exposed once full support for
+    cudf.Categorical is ready.
+    """
+    # TODO(s) in the order specified :
+    # 1. The return type needs to be changed
+    #    to cudf.Categorical once it is implemented.
+    # 2. Make this API public (i.e., to resemble
+    #    pd.api.types.union_categoricals)
+
+    if ignore_order:
+        raise TypeError("ignore_order is not yet implemented")
+
+    result_col = cudf.core.column.CategoricalColumn._concat(
+        [obj._column for obj in to_union]
+    )
+    if sort_categories:
+        sorted_categories = result_col.categories.sort_by_values(
+            ascending=True
+        )[0]
+        result_col = result_col.cat().reorder_categories(
+            new_categories=sorted_categories
+        )
+
+    return cudf.Index(result_col)
+
+
+# TODO: The below alias is removed for now since improving cudf categorical
+# support is ongoing and we don't want to introduce any ambiguities. The above
+# method _union_categoricals will take its place once exposed.
+# union_categoricals = pd_types.union_categoricals
+infer_dtype = pd_types.infer_dtype
+pandas_dtype = pd_types.pandas_dtype
+is_bool_dtype = pd_types.is_bool_dtype
+is_complex_dtype = pd_types.is_complex_dtype
+# TODO: Evaluate which of the datetime types need special handling for cudf.
+is_datetime_dtype = _wrap_pandas_is_dtype_api(pd_types.is_datetime64_dtype)
+is_datetime64_any_dtype = pd_types.is_datetime64_any_dtype
+is_datetime64_dtype = pd_types.is_datetime64_dtype
+is_datetime64_ns_dtype = pd_types.is_datetime64_ns_dtype
+is_datetime64tz_dtype = pd_types.is_datetime64tz_dtype
+is_extension_type = pd_types.is_extension_type
+is_extension_array_dtype = pd_types.is_extension_array_dtype
+is_float_dtype = pd_types.is_float_dtype
+is_int64_dtype = pd_types.is_int64_dtype
+is_integer_dtype = _wrap_pandas_is_dtype_api(pd_types.is_integer_dtype)
+is_object_dtype = pd_types.is_object_dtype
+is_period_dtype = pd_types.is_period_dtype
+is_signed_integer_dtype = pd_types.is_signed_integer_dtype
+is_timedelta_dtype = _wrap_pandas_is_dtype_api(pd_types.is_timedelta64_dtype)
+is_timedelta64_dtype = pd_types.is_timedelta64_dtype
+is_timedelta64_ns_dtype = pd_types.is_timedelta64_ns_dtype
+is_unsigned_integer_dtype = pd_types.is_unsigned_integer_dtype
+is_sparse = pd_types.is_sparse
+# is_list_like = pd_types.is_list_like
+is_dict_like = pd_types.is_dict_like
+is_file_like = pd_types.is_file_like
+is_named_tuple = pd_types.is_named_tuple
+is_iterator = pd_types.is_iterator
+is_bool = pd_types.is_bool
+is_categorical = pd_types.is_categorical
+is_complex = pd_types.is_complex
+is_float = pd_types.is_float
+is_hashable = pd_types.is_hashable
+is_interval = pd_types.is_interval
+is_number = pd_types.is_number
+is_re = pd_types.is_re
+is_re_compilable = pd_types.is_re_compilable
+is_dtype_equal = pd_types.is_dtype_equal
diff --git a/python/cudf/cudf/api/types/__init__.py b/python/cudf/cudf/api/types/__init__.py
deleted file mode 100644
index cddac3ae67e..00000000000
--- a/python/cudf/cudf/api/types/__init__.py
+++ /dev/null
@@ -1,3 +0,0 @@
-# Copyright (c) 2021, NVIDIA CORPORATION.
-
-from cudf.api.types.categoricals import _union_categoricals
diff --git a/python/cudf/cudf/api/types/categoricals.py b/python/cudf/cudf/api/types/categoricals.py
deleted file mode 100644
index ae0ac4a08d5..00000000000
--- a/python/cudf/cudf/api/types/categoricals.py
+++ /dev/null
@@ -1,40 +0,0 @@
-# Copyright (c) 2021, NVIDIA CORPORATION.
-
-from __future__ import annotations
-
-from typing import List, Union
-
-import cudf
-
-ParentType = Union["cudf.Series", "cudf.Index"]
-
-
-def _union_categoricals(
-    to_union: List[Union[cudf.Series, cudf.Index]],
-    sort_categories: bool = False,
-    ignore_order: bool = False,
-):
-    """
-    This is an internal API which combines categorical data.
-    """
-    # TODO(s) in the order specified :
-    # 1. The return type needs to be changed
-    #    to cudf.Categorical once it is implemented.
-    # 2. Make this API public (i.e., to resemble
-    #    pd.api.types.union_categoricals)
-
-    if ignore_order:
-        raise TypeError("ignore_order is not yet implemented")
-
-    result_col = cudf.core.column.CategoricalColumn._concat(
-        [obj._column for obj in to_union]
-    )
-    if sort_categories:
-        sorted_categories = result_col.categories.sort_by_values(
-            ascending=True
-        )[0]
-        result_col = result_col.cat().reorder_categories(
-            new_categories=sorted_categories
-        )
-
-    return cudf.Index(result_col)
diff --git a/python/cudf/cudf/core/_internals/where.py b/python/cudf/cudf/core/_internals/where.py
index 1fdc907875e..06bcf16da9d 100644
--- a/python/cudf/cudf/core/_internals/where.py
+++ b/python/cudf/cudf/core/_internals/where.py
@@ -65,7 +65,9 @@ def _check_and_cast_columns_with_other(
     else:
         if (
             cudf.utils.dtypes.is_scalar(other)
-            and cudf.utils.dtypes.is_numerical_dtype(source_col.dtype)
+            and cudf.utils.dtypes._is_non_decimal_numeric_dtype(
+                source_col.dtype
+            )
             and cudf.utils.dtypes._can_cast(other, source_col.dtype)
         ):
             common_dtype = source_col.dtype
diff --git a/python/cudf/cudf/core/column/__init__.py b/python/cudf/cudf/core/column/__init__.py
index 32cb557548f..76d38e00790 100644
--- a/python/cudf/cudf/core/column/__init__.py
+++ b/python/cudf/cudf/core/column/__init__.py
@@ -7,6 +7,8 @@
     as_column,
     build_categorical_column,
     build_column,
+    build_list_column,
+    build_struct_column,
     column_empty,
     column_empty_like,
     column_empty_like_same_mask,
diff --git a/python/cudf/cudf/core/column/categorical.py b/python/cudf/cudf/core/column/categorical.py
index 7d09dcb8cbd..e2aa20cc948 100644
--- a/python/cudf/cudf/core/column/categorical.py
+++ b/python/cudf/cudf/core/column/categorical.py
@@ -1513,27 +1513,23 @@ def _concat(objs: MutableSequence[CategoricalColumn]) -> CategoricalColumn:
             offset=codes_col.offset,
         )
 
-    def _copy_type_metadata(
-        self: CategoricalColumn, other: ColumnBase
-    ) -> ColumnBase:
-        """Copies type metadata from self onto other, returning a new column.
-
-        In addition to the default behavior, if `other` is not a
-        CategoricalColumn, we assume other is a column of codes, and return a
-        CategoricalColumn composed of `other`  and the categories of `self`.
-        """
-        if not isinstance(other, cudf.core.column.CategoricalColumn):
-            other = column.build_categorical_column(
-                categories=self.categories,
-                codes=column.as_column(other.base_data, dtype=other.dtype),
-                mask=other.base_mask,
-                ordered=self.ordered,
-                size=other.size,
-                offset=other.offset,
-                null_count=other.null_count,
+    def _with_type_metadata(
+        self: CategoricalColumn, dtype: Dtype
+    ) -> CategoricalColumn:
+        if isinstance(dtype, CategoricalDtype):
+            return column.build_categorical_column(
+                categories=dtype.categories._values,
+                codes=column.as_column(
+                    self.codes.base_data, dtype=self.codes.dtype
+                ),
+                mask=self.codes.base_mask,
+                ordered=dtype.ordered,
+                size=self.codes.size,
+                offset=self.codes.offset,
+                null_count=self.codes.null_count,
             )
-        # Have to ignore typing here because it misdiagnoses super().
-        return super()._copy_type_metadata(other)  # type: ignore
+
+        return self
 
 
 def _create_empty_categorical_column(
diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index 87580dd3755..c771dd5e99e 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -47,13 +47,15 @@
 )
 from cudf.utils import ioutils, utils
 from cudf.utils.dtypes import (
+    _is_non_decimal_numeric_dtype,
+    _is_scalar_or_zero_d_array,
     check_cast_unsupported_dtype,
+    cudf_dtype_from_pa_type,
     get_time_unit,
     is_categorical_dtype,
     is_decimal_dtype,
     is_interval_dtype,
     is_list_dtype,
-    is_numerical_dtype,
     is_scalar,
     is_string_dtype,
     is_struct_dtype,
@@ -62,6 +64,14 @@
 )
 from cudf.utils.utils import mask_dtype
 
+from ...api.types import (
+    infer_dtype,
+    is_bool_dtype,
+    is_dtype_equal,
+    is_integer_dtype,
+    pandas_dtype,
+)
+
 T = TypeVar("T", bound="ColumnBase")
 
 
@@ -295,7 +305,9 @@ def from_arrow(cls, array: pa.Array) -> ColumnBase:
             "None"
         ]
 
-        result = _copy_type_metadata_from_arrow(array, result)
+        result = result._with_type_metadata(
+            cudf_dtype_from_pa_type(array.type)
+        )
         return result
 
     def _get_mask_as_column(self) -> ColumnBase:
@@ -347,6 +359,9 @@ def to_array(self, fillna=None) -> np.ndarray:
 
         return self.to_gpu_array(fillna=fillna).copy_to_host()
 
+    def _reverse(self):
+        return libcudf.copying.reverse(self)
+
     def _fill(
         self,
         fill_value: ScalarLike,
@@ -408,7 +423,7 @@ def copy(self: T, deep: bool = True) -> T:
         """
         if deep:
             result = libcudf.copying.copy_column(self)
-            return cast(T, self._copy_type_metadata(result))
+            return cast(T, result._with_type_metadata(self.dtype))
         else:
             return cast(
                 T,
@@ -510,7 +525,7 @@ def slice(self, start: int, stop: int, stride: int = None) -> ColumnBase:
             return self.take(gather_map)
 
     def __getitem__(self, arg) -> Union[ScalarLike, ColumnBase]:
-        if is_scalar(arg):
+        if _is_scalar_or_zero_d_array(arg):
             return self.element_indexing(int(arg))
         elif isinstance(arg, slice):
             start, stop, stride = arg.indices(len(self))
@@ -519,9 +534,9 @@ def __getitem__(self, arg) -> Union[ScalarLike, ColumnBase]:
             arg = as_column(arg)
             if len(arg) == 0:
                 arg = as_column([], dtype="int32")
-            if pd.api.types.is_integer_dtype(arg.dtype):
+            if is_integer_dtype(arg.dtype):
                 return self.take(arg)
-            if pd.api.types.is_bool_dtype(arg.dtype):
+            if is_bool_dtype(arg.dtype):
                 return self.apply_boolean_mask(arg)
             raise NotImplementedError(type(arg))
 
@@ -555,7 +570,7 @@ def __setitem__(self, key: Any, value: Any):
                 nelem = abs(key_stop - key_start)
         else:
             key = as_column(key)
-            if pd.api.types.is_bool_dtype(key.dtype):
+            if is_bool_dtype(key.dtype):
                 if not len(key) == len(self):
                     raise ValueError(
                         "Boolean mask must be of same length as column"
@@ -881,11 +896,11 @@ def can_cast_safely(self, to_dtype: Dtype) -> bool:
         raise NotImplementedError()
 
     def astype(self, dtype: Dtype, **kwargs) -> ColumnBase:
-        if is_numerical_dtype(dtype):
+        if _is_non_decimal_numeric_dtype(dtype):
             return self.as_numerical_column(dtype)
         elif is_categorical_dtype(dtype):
             return self.as_categorical_column(dtype, **kwargs)
-        elif pd.api.types.pandas_dtype(dtype).type in {
+        elif pandas_dtype(dtype).type in {
             np.str_,
             np.object_,
             str,
@@ -1267,28 +1282,14 @@ def scatter_to_table(
             }
         )
 
-    def _copy_type_metadata(self: ColumnBase, other: ColumnBase) -> ColumnBase:
+    def _with_type_metadata(self: ColumnBase, dtype: Dtype) -> ColumnBase:
         """
         Copies type metadata from self onto other, returning a new column.
 
-        * when `self` and `other` are nested columns of the same type,
-          recursively apply this function on the children of `self` to the
-          and the children of `other`.
-        * if none of the above, return `other` without any changes
+        When ``self`` is a nested column, recursively apply this function on
+        the children of ``self``.
         """
-        # TODO: This logic should probably be moved to a common nested column
-        # class.
-        if isinstance(other, type(self)):
-            if self.base_children and other.base_children:
-                base_children = tuple(
-                    self.base_children[i]._copy_type_metadata(
-                        other.base_children[i]
-                    )
-                    for i in range(len(self.base_children))
-                )
-                other.set_base_children(base_children)
-
-        return other
+        return self
 
 
 def column_empty_like(
@@ -1342,7 +1343,7 @@ def column_empty(
 ) -> ColumnBase:
     """Allocate a new column like the given row_count and dtype.
     """
-    dtype = pd.api.types.pandas_dtype(dtype)
+    dtype = pandas_dtype(dtype)
     children = ()  # type: Tuple[ColumnBase, ...]
 
     if is_categorical_dtype(dtype):
@@ -1401,9 +1402,9 @@ def build_column(
     offset : int, optional
     children : tuple, optional
     """
-    dtype = pd.api.types.pandas_dtype(dtype)
+    dtype = pandas_dtype(dtype)
 
-    if is_numerical_dtype(dtype):
+    if _is_non_decimal_numeric_dtype(dtype):
         assert data is not None
         return cudf.core.column.NumericalColumn(
             data=data,
@@ -1603,6 +1604,84 @@ def build_interval_column(
     )
 
 
+def build_list_column(
+    indices: ColumnBase,
+    elements: ColumnBase,
+    mask: Buffer = None,
+    size: int = None,
+    offset: int = 0,
+    null_count: int = None,
+) -> "cudf.core.column.ListColumn":
+    """
+    Build a ListColumn
+
+    Parameters
+    ----------
+    indices : ColumnBase
+        Column of list indices
+    elements : ColumnBase
+        Column of list elements
+    mask: Buffer
+        Null mask
+    size: int, optional
+    offset: int, optional
+    """
+    dtype = ListDtype(element_type=elements.dtype)
+
+    result = build_column(
+        data=None,
+        dtype=dtype,
+        mask=mask,
+        size=size,
+        offset=offset,
+        null_count=null_count,
+        children=(indices, elements),
+    )
+
+    return cast("cudf.core.column.ListColumn", result)
+
+
+def build_struct_column(
+    names: Sequence[str],
+    children: Tuple[ColumnBase, ...],
+    dtype: Optional[Dtype] = None,
+    mask: Buffer = None,
+    size: int = None,
+    offset: int = 0,
+    null_count: int = None,
+) -> "cudf.core.column.StructColumn":
+    """
+    Build a StructColumn
+
+    Parameters
+    ----------
+    names : list-like
+        Field names to map to children dtypes
+    children : tuple
+
+    mask: Buffer
+        Null mask
+    size: int, optional
+    offset: int, optional
+    """
+    if dtype is None:
+        dtype = StructDtype(
+            fields={name: col.dtype for name, col in zip(names, children)}
+        )
+
+    result = build_column(
+        data=None,
+        dtype=dtype,
+        mask=mask,
+        size=size,
+        offset=offset,
+        null_count=null_count,
+        children=children,
+    )
+
+    return cast("cudf.core.column.StructColumn", result)
+
+
 def as_column(
     arbitrary: Any,
     nan_as_null: bool = None,
@@ -1715,11 +1794,9 @@ def as_column(
         col = ColumnBase.from_arrow(arbitrary)
         if isinstance(arbitrary, pa.NullArray):
             if type(dtype) == str and dtype == "empty":
-                new_dtype = pd.api.types.pandas_dtype(
-                    arbitrary.type.to_pandas_dtype()
-                )
+                new_dtype = pandas_dtype(arbitrary.type.to_pandas_dtype())
             else:
-                new_dtype = pd.api.types.pandas_dtype(dtype)
+                new_dtype = pandas_dtype(dtype)
             col = col.astype(new_dtype)
 
         return col
@@ -1964,7 +2041,7 @@ def as_column(
                             ),
                         )
                         return cudf.core.column.DecimalColumn.from_arrow(data)
-                    dtype = pd.api.types.pandas_dtype(dtype)
+                    dtype = pandas_dtype(dtype)
                     if is_categorical_dtype(dtype) or is_interval_dtype(dtype):
                         raise TypeError
                     else:
@@ -2017,8 +2094,7 @@ def _construct_array(
         if (
             dtype is None
             and not cudf._lib.scalar._is_null_host_scalar(arbitrary)
-            and pd.api.types.infer_dtype(arbitrary)
-            in ("mixed", "mixed-integer",)
+            and infer_dtype(arbitrary) in ("mixed", "mixed-integer",)
         ):
             native_dtype = "object"
         arbitrary = np.asarray(
@@ -2200,64 +2276,10 @@ def full(size: int, fill_value: ScalarLike, dtype: Dtype = None) -> ColumnBase:
     return ColumnBase.from_scalar(cudf.Scalar(fill_value, dtype), size)
 
 
-def _copy_type_metadata_from_arrow(
-    arrow_array: pa.array, cudf_column: ColumnBase
-) -> ColumnBase:
-    """
-    Similar to `Column._copy_type_metadata`, except copies type metadata
-    from arrow array into a cudf column. Recursive for every level.
-    * When `arrow_array` is struct type and `cudf_column` is StructDtype, copy
-    field names.
-    * When `arrow_array` is decimal type and `cudf_column` is
-    Decimal64Dtype, copy precisions.
-    """
-    if pa.types.is_decimal(arrow_array.type) and isinstance(
-        cudf_column, cudf.core.column.DecimalColumn
-    ):
-        cudf_column.dtype.precision = arrow_array.type.precision
-    elif pa.types.is_struct(arrow_array.type) and isinstance(
-        cudf_column, cudf.core.column.StructColumn
-    ):
-        base_children = tuple(
-            _copy_type_metadata_from_arrow(arrow_array.field(i), col_child)
-            for i, col_child in enumerate(cudf_column.base_children)
-        )
-        cudf_column.set_base_children(base_children)
-        return cudf.core.column.StructColumn(
-            data=None,
-            size=cudf_column.base_size,
-            dtype=StructDtype.from_arrow(arrow_array.type),
-            mask=cudf_column.base_mask,
-            offset=cudf_column.offset,
-            null_count=cudf_column.null_count,
-            children=base_children,
-        )
-    elif pa.types.is_list(arrow_array.type) and isinstance(
-        cudf_column, cudf.core.column.ListColumn
-    ):
-        if arrow_array.values and cudf_column.base_children:
-            base_children = (
-                cudf_column.base_children[0],
-                _copy_type_metadata_from_arrow(
-                    arrow_array.values, cudf_column.base_children[1]
-                ),
-            )
-            return cudf.core.column.ListColumn(
-                size=cudf_column.base_size,
-                dtype=ListDtype.from_arrow(arrow_array.type),
-                mask=cudf_column.base_mask,
-                offset=cudf_column.offset,
-                null_count=cudf_column.null_count,
-                children=base_children,
-            )
-
-    return cudf_column
-
-
 def _concat_columns(objs: "MutableSequence[ColumnBase]") -> ColumnBase:
     """Concatenate a sequence of columns."""
     if len(objs) == 0:
-        dtype = pd.api.types.pandas_dtype(None)
+        dtype = pandas_dtype(None)
         return column_empty(0, dtype=dtype, masked=True)
 
     # If all columns are `NumericalColumn` with different dtypes,
@@ -2265,7 +2287,8 @@ def _concat_columns(objs: "MutableSequence[ColumnBase]") -> ColumnBase:
     # Notice, we can always cast pure null columns
     not_null_col_dtypes = [o.dtype for o in objs if o.valid_count]
     if len(not_null_col_dtypes) and all(
-        is_numerical_dtype(dtyp) and np.issubdtype(dtyp, np.datetime64)
+        _is_non_decimal_numeric_dtype(dtyp)
+        and np.issubdtype(dtyp, np.datetime64)
         for dtyp in not_null_col_dtypes
     ):
         # Use NumPy to find a common dtype
@@ -2278,7 +2301,7 @@ def _concat_columns(objs: "MutableSequence[ColumnBase]") -> ColumnBase:
 
     for i, obj in enumerate(objs):
         # Check that all columns are the same type:
-        if not pd.api.types.is_dtype_equal(obj.dtype, head.dtype):
+        if not is_dtype_equal(obj.dtype, head.dtype):
             # if all null, cast to appropriate dtype
             if obj.valid_count == 0:
                 objs[i] = column_empty_like(
diff --git a/python/cudf/cudf/core/column/decimal.py b/python/cudf/cudf/core/column/decimal.py
index 459cfae6fdb..d7eae47ca41 100644
--- a/python/cudf/cudf/core/column/decimal.py
+++ b/python/cudf/cudf/core/column/decimal.py
@@ -6,7 +6,6 @@
 import cupy as cp
 import numpy as np
 import pyarrow as pa
-from pandas.api.types import is_integer_dtype
 
 import cudf
 from cudf import _lib as libcudf
@@ -22,6 +21,7 @@
 from cudf.utils.utils import pa_mask_buffer_to_mask
 
 from .numerical_base import NumericalBaseColumn
+from ...api.types import is_integer_dtype
 
 
 class DecimalColumn(NumericalBaseColumn):
@@ -141,7 +141,7 @@ def _decimal_quantile(
             self, quant, interpolation, sorted_indices, exact
         )
 
-        return self._copy_type_metadata(result)
+        return result._with_type_metadata(self.dtype)
 
     def as_decimal_column(
         self, dtype: Dtype, **kwargs
@@ -189,7 +189,7 @@ def fillna(
         result = libcudf.replace.replace_nulls(
             input_col=self, replacement=value, method=method, dtype=dtype
         )
-        return self._copy_type_metadata(result)
+        return result._with_type_metadata(self.dtype)
 
     def serialize(self) -> Tuple[dict, list]:
         header, frames = super().serialize()
@@ -209,16 +209,13 @@ def __cuda_array_interface__(self):
             "Decimals are not yet supported via `__cuda_array_interface__`"
         )
 
-    def _copy_type_metadata(self: ColumnBase, other: ColumnBase) -> ColumnBase:
-        """Copies type metadata from self onto other, returning a new column.
+    def _with_type_metadata(
+        self: "cudf.core.column.DecimalColumn", dtype: Dtype
+    ) -> "cudf.core.column.DecimalColumn":
+        if isinstance(dtype, Decimal64Dtype):
+            self.dtype.precision = dtype.precision
 
-        In addition to the default behavior, if `other` is also a decimal
-        column the precision is copied over.
-        """
-        if isinstance(other, DecimalColumn):
-            other.dtype.precision = self.dtype.precision  # type: ignore
-        # Have to ignore typing here because it misdiagnoses super().
-        return super()._copy_type_metadata(other)  # type: ignore
+        return self
 
 
 def _binop_scale(l_dtype, r_dtype, op):
diff --git a/python/cudf/cudf/core/column/lists.py b/python/cudf/cudf/core/column/lists.py
index 7ea02c0e878..123ad1d14ab 100644
--- a/python/cudf/cudf/core/column/lists.py
+++ b/python/cudf/cudf/core/column/lists.py
@@ -16,12 +16,12 @@
     sort_lists,
 )
 from cudf._lib.table import Table
-from cudf._typing import BinaryOperand
+from cudf._typing import BinaryOperand, Dtype
 from cudf.core.buffer import Buffer
 from cudf.core.column import ColumnBase, as_column, column
 from cudf.core.column.methods import ColumnMethodsMixin
 from cudf.core.dtypes import ListDtype
-from cudf.utils.dtypes import is_list_dtype, is_numerical_dtype
+from cudf.utils.dtypes import _is_non_decimal_numeric_dtype, is_list_dtype
 
 
 class ListColumn(ColumnBase):
@@ -76,7 +76,10 @@ def __sizeof__(self):
 
     @property
     def base_size(self):
-        return len(self.base_children[0]) - 1
+        # in some cases, libcudf will return an empty ListColumn with no
+        # indices; in these cases, we must manually set the base_size to 0 to
+        # avoid it being negative
+        return max(0, len(self.base_children[0]) - 1)
 
     def binary_operator(
         self, binop: str, other: BinaryOperand, reflect: bool = False
@@ -233,6 +236,23 @@ def __cuda_array_interface__(self):
             "Lists are not yet supported via `__cuda_array_interface__`"
         )
 
+    def _with_type_metadata(
+        self: "cudf.core.column.ListColumn", dtype: Dtype
+    ) -> "cudf.core.column.ListColumn":
+        if isinstance(dtype, ListDtype):
+            return column.build_list_column(
+                indices=self.base_children[0],
+                elements=self.base_children[1]._with_type_metadata(
+                    dtype.element_type
+                ),
+                mask=self.base_mask,
+                size=self.base_size,
+                offset=self.offset,
+                null_count=self.null_count,
+            )
+
+        return self
+
 
 class ListMethods(ColumnMethodsMixin):
     """
@@ -405,7 +425,7 @@ def take(self, lists_indices):
             raise ValueError(
                 "lists_indices and list column is of different " "size."
             )
-        if not is_numerical_dtype(
+        if not _is_non_decimal_numeric_dtype(
             lists_indices_col.children[1].dtype
         ) or not np.issubdtype(
             lists_indices_col.children[1].dtype, np.integer
diff --git a/python/cudf/cudf/core/column/numerical.py b/python/cudf/cudf/core/column/numerical.py
index e35cc744434..abf6cb703d7 100644
--- a/python/cudf/cudf/core/column/numerical.py
+++ b/python/cudf/cudf/core/column/numerical.py
@@ -8,7 +8,6 @@
 import cupy
 import numpy as np
 import pandas as pd
-from pandas.api.types import is_integer_dtype
 
 import cudf
 from cudf import _lib as libcudf
@@ -21,7 +20,7 @@
     column,
     string,
 )
-from cudf.core.dtypes import Decimal64Dtype
+from cudf.core.dtypes import CategoricalDtype, Decimal64Dtype
 from cudf.utils import cudautils, utils
 from cudf.utils.dtypes import (
     NUMERIC_TYPES,
@@ -32,6 +31,7 @@
     to_cudf_compatible_scalar,
 )
 
+from ...api.types import is_integer_dtype, is_number
 from .numerical_base import NumericalBaseColumn
 
 
@@ -393,7 +393,7 @@ def find_first_value(
         if closest=True.
         """
         value = to_cudf_compatible_scalar(value)
-        if not pd.api.types.is_number(value):
+        if not is_number(value):
             raise ValueError("Expected a numeric value")
         found = 0
         if len(self):
@@ -422,7 +422,7 @@ def find_last_value(self, value: ScalarLike, closest: bool = False) -> int:
         if closest=True.
         """
         value = to_cudf_compatible_scalar(value)
-        if not pd.api.types.is_number(value):
+        if not is_number(value):
             raise ValueError("Expected a numeric value")
         found = 0
         if len(self):
@@ -544,6 +544,20 @@ def can_cast_safely(self, to_dtype: DtypeObj) -> bool:
 
         return False
 
+    def _with_type_metadata(self: ColumnBase, dtype: Dtype) -> ColumnBase:
+        if isinstance(dtype, CategoricalDtype):
+            return column.build_categorical_column(
+                categories=dtype.categories._values,
+                codes=as_column(self.base_data, dtype=self.dtype),
+                mask=self.base_mask,
+                ordered=dtype.ordered,
+                size=self.size,
+                offset=self.offset,
+                null_count=self.null_count,
+            )
+
+        return self
+
     def to_pandas(
         self, index: pd.Index = None, nullable: bool = False, **kwargs
     ) -> "pd.Series":
diff --git a/python/cudf/cudf/core/column/numerical_base.py b/python/cudf/cudf/core/column/numerical_base.py
index 4c935e18ba8..388cdb0ca79 100644
--- a/python/cudf/cudf/core/column/numerical_base.py
+++ b/python/cudf/cudf/core/column/numerical_base.py
@@ -199,4 +199,6 @@ def round(
         return libcudf.round.round(self, decimal_places=decimals, how=how)
 
     def _apply_scan_op(self, op: str) -> ColumnBase:
-        return self._copy_type_metadata(libcudf.reduce.scan(op, self, True))
+        return libcudf.reduce.scan(op, self, True)._with_type_metadata(
+            self.dtype
+        )
diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py
index d5e75039070..dd1c0c1e4ac 100644
--- a/python/cudf/cudf/core/column/string.py
+++ b/python/cudf/cudf/core/column/string.py
@@ -170,6 +170,8 @@
     is_string_dtype,
 )
 
+from ...api.types import is_integer
+
 _str_to_numeric_typecast_functions = {
     np.dtype("int8"): str_cast.stoi8,
     np.dtype("int16"): str_cast.stoi16,
@@ -483,7 +485,7 @@ def join(
 
         If the elements of a Series are lists themselves, join the content of
         these lists using the delimiter passed to the function.
-        This function is an equivalent to :meth:`str.join`. 
+        This function is an equivalent to :meth:`str.join`.
         In the special case that the lists in the Series contain only ``None``,
         a `<NA>`/`None` value will always be returned.
 
@@ -497,7 +499,7 @@ def join(
             This character will take the place of null strings
             (not empty strings) in the Series but will be considered
             only if the Series contains list elements and those lists have
-            at least one non-null string. If ``string_na_rep`` is ``None``, 
+            at least one non-null string. If ``string_na_rep`` is ``None``,
             it defaults to empty space "".
         sep_na_rep : str, default None
             This character will take the place of any null strings
@@ -2763,7 +2765,7 @@ def pad(
         if len(fillchar) != 1:
             raise TypeError("fillchar must be a character, not str")
 
-        if not pd.api.types.is_integer(width):
+        if not is_integer(width):
             msg = f"width must be of integer type, not {type(width).__name__}"
             raise TypeError(msg)
 
@@ -2845,7 +2847,7 @@ def zfill(self, width: int) -> ParentType:
         3    <NA>
         dtype: object
         """
-        if not pd.api.types.is_integer(width):
+        if not is_integer(width):
             msg = f"width must be of integer type, not {type(width).__name__}"
             raise TypeError(msg)
 
@@ -2915,7 +2917,7 @@ def center(self, width: int, fillchar: str = " ") -> ParentType:
         if len(fillchar) != 1:
             raise TypeError("fillchar must be a character, not str")
 
-        if not pd.api.types.is_integer(width):
+        if not is_integer(width):
             msg = f"width must be of integer type, not {type(width).__name__}"
             raise TypeError(msg)
 
@@ -2969,7 +2971,7 @@ def ljust(self, width: int, fillchar: str = " ") -> ParentType:
         if len(fillchar) != 1:
             raise TypeError("fillchar must be a character, not str")
 
-        if not pd.api.types.is_integer(width):
+        if not is_integer(width):
             msg = f"width must be of integer type, not {type(width).__name__}"
             raise TypeError(msg)
 
@@ -3023,7 +3025,7 @@ def rjust(self, width: int, fillchar: str = " ") -> ParentType:
         if len(fillchar) != 1:
             raise TypeError("fillchar must be a character, not str")
 
-        if not pd.api.types.is_integer(width):
+        if not is_integer(width):
             msg = f"width must be of integer type, not {type(width).__name__}"
             raise TypeError(msg)
 
@@ -3238,7 +3240,7 @@ def wrap(self, width: int, **kwargs) -> ParentType:
         1    another line\\nto be\\nwrapped
         dtype: object
         """
-        if not pd.api.types.is_integer(width):
+        if not is_integer(width):
             msg = f"width must be of integer type, not {type(width).__name__}"
             raise TypeError(msg)
 
diff --git a/python/cudf/cudf/core/column/struct.py b/python/cudf/cudf/core/column/struct.py
index 3c47f30dd15..8b6dbbf1f3c 100644
--- a/python/cudf/cudf/core/column/struct.py
+++ b/python/cudf/cudf/core/column/struct.py
@@ -4,8 +4,10 @@
 import pyarrow as pa
 
 import cudf
-from cudf.core.column import ColumnBase
+from cudf._typing import Dtype
+from cudf.core.column import ColumnBase, build_struct_column
 from cudf.core.column.methods import ColumnMethodsMixin
+from cudf.core.dtypes import StructDtype
 from cudf.utils.dtypes import is_struct_dtype
 
 
@@ -111,18 +113,21 @@ def __cuda_array_interface__(self):
             "Structs are not yet supported via `__cuda_array_interface__`"
         )
 
-    def _copy_type_metadata(self: ColumnBase, other: ColumnBase) -> ColumnBase:
-        """Copies type metadata from self onto other, returning a new column.
-
-        In addition to the default behavior, if `other` is a StructColumns we
-        rename the fields of `other` to the field names of `self`.
-        """
-        if isinstance(other, cudf.core.column.StructColumn):
-            other = other._rename_fields(
-                self.dtype.fields.keys()  # type: ignore
+    def _with_type_metadata(self: StructColumn, dtype: Dtype) -> StructColumn:
+        if isinstance(dtype, StructDtype):
+            return build_struct_column(
+                names=dtype.fields.keys(),
+                children=tuple(
+                    self.base_children[i]._with_type_metadata(dtype.fields[f])
+                    for i, f in enumerate(dtype.fields.keys())
+                ),
+                mask=self.base_mask,
+                size=self.base_size,
+                offset=self.offset,
+                null_count=self.null_count,
             )
-        # Have to ignore typing here because it misdiagnoses super().
-        return super()._copy_type_metadata(other)  # type: ignore
+
+        return self
 
 
 class StructMethods(ColumnMethodsMixin):
diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index e055ba11cb7..7f7c857910c 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -19,7 +19,6 @@
 from numba import cuda
 from nvtx import annotate
 from pandas._config import get_option
-from pandas.api.types import is_dict_like
 from pandas.io.formats import console
 from pandas.io.formats.printing import pprint_thing
 
@@ -39,6 +38,7 @@
 from cudf.utils import applyutils, docutils, ioutils, queryutils, utils
 from cudf.utils.docutils import copy_docstring
 from cudf.utils.dtypes import (
+    _is_scalar_or_zero_d_array,
     can_convert_to_column,
     cudf_dtype_from_pydata_dtype,
     find_common_type,
@@ -54,6 +54,8 @@
 )
 from cudf.utils.utils import GetAttrGetItemMixin
 
+from ..api.types import is_bool_dtype, is_dict_like
+
 T = TypeVar("T", bound="DataFrame")
 
 
@@ -719,7 +721,7 @@ def __getitem__(self, arg):
         >>> df[[True, False, True, False]] # mask the entire dataframe,
         # returning the rows specified in the boolean mask
         """
-        if is_scalar(arg) or isinstance(arg, tuple):
+        if _is_scalar_or_zero_d_array(arg) or isinstance(arg, tuple):
             return self._get_columns_by_label(arg, downcast=True)
 
         elif isinstance(arg, slice):
@@ -3153,7 +3155,7 @@ def take(self, positions, keep_index=True):
         2  3.0  c
         """
         positions = as_column(positions)
-        if pd.api.types.is_bool_dtype(positions):
+        if is_bool_dtype(positions):
             return self._apply_boolean_mask(positions)
         out = self._gather(positions, keep_index=keep_index)
         out.columns = self.columns
@@ -3185,7 +3187,7 @@ def insert(self, loc, name, value):
                 f"{num_cols * (num_cols > 0)}"
             )
 
-        if is_scalar(value):
+        if _is_scalar_or_zero_d_array(value):
             value = utils.scalar_broadcast_to(value, len(self))
 
         if len(self) == 0:
diff --git a/python/cudf/cudf/core/dtypes.py b/python/cudf/cudf/core/dtypes.py
index 5ec563cd934..83bef461ad8 100644
--- a/python/cudf/cudf/core/dtypes.py
+++ b/python/cudf/cudf/core/dtypes.py
@@ -7,9 +7,13 @@
 import numpy as np
 import pandas as pd
 import pyarrow as pa
+from pandas.api import types as pd_types
 from pandas.api.extensions import ExtensionDtype
 from pandas.core.arrays._arrow_utils import ArrowIntervalType
-from cudf.utils.dtypes import is_interval_dtype
+from pandas.core.dtypes.dtypes import (
+    CategoricalDtype as pd_CategoricalDtype,
+    CategoricalDtypeType as pd_CategoricalDtypeType,
+)
 
 import cudf
 from cudf._typing import Dtype
@@ -374,3 +378,166 @@ def to_arrow(self):
         return ArrowIntervalType(
             pa.from_numpy_dtype(self.subtype), self.closed
         )
+
+
+def is_categorical_dtype(obj):
+    """Check whether an array-like or dtype is of the Categorical dtype.
+
+    Parameters
+    ----------
+    obj : array-like or dtype
+        The array-like or dtype to check.
+
+    Returns
+    -------
+    bool
+        Whether or not the array-like or dtype is of a categorical dtype.
+    """
+    if obj is None:
+        return False
+
+    if isinstance(
+        obj,
+        (
+            pd_CategoricalDtype,
+            cudf.CategoricalDtype,
+            cudf.core.index.CategoricalIndex,
+            cudf.core.column.CategoricalColumn,
+            pd.Categorical,
+            pd.CategoricalIndex,
+        ),
+    ):
+        return True
+    # Note that we cannot directly use `obj in (...)`  because that triggers
+    # equality as well as identity checks and pandas extension dtypes won't
+    # allow converting that equality check to a boolean; `__nonzero__` is
+    # disabled because they treat dtypes as "array-like".
+    if any(
+        obj is t
+        for t in (
+            cudf.CategoricalDtype,
+            pd_CategoricalDtype,
+            pd_CategoricalDtypeType,
+        )
+    ):
+        return True
+    if isinstance(obj, (np.ndarray, np.dtype)):
+        return False
+    if isinstance(obj, str) and obj == "category":
+        return True
+    if isinstance(
+        obj,
+        (
+            cudf.Index,
+            cudf.Series,
+            cudf.core.column.ColumnBase,
+            pd.Index,
+            pd.Series,
+        ),
+    ):
+        return is_categorical_dtype(obj.dtype)
+    if hasattr(obj, "type"):
+        if obj.type is pd_CategoricalDtypeType:
+            return True
+    # TODO: A lot of the above checks are probably redundant and should be
+    # farmed out to this function here instead.
+    return pd_types.is_categorical_dtype(obj)
+
+
+def is_list_dtype(obj):
+    """Check whether an array-like or dtype is of the list dtype.
+
+    Parameters
+    ----------
+    obj : array-like or dtype
+        The array-like or dtype to check.
+
+    Returns
+    -------
+    bool
+        Whether or not the array-like or dtype is of the list dtype.
+    """
+    return (
+        type(obj) is cudf.core.dtypes.ListDtype
+        or obj is cudf.core.dtypes.ListDtype
+        or type(obj) is cudf.core.column.ListColumn
+        or obj is cudf.core.column.ListColumn
+        or (isinstance(obj, str) and obj == cudf.core.dtypes.ListDtype.name)
+        or (hasattr(obj, "dtype") and is_list_dtype(obj.dtype))
+    )
+
+
+def is_struct_dtype(obj):
+    """Check whether an array-like or dtype is of the struct dtype.
+
+    Parameters
+    ----------
+    obj : array-like or dtype
+        The array-like or dtype to check.
+
+    Returns
+    -------
+    bool
+        Whether or not the array-like or dtype is of the struct dtype.
+    """
+    # TODO: This behavior is currently inconsistent for interval types. the
+    # actual class IntervalDtype will return False, but instances (e.g.
+    # IntervalDtype(int)) will return True. For now this is not being changed
+    # since the interval dtype is being modified as part of the array refactor,
+    # but this behavior should be made consistent afterwards.
+    return (
+        isinstance(obj, cudf.core.dtypes.StructDtype)
+        or obj is cudf.core.dtypes.StructDtype
+        or (isinstance(obj, str) and obj == cudf.core.dtypes.StructDtype.name)
+        or (hasattr(obj, "dtype") and is_struct_dtype(obj.dtype))
+    )
+
+
+def is_decimal_dtype(obj):
+    """Check whether an array-like or dtype is of the decimal dtype.
+
+    Parameters
+    ----------
+    obj : array-like or dtype
+        The array-like or dtype to check.
+
+    Returns
+    -------
+    bool
+        Whether or not the array-like or dtype is of the decimal dtype.
+    """
+    return (
+        type(obj) is cudf.core.dtypes.Decimal64Dtype
+        or obj is cudf.core.dtypes.Decimal64Dtype
+        or (
+            isinstance(obj, str)
+            and obj == cudf.core.dtypes.Decimal64Dtype.name
+        )
+        or (hasattr(obj, "dtype") and is_decimal_dtype(obj.dtype))
+    )
+
+
+def is_interval_dtype(obj):
+    """Check whether an array-like or dtype is of the interval dtype.
+
+    Parameters
+    ----------
+    obj : array-like or dtype
+        The array-like or dtype to check.
+
+    Returns
+    -------
+    bool
+        Whether or not the array-like or dtype is of the interval dtype.
+    """
+    # TODO: Should there be any branch in this function that calls
+    # pd.api.types.is_interval_dtype?
+    return (
+        isinstance(obj, cudf.core.dtypes.IntervalDtype)
+        or isinstance(obj, pd.core.dtypes.dtypes.IntervalDtype)
+        or obj is cudf.core.dtypes.IntervalDtype
+        or (
+            isinstance(obj, str) and obj == cudf.core.dtypes.IntervalDtype.name
+        )
+        or (hasattr(obj, "dtype") and is_interval_dtype(obj.dtype))
+    )
diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index ea40997ca3f..316aa1b66ff 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -13,7 +13,6 @@
 import pandas as pd
 import pyarrow as pa
 from nvtx import annotate
-from pandas.api.types import is_dict_like, is_dtype_equal
 
 import cudf
 from cudf import _lib as libcudf
@@ -26,15 +25,19 @@
 )
 from cudf.core.join import merge
 from cudf.utils.dtypes import (
+    _is_non_decimal_numeric_dtype,
     find_common_type,
     is_categorical_dtype,
     is_column_like,
     is_decimal_dtype,
+    is_integer_dtype,
     is_numerical_dtype,
     is_scalar,
     min_scalar_type,
 )
 
+from ..api.types import is_dict_like, is_dtype_equal
+
 T = TypeVar("T", bound="Frame")
 
 if TYPE_CHECKING:
@@ -505,7 +508,7 @@ def _concat(
         # Reassign precision for any decimal cols
         for name, col in out._data.items():
             if isinstance(col, cudf.core.column.DecimalColumn):
-                col = tables[0]._data[name]._copy_type_metadata(col)
+                col = col._with_type_metadata(tables[0]._data[name].dtype)
 
         # Reassign index and column names
         if isinstance(objs[0].columns, pd.MultiIndex):
@@ -640,7 +643,7 @@ def _get_columns_by_index(self, indices):
         )
 
     def _gather(self, gather_map, keep_index=True, nullify=False):
-        if not pd.api.types.is_integer_dtype(gather_map.dtype):
+        if not is_integer_dtype(gather_map.dtype):
             gather_map = gather_map.astype("int32")
         result = self.__class__._from_table(
             libcudf.copying.gather(
@@ -1518,7 +1521,7 @@ def rank(
             numeric_cols = (
                 name
                 for name in self._data.names
-                if is_numerical_dtype(self._data[name])
+                if _is_non_decimal_numeric_dtype(self._data[name])
             )
             source = self._get_columns_by_label(numeric_cols)
             if source.empty:
@@ -1622,6 +1625,10 @@ def _repeat(self, count):
         result._copy_type_metadata(self)
         return result
 
+    def _reverse(self):
+        result = self.__class__._from_table(libcudf.copying.reverse(self))
+        return result
+
     def _fill(self, fill_values, begin, end, inplace):
         col_and_fill = zip(self._columns, fill_values)
 
@@ -1747,7 +1754,7 @@ def round(self, decimals=0, how="half_even"):
                 name: col.round(decimals[name], how=how)
                 if (
                     name in decimals.keys()
-                    and pd.api.types.is_numeric_dtype(col.dtype)
+                    and _is_non_decimal_numeric_dtype(col.dtype)
                 )
                 else col.copy(deep=True)
                 for name, col in self._data.items()
@@ -1755,7 +1762,7 @@ def round(self, decimals=0, how="half_even"):
         elif isinstance(decimals, int):
             cols = {
                 name: col.round(decimals, how=how)
-                if pd.api.types.is_numeric_dtype(col.dtype)
+                if _is_non_decimal_numeric_dtype(col.dtype)
                 else col.copy(deep=True)
                 for name, col in self._data.items()
             }
@@ -2240,13 +2247,13 @@ def _copy_type_metadata(
         """
         Copy type metadata from each column of `other` to the corresponding
         column of `self`.
-        See `ColumnBase._copy_type_metadata` for more information.
+        See `ColumnBase._with_type_metadata` for more information.
         """
         for name, col, other_col in zip(
             self._data.keys(), self._data.values(), other._data.values()
         ):
             self._data.set_by_label(
-                name, other_col._copy_type_metadata(col), validate=False
+                name, col._with_type_metadata(other_col.dtype), validate=False
             )
 
         if include_index:
@@ -3889,7 +3896,7 @@ def _get_replacement_values_for_columns(
             to_replace_columns = {col: to_replace for col in columns_dtype_map}
             values_columns = {
                 col: [value]
-                if pd.api.types.is_numeric_dtype(columns_dtype_map[col])
+                if _is_non_decimal_numeric_dtype(columns_dtype_map[col])
                 else cudf.utils.utils.scalar_broadcast_to(
                     value, (len(to_replace),), np.dtype(type(value)),
                 )
@@ -4049,10 +4056,7 @@ def _find_common_dtypes_and_categories(non_null_columns, dtypes):
         # default to the first non-null dtype
         dtypes[idx] = cols[0].dtype
         # If all the non-null dtypes are int/float, find a common dtype
-        if all(
-            is_numerical_dtype(col.dtype) or is_decimal_dtype(col.dtype)
-            for col in cols
-        ):
+        if all(is_numerical_dtype(col.dtype) for col in cols):
             dtypes[idx] = find_common_type([col.dtype for col in cols])
         # If all categorical dtypes, combine the categories
         elif all(
diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py
index e6772732357..1641f821a38 100644
--- a/python/cudf/cudf/core/groupby/groupby.py
+++ b/python/cudf/cudf/core/groupby/groupby.py
@@ -320,7 +320,7 @@ def _normalize_aggs(self, aggs):
 
         # Convert all values to list-like:
         for col, agg in out.items():
-            if not pd.api.types.is_list_like(agg):
+            if not is_list_like(agg):
                 out[col] = [agg]
             else:
                 out[col] = list(agg)
@@ -1157,7 +1157,7 @@ def agg(self, func):
 
         # downcast the result to a Series:
         if len(result._data):
-            if result.shape[1] == 1 and not pd.api.types.is_list_like(func):
+            if result.shape[1] == 1 and not is_list_like(func):
                 return result.iloc[:, 0]
 
         # drop the first level if we have a multiindex
@@ -1341,7 +1341,7 @@ def _is_multi_agg(aggs):
     on any of the columns as specified in `aggs`.
     """
     if isinstance(aggs, collections.abc.Mapping):
-        return any(pd.api.types.is_list_like(agg) for agg in aggs.values())
-    if pd.api.types.is_list_like(aggs):
+        return any(is_list_like(agg) for agg in aggs.values())
+    if is_list_like(aggs):
         return True
     return False
diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py
index 9f7bfa09d9e..7a9ad732c50 100644
--- a/python/cudf/cudf/core/index.py
+++ b/python/cudf/cudf/core/index.py
@@ -4,7 +4,7 @@
 
 import pickle
 from numbers import Number
-from typing import Any, Dict, Type
+from typing import Any, Dict, Optional, Tuple, Type
 
 import cupy
 import numpy as np
@@ -14,6 +14,8 @@
 
 import cudf
 from cudf._lib.filling import sequence
+from cudf._lib.search import search_sorted
+from cudf._lib.table import Table
 from cudf._typing import DtypeObj
 from cudf.core.abc import Serializable
 from cudf.core.column import (
@@ -27,24 +29,31 @@
     arange,
     column,
 )
-from cudf.core.column.column import _concat_columns
+from cudf.core.column.column import _concat_columns, as_column
 from cudf.core.column.string import StringMethods as StringMethods
 from cudf.core.dtypes import IntervalDtype
 from cudf.core.frame import SingleColumnFrame
 from cudf.utils import ioutils
 from cudf.utils.docutils import copy_docstring
 from cudf.utils.dtypes import (
+    _is_non_decimal_numeric_dtype,
     find_common_type,
     is_categorical_dtype,
     is_interval_dtype,
     is_list_like,
     is_mixed_with_object_dtype,
-    is_numerical_dtype,
     is_scalar,
     numeric_normalize_types,
 )
 from cudf.utils.utils import cached_property, search_range
 
+from ..api.types import (
+    _is_scalar_or_zero_d_array,
+    is_dtype_equal,
+    is_integer,
+    is_string_dtype,
+)
+
 
 class BaseIndex(SingleColumnFrame, Serializable):
     """Base class for all cudf Index types."""
@@ -189,7 +198,7 @@ def get_level_values(self, level):
 
         if level == self.name:
             return self
-        elif pd.api.types.is_integer(level):
+        elif is_integer(level):
             if level != 0:
                 raise IndexError(
                     f"Cannot get level: {level} " f"for index with 1 level"
@@ -1036,7 +1045,7 @@ def astype(self, dtype, copy=False):
         >>> index.astype('float64')
         Float64Index([1.0, 2.0, 3.0], dtype='float64')
         """
-        if pd.api.types.is_dtype_equal(dtype, self.dtype):
+        if is_dtype_equal(dtype, self.dtype):
             return self.copy(deep=copy)
 
         return as_index(
@@ -1215,6 +1224,144 @@ def memory_usage(self, deep=False):
         """
         return self._values._memory_usage(deep=deep)
 
+    def get_loc(self, key, method=None, tolerance=None):
+        """Get integer location, slice or boolean mask for requested label.
+
+        Parameters
+        ----------
+        key : label
+        method : {None, 'pad'/'fill', 'backfill'/'bfill', 'nearest'}, optional
+            - default: exact matches only.
+            - pad / ffill: find the PREVIOUS index value if no exact match.
+            - backfill / bfill: use NEXT index value if no exact match.
+            - nearest: use the NEAREST index value if no exact match. Tied
+              distances are broken by preferring the larger index
+              value.
+        tolerance : int or float, optional
+            Maximum distance from index value for inexact matches. The value
+            of the index at the matching location must satisfy the equation
+            ``abs(index[loc] - key) <= tolerance``.
+
+        Returns
+        -------
+        int or slice or boolean mask
+            - If result is unique, return integer index
+            - If index is monotonic, loc is returned as a slice object
+            - Otherwise, a boolean mask is returned
+
+        Examples
+        --------
+        >>> unique_index = cudf.Index(list('abc'))
+        >>> unique_index.get_loc('b')
+        1
+        >>> monotonic_index = cudf.Index(list('abbc'))
+        >>> monotonic_index.get_loc('b')
+        slice(1, 3, None)
+        >>> non_monotonic_index = cudf.Index(list('abcb'))
+        >>> non_monotonic_index.get_loc('b')
+        array([False,  True, False,  True])
+        >>> numeric_unique_index = cudf.Index([1, 2, 3])
+        >>> numeric_unique_index.get_loc(3)
+        2
+        """
+        if tolerance is not None:
+            raise NotImplementedError(
+                "Parameter tolerance is unsupported yet."
+            )
+        if method not in {
+            None,
+            "ffill",
+            "bfill",
+            "pad",
+            "backfill",
+            "nearest",
+        }:
+            raise ValueError(
+                f"Invalid fill method. Expecting pad (ffill), backfill (bfill)"
+                f" or nearest. Got {method}"
+            )
+
+        is_sorted = (
+            self.is_monotonic_increasing or self.is_monotonic_decreasing
+        )
+
+        if not is_sorted and method is not None:
+            raise ValueError(
+                "index must be monotonic increasing or decreasing if `method`"
+                "is specified."
+            )
+
+        key_as_table = Table({"None": as_column(key, length=1)})
+        lower_bound, upper_bound, sort_inds = self._lexsorted_equal_range(
+            key_as_table, is_sorted
+        )
+
+        if lower_bound == upper_bound:
+            # Key not found, apply method
+            if method in ("pad", "ffill"):
+                if lower_bound == 0:
+                    raise KeyError(key)
+                return lower_bound - 1
+            elif method in ("backfill", "bfill"):
+                if lower_bound == self._data.nrows:
+                    raise KeyError(key)
+                return lower_bound
+            elif method == "nearest":
+                if lower_bound == self._data.nrows:
+                    return lower_bound - 1
+                elif lower_bound == 0:
+                    return 0
+                lower_val = self._column.element_indexing(lower_bound - 1)
+                upper_val = self._column.element_indexing(lower_bound)
+                return (
+                    lower_bound - 1
+                    if abs(lower_val - key) < abs(upper_val - key)
+                    else lower_bound
+                )
+            else:
+                raise KeyError(key)
+
+        if lower_bound + 1 == upper_bound:
+            # Search result is unique, return int.
+            return (
+                lower_bound
+                if is_sorted
+                else sort_inds.element_indexing(lower_bound)
+            )
+
+        if is_sorted:
+            # In monotonic index, lex search result is continuous. A slice for
+            # the range is returned.
+            return slice(lower_bound, upper_bound)
+
+        # Not sorted and not unique. Return a boolean mask
+        mask = cupy.full(self._data.nrows, False)
+        true_inds = sort_inds.slice(lower_bound, upper_bound).to_gpu_array()
+        mask[cupy.array(true_inds)] = True
+        return mask
+
+    def _lexsorted_equal_range(
+        self, key_as_table: Table, is_sorted: bool
+    ) -> Tuple[int, int, Optional[ColumnBase]]:
+        """Get equal range for key in lexicographically sorted index. If index
+        is not sorted when called, a sort will take place and `sort_inds` is
+        returned. Otherwise `None` is returned in that position.
+        """
+        if not is_sorted:
+            sort_inds = self._get_sorted_inds()
+            sort_vals = self._gather(sort_inds)
+        else:
+            sort_inds = None
+            sort_vals = self
+        lower_bound = search_sorted(
+            sort_vals, key_as_table, side="left"
+        ).element_indexing(0)
+        upper_bound = search_sorted(
+            sort_vals, key_as_table, side="right"
+        ).element_indexing(0)
+
+        return lower_bound, upper_bound, sort_inds
+
     @classmethod
     def from_pandas(cls, index, nan_as_null=None):
         """
@@ -1488,7 +1635,7 @@ def __getitem__(self, index):
             index = self._start + index * self._step
             return index
         else:
-            if is_scalar(index):
+            if _is_scalar_or_zero_d_array(index):
                 index = np.min_scalar_type(index).type(index)
             index = column.as_column(index)
 
@@ -2515,7 +2662,7 @@ def interval_range(
         for x in (start, end, freq, periods)
     ]
     if any(
-        not is_numerical_dtype(x.dtype) if x is not None else False
+        not _is_non_decimal_numeric_dtype(x.dtype) if x is not None else False
         for x in args
     ):
         raise ValueError("start, end, periods, freq must be numeric values.")
@@ -2709,7 +2856,7 @@ def __new__(cls, values, copy=False, **kwargs):
             values = values._values.copy(deep=copy)
         else:
             values = column.as_column(values, dtype="str")
-            if not pd.api.types.is_string_dtype(values.dtype):
+            if not is_string_dtype(values.dtype):
                 raise ValueError(
                     "Couldn't create StringIndex from passed in object"
                 )
diff --git a/python/cudf/cudf/core/indexing.py b/python/cudf/cudf/core/indexing.py
index 21d075ae67d..3c9dc127ce2 100755
--- a/python/cudf/cudf/core/indexing.py
+++ b/python/cudf/cudf/core/indexing.py
@@ -13,15 +13,23 @@
 from cudf._typing import ColumnLike, DataFrameOrSeries, ScalarLike
 from cudf.core.column.column import as_column
 from cudf.utils.dtypes import (
+    _is_non_decimal_numeric_dtype,
+    _is_scalar_or_zero_d_array,
     find_common_type,
     is_categorical_dtype,
     is_column_like,
     is_list_like,
-    is_numerical_dtype,
     is_scalar,
     to_cudf_compatible_scalar,
 )
 
+from ..api.types import (
+    is_bool_dtype,
+    is_integer,
+    is_integer_dtype,
+    is_numeric_dtype,
+)
+
 
 def indices_from_labels(obj, labels):
     from cudf.core.column import column
@@ -87,7 +95,7 @@ def __getitem__(self, arg):
 
         if (
             isinstance(data, list)
-            or is_scalar(data)
+            or _is_scalar_or_zero_d_array(data)
             or _is_null_host_scalar(data)
         ):
             return data
@@ -111,10 +119,10 @@ def __setitem__(self, key, value):
                 (cudf.Decimal64Dtype, cudf.CategoricalDtype),
             )
             and hasattr(value, "dtype")
-            and pd.api.types.is_numeric_dtype(value.dtype)
+            and _is_non_decimal_numeric_dtype(value.dtype)
         ):
             # normalize types if necessary:
-            if not pd.api.types.is_integer(key):
+            if not is_integer(key):
                 to_dtype = np.result_type(value.dtype, self._sr._column.dtype)
                 value = value.astype(to_dtype)
                 self._sr._column._mimic_inplace(
@@ -174,15 +182,15 @@ def __setitem__(self, key, value):
         self._sr.iloc[key] = value
 
     def _loc_to_iloc(self, arg):
-        if is_scalar(arg):
-            if not is_numerical_dtype(self._sr.index.dtype):
+        if _is_scalar_or_zero_d_array(arg):
+            if not _is_non_decimal_numeric_dtype(self._sr.index.dtype):
                 # TODO: switch to cudf.utils.dtypes.is_integer(arg)
-                if isinstance(
-                    arg, cudf.Scalar
-                ) and pd.api.types.is_integer_dtype(arg.dtype):
+                if isinstance(arg, cudf.Scalar) and is_integer_dtype(
+                    arg.dtype
+                ):
                     found_index = arg.value
                     return found_index
-                elif pd.api.types.is_integer(arg):
+                elif is_integer(arg):
                     found_index = arg
                     return found_index
             try:
@@ -260,14 +268,12 @@ def _can_downcast_to_series(self, df, arg):
             ):
                 return False
             else:
-                if pd.api.types.is_bool_dtype(
-                    as_column(arg[0]).dtype
-                ) and not isinstance(arg[1], slice):
+                if is_bool_dtype(as_column(arg[0]).dtype) and not isinstance(
+                    arg[1], slice
+                ):
                     return True
             dtypes = df.dtypes.values.tolist()
-            all_numeric = all(
-                [pd.api.types.is_numeric_dtype(t) for t in dtypes]
-            )
+            all_numeric = all([is_numeric_dtype(t) for t in dtypes])
             if all_numeric:
                 return True
         if ncols == 1:
@@ -373,7 +379,7 @@ def _getitem_tuple_arg(self, arg):
                     return columns_df._empty_like(keep_index=True)
                 tmp_arg = (column.as_column(tmp_arg[0]), tmp_arg[1])
 
-                if pd.api.types.is_bool_dtype(tmp_arg[0]):
+                if is_bool_dtype(tmp_arg[0]):
                     df = columns_df._apply_boolean_mask(tmp_arg[0])
                 else:
                     tmp_col_name = str(uuid4())
@@ -400,7 +406,7 @@ def _getitem_tuple_arg(self, arg):
                 df.index = as_index(start)
             else:
                 row_selection = column.as_column(arg[0])
-                if pd.api.types.is_bool_dtype(row_selection.dtype):
+                if is_bool_dtype(row_selection.dtype):
                     df.index = self._df.index.take(row_selection)
                 else:
                     df.index = as_index(row_selection)
@@ -496,7 +502,7 @@ def _getitem_tuple_arg(self, arg):
                 df = columns_df._slice(slice(index, index + 1, 1))
             else:
                 arg = (column.as_column(arg[0]), arg[1])
-                if pd.api.types.is_bool_dtype(arg[0]):
+                if is_bool_dtype(arg[0]):
                     df = columns_df._apply_boolean_mask(arg[0])
                 else:
                     df = columns_df._gather(arg[0])
diff --git a/python/cudf/cudf/core/multiindex.py b/python/cudf/cudf/core/multiindex.py
index fa957e0cd88..3735a36b3eb 100644
--- a/python/cudf/cudf/core/multiindex.py
+++ b/python/cudf/cudf/core/multiindex.py
@@ -17,10 +17,11 @@
 from cudf import _lib as libcudf
 from cudf._typing import DataFrameOrSeries
 from cudf.core._compat import PANDAS_GE_120
-from cudf.core.column import column
+from cudf.core.column import as_column, column
 from cudf.core.column_accessor import ColumnAccessor
 from cudf.core.frame import Frame, SingleColumnFrame
 from cudf.core.index import BaseIndex, as_index
+from cudf.utils.utils import _maybe_indices_to_slice
 
 
 class MultiIndex(BaseIndex):
@@ -1609,3 +1610,97 @@ def _level_index_from_level(self, level):
 
     def _level_name_from_level(self, level):
         return self.names[self._level_index_from_level(level)]
+
+    def get_loc(self, key, method=None, tolerance=None):
+        """
+        Get location for a label or a tuple of labels.
+
+        The location is returned as an integer/slice or boolean mask.
+
+        Parameters
+        ----------
+        key : label or tuple of labels (one for each level)
+        method : None
+
+        Returns
+        -------
+        loc : int, slice object or boolean mask
+            - If index is unique, search result is unique, return a single int.
+            - If index is monotonic, index is returned as a slice object.
+            - Otherwise, cudf attempts a best effort to convert the search
+              result into a slice object, and will return a boolean mask if
+              failed to do so. Notice this can deviate from Pandas behavior
+              in some situations.
+
+        Examples
+        --------
+        >>> import cudf
+        >>> mi = cudf.MultiIndex.from_tuples(
+            [('a', 'd'), ('b', 'e'), ('b', 'f')])
+        >>> mi.get_loc('b')
+        slice(1, 3, None)
+        >>> mi.get_loc(('b', 'e'))
+        1
+        >>> non_monotonic_non_unique_idx = cudf.MultiIndex.from_tuples(
+            [('c', 'd'), ('b', 'e'), ('a', 'f'), ('b', 'e')])
+        >>> non_monotonic_non_unique_idx.get_loc('b') # differ from pandas
+        slice(1, 4, 2)
+        """
+        if tolerance is not None:
+            raise NotImplementedError(
+                "Parameter tolerance is unsupported yet."
+            )
+        if method is not None:
+            raise NotImplementedError(
+                "only the default get_loc method is currently supported for"
+                " MultiIndex"
+            )
+
+        is_sorted = (
+            self.is_monotonic_increasing or self.is_monotonic_decreasing
+        )
+        is_unique = self.is_unique
+        key = (key,) if not isinstance(key, tuple) else key
+
+        # Handle partial key search. If length of `key` is less than `nlevels`,
+        # Only search levels up to `len(key)` level.
+        key_as_table = libcudf.table.Table(
+            {i: as_column(k, length=1) for i, k in enumerate(key)}
+        )
+        partial_index = self.__class__._from_data(
+            data=self._data.select_by_index(slice(key_as_table._num_columns))
+        )
+        (
+            lower_bound,
+            upper_bound,
+            sort_inds,
+        ) = partial_index._lexsorted_equal_range(key_as_table, is_sorted)
+
+        if lower_bound == upper_bound:
+            raise KeyError(key)
+
+        if is_unique and lower_bound + 1 == upper_bound:
+            # Indices are unique (Pandas constraint), search result is unique,
+            # return int.
+            return (
+                lower_bound
+                if is_sorted
+                else sort_inds.element_indexing(lower_bound)
+            )
+
+        if is_sorted:
+            # In monotonic index, lex search result is continuous. A slice for
+            # the range is returned.
+            return slice(lower_bound, upper_bound)
+
+        true_inds = cupy.array(
+            sort_inds.slice(lower_bound, upper_bound).to_gpu_array()
+        )
+        true_inds = _maybe_indices_to_slice(true_inds)
+        if isinstance(true_inds, slice):
+            return true_inds
+
+        # Not sorted and not unique. Return a boolean mask
+        mask = cupy.full(self._data.nrows, False)
+        mask[true_inds] = True
+        return mask
diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index fa99c5ebac8..8a6cad40765 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -14,7 +14,6 @@
 import numpy as np
 import pandas as pd
 from pandas._config import get_option
-from pandas.api.types import is_dict_like
 
 import cudf
 from cudf import _lib as libcudf
@@ -47,14 +46,14 @@
 from cudf.utils.dtypes import (
     can_convert_to_column,
     find_common_type,
-    is_decimal_dtype,
-    is_struct_dtype,
     is_categorical_dtype,
+    is_decimal_dtype,
     is_interval_dtype,
     is_list_dtype,
     is_list_like,
     is_mixed_with_object_dtype,
     is_scalar,
+    is_struct_dtype,
     min_scalar_type,
 )
 from cudf.utils.utils import (
@@ -62,6 +61,8 @@
     get_relevant_submodule,
 )
 
+from ..api.types import is_bool_dtype, is_dict_like, is_dtype_equal
+
 
 class Series(SingleColumnFrame, Serializable):
     # The `constructor*` properties are used by `dask` (and `dask_cudf`)
@@ -2415,7 +2416,7 @@ def _concat(cls, objs, axis=0, index=True):
         col = _concat_columns([o._column for o in objs])
 
         if isinstance(col, cudf.core.column.DecimalColumn):
-            col = objs[0]._column._copy_type_metadata(col)
+            col = col._with_type_metadata(objs[0]._column.dtype)
 
         return cls(data=col, index=index, name=name)
 
@@ -2988,7 +2989,7 @@ def as_mask(self):
                  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0],
              dtype=uint8)
         """
-        if not pd.api.types.is_bool_dtype(self.dtype):
+        if not is_bool_dtype(self.dtype):
             raise TypeError(
                 f"Series must of boolean dtype, found: {self.dtype}"
             )
@@ -3082,7 +3083,7 @@ def astype(self, dtype, copy=False, errors="raise"):
                 )
             dtype = dtype[self.name]
 
-        if pd.api.types.is_dtype_equal(dtype, self.dtype):
+        if is_dtype_equal(dtype, self.dtype):
             return self.copy(deep=copy)
         try:
             data = self._column.astype(dtype)
@@ -5603,7 +5604,7 @@ def _describe_timestamp(self):
             # pandas defaults
             percentiles = np.array([0.25, 0.5, 0.75])
 
-        if pd.api.types.is_bool_dtype(self.dtype):
+        if is_bool_dtype(self.dtype):
             return _describe_categorical(self)
         elif isinstance(self._column, cudf.core.column.NumericalColumn):
             return _describe_numeric(self)
diff --git a/python/cudf/cudf/core/tools/numeric.py b/python/cudf/cudf/core/tools/numeric.py
index 74f7d16e4ff..83df7985108 100644
--- a/python/cudf/cudf/core/tools/numeric.py
+++ b/python/cudf/cudf/core/tools/numeric.py
@@ -6,20 +6,19 @@
 import pandas as pd
 
 import cudf
+from cudf import _lib as libcudf
 from cudf.core.column import as_column
 from cudf.utils.dtypes import (
+    _is_non_decimal_numeric_dtype,
     can_convert_to_column,
-    is_numerical_dtype,
-    is_datetime_dtype,
-    is_timedelta_dtype,
     is_categorical_dtype,
-    is_string_dtype,
+    is_datetime_dtype,
     is_list_dtype,
+    is_string_dtype,
     is_struct_dtype,
+    is_timedelta_dtype,
 )
 
-import cudf._lib as libcudf
-
 
 def to_numeric(arg, errors="raise", downcast=None):
     """
@@ -112,7 +111,7 @@ def to_numeric(arg, errors="raise", downcast=None):
         col = col.as_numerical_column(np.dtype("int64"))
     elif is_categorical_dtype(dtype):
         cat_dtype = col.dtype.type
-        if is_numerical_dtype(cat_dtype):
+        if _is_non_decimal_numeric_dtype(cat_dtype):
             col = col.as_numerical_column(cat_dtype)
         else:
             try:
@@ -134,7 +133,7 @@ def to_numeric(arg, errors="raise", downcast=None):
                 raise e
     elif is_list_dtype(dtype) or is_struct_dtype(dtype):
         raise ValueError("Input does not support nested datatypes")
-    elif is_numerical_dtype(dtype):
+    elif _is_non_decimal_numeric_dtype(dtype):
         pass
     else:
         raise ValueError("Unrecognized datatype")
diff --git a/python/cudf/cudf/core/window/rolling.py b/python/cudf/cudf/core/window/rolling.py
index ca0bd46d9e6..1a17d941da7 100644
--- a/python/cudf/cudf/core/window/rolling.py
+++ b/python/cudf/cudf/core/window/rolling.py
@@ -12,6 +12,8 @@
 from cudf.utils import cudautils
 from cudf.utils.utils import GetAttrGetItemMixin
 
+from ...api.types import is_integer, is_number
+
 
 class Rolling(GetAttrGetItemMixin):
     """
@@ -295,9 +297,9 @@ def _normalize(self):
           Only valid for datetime index.
         """
         window, min_periods = self.window, self.min_periods
-        if pd.api.types.is_number(window):
+        if is_number(window):
             # only allow integers
-            if not pd.api.types.is_integer(window):
+            if not is_integer(window):
                 raise ValueError("window must be an integer")
             if window <= 0:
                 raise ValueError("window cannot be zero or negative")
@@ -338,7 +340,7 @@ def _window_to_window_sizes(self, window):
         For non-fixed width windows,
         convert the window argument into window sizes.
         """
-        if pd.api.types.is_integer(window):
+        if is_integer(window):
             return window
         else:
             return cudautils.window_sizes_from_offset(
@@ -376,7 +378,7 @@ def __init__(self, groupby, window, min_periods=None, center=False):
         super().__init__(obj, window, min_periods=min_periods, center=center)
 
     def _window_to_window_sizes(self, window):
-        if pd.api.types.is_integer(window):
+        if is_integer(window):
             return cudautils.grouped_window_sizes_from_offset(
                 column.arange(len(self.obj)).data_array_view,
                 self._group_starts,
diff --git a/python/cudf/cudf/tests/test_api_types.py b/python/cudf/cudf/tests/test_api_types.py
new file mode 100644
index 00000000000..98249e761c1
--- /dev/null
+++ b/python/cudf/cudf/tests/test_api_types.py
@@ -0,0 +1,1090 @@
+# Copyright (c) 2018-2021, NVIDIA CORPORATION.
+
+import numpy as np
+import pandas as pd
+import pytest
+from pandas.api import types as ptypes
+
+import cudf
+from cudf.api import types as types
+
+
+@pytest.mark.parametrize(
+    "obj, expect",
+    (
+        # Base Python objects.
+        (bool(), False),
+        (int(), False),
+        (float(), False),
+        (complex(), False),
+        (str(), False),
+        ("", False),
+        (r"", False),
+        (object(), False),
+        # Base Python types.
+        (bool, False),
+        (int, False),
+        (float, False),
+        (complex, False),
+        (str, False),
+        (object, False),
+        # NumPy types.
+        (np.bool_, False),
+        (np.int_, False),
+        (np.float64, False),
+        (np.complex128, False),
+        (np.str_, False),
+        (np.unicode_, False),
+        (np.datetime64, False),
+        (np.timedelta64, False),
+        # NumPy scalars.
+        (np.bool_(), False),
+        (np.int_(), False),
+        (np.float64(), False),
+        (np.complex128(), False),
+        (np.str_(), False),
+        (np.unicode_(), False),
+        (np.datetime64(), False),
+        (np.timedelta64(), False),
+        # NumPy dtype objects.
+        (np.dtype("bool"), False),
+        (np.dtype("int"), False),
+        (np.dtype("float"), False),
+        (np.dtype("complex"), False),
+        (np.dtype("str"), False),
+        (np.dtype("unicode"), False),
+        (np.dtype("datetime64"), False),
+        (np.dtype("timedelta64"), False),
+        (np.dtype("object"), False),
+        # NumPy arrays.
+        (np.array([], dtype=np.bool_), False),
+        (np.array([], dtype=np.int_), False),
+        (np.array([], dtype=np.float64), False),
+        (np.array([], dtype=np.complex128), False),
+        (np.array([], dtype=np.str_), False),
+        (np.array([], dtype=np.unicode_), False),
+        (np.array([], dtype=np.datetime64), False),
+        (np.array([], dtype=np.timedelta64), False),
+        (np.array([], dtype=object), False),
+        # Pandas dtypes.
+        (pd.core.dtypes.dtypes.CategoricalDtypeType, True),
+        (pd.CategoricalDtype, True),
+        # Pandas objects.
+        (pd.Series(dtype="bool"), False),
+        (pd.Series(dtype="int"), False),
+        (pd.Series(dtype="float"), False),
+        (pd.Series(dtype="complex"), False),
+        (pd.Series(dtype="str"), False),
+        (pd.Series(dtype="unicode"), False),
+        (pd.Series(dtype="datetime64[s]"), False),
+        (pd.Series(dtype="timedelta64[s]"), False),
+        (pd.Series(dtype="category"), True),
+        (pd.Series(dtype="object"), False),
+        # cuDF dtypes.
+        (cudf.CategoricalDtype, True),
+        (cudf.ListDtype, False),
+        (cudf.StructDtype, False),
+        (cudf.Decimal64Dtype, False),
+        (cudf.IntervalDtype, False),
+        # cuDF dtype instances.
+        (cudf.CategoricalDtype("a"), True),
+        (cudf.ListDtype(int), False),
+        (cudf.StructDtype({"a": int}), False),
+        (cudf.Decimal64Dtype(5, 2), False),
+        (cudf.IntervalDtype(int), False),
+        # cuDF objects
+        (cudf.Series(dtype="bool"), False),
+        (cudf.Series(dtype="int"), False),
+        (cudf.Series(dtype="float"), False),
+        (cudf.Series(dtype="str"), False),
+        (cudf.Series(dtype="datetime64[s]"), False),
+        (cudf.Series(dtype="timedelta64[s]"), False),
+        (cudf.Series(dtype="category"), True),
+        (cudf.Series(dtype=cudf.Decimal64Dtype(5, 2)), False),
+        # TODO: Currently creating an empty Series of list type ignores the
+        # provided type and instead makes a float64 Series.
+        (cudf.Series([[1, 2], [3, 4, 5]]), False),
+        # TODO: Currently creating an empty Series of struct type fails because
+        # it uses a numpy utility that doesn't understand StructDtype.
+        (cudf.Series([{"a": 1, "b": 2}, {"c": 3}]), False),
+        (cudf.Series(dtype=cudf.IntervalDtype(int)), False),
+    ),
+)
+def test_is_categorical_dtype(obj, expect):
+    assert types.is_categorical_dtype(obj) == expect
+
+
+@pytest.mark.parametrize(
+    "obj, expect",
+    (
+        # Base Python objects.
+        (bool(), False),
+        (int(), False),
+        (float(), False),
+        (complex(), False),
+        (str(), False),
+        ("", False),
+        (r"", False),
+        (object(), False),
+        # Base Python types.
+        (bool, True),
+        (int, True),
+        (float, True),
+        (complex, True),
+        (str, False),
+        (object, False),
+        # NumPy types.
+        (np.bool_, True),
+        (np.int_, True),
+        (np.float64, True),
+        (np.complex128, True),
+        (np.str_, False),
+        (np.unicode_, False),
+        (np.datetime64, False),
+        (np.timedelta64, False),
+        # NumPy scalars.
+        (np.bool_(), True),
+        (np.int_(), True),
+        (np.float64(), True),
+        (np.complex128(), True),
+        (np.str_(), False),
+        (np.unicode_(), False),
+        (np.datetime64(), False),
+        (np.timedelta64(), False),
+        # NumPy dtype objects.
+        (np.dtype("bool"), True),
+        (np.dtype("int"), True),
+        (np.dtype("float"), True),
+        (np.dtype("complex"), True),
+        (np.dtype("str"), False),
+        (np.dtype("unicode"), False),
+        (np.dtype("datetime64"), False),
+        (np.dtype("timedelta64"), False),
+        (np.dtype("object"), False),
+        # NumPy arrays.
+        (np.array([], dtype=np.bool_), True),
+        (np.array([], dtype=np.int_), True),
+        (np.array([], dtype=np.float64), True),
+        (np.array([], dtype=np.complex128), True),
+        (np.array([], dtype=np.str_), False),
+        (np.array([], dtype=np.unicode_), False),
+        (np.array([], dtype=np.datetime64), False),
+        (np.array([], dtype=np.timedelta64), False),
+        (np.array([], dtype=object), False),
+        # Pandas dtypes.
+        (pd.core.dtypes.dtypes.CategoricalDtypeType, False),
+        (pd.CategoricalDtype, False),
+        # Pandas objects.
+        (pd.Series(dtype="bool"), True),
+        (pd.Series(dtype="int"), True),
+        (pd.Series(dtype="float"), True),
+        (pd.Series(dtype="complex"), True),
+        (pd.Series(dtype="str"), False),
+        (pd.Series(dtype="unicode"), False),
+        (pd.Series(dtype="datetime64[s]"), False),
+        (pd.Series(dtype="timedelta64[s]"), False),
+        (pd.Series(dtype="category"), False),
+        (pd.Series(dtype="object"), False),
+        # cuDF dtypes.
+        (cudf.CategoricalDtype, False),
+        (cudf.ListDtype, False),
+        (cudf.StructDtype, False),
+        (cudf.Decimal64Dtype, True),
+        (cudf.IntervalDtype, False),
+        # cuDF dtype instances.
+        (cudf.CategoricalDtype("a"), False),
+        (cudf.ListDtype(int), False),
+        (cudf.StructDtype({"a": int}), False),
+        (cudf.Decimal64Dtype(5, 2), True),
+        (cudf.IntervalDtype(int), False),
+        # cuDF objects
+        (cudf.Series(dtype="bool"), True),
+        (cudf.Series(dtype="int"), True),
+        (cudf.Series(dtype="float"), True),
+        (cudf.Series(dtype="str"), False),
+        (cudf.Series(dtype="datetime64[s]"), False),
+        (cudf.Series(dtype="timedelta64[s]"), False),
+        (cudf.Series(dtype="category"), False),
+        (cudf.Series(dtype=cudf.Decimal64Dtype(5, 2)), True),
+        (cudf.Series([[1, 2], [3, 4, 5]]), False),
+        (cudf.Series([{"a": 1, "b": 2}, {"c": 3}]), False),
+        (cudf.Series(dtype=cudf.IntervalDtype(int)), False),
+    ),
+)
+def test_is_numeric_dtype(obj, expect):
+    assert types.is_numeric_dtype(obj) == expect
+
+
+@pytest.mark.parametrize(
+    "obj, expect",
+    (
+        # Base Python objects.
+        (bool(), False),
+        (int(), False),
+        (float(), False),
+        (complex(), False),
+        (str(), False),
+        ("", False),
+        (r"", False),
+        (object(), False),
+        # Base Python types.
+        (bool, False),
+        (int, True),
+        (float, False),
+        (complex, False),
+        (str, False),
+        (object, False),
+        # NumPy types.
+        (np.bool_, False),
+        (np.int_, True),
+        (np.float64, False),
+        (np.complex128, False),
+        (np.str_, False),
+        (np.unicode_, False),
+        (np.datetime64, False),
+        (np.timedelta64, False),
+        # NumPy scalars.
+        (np.bool_(), False),
+        (np.int_(), True),
+        (np.float64(), False),
+        (np.complex128(), False),
+        (np.str_(), False),
+        (np.unicode_(), False),
+        (np.datetime64(), False),
+        (np.timedelta64(), False),
+        # NumPy dtype objects.
+        (np.dtype("bool"), False),
+        (np.dtype("int"), True),
+        (np.dtype("float"), False),
+        (np.dtype("complex"), False),
+        (np.dtype("str"), False),
+        (np.dtype("unicode"), False),
+        (np.dtype("datetime64"), False),
+        (np.dtype("timedelta64"), False),
+        (np.dtype("object"), False),
+        # NumPy arrays.
+        (np.array([], dtype=np.bool_), False),
+        (np.array([], dtype=np.int_), True),
+        (np.array([], dtype=np.float64), False),
+        (np.array([], dtype=np.complex128), False),
+        (np.array([], dtype=np.str_), False),
+        (np.array([], dtype=np.unicode_), False),
+        (np.array([], dtype=np.datetime64), False),
+        (np.array([], dtype=np.timedelta64), False),
+        (np.array([], dtype=object), False),
+        # Pandas dtypes.
+        (pd.core.dtypes.dtypes.CategoricalDtypeType, False),
+        (pd.CategoricalDtype, False),
+        # Pandas objects.
+        (pd.Series(dtype="bool"), False),
+        (pd.Series(dtype="int"), True),
+        (pd.Series(dtype="float"), False),
+        (pd.Series(dtype="complex"), False),
+        (pd.Series(dtype="str"), False),
+        (pd.Series(dtype="unicode"), False),
+        (pd.Series(dtype="datetime64[s]"), False),
+        (pd.Series(dtype="timedelta64[s]"), False),
+        (pd.Series(dtype="category"), False),
+        (pd.Series(dtype="object"), False),
+        # cuDF dtypes.
+        (cudf.CategoricalDtype, False),
+        (cudf.ListDtype, False),
+        (cudf.StructDtype, False),
+        (cudf.Decimal64Dtype, False),
+        (cudf.IntervalDtype, False),
+        # cuDF dtype instances.
+        (cudf.CategoricalDtype("a"), False),
+        (cudf.ListDtype(int), False),
+        (cudf.StructDtype({"a": int}), False),
+        (cudf.Decimal64Dtype(5, 2), False),
+        (cudf.IntervalDtype(int), False),
+        # cuDF objects
+        (cudf.Series(dtype="bool"), False),
+        (cudf.Series(dtype="int"), True),
+        (cudf.Series(dtype="float"), False),
+        (cudf.Series(dtype="str"), False),
+        (cudf.Series(dtype="datetime64[s]"), False),
+        (cudf.Series(dtype="timedelta64[s]"), False),
+        (cudf.Series(dtype="category"), False),
+        (cudf.Series(dtype=cudf.Decimal64Dtype(5, 2)), False),
+        (cudf.Series([[1, 2], [3, 4, 5]]), False),
+        (cudf.Series([{"a": 1, "b": 2}, {"c": 3}]), False),
+        (cudf.Series(dtype=cudf.IntervalDtype(int)), False),
+    ),
+)
+def test_is_integer_dtype(obj, expect):
+    assert types.is_integer_dtype(obj) == expect
+
+
+@pytest.mark.parametrize(
+    "obj, expect",
+    (
+        # Base Python objects.
+        (bool(), False),
+        (int(), True),
+        (float(), False),
+        (complex(), False),
+        (str(), False),
+        ("", False),
+        (r"", False),
+        (object(), False),
+        # Base Python types.
+        (bool, False),
+        (int, False),
+        (float, False),
+        (complex, False),
+        (str, False),
+        (object, False),
+        # NumPy types.
+        (np.bool_, False),
+        (np.int_, False),
+        (np.float64, False),
+        (np.complex128, False),
+        (np.str_, False),
+        (np.unicode_, False),
+        (np.datetime64, False),
+        (np.timedelta64, False),
+        # NumPy scalars.
+        (np.bool_(), False),
+        (np.int_(), True),
+        (np.float64(), False),
+        (np.complex128(), False),
+        (np.str_(), False),
+        (np.unicode_(), False),
+        (np.datetime64(), False),
+        (np.timedelta64(), False),
+        # NumPy dtype objects.
+        (np.dtype("bool"), False),
+        (np.dtype("int"), False),
+        (np.dtype("float"), False),
+        (np.dtype("complex"), False),
+        (np.dtype("str"), False),
+        (np.dtype("unicode"), False),
+        (np.dtype("datetime64"), False),
+        (np.dtype("timedelta64"), False),
+        (np.dtype("object"), False),
+        # NumPy arrays.
+        (np.array([], dtype=np.bool_), False),
+        (np.array([], dtype=np.int_), False),
+        (np.array([], dtype=np.float64), False),
+        (np.array([], dtype=np.complex128), False),
+        (np.array([], dtype=np.str_), False),
+        (np.array([], dtype=np.unicode_), False),
+        (np.array([], dtype=np.datetime64), False),
+        (np.array([], dtype=np.timedelta64), False),
+        (np.array([], dtype=object), False),
+        # Pandas dtypes.
+        (pd.core.dtypes.dtypes.CategoricalDtypeType, False),
+        (pd.CategoricalDtype, False),
+        # Pandas objects.
+        (pd.Series(dtype="bool"), False),
+        (pd.Series(dtype="int"), False),
+        (pd.Series(dtype="float"), False),
+        (pd.Series(dtype="complex"), False),
+        (pd.Series(dtype="str"), False),
+        (pd.Series(dtype="unicode"), False),
+        (pd.Series(dtype="datetime64[s]"), False),
+        (pd.Series(dtype="timedelta64[s]"), False),
+        (pd.Series(dtype="category"), False),
+        (pd.Series(dtype="object"), False),
+        # cuDF dtypes.
+        (cudf.CategoricalDtype, False),
+        (cudf.ListDtype, False),
+        (cudf.StructDtype, False),
+        (cudf.Decimal64Dtype, False),
+        (cudf.IntervalDtype, False),
+        # cuDF dtype instances.
+        (cudf.CategoricalDtype("a"), False),
+        (cudf.ListDtype(int), False),
+        (cudf.StructDtype({"a": int}), False),
+        (cudf.Decimal64Dtype(5, 2), False),
+        (cudf.IntervalDtype(int), False),
+        # cuDF objects
+        (cudf.Series(dtype="bool"), False),
+        (cudf.Series(dtype="int"), False),
+        (cudf.Series(dtype="float"), False),
+        (cudf.Series(dtype="str"), False),
+        (cudf.Series(dtype="datetime64[s]"), False),
+        (cudf.Series(dtype="timedelta64[s]"), False),
+        (cudf.Series(dtype="category"), False),
+        (cudf.Series(dtype=cudf.Decimal64Dtype(5, 2)), False),
+        (cudf.Series([[1, 2], [3, 4, 5]]), False),
+        (cudf.Series([{"a": 1, "b": 2}, {"c": 3}]), False),
+        (cudf.Series(dtype=cudf.IntervalDtype(int)), False),
+    ),
+)
+def test_is_integer(obj, expect):
+    assert types.is_integer(obj) == expect
+
+
+# TODO: Temporarily ignoring all cases of "object" until we decide what to do.
+@pytest.mark.parametrize(
+    "obj, expect",
+    (
+        # Base Python objects.
+        (bool(), False),
+        (int(), False),
+        (float(), False),
+        (complex(), False),
+        (str(), False),
+        ("", False),
+        (r"", False),
+        (object(), False),
+        # Base Python types.
+        (bool, False),
+        (int, False),
+        (float, False),
+        (complex, False),
+        (str, True),
+        # (object, False),
+        # NumPy types.
+        (np.bool_, False),
+        (np.int_, False),
+        (np.float64, False),
+        (np.complex128, False),
+        (np.str_, True),
+        (np.unicode_, True),
+        (np.datetime64, False),
+        (np.timedelta64, False),
+        # NumPy scalars.
+        (np.bool_(), False),
+        (np.int_(), False),
+        (np.float64(), False),
+        (np.complex128(), False),
+        (np.str_(), True),
+        (np.unicode_(), True),
+        (np.datetime64(), False),
+        (np.timedelta64(), False),
+        # NumPy dtype objects.
+        (np.dtype("bool"), False),
+        (np.dtype("int"), False),
+        (np.dtype("float"), False),
+        (np.dtype("complex"), False),
+        (np.dtype("str"), True),
+        (np.dtype("unicode"), True),
+        (np.dtype("datetime64"), False),
+        (np.dtype("timedelta64"), False),
+        # (np.dtype("object"), False),
+        # NumPy arrays.
+        (np.array([], dtype=np.bool_), False),
+        (np.array([], dtype=np.int_), False),
+        (np.array([], dtype=np.float64), False),
+        (np.array([], dtype=np.complex128), False),
+        (np.array([], dtype=np.str_), True),
+        (np.array([], dtype=np.unicode_), True),
+        (np.array([], dtype=np.datetime64), False),
+        (np.array([], dtype=np.timedelta64), False),
+        # (np.array([], dtype=object), False),
+        # Pandas dtypes.
+        (pd.core.dtypes.dtypes.CategoricalDtypeType, False),
+        (pd.CategoricalDtype, False),
+        # Pandas objects.
+        (pd.Series(dtype="bool"), False),
+        (pd.Series(dtype="int"), False),
+        (pd.Series(dtype="float"), False),
+        (pd.Series(dtype="complex"), False),
+        (pd.Series(dtype="str"), True),
+        (pd.Series(dtype="unicode"), True),
+        (pd.Series(dtype="datetime64[s]"), False),
+        (pd.Series(dtype="timedelta64[s]"), False),
+        (pd.Series(dtype="category"), False),
+        # (pd.Series(dtype="object"), False),
+        # cuDF dtypes.
+        (cudf.CategoricalDtype, False),
+        (cudf.ListDtype, False),
+        (cudf.StructDtype, False),
+        (cudf.Decimal64Dtype, False),
+        (cudf.IntervalDtype, False),
+        # cuDF dtype instances.
+        (cudf.CategoricalDtype("a"), False),
+        (cudf.ListDtype(int), False),
+        (cudf.StructDtype({"a": int}), False),
+        (cudf.Decimal64Dtype(5, 2), False),
+        (cudf.IntervalDtype(int), False),
+        # cuDF objects
+        (cudf.Series(dtype="bool"), False),
+        (cudf.Series(dtype="int"), False),
+        (cudf.Series(dtype="float"), False),
+        (cudf.Series(dtype="str"), True),
+        (cudf.Series(dtype="datetime64[s]"), False),
+        (cudf.Series(dtype="timedelta64[s]"), False),
+        (cudf.Series(dtype="category"), False),
+        (cudf.Series(dtype=cudf.Decimal64Dtype(5, 2)), False),
+        (cudf.Series([[1, 2], [3, 4, 5]]), False),
+        (cudf.Series([{"a": 1, "b": 2}, {"c": 3}]), False),
+        (cudf.Series(dtype=cudf.IntervalDtype(int)), False),
+    ),
+)
+def test_is_string_dtype(obj, expect):
+    assert types.is_string_dtype(obj) == expect
+
+
+@pytest.mark.parametrize(
+    "obj, expect",
+    (
+        # Base Python objects.
+        (bool(), False),
+        (int(), False),
+        (float(), False),
+        (complex(), False),
+        (str(), False),
+        ("", False),
+        (r"", False),
+        (object(), False),
+        # Base Python types.
+        (bool, False),
+        (int, False),
+        (float, False),
+        (complex, False),
+        (str, False),
+        (object, False),
+        # NumPy types.
+        (np.bool_, False),
+        (np.int_, False),
+        (np.float64, False),
+        (np.complex128, False),
+        (np.str_, False),
+        (np.unicode_, False),
+        (np.datetime64, True),
+        (np.timedelta64, False),
+        # NumPy scalars.
+        (np.bool_(), False),
+        (np.int_(), False),
+        (np.float64(), False),
+        (np.complex128(), False),
+        (np.str_(), False),
+        (np.unicode_(), False),
+        (np.datetime64(), True),
+        (np.timedelta64(), False),
+        # NumPy dtype objects.
+        (np.dtype("bool"), False),
+        (np.dtype("int"), False),
+        (np.dtype("float"), False),
+        (np.dtype("complex"), False),
+        (np.dtype("str"), False),
+        (np.dtype("unicode"), False),
+        (np.dtype("datetime64"), True),
+        (np.dtype("timedelta64"), False),
+        (np.dtype("object"), False),
+        # NumPy arrays.
+        (np.array([], dtype=np.bool_), False),
+        (np.array([], dtype=np.int_), False),
+        (np.array([], dtype=np.float64), False),
+        (np.array([], dtype=np.complex128), False),
+        (np.array([], dtype=np.str_), False),
+        (np.array([], dtype=np.unicode_), False),
+        (np.array([], dtype=np.datetime64), True),
+        (np.array([], dtype=np.timedelta64), False),
+        (np.array([], dtype=object), False),
+        # Pandas dtypes.
+        (pd.core.dtypes.dtypes.CategoricalDtypeType, False),
+        (pd.CategoricalDtype, False),
+        # Pandas objects.
+        (pd.Series(dtype="bool"), False),
+        (pd.Series(dtype="int"), False),
+        (pd.Series(dtype="float"), False),
+        (pd.Series(dtype="complex"), False),
+        (pd.Series(dtype="str"), False),
+        (pd.Series(dtype="unicode"), False),
+        (pd.Series(dtype="datetime64[s]"), True),
+        (pd.Series(dtype="timedelta64[s]"), False),
+        (pd.Series(dtype="category"), False),
+        (pd.Series(dtype="object"), False),
+        # cuDF dtypes.
+        (cudf.CategoricalDtype, False),
+        (cudf.ListDtype, False),
+        (cudf.StructDtype, False),
+        (cudf.Decimal64Dtype, False),
+        (cudf.IntervalDtype, False),
+        # cuDF dtype instances.
+        (cudf.CategoricalDtype("a"), False),
+        (cudf.ListDtype(int), False),
+        (cudf.StructDtype({"a": int}), False),
+        (cudf.Decimal64Dtype(5, 2), False),
+        (cudf.IntervalDtype(int), False),
+        # cuDF objects
+        (cudf.Series(dtype="bool"), False),
+        (cudf.Series(dtype="int"), False),
+        (cudf.Series(dtype="float"), False),
+        (cudf.Series(dtype="str"), False),
+        (cudf.Series(dtype="datetime64[s]"), True),
+        (cudf.Series(dtype="timedelta64[s]"), False),
+        (cudf.Series(dtype="category"), False),
+        (cudf.Series(dtype=cudf.Decimal64Dtype(5, 2)), False),
+        (cudf.Series([[1, 2], [3, 4, 5]]), False),
+        (cudf.Series([{"a": 1, "b": 2}, {"c": 3}]), False),
+        (cudf.Series(dtype=cudf.IntervalDtype(int)), False),
+    ),
+)
+def test_is_datetime_dtype(obj, expect):
+    assert types.is_datetime_dtype(obj) == expect
+
+
+@pytest.mark.parametrize(
+    "obj, expect",
+    (
+        # Base Python objects.
+        (bool(), False),
+        (int(), False),
+        (float(), False),
+        (complex(), False),
+        (str(), False),
+        ("", False),
+        (r"", False),
+        (object(), False),
+        # Base Python types.
+        (bool, False),
+        (int, False),
+        (float, False),
+        (complex, False),
+        (str, False),
+        (object, False),
+        # NumPy types.
+        (np.bool_, False),
+        (np.int_, False),
+        (np.float64, False),
+        (np.complex128, False),
+        (np.str_, False),
+        (np.unicode_, False),
+        (np.datetime64, False),
+        (np.timedelta64, False),
+        # NumPy scalars.
+        (np.bool_(), False),
+        (np.int_(), False),
+        (np.float64(), False),
+        (np.complex128(), False),
+        (np.str_(), False),
+        (np.unicode_(), False),
+        (np.datetime64(), False),
+        (np.timedelta64(), False),
+        # NumPy dtype objects.
+        (np.dtype("bool"), False),
+        (np.dtype("int"), False),
+        (np.dtype("float"), False),
+        (np.dtype("complex"), False),
+        (np.dtype("str"), False),
+        (np.dtype("unicode"), False),
+        (np.dtype("datetime64"), False),
+        (np.dtype("timedelta64"), False),
+        (np.dtype("object"), False),
+        # NumPy arrays.
+        (np.array([], dtype=np.bool_), False),
+        (np.array([], dtype=np.int_), False),
+        (np.array([], dtype=np.float64), False),
+        (np.array([], dtype=np.complex128), False),
+        (np.array([], dtype=np.str_), False),
+        (np.array([], dtype=np.unicode_), False),
+        (np.array([], dtype=np.datetime64), False),
+        (np.array([], dtype=np.timedelta64), False),
+        (np.array([], dtype=object), False),
+        # Pandas dtypes.
+        (pd.core.dtypes.dtypes.CategoricalDtypeType, False),
+        (pd.CategoricalDtype, False),
+        # Pandas objects.
+        (pd.Series(dtype="bool"), False),
+        (pd.Series(dtype="int"), False),
+        (pd.Series(dtype="float"), False),
+        (pd.Series(dtype="complex"), False),
+        (pd.Series(dtype="str"), False),
+        (pd.Series(dtype="unicode"), False),
+        (pd.Series(dtype="datetime64[s]"), False),
+        (pd.Series(dtype="timedelta64[s]"), False),
+        (pd.Series(dtype="category"), False),
+        (pd.Series(dtype="object"), False),
+        # cuDF dtypes.
+        (cudf.CategoricalDtype, False),
+        (cudf.ListDtype, True),
+        (cudf.StructDtype, False),
+        (cudf.Decimal64Dtype, False),
+        (cudf.IntervalDtype, False),
+        # cuDF dtype instances.
+        (cudf.CategoricalDtype("a"), False),
+        (cudf.ListDtype(int), True),
+        (cudf.StructDtype({"a": int}), False),
+        (cudf.Decimal64Dtype(5, 2), False),
+        (cudf.IntervalDtype(int), False),
+        # cuDF objects
+        (cudf.Series(dtype="bool"), False),
+        (cudf.Series(dtype="int"), False),
+        (cudf.Series(dtype="float"), False),
+        (cudf.Series(dtype="str"), False),
+        (cudf.Series(dtype="datetime64[s]"), False),
+        (cudf.Series(dtype="timedelta64[s]"), False),
+        (cudf.Series(dtype="category"), False),
+        (cudf.Series(dtype=cudf.Decimal64Dtype(5, 2)), False),
+        (cudf.Series([[1, 2], [3, 4, 5]]), True),
+        (cudf.Series([{"a": 1, "b": 2}, {"c": 3}]), False),
+        (cudf.Series(dtype=cudf.IntervalDtype(int)), False),
+    ),
+)
+def test_is_list_dtype(obj, expect):
+    assert types.is_list_dtype(obj) == expect
+
+
+@pytest.mark.parametrize(
+    "obj, expect",
+    (
+        # Base Python objects.
+        (bool(), False),
+        (int(), False),
+        (float(), False),
+        (complex(), False),
+        (str(), False),
+        ("", False),
+        (r"", False),
+        (object(), False),
+        # Base Python types.
+        (bool, False),
+        (int, False),
+        (float, False),
+        (complex, False),
+        (str, False),
+        (object, False),
+        # NumPy types.
+        (np.bool_, False),
+        (np.int_, False),
+        (np.float64, False),
+        (np.complex128, False),
+        (np.str_, False),
+        (np.unicode_, False),
+        (np.datetime64, False),
+        (np.timedelta64, False),
+        # NumPy scalars.
+        (np.bool_(), False),
+        (np.int_(), False),
+        (np.float64(), False),
+        (np.complex128(), False),
+        (np.str_(), False),
+        (np.unicode_(), False),
+        (np.datetime64(), False),
+        (np.timedelta64(), False),
+        # NumPy dtype objects.
+        (np.dtype("bool"), False),
+        (np.dtype("int"), False),
+        (np.dtype("float"), False),
+        (np.dtype("complex"), False),
+        (np.dtype("str"), False),
+        (np.dtype("unicode"), False),
+        (np.dtype("datetime64"), False),
+        (np.dtype("timedelta64"), False),
+        (np.dtype("object"), False),
+        # NumPy arrays.
+        (np.array([], dtype=np.bool_), False),
+        (np.array([], dtype=np.int_), False),
+        (np.array([], dtype=np.float64), False),
+        (np.array([], dtype=np.complex128), False),
+        (np.array([], dtype=np.str_), False),
+        (np.array([], dtype=np.unicode_), False),
+        (np.array([], dtype=np.datetime64), False),
+        (np.array([], dtype=np.timedelta64), False),
+        (np.array([], dtype=object), False),
+        # Pandas dtypes.
+        (pd.core.dtypes.dtypes.CategoricalDtypeType, False),
+        (pd.CategoricalDtype, False),
+        # Pandas objects.
+        (pd.Series(dtype="bool"), False),
+        (pd.Series(dtype="int"), False),
+        (pd.Series(dtype="float"), False),
+        (pd.Series(dtype="complex"), False),
+        (pd.Series(dtype="str"), False),
+        (pd.Series(dtype="unicode"), False),
+        (pd.Series(dtype="datetime64[s]"), False),
+        (pd.Series(dtype="timedelta64[s]"), False),
+        (pd.Series(dtype="category"), False),
+        (pd.Series(dtype="object"), False),
+        # cuDF dtypes.
+        (cudf.CategoricalDtype, False),
+        (cudf.ListDtype, False),
+        (cudf.StructDtype, True),
+        (cudf.Decimal64Dtype, False),
+        # (cudf.IntervalDtype, False),
+        # cuDF dtype instances.
+        (cudf.CategoricalDtype("a"), False),
+        (cudf.ListDtype(int), False),
+        (cudf.StructDtype({"a": int}), True),
+        (cudf.Decimal64Dtype(5, 2), False),
+        # (cudf.IntervalDtype(int), False),
+        # cuDF objects
+        (cudf.Series(dtype="bool"), False),
+        (cudf.Series(dtype="int"), False),
+        (cudf.Series(dtype="float"), False),
+        (cudf.Series(dtype="str"), False),
+        (cudf.Series(dtype="datetime64[s]"), False),
+        (cudf.Series(dtype="timedelta64[s]"), False),
+        (cudf.Series(dtype="category"), False),
+        (cudf.Series(dtype=cudf.Decimal64Dtype(5, 2)), False),
+        (cudf.Series([[1, 2], [3, 4, 5]]), False),
+        (cudf.Series([{"a": 1, "b": 2}, {"c": 3}]), True),
+        # (cudf.Series(dtype=cudf.IntervalDtype(int)), False),
+    ),
+)
+def test_is_struct_dtype(obj, expect):
+    # TODO: All inputs of interval types are currently disabled due to
+    # inconsistent behavior of is_struct_dtype for interval types that will be
+    # fixed as part of the array refactor.
+    assert types.is_struct_dtype(obj) == expect
+
+
+@pytest.mark.parametrize(
+    "obj, expect",
+    (
+        # Base Python objects.
+        (bool(), False),
+        (int(), False),
+        (float(), False),
+        (complex(), False),
+        (str(), False),
+        ("", False),
+        (r"", False),
+        (object(), False),
+        # Base Python types.
+        (bool, False),
+        (int, False),
+        (float, False),
+        (complex, False),
+        (str, False),
+        (object, False),
+        # NumPy types.
+        (np.bool_, False),
+        (np.int_, False),
+        (np.float64, False),
+        (np.complex128, False),
+        (np.str_, False),
+        (np.unicode_, False),
+        (np.datetime64, False),
+        (np.timedelta64, False),
+        # NumPy scalars.
+        (np.bool_(), False),
+        (np.int_(), False),
+        (np.float64(), False),
+        (np.complex128(), False),
+        (np.str_(), False),
+        (np.unicode_(), False),
+        (np.datetime64(), False),
+        (np.timedelta64(), False),
+        # NumPy dtype objects.
+        (np.dtype("bool"), False),
+        (np.dtype("int"), False),
+        (np.dtype("float"), False),
+        (np.dtype("complex"), False),
+        (np.dtype("str"), False),
+        (np.dtype("unicode"), False),
+        (np.dtype("datetime64"), False),
+        (np.dtype("timedelta64"), False),
+        (np.dtype("object"), False),
+        # NumPy arrays.
+        (np.array([], dtype=np.bool_), False),
+        (np.array([], dtype=np.int_), False),
+        (np.array([], dtype=np.float64), False),
+        (np.array([], dtype=np.complex128), False),
+        (np.array([], dtype=np.str_), False),
+        (np.array([], dtype=np.unicode_), False),
+        (np.array([], dtype=np.datetime64), False),
+        (np.array([], dtype=np.timedelta64), False),
+        (np.array([], dtype=object), False),
+        # Pandas dtypes.
+        (pd.core.dtypes.dtypes.CategoricalDtypeType, False),
+        (pd.CategoricalDtype, False),
+        # Pandas objects.
+        (pd.Series(dtype="bool"), False),
+        (pd.Series(dtype="int"), False),
+        (pd.Series(dtype="float"), False),
+        (pd.Series(dtype="complex"), False),
+        (pd.Series(dtype="str"), False),
+        (pd.Series(dtype="unicode"), False),
+        (pd.Series(dtype="datetime64[s]"), False),
+        (pd.Series(dtype="timedelta64[s]"), False),
+        (pd.Series(dtype="category"), False),
+        (pd.Series(dtype="object"), False),
+        # cuDF dtypes.
+        (cudf.CategoricalDtype, False),
+        (cudf.ListDtype, False),
+        (cudf.StructDtype, False),
+        (cudf.Decimal64Dtype, True),
+        (cudf.IntervalDtype, False),
+        # cuDF dtype instances.
+        (cudf.CategoricalDtype("a"), False),
+        (cudf.ListDtype(int), False),
+        (cudf.StructDtype({"a": int}), False),
+        (cudf.Decimal64Dtype(5, 2), True),
+        (cudf.IntervalDtype(int), False),
+        # cuDF objects
+        (cudf.Series(dtype="bool"), False),
+        (cudf.Series(dtype="int"), False),
+        (cudf.Series(dtype="float"), False),
+        (cudf.Series(dtype="str"), False),
+        (cudf.Series(dtype="datetime64[s]"), False),
+        (cudf.Series(dtype="timedelta64[s]"), False),
+        (cudf.Series(dtype="category"), False),
+        (cudf.Series(dtype=cudf.Decimal64Dtype(5, 2)), True),
+        (cudf.Series([[1, 2], [3, 4, 5]]), False),
+        (cudf.Series([{"a": 1, "b": 2}, {"c": 3}]), False),
+        (cudf.Series(dtype=cudf.IntervalDtype(int)), False),
+    ),
+)
+def test_is_decimal_dtype(obj, expect):
+    assert types.is_decimal_dtype(obj) == expect
+
+
+@pytest.mark.parametrize(
+    "obj",
+    (
+        # Base Python objects.
+        bool(),
+        int(),
+        float(),
+        complex(),
+        str(),
+        "",
+        r"",
+        object(),
+        # Base Python types.
+        bool,
+        int,
+        float,
+        complex,
+        str,
+        object,
+        # NumPy types.
+        np.bool_,
+        np.int_,
+        np.float64,
+        np.complex128,
+        np.str_,
+        np.unicode_,
+        np.datetime64,
+        np.timedelta64,
+        # NumPy scalars.
+        np.bool_(),
+        np.int_(),
+        np.float64(),
+        np.complex128(),
+        np.str_(),
+        np.unicode_(),
+        np.datetime64(),
+        np.timedelta64(),
+        # NumPy dtype objects.
+        np.dtype("bool"),
+        np.dtype("int"),
+        np.dtype("float"),
+        np.dtype("complex"),
+        np.dtype("str"),
+        np.dtype("unicode"),
+        np.dtype("datetime64"),
+        np.dtype("timedelta64"),
+        np.dtype("object"),
+        # NumPy arrays.
+        np.array([], dtype=np.bool_),
+        np.array([], dtype=np.int_),
+        np.array([], dtype=np.float64),
+        np.array([], dtype=np.complex128),
+        np.array([], dtype=np.str_),
+        np.array([], dtype=np.unicode_),
+        np.array([], dtype=np.datetime64),
+        np.array([], dtype=np.timedelta64),
+        np.array([], dtype=object),
+        # Pandas dtypes.
+        # TODO: pandas does not consider these to be categoricals.
+        # pd.core.dtypes.dtypes.CategoricalDtypeType,
+        # pd.CategoricalDtype,
+        # Pandas objects.
+        pd.Series(dtype="bool"),
+        pd.Series(dtype="int"),
+        pd.Series(dtype="float"),
+        pd.Series(dtype="complex"),
+        pd.Series(dtype="str"),
+        pd.Series(dtype="unicode"),
+        pd.Series(dtype="datetime64[s]"),
+        pd.Series(dtype="timedelta64[s]"),
+        pd.Series(dtype="category"),
+        pd.Series(dtype="object"),
+    ),
+)
+def test_pandas_agreement(obj):
+    assert types.is_categorical_dtype(obj) == ptypes.is_categorical_dtype(obj)
+    assert types.is_numeric_dtype(obj) == ptypes.is_numeric_dtype(obj)
+    assert types.is_integer_dtype(obj) == ptypes.is_integer_dtype(obj)
+    assert types.is_integer(obj) == ptypes.is_integer(obj)
+    assert types.is_string_dtype(obj) == ptypes.is_string_dtype(obj)
+
+
+@pytest.mark.parametrize(
+    "obj",
+    (
+        # Base Python objects.
+        bool(),
+        int(),
+        float(),
+        complex(),
+        str(),
+        "",
+        r"",
+        object(),
+        # Base Python types.
+        bool,
+        int,
+        float,
+        complex,
+        str,
+        object,
+        # NumPy types.
+        np.bool_,
+        np.int_,
+        np.float64,
+        np.complex128,
+        np.str_,
+        np.unicode_,
+        np.datetime64,
+        np.timedelta64,
+        # NumPy scalars.
+        np.bool_(),
+        np.int_(),
+        np.float64(),
+        np.complex128(),
+        np.str_(),
+        np.unicode_(),
+        np.datetime64(),
+        np.timedelta64(),
+        # NumPy dtype objects.
+        np.dtype("bool"),
+        np.dtype("int"),
+        np.dtype("float"),
+        np.dtype("complex"),
+        np.dtype("str"),
+        np.dtype("unicode"),
+        np.dtype("datetime64"),
+        np.dtype("timedelta64"),
+        np.dtype("object"),
+        # NumPy arrays.
+        np.array([], dtype=np.bool_),
+        np.array([], dtype=np.int_),
+        np.array([], dtype=np.float64),
+        np.array([], dtype=np.complex128),
+        np.array([], dtype=np.str_),
+        np.array([], dtype=np.unicode_),
+        np.array([], dtype=np.datetime64),
+        np.array([], dtype=np.timedelta64),
+        np.array([], dtype=object),
+        # Pandas dtypes.
+        # TODO: pandas does not consider these to be categoricals.
+        # pd.core.dtypes.dtypes.CategoricalDtypeType,
+        # pd.CategoricalDtype,
+        # Pandas objects.
+        pd.Series(dtype="bool"),
+        pd.Series(dtype="int"),
+        pd.Series(dtype="float"),
+        pd.Series(dtype="complex"),
+        pd.Series(dtype="str"),
+        pd.Series(dtype="unicode"),
+        pd.Series(dtype="datetime64[s]"),
+        pd.Series(dtype="timedelta64[s]"),
+        pd.Series(dtype="category"),
+        pd.Series(dtype="object"),
+    ),
+)
+def test_pandas_agreement_scalar(obj):
+    assert types.is_scalar(obj) == ptypes.is_scalar(obj)
+
+
+# TODO: Add test of interval.
+# TODO: Add test of Scalar.
diff --git a/python/cudf/cudf/tests/test_column.py b/python/cudf/cudf/tests/test_column.py
index efdeb36755f..3ac6cc0bb44 100644
--- a/python/cudf/cudf/tests/test_column.py
+++ b/python/cudf/cudf/tests/test_column.py
@@ -367,6 +367,32 @@ def test_as_column_buffer(data, expected):
     assert_eq(cudf.Series(actual_column), cudf.Series(expected))
 
 
+@pytest.mark.parametrize(
+    "data,expected",
+    [
+        (
+            pa.array([100, 200, 300], type=pa.decimal128(3)),
+            cudf.core.column.as_column(
+                [100, 200, 300], dtype=cudf.core.dtypes.Decimal64Dtype(3, 0)
+            ),
+        ),
+        (
+            pa.array([{"a": 1, "b": 3}, {"c": 2, "d": 4}]),
+            cudf.core.column.as_column([{"a": 1, "b": 3}, {"c": 2, "d": 4}]),
+        ),
+        (
+            pa.array([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]]),
+            cudf.core.column.as_column(
+                [[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]]
+            ),
+        ),
+    ],
+)
+def test_as_column_arrow_array(data, expected):
+    actual_column = cudf.core.column.as_column(data)
+    assert_eq(cudf.Series(actual_column), cudf.Series(expected))
+
+
 @pytest.mark.parametrize(
     "pd_dtype,expect_dtype",
     [
diff --git a/python/cudf/cudf/tests/test_groupby.py b/python/cudf/cudf/tests/test_groupby.py
index 1150c16e08c..f346edb4304 100644
--- a/python/cudf/cudf/tests/test_groupby.py
+++ b/python/cudf/cudf/tests/test_groupby.py
@@ -1495,6 +1495,27 @@ def test_groupby_list_of_lists(list_agg):
     )
 
 
+@pytest.mark.parametrize("list_agg", [list, "collect"])
+def test_groupby_list_of_structs(list_agg):
+    pdf = pd.DataFrame(
+        {
+            "a": [1, 1, 1, 2, 2, 2],
+            "b": [
+                {"c": "1", "d": 1},
+                {"c": "2", "d": 2},
+                {"c": "3", "d": 3},
+                {"c": "4", "d": 4},
+                {"c": "5", "d": 5},
+                {"c": "6", "d": 6},
+            ],
+        }
+    )
+    gdf = cudf.from_pandas(pdf)
+
+    with pytest.raises(pd.core.base.DataError):
+        gdf.groupby("a").agg({"b": list_agg}),
+
+
 @pytest.mark.parametrize("list_agg", [list, "collect"])
 def test_groupby_list_single_element(list_agg):
     pdf = pd.DataFrame({"a": [1, 2], "b": [3, None]})
diff --git a/python/cudf/cudf/tests/test_index.py b/python/cudf/cudf/tests/test_index.py
index 158dffc3884..0d3380343f4 100644
--- a/python/cudf/cudf/tests/test_index.py
+++ b/python/cudf/cudf/tests/test_index.py
@@ -2058,3 +2058,264 @@ def test_index_set_names_error(idx, level, names):
         lfunc_args_and_kwargs=([], {"names": names, "level": level}),
         rfunc_args_and_kwargs=([], {"names": names, "level": level}),
     )
+
+
+@pytest.mark.parametrize(
+    "idx",
+    [pd.Index([1, 3, 6]), pd.Index([6, 1, 3])],  # monotonic  # non-monotonic
+)
+@pytest.mark.parametrize("key", list(range(0, 8)))
+@pytest.mark.parametrize("method", [None, "ffill", "bfill", "nearest"])
+def test_get_loc_single_unique_numeric(idx, key, method):
+    pi = idx
+    gi = cudf.from_pandas(pi)
+
+    if (
+        (key not in pi and method is None)
+        # `method` only applicable to monotonic index
+        or (not pi.is_monotonic and method is not None)
+        # Get key before the first element is KeyError
+        or (key == 0 and method in "ffill")
+        # Get key after the last element is KeyError
+        or (key == 7 and method in "bfill")
+    ):
+        assert_exceptions_equal(
+            lfunc=pi.get_loc,
+            rfunc=gi.get_loc,
+            lfunc_args_and_kwargs=([], {"key": key, "method": method}),
+            rfunc_args_and_kwargs=([], {"key": key, "method": method}),
+        )
+    else:
+        expected = pi.get_loc(key, method=method)
+        got = gi.get_loc(key, method=method)
+
+        assert_eq(expected, got)
+
+
+@pytest.mark.parametrize(
+    "idx",
+    [
+        pd.Index([1, 3, 3, 6]),  # monotonic
+        pd.Index([6, 1, 3, 3]),  # non-monotonic
+    ],
+)
+@pytest.mark.parametrize("key", [0, 3, 6, 7])
+@pytest.mark.parametrize("method", [None])
+def test_get_loc_single_duplicate_numeric(idx, key, method):
+    pi = idx
+    gi = cudf.from_pandas(pi)
+
+    if key not in pi:
+        assert_exceptions_equal(
+            lfunc=pi.get_loc,
+            rfunc=gi.get_loc,
+            lfunc_args_and_kwargs=([], {"key": key, "method": method}),
+            rfunc_args_and_kwargs=([], {"key": key, "method": method}),
+        )
+    else:
+        expected = pi.get_loc(key, method=method)
+        got = gi.get_loc(key, method=method)
+
+        assert_eq(expected, got)
+
+
+@pytest.mark.parametrize(
+    "idx", [pd.Index(["b", "f", "m", "q"]), pd.Index(["m", "f", "b", "q"])]
+)
+@pytest.mark.parametrize("key", ["a", "f", "n", "z"])
+@pytest.mark.parametrize("method", [None, "ffill", "bfill"])
+def test_get_loc_single_unique_string(idx, key, method):
+    pi = idx
+    gi = cudf.from_pandas(pi)
+
+    if (
+        (key not in pi and method is None)
+        # `method` only applicable to monotonic index
+        or (not pi.is_monotonic and method is not None)
+        # Get key before the first element is KeyError
+        or (key == "a" and method == "ffill")
+        # Get key after the last element is KeyError
+        or (key == "z" and method == "bfill")
+    ):
+        assert_exceptions_equal(
+            lfunc=pi.get_loc,
+            rfunc=gi.get_loc,
+            lfunc_args_and_kwargs=([], {"key": key, "method": method}),
+            rfunc_args_and_kwargs=([], {"key": key, "method": method}),
+        )
+    else:
+        expected = pi.get_loc(key, method=method)
+        got = gi.get_loc(key, method=method)
+
+        assert_eq(expected, got)
+
+
+@pytest.mark.parametrize(
+    "idx", [pd.Index(["b", "m", "m", "q"]), pd.Index(["m", "f", "m", "q"])]
+)
+@pytest.mark.parametrize("key", ["a", "f", "n", "z"])
+@pytest.mark.parametrize("method", [None])
+def test_get_loc_single_duplicate_string(idx, key, method):
+    pi = idx
+    gi = cudf.from_pandas(pi)
+
+    if key not in pi:
+        assert_exceptions_equal(
+            lfunc=pi.get_loc,
+            rfunc=gi.get_loc,
+            lfunc_args_and_kwargs=([], {"key": key, "method": method}),
+            rfunc_args_and_kwargs=([], {"key": key, "method": method}),
+        )
+    else:
+        expected = pi.get_loc(key, method=method)
+        got = gi.get_loc(key, method=method)
+
+        assert_eq(expected, got)
+
+
+@pytest.mark.parametrize(
+    "idx",
+    [
+        pd.MultiIndex.from_tuples(
+            [(1, 1, 1), (1, 1, 2), (1, 2, 1), (1, 2, 3), (2, 1, 1), (2, 2, 1)]
+        ),
+        pd.MultiIndex.from_tuples(
+            [(2, 1, 1), (1, 2, 3), (1, 2, 1), (1, 1, 2), (2, 2, 1), (1, 1, 1)]
+        ),
+        pd.MultiIndex.from_tuples(
+            [(1, 1, 1), (1, 1, 2), (1, 1, 2), (1, 2, 3), (2, 1, 1), (2, 2, 1)]
+        ),
+    ],
+)
+@pytest.mark.parametrize("key", [1, (1, 2), (1, 2, 3), (2, 1, 1), (9, 9, 9)])
+@pytest.mark.parametrize("method", [None])
+def test_get_loc_multi_numeric(idx, key, method):
+    pi = idx
+    gi = cudf.from_pandas(pi)
+
+    if key not in pi:
+        assert_exceptions_equal(
+            lfunc=pi.get_loc,
+            rfunc=gi.get_loc,
+            lfunc_args_and_kwargs=([], {"key": key, "method": method}),
+            rfunc_args_and_kwargs=([], {"key": key, "method": method}),
+        )
+    else:
+        expected = pi.get_loc(key, method=method)
+        got = gi.get_loc(key, method=method)
+
+        assert_eq(expected, got)
+
+
+@pytest.mark.parametrize(
+    "idx",
+    [
+        pd.MultiIndex.from_tuples(
+            [(2, 1, 1), (1, 2, 3), (1, 2, 1), (1, 1, 1), (1, 1, 1), (2, 2, 1)]
+        )
+    ],
+)
+@pytest.mark.parametrize(
+    "key, result",
+    [
+        (1, slice(1, 5, 1)),  # deviates
+        ((1, 2), slice(1, 3, 1)),
+        ((1, 2, 3), slice(1, 2, None)),
+        ((2, 1, 1), slice(0, 1, None)),
+        ((9, 9, 9), None),
+    ],
+)
+@pytest.mark.parametrize("method", [None])
+def test_get_loc_multi_numeric_deviate(idx, key, result, method):
+    pi = idx
+    gi = cudf.from_pandas(pi)
+
+    if key not in pi:
+        assert_exceptions_equal(
+            lfunc=pi.get_loc,
+            rfunc=gi.get_loc,
+            lfunc_args_and_kwargs=([], {"key": key, "method": method}),
+            rfunc_args_and_kwargs=([], {"key": key, "method": method}),
+        )
+    else:
+        expected = result
+        got = gi.get_loc(key, method=method)
+
+        assert_eq(expected, got)
+
+
+@pytest.mark.parametrize(
+    "idx",
+    [
+        pd.MultiIndex.from_tuples(
+            [
+                ("a", "a", "a"),
+                ("a", "a", "b"),
+                ("a", "b", "a"),
+                ("a", "b", "c"),
+                ("b", "a", "a"),
+                ("b", "c", "a"),
+            ]
+        ),
+        pd.MultiIndex.from_tuples(
+            [
+                ("a", "a", "b"),
+                ("a", "b", "c"),
+                ("b", "a", "a"),
+                ("a", "a", "a"),
+                ("a", "b", "a"),
+                ("b", "c", "a"),
+            ]
+        ),
+        pd.MultiIndex.from_tuples(
+            [
+                ("a", "a", "a"),
+                ("a", "b", "c"),
+                ("b", "a", "a"),
+                ("a", "a", "b"),
+                ("a", "b", "a"),
+                ("b", "c", "a"),
+            ]
+        ),
+        pd.MultiIndex.from_tuples(
+            [
+                ("a", "a", "a"),
+                ("a", "a", "b"),
+                ("a", "a", "b"),
+                ("a", "b", "c"),
+                ("b", "a", "a"),
+                ("b", "c", "a"),
+            ]
+        ),
+        pd.MultiIndex.from_tuples(
+            [
+                ("a", "a", "b"),
+                ("b", "a", "a"),
+                ("b", "a", "a"),
+                ("a", "a", "a"),
+                ("a", "b", "a"),
+                ("b", "c", "a"),
+            ]
+        ),
+    ],
+)
+@pytest.mark.parametrize(
+    "key", ["a", ("a", "a"), ("a", "b", "c"), ("b", "c", "a"), ("z", "z", "z")]
+)
+@pytest.mark.parametrize("method", [None])
+def test_get_loc_multi_string(idx, key, method):
+    pi = idx
+    gi = cudf.from_pandas(pi)
+
+    if key not in pi:
+        assert_exceptions_equal(
+            lfunc=pi.get_loc,
+            rfunc=gi.get_loc,
+            lfunc_args_and_kwargs=([], {"key": key, "method": method}),
+            rfunc_args_and_kwargs=([], {"key": key, "method": method}),
+        )
+    else:
+        expected = pi.get_loc(key, method=method)
+        got = gi.get_loc(key, method=method)
+
+        assert_eq(expected, got)
diff --git a/python/cudf/cudf/tests/test_orc.py b/python/cudf/cudf/tests/test_orc.py
index 71666846b96..c3a33f75ee3 100644
--- a/python/cudf/cudf/tests/test_orc.py
+++ b/python/cudf/cudf/tests/test_orc.py
@@ -10,6 +10,7 @@
 import pyarrow.orc
 import pyorc
 import pytest
+import decimal
 
 import cudf
 from cudf.io.orc import ORCWriter
@@ -780,3 +781,17 @@ def test_orc_writer_decimal(tmpdir, scale):
 
     got = pd.read_orc(fname)
     assert_eq(expected.to_pandas()["dec_val"], got["dec_val"])
+
+
+def test_orc_string_stream_offset_issue():
+    size = 30000
+    vals = {
+        str(x): [decimal.Decimal(1)] * size if x != 0 else ["XYZ"] * size
+        for x in range(0, 5)
+    }
+    df = cudf.DataFrame(vals)
+
+    buffer = BytesIO()
+    df.to_orc(buffer)
+
+    assert_eq(df, cudf.read_orc(buffer))
diff --git a/python/cudf/cudf/utils/dtypes.py b/python/cudf/cudf/utils/dtypes.py
index 0b59116f8e6..7af288c3094 100644
--- a/python/cudf/cudf/utils/dtypes.py
+++ b/python/cudf/cudf/utils/dtypes.py
@@ -1,9 +1,7 @@
 # Copyright (c) 2020-2021, NVIDIA CORPORATION.
 
 import datetime as dt
-import numbers
 from collections import namedtuple
-from collections.abc import Sequence
 from decimal import Decimal
 
 import cupy as cp
@@ -11,12 +9,28 @@
 import pandas as pd
 import pyarrow as pa
 from pandas.core.dtypes.common import infer_dtype_from_object
-from pandas.core.dtypes.dtypes import CategoricalDtype, CategoricalDtypeType
 
 import cudf
-from cudf._lib.scalar import DeviceScalar
 from cudf.core._compat import PANDAS_GE_120
 
+from ..api.types import (  # noqa: F401
+    _is_non_decimal_numeric_dtype,
+    _is_scalar_or_zero_d_array,
+    is_categorical_dtype,
+    is_datetime_dtype as is_datetime_dtype,
+    is_decimal_dtype,
+    is_integer,
+    is_integer_dtype,
+    is_interval_dtype,
+    is_list_dtype,
+    is_list_like,
+    is_numeric_dtype as is_numerical_dtype,
+    is_scalar,
+    is_string_dtype,
+    is_struct_dtype,
+    is_timedelta_dtype,
+)
+
 _NA_REP = "<NA>"
 _np_pa_dtypes = {
     np.float64: pa.float64(),
@@ -144,152 +158,6 @@ def numeric_normalize_types(*args):
     return [a.astype(dtype) for a in args]
 
 
-def is_numerical_dtype(obj):
-    # TODO: we should handle objects with a `.dtype` attribute,
-    # e.g., arrays, here.
-    try:
-        dtype = np.dtype(obj)
-    except TypeError:
-        return False
-    return dtype.kind in "biuf"
-
-
-def is_integer_dtype(obj):
-    try:
-        dtype = np.dtype(obj)
-    except TypeError:
-        return pd.api.types.is_integer_dtype(obj)
-    return dtype.kind in "iu"
-
-
-def is_integer(obj):
-    if isinstance(obj, cudf.Scalar):
-        return is_integer_dtype(obj.dtype)
-    return pd.api.types.is_integer(obj)
-
-
-def is_string_dtype(obj):
-    return (
-        pd.api.types.is_string_dtype(obj)
-        # Reject all cudf extension types.
-        and not is_categorical_dtype(obj)
-        and not is_decimal_dtype(obj)
-        and not is_list_dtype(obj)
-        and not is_struct_dtype(obj)
-        and not is_interval_dtype(obj)
-    )
-
-
-def is_datetime_dtype(obj):
-    if obj is None:
-        return False
-    if not hasattr(obj, "str"):
-        return False
-    return "M8" in obj.str
-
-
-def is_timedelta_dtype(obj):
-    if obj is None:
-        return False
-    if not hasattr(obj, "str"):
-        return False
-    return "m8" in obj.str
-
-
-def is_categorical_dtype(obj):
-    """Infer whether a given pandas, numpy, or cuDF Column, Series, or dtype
-    is a pandas CategoricalDtype.
-    """
-    if obj is None:
-        return False
-    if isinstance(obj, cudf.CategoricalDtype):
-        return True
-    if obj is cudf.CategoricalDtype:
-        return True
-    if isinstance(obj, np.dtype):
-        return False
-    if isinstance(obj, CategoricalDtype):
-        return True
-    if obj is CategoricalDtype:
-        return True
-    if obj is CategoricalDtypeType:
-        return True
-    if isinstance(obj, str) and obj == "category":
-        return True
-    if isinstance(
-        obj,
-        (
-            CategoricalDtype,
-            cudf.core.index.CategoricalIndex,
-            cudf.core.column.CategoricalColumn,
-            pd.Categorical,
-            pd.CategoricalIndex,
-        ),
-    ):
-        return True
-    if isinstance(obj, np.ndarray):
-        return False
-    if isinstance(
-        obj,
-        (
-            cudf.Index,
-            cudf.Series,
-            cudf.core.column.ColumnBase,
-            pd.Index,
-            pd.Series,
-        ),
-    ):
-        return is_categorical_dtype(obj.dtype)
-    if hasattr(obj, "type"):
-        if obj.type is CategoricalDtypeType:
-            return True
-    return pd.api.types.is_categorical_dtype(obj)
-
-
-def is_list_dtype(obj):
-    return (
-        type(obj) is cudf.core.dtypes.ListDtype
-        or obj is cudf.core.dtypes.ListDtype
-        or type(obj) is cudf.core.column.ListColumn
-        or obj is cudf.core.column.ListColumn
-        or (isinstance(obj, str) and obj == cudf.core.dtypes.ListDtype.name)
-        or (hasattr(obj, "dtype") and is_list_dtype(obj.dtype))
-    )
-
-
-def is_struct_dtype(obj):
-    return (
-        isinstance(obj, cudf.core.dtypes.StructDtype)
-        or obj is cudf.core.dtypes.StructDtype
-        or (isinstance(obj, str) and obj == cudf.core.dtypes.StructDtype.name)
-        or (hasattr(obj, "dtype") and is_struct_dtype(obj.dtype))
-    )
-
-
-def is_interval_dtype(obj):
-    return (
-        isinstance(obj, cudf.core.dtypes.IntervalDtype)
-        or isinstance(obj, pd.core.dtypes.dtypes.IntervalDtype)
-        or obj is cudf.core.dtypes.IntervalDtype
-        or (
-            isinstance(obj, str) and obj == cudf.core.dtypes.IntervalDtype.name
-        )
-        or (hasattr(obj, "dtype") and is_interval_dtype(obj.dtype))
-    )
-
-
-def is_decimal_dtype(obj):
-    return (
-        type(obj) is cudf.core.dtypes.Decimal64Dtype
-        or obj is cudf.core.dtypes.Decimal64Dtype
-        or (
-            isinstance(obj, str)
-            and obj == cudf.core.dtypes.Decimal64Dtype.name
-        )
-        or (hasattr(obj, "dtype") and is_decimal_dtype(obj.dtype))
-    )
-
-
 def _find_common_type_decimal(dtypes):
     # Find the largest scale and the largest difference between
     # precision and scale of the columns to be concatenated
@@ -340,28 +208,12 @@ def cudf_dtype_from_pa_type(typ):
         return cudf.core.dtypes.ListDtype.from_arrow(typ)
     elif pa.types.is_struct(typ):
         return cudf.core.dtypes.StructDtype.from_arrow(typ)
+    elif pa.types.is_decimal(typ):
+        return cudf.core.dtypes.Decimal64Dtype.from_arrow(typ)
     else:
         return pd.api.types.pandas_dtype(typ.to_pandas_dtype())
 
 
-def is_scalar(val):
-    return (
-        val is None
-        or isinstance(val, DeviceScalar)
-        or isinstance(val, cudf.Scalar)
-        or isinstance(val, str)
-        or isinstance(val, numbers.Number)
-        or np.isscalar(val)
-        or (isinstance(val, (np.ndarray, cp.ndarray)) and val.ndim == 0)
-        or isinstance(val, pd.Timestamp)
-        or (isinstance(val, pd.Categorical) and len(val) == 1)
-        or (isinstance(val, pd.Timedelta))
-        or (isinstance(val, pd.Timestamp))
-        or (isinstance(val, dt.datetime))
-        or (isinstance(val, dt.timedelta))
-    )
-
-
 def to_cudf_compatible_scalar(val, dtype=None):
     """
     Converts the value `val` to a numpy/Pandas scalar,
@@ -375,7 +227,7 @@ def to_cudf_compatible_scalar(val, dtype=None):
     ):
         return val
 
-    if not is_scalar(val):
+    if not _is_scalar_or_zero_d_array(val):
         raise ValueError(
             f"Cannot convert value of type {type(val).__name__} "
             "to cudf scalar"
@@ -416,27 +268,6 @@ def to_cudf_compatible_scalar(val, dtype=None):
     return val
 
 
-def is_list_like(obj):
-    """
-    This function checks if the given `obj`
-    is a list-like (list, tuple, Series...)
-    type or not.
-
-    Parameters
-    ----------
-    obj : object of any type which needs to be validated.
-
-    Returns
-    -------
-    Boolean: True or False depending on whether the
-    input `obj` is like-like or not.
-    """
-
-    return isinstance(obj, (Sequence, np.ndarray)) and not isinstance(
-        obj, (str, bytes)
-    )
-
-
 def is_column_like(obj):
     """
     This function checks if the given `obj`
diff --git a/python/cudf/cudf/utils/utils.py b/python/cudf/cudf/utils/utils.py
index f1841129e20..209f61ad399 100644
--- a/python/cudf/cudf/utils/utils.py
+++ b/python/cudf/cudf/utils/utils.py
@@ -1,7 +1,7 @@
 # Copyright (c) 2020-2021, NVIDIA CORPORATION.
 
-import functools
 import decimal
+import functools
 from collections.abc import Sequence
 from typing import FrozenSet, Set, Union
 
@@ -490,3 +490,22 @@ def _create_pandas_series(
         copy=copy,
         fastpath=fastpath,
     )
+
+
+def _maybe_indices_to_slice(indices: cp.ndarray) -> Union[slice, cp.ndarray]:
+    """Makes best effort to convert an array of indices into a python slice.
+    If the conversion is not possible, return input. `indices` are expected
+    to be valid.
+    """
+    # TODO: improve efficiency by avoiding sync.
+    if len(indices) == 1:
+        x = indices[0].item()
+        return slice(x, x + 1)
+    if len(indices) == 2:
+        x1, x2 = indices[0].item(), indices[1].item()
+        return slice(x1, x2 + 1, x2 - x1)
+    start, step = indices[0].item(), (indices[1] - indices[0]).item()
+    stop = start + step * len(indices)
+    if (indices == cp.arange(start, stop, step)).all():
+        return slice(start, stop, step)
+    return indices
diff --git a/python/dask_cudf/dask_cudf/core.py b/python/dask_cudf/dask_cudf/core.py
index 8af6188e625..f334237d1e8 100644
--- a/python/dask_cudf/dask_cudf/core.py
+++ b/python/dask_cudf/dask_cudf/core.py
@@ -11,7 +11,6 @@
 import dask
 from dask import dataframe as dd
 from dask.base import normalize_token, tokenize
-from dask.compatibility import apply
 from dask.context import _globals
 from dask.core import flatten
 from dask.dataframe.core import (
@@ -24,7 +23,7 @@
 from dask.dataframe.utils import raise_on_meta_error
 from dask.highlevelgraph import HighLevelGraph
 from dask.optimization import cull, fuse
-from dask.utils import M, OperatorMethodMixin, derived_from, funcname
+from dask.utils import M, OperatorMethodMixin, apply, derived_from, funcname
 
 import cudf
 from cudf import _lib as libcudf
diff --git a/python/dask_cudf/dask_cudf/io/csv.py b/python/dask_cudf/dask_cudf/io/csv.py
index f00e91b5ff6..4dc803b3259 100644
--- a/python/dask_cudf/dask_cudf/io/csv.py
+++ b/python/dask_cudf/dask_cudf/io/csv.py
@@ -8,9 +8,8 @@
 
 from dask import dataframe as dd
 from dask.base import tokenize
-from dask.compatibility import apply
 from dask.dataframe.io.csv import make_reader
-from dask.utils import parse_bytes
+from dask.utils import apply, parse_bytes
 
 import cudf