diff --git a/ci/checks/copyright.py b/ci/checks/copyright.py
index d72fd95fea3..e3ff0d677ee 100644
--- a/ci/checks/copyright.py
+++ b/ci/checks/copyright.py
@@ -37,7 +37,7 @@
     re.compile(r"[.]flake8[.]cython$"),
     re.compile(r"meta[.]yaml$")
 ]
-ExemptFiles = []
+ExemptFiles = ["cpp/include/cudf_test/cxxopts.hpp"]
 
 # this will break starting at year 10000, which is probably OK :)
 CheckSimple = re.compile(
@@ -230,4 +230,4 @@ def checkCopyright_main():
 
 if __name__ == "__main__":
     import sys
-    sys.exit(checkCopyright_main())
\ No newline at end of file
+    sys.exit(checkCopyright_main())
diff --git a/conda/recipes/libcudf/meta.yaml b/conda/recipes/libcudf/meta.yaml
index 11874db0506..c8b5a9d373b 100644
--- a/conda/recipes/libcudf/meta.yaml
+++ b/conda/recipes/libcudf/meta.yaml
@@ -89,6 +89,7 @@ outputs:
         - test -f $PREFIX/include/cudf/detail/hashing.hpp
         - test -f $PREFIX/include/cudf/detail/interop.hpp
         - test -f $PREFIX/include/cudf/detail/is_element_valid.hpp
+        - test -f $PREFIX/include/cudf/detail/join.hpp
         - test -f $PREFIX/include/cudf/detail/null_mask.hpp
         - test -f $PREFIX/include/cudf/detail/nvtx/nvtx3.hpp
         - test -f $PREFIX/include/cudf/detail/nvtx/ranges.hpp
@@ -168,6 +169,7 @@ outputs:
         - test -f $PREFIX/include/cudf/lists/detail/interleave_columns.hpp
         - test -f $PREFIX/include/cudf/lists/detail/sorting.hpp
         - test -f $PREFIX/include/cudf/lists/detail/scatter_helper.cuh
+        - test -f $PREFIX/include/cudf/lists/detail/stream_compaction.hpp
         - test -f $PREFIX/include/cudf/lists/combine.hpp
         - test -f $PREFIX/include/cudf/lists/count_elements.hpp
         - test -f $PREFIX/include/cudf/lists/explode.hpp
@@ -178,6 +180,7 @@ outputs:
         - test -f $PREFIX/include/cudf/lists/gather.hpp
         - test -f $PREFIX/include/cudf/lists/lists_column_view.hpp
         - test -f $PREFIX/include/cudf/lists/sorting.hpp
+        - test -f $PREFIX/include/cudf/lists/stream_compaction.hpp
         - test -f $PREFIX/include/cudf/merge.hpp
         - test -f $PREFIX/include/cudf/null_mask.hpp
         - test -f $PREFIX/include/cudf/partitioning.hpp
diff --git a/cpp/doxygen/Doxyfile b/cpp/doxygen/Doxyfile
index 6929b529728..4aa0bfca78a 100644
--- a/cpp/doxygen/Doxyfile
+++ b/cpp/doxygen/Doxyfile
@@ -892,7 +892,9 @@ EXCLUDE_PATTERNS       = */nvtx/* */detail/*
 # Note that the wildcards are matched against the file with absolute path, so to
 # exclude all test directories use the pattern */test/*
 
-EXCLUDE_SYMBOLS        = org::apache
+EXCLUDE_SYMBOLS        = org::apache \
+                         *_impl \
+                         *Impl
 
 # The EXAMPLE_PATH tag can be used to specify one or more files or directories
 # that contain example code fragments that are included (see the \include
@@ -2130,7 +2132,8 @@ INCLUDE_FILE_PATTERNS  =
 # This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
 
 PREDEFINED              = __device__= \
-                          __host__=
+                          __host__= \
+                          DOXYGEN_SHOULD_SKIP_THIS
 
 # If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then this
 # tag can be used to specify a list of macro names that should be expanded. The
diff --git a/cpp/include/cudf/column/column_device_view.cuh b/cpp/include/cudf/column/column_device_view.cuh
index 070ca80858b..0e99fb52186 100644
--- a/cpp/include/cudf/column/column_device_view.cuh
+++ b/cpp/include/cudf/column/column_device_view.cuh
@@ -1198,7 +1198,7 @@ struct optional_accessor {
   /**
    * @brief Constructor
    *
-   * @param col Column on which to iterator over its elements.
+   * @param _col Column on which to iterator over its elements.
    * @param with_nulls Indicates if the `col` should be checked for nulls.
    */
   optional_accessor(column_device_view const& _col, Nullate with_nulls)
diff --git a/cpp/include/cudf/table/experimental/row_operators.cuh b/cpp/include/cudf/table/experimental/row_operators.cuh
index 336420ed840..43f09ff55f0 100644
--- a/cpp/include/cudf/table/experimental/row_operators.cuh
+++ b/cpp/include/cudf/table/experimental/row_operators.cuh
@@ -420,7 +420,7 @@ class self_comparator {
    * @brief Construct an owning object for performing a lexicographic comparison between two rows of
    * the same table.
    *
-   * @param table The table to compare
+   * @param t The table to compare
    * @param column_order Optional, host array the same length as a row that indicates the desired
    * ascending/descending order of each column in a row. If empty, it is assumed all columns are
    * sorted in ascending order.
diff --git a/cpp/include/cudf/utilities/type_dispatcher.hpp b/cpp/include/cudf/utilities/type_dispatcher.hpp
index 5c3a6b128b5..920c5222552 100644
--- a/cpp/include/cudf/utilities/type_dispatcher.hpp
+++ b/cpp/include/cudf/utilities/type_dispatcher.hpp
@@ -156,41 +156,38 @@ constexpr bool is_fixed_point(cudf::type_id id)
   template <>                                         \
   struct id_to_type_impl<Id> {                        \
     using type = Type;                                \
-  }
+  };
 #endif
 
-/**
- * @brief Defines all of the mappings between C++ types and their corresponding
- * `cudf::type_id` values.
- */
-CUDF_TYPE_MAPPING(bool, type_id::BOOL8);
-CUDF_TYPE_MAPPING(int8_t, type_id::INT8);
-CUDF_TYPE_MAPPING(int16_t, type_id::INT16);
-CUDF_TYPE_MAPPING(int32_t, type_id::INT32);
-CUDF_TYPE_MAPPING(int64_t, type_id::INT64);
-CUDF_TYPE_MAPPING(uint8_t, type_id::UINT8);
-CUDF_TYPE_MAPPING(uint16_t, type_id::UINT16);
-CUDF_TYPE_MAPPING(uint32_t, type_id::UINT32);
-CUDF_TYPE_MAPPING(uint64_t, type_id::UINT64);
-CUDF_TYPE_MAPPING(float, type_id::FLOAT32);
-CUDF_TYPE_MAPPING(double, type_id::FLOAT64);
-CUDF_TYPE_MAPPING(cudf::string_view, type_id::STRING);
-CUDF_TYPE_MAPPING(cudf::timestamp_D, type_id::TIMESTAMP_DAYS);
-CUDF_TYPE_MAPPING(cudf::timestamp_s, type_id::TIMESTAMP_SECONDS);
-CUDF_TYPE_MAPPING(cudf::timestamp_ms, type_id::TIMESTAMP_MILLISECONDS);
-CUDF_TYPE_MAPPING(cudf::timestamp_us, type_id::TIMESTAMP_MICROSECONDS);
-CUDF_TYPE_MAPPING(cudf::timestamp_ns, type_id::TIMESTAMP_NANOSECONDS);
-CUDF_TYPE_MAPPING(cudf::duration_D, type_id::DURATION_DAYS);
-CUDF_TYPE_MAPPING(cudf::duration_s, type_id::DURATION_SECONDS);
-CUDF_TYPE_MAPPING(cudf::duration_ms, type_id::DURATION_MILLISECONDS);
-CUDF_TYPE_MAPPING(cudf::duration_us, type_id::DURATION_MICROSECONDS);
-CUDF_TYPE_MAPPING(cudf::duration_ns, type_id::DURATION_NANOSECONDS);
-CUDF_TYPE_MAPPING(dictionary32, type_id::DICTIONARY32);
-CUDF_TYPE_MAPPING(cudf::list_view, type_id::LIST);
-CUDF_TYPE_MAPPING(numeric::decimal32, type_id::DECIMAL32);
-CUDF_TYPE_MAPPING(numeric::decimal64, type_id::DECIMAL64);
-CUDF_TYPE_MAPPING(numeric::decimal128, type_id::DECIMAL128);
-CUDF_TYPE_MAPPING(cudf::struct_view, type_id::STRUCT);
+// Defines all of the mappings between C++ types and their corresponding `cudf::type_id` values.
+CUDF_TYPE_MAPPING(bool, type_id::BOOL8)
+CUDF_TYPE_MAPPING(int8_t, type_id::INT8)
+CUDF_TYPE_MAPPING(int16_t, type_id::INT16)
+CUDF_TYPE_MAPPING(int32_t, type_id::INT32)
+CUDF_TYPE_MAPPING(int64_t, type_id::INT64)
+CUDF_TYPE_MAPPING(uint8_t, type_id::UINT8)
+CUDF_TYPE_MAPPING(uint16_t, type_id::UINT16)
+CUDF_TYPE_MAPPING(uint32_t, type_id::UINT32)
+CUDF_TYPE_MAPPING(uint64_t, type_id::UINT64)
+CUDF_TYPE_MAPPING(float, type_id::FLOAT32)
+CUDF_TYPE_MAPPING(double, type_id::FLOAT64)
+CUDF_TYPE_MAPPING(cudf::string_view, type_id::STRING)
+CUDF_TYPE_MAPPING(cudf::timestamp_D, type_id::TIMESTAMP_DAYS)
+CUDF_TYPE_MAPPING(cudf::timestamp_s, type_id::TIMESTAMP_SECONDS)
+CUDF_TYPE_MAPPING(cudf::timestamp_ms, type_id::TIMESTAMP_MILLISECONDS)
+CUDF_TYPE_MAPPING(cudf::timestamp_us, type_id::TIMESTAMP_MICROSECONDS)
+CUDF_TYPE_MAPPING(cudf::timestamp_ns, type_id::TIMESTAMP_NANOSECONDS)
+CUDF_TYPE_MAPPING(cudf::duration_D, type_id::DURATION_DAYS)
+CUDF_TYPE_MAPPING(cudf::duration_s, type_id::DURATION_SECONDS)
+CUDF_TYPE_MAPPING(cudf::duration_ms, type_id::DURATION_MILLISECONDS)
+CUDF_TYPE_MAPPING(cudf::duration_us, type_id::DURATION_MICROSECONDS)
+CUDF_TYPE_MAPPING(cudf::duration_ns, type_id::DURATION_NANOSECONDS)
+CUDF_TYPE_MAPPING(dictionary32, type_id::DICTIONARY32)
+CUDF_TYPE_MAPPING(cudf::list_view, type_id::LIST)
+CUDF_TYPE_MAPPING(numeric::decimal32, type_id::DECIMAL32)
+CUDF_TYPE_MAPPING(numeric::decimal64, type_id::DECIMAL64)
+CUDF_TYPE_MAPPING(numeric::decimal128, type_id::DECIMAL128)
+CUDF_TYPE_MAPPING(cudf::struct_view, type_id::STRUCT)
 
 /**
  * @brief Use this specialization on `type_dispatcher` whenever you only need to operate on the
@@ -210,6 +207,12 @@ struct type_to_scalar_type_impl {
   using ScalarType = cudf::scalar;
 };
 
+/**
+ * @brief Macro used to define scalar type and scalar device type for
+ * `cudf::numeric_scalar` template class for numeric C++ types.
+ *
+ * @param Type The numeric C++ type
+ */
 #ifndef MAP_NUMERIC_SCALAR
 #define MAP_NUMERIC_SCALAR(Type)                                     \
   template <>                                                        \
@@ -230,7 +233,7 @@ MAP_NUMERIC_SCALAR(uint32_t)
 MAP_NUMERIC_SCALAR(uint64_t)
 MAP_NUMERIC_SCALAR(float)
 MAP_NUMERIC_SCALAR(double)
-MAP_NUMERIC_SCALAR(bool);
+MAP_NUMERIC_SCALAR(bool)
 
 template <>
 struct type_to_scalar_type_impl<std::string> {
@@ -281,6 +284,12 @@ struct type_to_scalar_type_impl<cudf::struct_view> {
   // using ScalarDeviceType = cudf::struct_scalar_device_view; // CALEB: TODO!
 };
 
+/**
+ * @brief Macro used to define scalar type and scalar device type for
+ * `cudf::timestamp_scalar` template class for timestamp C++ types.
+ *
+ * @param Type The timestamp C++ type
+ */
 #ifndef MAP_TIMESTAMP_SCALAR
 #define MAP_TIMESTAMP_SCALAR(Type)                                     \
   template <>                                                          \
@@ -296,6 +305,12 @@ MAP_TIMESTAMP_SCALAR(timestamp_ms)
 MAP_TIMESTAMP_SCALAR(timestamp_us)
 MAP_TIMESTAMP_SCALAR(timestamp_ns)
 
+/**
+ * @brief Macro used to define scalar type and scalar device type for
+ * `cudf::duration_scalar` template class for duration C++ types.
+ *
+ * @param Type The duration C++ type
+ */
 #ifndef MAP_DURATION_SCALAR
 #define MAP_DURATION_SCALAR(Type)                                     \
   template <>                                                         \
diff --git a/cpp/include/cudf_test/cxxopts.hpp b/cpp/include/cudf_test/cxxopts.hpp
index e3ff4fccfe3..d0fc3c7e38c 100644
--- a/cpp/include/cudf_test/cxxopts.hpp
+++ b/cpp/include/cudf_test/cxxopts.hpp
@@ -20,6 +20,8 @@ THE SOFTWARE.
 #ifndef CXXOPTS_HPP_INCLUDED
 #define CXXOPTS_HPP_INCLUDED
 
+#ifndef DOXYGEN_SHOULD_SKIP_THIS
+
 #include <cctype>
 #include <cstring>
 #include <exception>
@@ -1498,4 +1500,5 @@ inline const HelpGroupDetails& Options::group_help(const std::string& group) con
 
 }  // namespace cxxopts
 
+#endif  // DOXYGEN_SHOULD_SKIP_THIS
 #endif  // CXXOPTS_HPP_INCLUDED
diff --git a/cpp/include/nvtext/bpe_tokenize.hpp b/cpp/include/nvtext/bpe_tokenize.hpp
index 23fcd3acd03..dcd24674029 100644
--- a/cpp/include/nvtext/bpe_tokenize.hpp
+++ b/cpp/include/nvtext/bpe_tokenize.hpp
@@ -107,7 +107,7 @@ std::unique_ptr<bpe_merge_pairs> load_merge_pairs_file(
  * @throw cudf::logic_error if `separator` is invalid
  *
  * @param input Strings to encode.
- * @param merge_pairs Created by a call to @ref nvtext::load_merge_pairs_file.
+ * @param merges_pairs Created by a call to @ref nvtext::load_merge_pairs_file.
  * @param separator String used to build the output after encoding.
  *                  Default is a space.
  * @param mr Memory resource to allocate any returned objects.
diff --git a/cpp/src/interop/dlpack.cpp b/cpp/src/interop/dlpack.cpp
index e5da4794ca3..be64a9c9bc1 100644
--- a/cpp/src/interop/dlpack.cpp
+++ b/cpp/src/interop/dlpack.cpp
@@ -148,20 +148,33 @@ std::unique_ptr<table> from_dlpack(DLManagedTensor const* managed_tensor,
     CUDF_EXPECTS(tensor.device.device_id == device_id, "DLTensor device ID must be current device");
   }
 
-  // Currently only 1D and 2D tensors are supported
-  CUDF_EXPECTS(tensor.ndim > 0 && tensor.ndim <= 2, "DLTensor must be 1D or 2D");
-
+  // We only support 1D and 2D tensors with some restrictions on layout
+  if (tensor.ndim == 1) {
+    // 1D tensors must have dense layout (strides == nullptr <=> dense row-major)
+    CUDF_EXPECTS(nullptr == tensor.strides || tensor.strides[0] == 1,
+                 "from_dlpack of 1D DLTensor only for unit-stride data");
+  } else if (tensor.ndim == 2) {
+    // 2D tensors must have column-major layout and the fastest dimension must have dense layout
+    CUDF_EXPECTS((
+                   // 1D tensor reshaped into (N, 1) is fine
+                   tensor.shape[1] == 1 && (nullptr == tensor.strides || tensor.strides[0] == 1))
+                   // General case
+                   || (nullptr != tensor.strides && tensor.strides[0] == 1 &&
+                       tensor.strides[1] >= tensor.shape[0]),
+                 "from_dlpack of 2D DLTensor only for column-major unit-stride data");
+  } else {
+    CUDF_FAIL("DLTensor must be 1D or 2D");
+  }
   CUDF_EXPECTS(tensor.shape[0] >= 0,
-               "DLTensor first dim should be of shape greater than or equal-to 0.");
+               "DLTensor first dim should be of shape greater than or equal to 0.");
   CUDF_EXPECTS(tensor.shape[0] < std::numeric_limits<size_type>::max(),
                "DLTensor first dim exceeds size supported by cudf");
   if (tensor.ndim > 1) {
     CUDF_EXPECTS(tensor.shape[1] >= 0,
-                 "DLTensor second dim should be of shape greater than or equal-to 0.");
+                 "DLTensor second dim should be of shape greater than or equal to 0.");
     CUDF_EXPECTS(tensor.shape[1] < std::numeric_limits<size_type>::max(),
                  "DLTensor second dim exceeds size supported by cudf");
   }
-
   size_t const num_columns = (tensor.ndim == 2) ? static_cast<size_t>(tensor.shape[1]) : 1;
 
   // Validate and convert data type to cudf
diff --git a/cpp/src/io/csv/reader_impl.cu b/cpp/src/io/csv/reader_impl.cu
index cd070d28f38..d20155b4720 100644
--- a/cpp/src/io/csv/reader_impl.cu
+++ b/cpp/src/io/csv/reader_impl.cu
@@ -43,6 +43,7 @@
 #include <rmm/cuda_stream_view.hpp>
 
 #include <thrust/host_vector.h>
+#include <thrust/iterator/counting_iterator.h>
 
 #include <algorithm>
 #include <iostream>
@@ -696,37 +697,62 @@ table_with_metadata read_csv(cudf::io::datasource* source,
 
     column_flags.resize(num_actual_columns, column_parse::enabled | column_parse::inferred);
 
+    std::vector<size_t> col_loop_order(column_names.size());
+    auto unnamed_it = std::copy_if(
+      thrust::make_counting_iterator<size_t>(0),
+      thrust::make_counting_iterator<size_t>(column_names.size()),
+      col_loop_order.begin(),
+      [&column_names](auto col_idx) -> bool { return not column_names[col_idx].empty(); });
     // Rename empty column names to "Unnamed: col_index"
-    for (size_t col_idx = 0; col_idx < column_names.size(); ++col_idx) {
-      if (column_names[col_idx].empty()) {
-        column_names[col_idx] = string("Unnamed: ") + std::to_string(col_idx);
-      }
-    }
+    std::copy_if(thrust::make_counting_iterator<size_t>(0),
+                 thrust::make_counting_iterator<size_t>(column_names.size()),
+                 unnamed_it,
+                 [&column_names](auto col_idx) -> bool {
+                   auto is_empty = column_names[col_idx].empty();
+                   if (is_empty)
+                     column_names[col_idx] = string("Unnamed: ") + std::to_string(col_idx);
+                   return is_empty;
+                 });
 
     // Looking for duplicates
-    std::unordered_map<string, int> col_names_histogram;
-    for (auto& col_name : column_names) {
-      // Operator [] inserts a default-initialized value if the given key is not
-      // present
-      if (++col_names_histogram[col_name] > 1) {
-        if (reader_opts.is_enabled_mangle_dupe_cols()) {
-          // Rename duplicates of column X as X.1, X.2, ...; First appearance
-          // stays as X
-          do {
-            col_name += "." + std::to_string(col_names_histogram[col_name] - 1);
-          } while (col_names_histogram[col_name]++);
-        } else {
+    std::unordered_map<string, int> col_names_counts;
+    if (!reader_opts.is_enabled_mangle_dupe_cols()) {
+      for (auto& col_name : column_names) {
+        if (++col_names_counts[col_name] > 1) {
           // All duplicate columns will be ignored; First appearance is parsed
           const auto idx    = &col_name - column_names.data();
           column_flags[idx] = column_parse::disabled;
         }
       }
+    } else {
+      // For constant/linear search.
+      std::unordered_multiset<std::string> header(column_names.begin(), column_names.end());
+      for (auto const col_idx : col_loop_order) {
+        auto col       = column_names[col_idx];
+        auto cur_count = col_names_counts[col];
+        if (cur_count > 0) {
+          auto const old_col = col;
+          // Rename duplicates of column X as X.1, X.2, ...; First appearance stays as X
+          while (cur_count > 0) {
+            col_names_counts[old_col] = cur_count + 1;
+            col                       = old_col + "." + std::to_string(cur_count);
+            if (header.find(col) != header.end()) {
+              cur_count++;
+            } else {
+              cur_count = col_names_counts[col];
+            }
+          }
+          if (auto pos = header.find(old_col); pos != header.end()) { header.erase(pos); }
+          header.insert(col);
+          column_names[col_idx] = col;
+        }
+        col_names_counts[col] = cur_count + 1;
+      }
     }
 
-    // Update the number of columns to be processed, if some might have been
-    // removed
+    // Update the number of columns to be processed, if some might have been removed
     if (!reader_opts.is_enabled_mangle_dupe_cols()) {
-      num_active_columns = col_names_histogram.size();
+      num_active_columns = col_names_counts.size();
     }
   }
 
diff --git a/cpp/src/stream_compaction/distinct.cu b/cpp/src/stream_compaction/distinct.cu
index 35c74178620..d698c547a61 100644
--- a/cpp/src/stream_compaction/distinct.cu
+++ b/cpp/src/stream_compaction/distinct.cu
@@ -59,7 +59,7 @@ std::unique_ptr<table> distinct(table_view const& input,
   auto keys_view = input.select(keys);
   auto preprocessed_keys =
     cudf::experimental::row::hash::preprocessed_table::create(keys_view, stream);
-  auto has_null = nullate::DYNAMIC{cudf::has_nulls(keys_view)};
+  auto const has_null = nullate::DYNAMIC{cudf::has_nested_nulls(keys_view)};
   auto const num_rows{keys_view.num_rows()};
 
   hash_map_type key_map{compute_hash_table_size(num_rows),
diff --git a/cpp/tests/interop/dlpack_test.cpp b/cpp/tests/interop/dlpack_test.cpp
index 2528c3e5a83..a722f66951f 100644
--- a/cpp/tests/interop/dlpack_test.cpp
+++ b/cpp/tests/interop/dlpack_test.cpp
@@ -208,6 +208,120 @@ TEST_F(DLPackUntypedTests, UnsupportedLanesFromDlpack)
   EXPECT_THROW(cudf::from_dlpack(tensor.get()), cudf::logic_error);
 }
 
+TEST_F(DLPackUntypedTests, UnsupportedBroadcast1DTensorFromDlpack)
+{
+  using T            = float;
+  constexpr int ndim = 1;
+  // Broadcasted (stride-0) 1D tensor
+  auto const data       = cudf::test::make_type_param_vector<T>({1});
+  int64_t shape[ndim]   = {5};
+  int64_t strides[ndim] = {0};
+
+  DLManagedTensor tensor{};
+  tensor.dl_tensor.device.device_type = kDLCPU;
+  tensor.dl_tensor.dtype              = get_dtype<T>();
+  tensor.dl_tensor.ndim               = ndim;
+  tensor.dl_tensor.byte_offset        = 0;
+  tensor.dl_tensor.shape              = shape;
+  tensor.dl_tensor.strides            = strides;
+
+  thrust::host_vector<T> host_vector(data.begin(), data.end());
+  tensor.dl_tensor.data = host_vector.data();
+
+  EXPECT_THROW(cudf::from_dlpack(&tensor), cudf::logic_error);
+}
+
+TEST_F(DLPackUntypedTests, UnsupportedStrided1DTensorFromDlpack)
+{
+  using T            = float;
+  constexpr int ndim = 1;
+  // Strided 1D tensor
+  auto const data       = cudf::test::make_type_param_vector<T>({1, 2, 3, 4});
+  int64_t shape[ndim]   = {2};
+  int64_t strides[ndim] = {2};
+
+  DLManagedTensor tensor{};
+  tensor.dl_tensor.device.device_type = kDLCPU;
+  tensor.dl_tensor.dtype              = get_dtype<T>();
+  tensor.dl_tensor.ndim               = ndim;
+  tensor.dl_tensor.byte_offset        = 0;
+  tensor.dl_tensor.shape              = shape;
+  tensor.dl_tensor.strides            = strides;
+
+  thrust::host_vector<T> host_vector(data.begin(), data.end());
+  tensor.dl_tensor.data = host_vector.data();
+
+  EXPECT_THROW(cudf::from_dlpack(&tensor), cudf::logic_error);
+}
+
+TEST_F(DLPackUntypedTests, UnsupportedImplicitRowMajor2DTensorFromDlpack)
+{
+  using T            = float;
+  constexpr int ndim = 2;
+  // Row major 2D tensor
+  auto const data     = cudf::test::make_type_param_vector<T>({1, 2, 3, 4});
+  int64_t shape[ndim] = {2, 2};
+
+  DLManagedTensor tensor{};
+  tensor.dl_tensor.device.device_type = kDLCPU;
+  tensor.dl_tensor.dtype              = get_dtype<T>();
+  tensor.dl_tensor.ndim               = ndim;
+  tensor.dl_tensor.byte_offset        = 0;
+  tensor.dl_tensor.shape              = shape;
+  tensor.dl_tensor.strides            = nullptr;
+
+  thrust::host_vector<T> host_vector(data.begin(), data.end());
+  tensor.dl_tensor.data = host_vector.data();
+
+  EXPECT_THROW(cudf::from_dlpack(&tensor), cudf::logic_error);
+}
+
+TEST_F(DLPackUntypedTests, UnsupportedExplicitRowMajor2DTensorFromDlpack)
+{
+  using T            = float;
+  constexpr int ndim = 2;
+  // Row major 2D tensor with explicit strides
+  auto const data       = cudf::test::make_type_param_vector<T>({1, 2, 3, 4});
+  int64_t shape[ndim]   = {2, 2};
+  int64_t strides[ndim] = {2, 1};
+
+  DLManagedTensor tensor{};
+  tensor.dl_tensor.device.device_type = kDLCPU;
+  tensor.dl_tensor.dtype              = get_dtype<T>();
+  tensor.dl_tensor.ndim               = ndim;
+  tensor.dl_tensor.byte_offset        = 0;
+  tensor.dl_tensor.shape              = shape;
+  tensor.dl_tensor.strides            = strides;
+
+  thrust::host_vector<T> host_vector(data.begin(), data.end());
+  tensor.dl_tensor.data = host_vector.data();
+
+  EXPECT_THROW(cudf::from_dlpack(&tensor), cudf::logic_error);
+}
+
+TEST_F(DLPackUntypedTests, UnsupportedStridedColMajor2DTensorFromDlpack)
+{
+  using T            = float;
+  constexpr int ndim = 2;
+  // Column major, but strided in fastest dimension
+  auto const data       = cudf::test::make_type_param_vector<T>({1, 2, 3, 4, 5, 6, 7, 8});
+  int64_t shape[ndim]   = {2, 2};
+  int64_t strides[ndim] = {2, 4};
+
+  DLManagedTensor tensor{};
+  tensor.dl_tensor.device.device_type = kDLCPU;
+  tensor.dl_tensor.dtype              = get_dtype<T>();
+  tensor.dl_tensor.ndim               = ndim;
+  tensor.dl_tensor.byte_offset        = 0;
+  tensor.dl_tensor.shape              = shape;
+  tensor.dl_tensor.strides            = strides;
+
+  thrust::host_vector<T> host_vector(data.begin(), data.end());
+  tensor.dl_tensor.data = host_vector.data();
+
+  EXPECT_THROW(cudf::from_dlpack(&tensor), cudf::logic_error);
+}
+
 template <typename T>
 class DLPackTimestampTests : public BaseFixture {
 };
diff --git a/cpp/tests/stream_compaction/distinct_tests.cpp b/cpp/tests/stream_compaction/distinct_tests.cpp
index 13065dcec8b..1c3e07dad2d 100644
--- a/cpp/tests/stream_compaction/distinct_tests.cpp
+++ b/cpp/tests/stream_compaction/distinct_tests.cpp
@@ -439,6 +439,46 @@ TEST_F(Distinct, SlicedStructsOfLists)
   CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result);
 }
 
+TEST_F(Distinct, StructWithNullElement)
+{
+  using FWCW = cudf::test::fixed_width_column_wrapper<int>;
+  using MASK = std::vector<bool>;
+
+  /*
+    `@` indicates null
+
+       /+-------------+
+       |s1{s2{a,b}, c}|
+       +--------------+
+     0 |  { {1, 1}, 2}|
+     1 |  {@{1, 1}, 2}|
+       +--------------+
+  */
+
+  auto col_a   = FWCW{1, 1};
+  auto col_b   = FWCW{1, 1};
+  auto s2_mask = MASK{1, 0};
+  auto col_c   = FWCW{2, 2};
+  auto s1_mask = MASK{1, 1};
+  auto idx     = FWCW{0, 1};
+
+  std::vector<std::unique_ptr<cudf::column>> s2_children;
+  s2_children.push_back(col_a.release());
+  s2_children.push_back(col_b.release());
+  auto s2 = cudf::test::structs_column_wrapper(std::move(s2_children), s2_mask);
+
+  std::vector<std::unique_ptr<cudf::column>> s1_children;
+  s1_children.push_back(s2.release());
+  s1_children.push_back(col_c.release());
+  auto s1 = cudf::test::structs_column_wrapper(std::move(s1_children), s1_mask);
+
+  auto input = cudf::table_view({idx, s1});
+
+  auto result        = cudf::distinct(input, {1});
+  auto sorted_result = cudf::sort_by_key(*result, result->select({0}));
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(input.column(1), sorted_result->get_column(1));
+}
+
 TEST_F(Distinct, ListOfEmptyStruct)
 {
   // 0.  []             ==
diff --git a/docs/cudf/source/api_docs/general_functions.rst b/docs/cudf/source/api_docs/general_functions.rst
index a4a08a6be7f..c666bcb147d 100644
--- a/docs/cudf/source/api_docs/general_functions.rst
+++ b/docs/cudf/source/api_docs/general_functions.rst
@@ -23,6 +23,7 @@ Top-level conversions
    :toctree: api/
 
     cudf.to_numeric
+    cudf.from_dlpack
 
 Top-level dealing with datetimelike
 -----------------------------------
diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py
index 09a4754f519..dc05e8d1937 100644
--- a/python/cudf/cudf/core/column/string.py
+++ b/python/cudf/cudf/core/column/string.py
@@ -4519,7 +4519,6 @@ def ngrams(self, n: int = 2, separator: str = "_") -> SeriesOrIndex:
         --------
         >>> import cudf
         >>> str_series = cudf.Series(['this is my', 'favorite book'])
-        >>> str_series = cudf.Series(['this is my', 'favorite book'])
         >>> str_series.str.ngrams(2, "_")
         0    this is my_favorite book
         dtype: object
diff --git a/python/cudf/cudf/tests/test_csv.py b/python/cudf/cudf/tests/test_csv.py
index acad2507292..6ddc973b1a0 100644
--- a/python/cudf/cudf/tests/test_csv.py
+++ b/python/cudf/cudf/tests/test_csv.py
@@ -473,20 +473,27 @@ def test_csv_reader_usecols_int_char(tmpdir, pd_mixed_dataframe):
     assert_eq(df_out, out, check_names=False)
 
 
-def test_csv_reader_mangle_dupe_cols(tmpdir):
-    buffer = "abc,ABC,abc,abcd,abc\n1,2,3,4,5\n"
-
+@pytest.mark.parametrize(
+    "buffer",
+    [
+        "abc,ABC,abc,abcd,abc\n1,2,3,4,5\n",
+        "A,A,A.1,A,A.2,A,A.4,A,A\n1,2,3.1,4,a.2,a,a.4,a,a",
+        "A,A,A.1,,Unnamed: 4,A,A.4,A,A\n1,2,3.1,4,a.2,a,a.4,a,a",
+    ],
+)
+@pytest.mark.parametrize("mangle_dupe_cols", [True, False])
+def test_csv_reader_mangle_dupe_cols(tmpdir, buffer, mangle_dupe_cols):
     # Default: mangle_dupe_cols=True
-    pd_df = pd.read_csv(StringIO(buffer))
-    cu_df = read_csv(StringIO(buffer))
+    cu_df = read_csv(StringIO(buffer), mangle_dupe_cols=mangle_dupe_cols)
+    if mangle_dupe_cols:
+        pd_df = pd.read_csv(StringIO(buffer))
+    else:
+        # Pandas does not support mangle_dupe_cols=False
+        head = buffer.split("\n")[0].split(",")
+        first_cols = np.unique(head, return_index=True)[1]
+        pd_df = pd.read_csv(StringIO(buffer), usecols=first_cols)
     assert_eq(cu_df, pd_df)
 
-    # Pandas does not support mangle_dupe_cols=False
-    cu_df = read_csv(StringIO(buffer), mangle_dupe_cols=False)
-    # check that the dupe columns were removed
-    assert len(cu_df.columns) == 3
-    np.testing.assert_array_equal(cu_df["abc"].to_numpy(), [1])
-
 
 def test_csv_reader_float_decimal(tmpdir):
     fname = tmpdir.mkdir("gdf_csv").join("tmp_csvreader_file12.csv")