From 2808a5c1d83a17169a204bbfe66a3e6686f038d8 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Mon, 11 Jan 2021 12:07:11 -0500
Subject: [PATCH 001/138] Add a compute_hash_join_indices that returns just the
 join indices

---
 cpp/src/join/hash_join.cu  | 28 ++++++++++++++++++++++++----
 cpp/src/join/hash_join.cuh | 10 ++++++++++
 2 files changed, 34 insertions(+), 4 deletions(-)
diff --git a/cpp/src/join/hash_join.cu b/cpp/src/join/hash_join.cu
index c2c32d4165a..5fc979941b9 100644
--- a/cpp/src/join/hash_join.cu
+++ b/cpp/src/join/hash_join.cu
@@ -573,8 +573,8 @@ std::unique_ptr<cudf::table> hash_join::hash_join_impl::full_join(
 }
 
 template <cudf::detail::join_kind JoinKind>
-std::pair<std::unique_ptr<cudf::table>, std::unique_ptr<cudf::table>>
-hash_join::hash_join_impl::compute_hash_join(
+std::pair<rmm::device_vector<size_type>, rmm::device_vector<size_type>>
+hash_join::hash_join_impl::compute_hash_join_indices(
   cudf::table_view const &probe,
   std::vector<size_type> const &probe_on,
   std::vector<std::pair<cudf::size_type, cudf::size_type>> const &columns_in_common,
@@ -601,7 +601,7 @@ hash_join::hash_join_impl::compute_hash_join(
                "Invalid values passed to columns_in_common");
 
   if (is_trivial_join(probe, _build, probe_on, _build_on, JoinKind)) {
-    return get_empty_joined_table(probe, _build, columns_in_common, common_columns_output_side);
+    return std::make_pair(rmm::device_vector<size_type>{}, rmm::device_vector<size_type>{});
   }
 
   auto probe_selected = probe.select(probe_on);
@@ -615,7 +615,27 @@ hash_join::hash_join_impl::compute_hash_join(
   constexpr cudf::detail::join_kind ProbeJoinKind = (JoinKind == cudf::detail::join_kind::FULL_JOIN)
                                                       ? cudf::detail::join_kind::LEFT_JOIN
                                                       : JoinKind;
-  auto joined_indices = probe_join_indices<ProbeJoinKind>(probe_selected, compare_nulls, stream);
+  return probe_join_indices<ProbeJoinKind>(probe_selected, compare_nulls, stream);
+}
+
+template <cudf::detail::join_kind JoinKind>
+std::pair<std::unique_ptr<cudf::table>, std::unique_ptr<cudf::table>>
+hash_join::hash_join_impl::compute_hash_join(
+  cudf::table_view const &probe,
+  std::vector<size_type> const &probe_on,
+  std::vector<std::pair<cudf::size_type, cudf::size_type>> const &columns_in_common,
+  common_columns_output_side common_columns_output_side,
+  null_equality compare_nulls,
+  rmm::cuda_stream_view stream,
+  rmm::mr::device_memory_resource *mr) const
+{
+  auto joined_indices = compute_hash_join_indices<JoinKind>(
+    probe, probe_on, columns_in_common, common_columns_output_side, compare_nulls, stream, mr);
+
+  if (is_trivial_join(probe, _build, probe_on, _build_on, JoinKind)) {
+    return get_empty_joined_table(probe, _build, columns_in_common, common_columns_output_side);
+  }
+
   return cudf::detail::construct_join_output_df<JoinKind>(
     probe, _build, joined_indices, columns_in_common, common_columns_output_side, stream, mr);
 }
diff --git a/cpp/src/join/hash_join.cuh b/cpp/src/join/hash_join.cuh
index 712d771bd73..00dfc492260 100644
--- a/cpp/src/join/hash_join.cuh
+++ b/cpp/src/join/hash_join.cuh
@@ -257,6 +257,16 @@ struct hash_join::hash_join_impl {
     rmm::mr::device_memory_resource* mr) const;
 
  private:
+  template <cudf::detail::join_kind JoinKind>
+  std::pair<rmm::device_vector<size_type>, rmm::device_vector<size_type>> compute_hash_join_indices(
+    cudf::table_view const& probe,
+    std::vector<size_type> const& probe_on,
+    std::vector<std::pair<cudf::size_type, cudf::size_type>> const& columns_in_common,
+    common_columns_output_side common_columns_output_side,
+    null_equality compare_nulls,
+    rmm::cuda_stream_view stream,
+    rmm::mr::device_memory_resource* mr) const;
+
   /**
    * @brief Performs hash join by probing the columns provided in `probe` as per
    * the joining indices given in `probe_on` and returns a (`probe`, `_build`) table pair, which

From ef0baee062880f885b0506832e8d4fd50269c747 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Mon, 11 Jan 2021 13:06:59 -0500
Subject: [PATCH 002/138] Don't need common_columns stuff for join that returns
 a gathermap

---
 cpp/src/join/hash_join.cu  | 38 +++++++++++++++++---------------------
 cpp/src/join/hash_join.cuh |  4 +---
 2 files changed, 18 insertions(+), 24 deletions(-)

diff --git a/cpp/src/join/hash_join.cu b/cpp/src/join/hash_join.cu
index 5fc979941b9..109698e3752 100644
--- a/cpp/src/join/hash_join.cu
+++ b/cpp/src/join/hash_join.cu
@@ -574,14 +574,11 @@ std::unique_ptr<cudf::table> hash_join::hash_join_impl::full_join(
 
 template <cudf::detail::join_kind JoinKind>
 std::pair<rmm::device_vector<size_type>, rmm::device_vector<size_type>>
-hash_join::hash_join_impl::compute_hash_join_indices(
-  cudf::table_view const &probe,
-  std::vector<size_type> const &probe_on,
-  std::vector<std::pair<cudf::size_type, cudf::size_type>> const &columns_in_common,
-  common_columns_output_side common_columns_output_side,
-  null_equality compare_nulls,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource *mr) const
+hash_join::hash_join_impl::compute_hash_join(cudf::table_view const &probe,
+                                             std::vector<size_type> const &probe_on,
+                                             null_equality compare_nulls,
+                                             rmm::cuda_stream_view stream,
+                                             rmm::mr::device_memory_resource *mr) const
 {
   CUDF_EXPECTS(0 != probe.num_columns(), "Hash join probe table is empty");
   CUDF_EXPECTS(probe.num_rows() < cudf::detail::MAX_JOIN_SIZE,
@@ -589,17 +586,6 @@ hash_join::hash_join_impl::compute_hash_join_indices(
   CUDF_EXPECTS(_build_on.size() == probe_on.size(),
                "Mismatch in number of columns to be joined on");
 
-  CUDF_EXPECTS(std::all_of(columns_in_common.begin(),
-                           columns_in_common.end(),
-                           [this, &probe_on](auto pair) {
-                             size_t p = std::find(probe_on.begin(), probe_on.end(), pair.first) -
-                                        probe_on.begin();
-                             size_t b = std::find(_build_on.begin(), _build_on.end(), pair.second) -
-                                        _build_on.begin();
-                             return (p != probe_on.size()) && (b != _build_on.size()) && (p == b);
-                           }),
-               "Invalid values passed to columns_in_common");
-
   if (is_trivial_join(probe, _build, probe_on, _build_on, JoinKind)) {
     return std::make_pair(rmm::device_vector<size_type>{}, rmm::device_vector<size_type>{});
   }
@@ -629,8 +615,18 @@ hash_join::hash_join_impl::compute_hash_join(
   rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource *mr) const
 {
-  auto joined_indices = compute_hash_join_indices<JoinKind>(
-    probe, probe_on, columns_in_common, common_columns_output_side, compare_nulls, stream, mr);
+  CUDF_EXPECTS(std::all_of(columns_in_common.begin(),
+                           columns_in_common.end(),
+                           [this, &probe_on](auto pair) {
+                             size_t p = std::find(probe_on.begin(), probe_on.end(), pair.first) -
+                                        probe_on.begin();
+                             size_t b = std::find(_build_on.begin(), _build_on.end(), pair.second) -
+                                        _build_on.begin();
+                             return (p != probe_on.size()) && (b != _build_on.size()) && (p == b);
+                           }),
+               "Invalid values passed to columns_in_common");
+
+  auto joined_indices = compute_hash_join<JoinKind>(probe, probe_on, compare_nulls, stream, mr);
 
   if (is_trivial_join(probe, _build, probe_on, _build_on, JoinKind)) {
     return get_empty_joined_table(probe, _build, columns_in_common, common_columns_output_side);
diff --git a/cpp/src/join/hash_join.cuh b/cpp/src/join/hash_join.cuh
index 00dfc492260..fb2b52e62e8 100644
--- a/cpp/src/join/hash_join.cuh
+++ b/cpp/src/join/hash_join.cuh
@@ -258,11 +258,9 @@ struct hash_join::hash_join_impl {
 
  private:
   template <cudf::detail::join_kind JoinKind>
-  std::pair<rmm::device_vector<size_type>, rmm::device_vector<size_type>> compute_hash_join_indices(
+  std::pair<rmm::device_vector<size_type>, rmm::device_vector<size_type>> compute_hash_join(
     cudf::table_view const& probe,
     std::vector<size_type> const& probe_on,
-    std::vector<std::pair<cudf::size_type, cudf::size_type>> const& columns_in_common,
-    common_columns_output_side common_columns_output_side,
     null_equality compare_nulls,
     rmm::cuda_stream_view stream,
     rmm::mr::device_memory_resource* mr) const;

From 18f3074bd6bacb03b1de5d74804b767707c1238a Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Mon, 11 Jan 2021 13:32:09 -0500
Subject: [PATCH 003/138] Add hash_join_impl methods that return gathermaps

---
 cpp/src/join/hash_join.cu  | 36 ++++++++++++++++++++++++++++++++++++
 cpp/src/join/hash_join.cuh | 21 +++++++++++++++++++++
 2 files changed, 57 insertions(+)

diff --git a/cpp/src/join/hash_join.cu b/cpp/src/join/hash_join.cu
index 109698e3752..1c19ff9d000 100644
--- a/cpp/src/join/hash_join.cu
+++ b/cpp/src/join/hash_join.cu
@@ -515,6 +515,18 @@ hash_join::hash_join_impl::hash_join_impl(cudf::table_view const &build,
   _hash_table = build_join_hash_table(_build_selected, compare_nulls, stream);
 }
 
+std::pair<rmm::device_vector<size_type>, rmm::device_vector<size_type>>
+hash_join::hash_join_impl::inner_join(cudf::table_view const &probe,
+                                      std::vector<size_type> const &probe_on,
+                                      null_equality compare_nulls,
+                                      rmm::cuda_stream_view stream,
+                                      rmm::mr::device_memory_resource *mr) const
+{
+  CUDF_FUNC_RANGE();
+  return compute_hash_join<cudf::detail::join_kind::INNER_JOIN>(
+    probe, probe_on, compare_nulls, stream, mr);
+}
+
 std::pair<std::unique_ptr<cudf::table>, std::unique_ptr<cudf::table>>
 hash_join::hash_join_impl::inner_join(
   cudf::table_view const &probe,
@@ -530,6 +542,18 @@ hash_join::hash_join_impl::inner_join(
     probe, probe_on, columns_in_common, common_columns_output_side, compare_nulls, stream, mr);
 }
 
+std::pair<rmm::device_vector<size_type>, rmm::device_vector<size_type>>
+hash_join::hash_join_impl::left_join(cudf::table_view const &probe,
+                                     std::vector<size_type> const &probe_on,
+                                     null_equality compare_nulls,
+                                     rmm::cuda_stream_view stream,
+                                     rmm::mr::device_memory_resource *mr) const
+{
+  CUDF_FUNC_RANGE();
+  return compute_hash_join<cudf::detail::join_kind::LEFT_JOIN>(
+    probe, probe_on, compare_nulls, stream, mr);
+}
+
 std::unique_ptr<cudf::table> hash_join::hash_join_impl::left_join(
   cudf::table_view const &probe,
   std::vector<size_type> const &probe_on,
@@ -551,6 +575,18 @@ std::unique_ptr<cudf::table> hash_join::hash_join_impl::left_join(
                                           std::move(probe_build_pair.second));
 }
 
+std::pair<rmm::device_vector<size_type>, rmm::device_vector<size_type>>
+hash_join::hash_join_impl::full_join(cudf::table_view const &probe,
+                                     std::vector<size_type> const &probe_on,
+                                     null_equality compare_nulls,
+                                     rmm::cuda_stream_view stream,
+                                     rmm::mr::device_memory_resource *mr) const
+{
+  CUDF_FUNC_RANGE();
+  return compute_hash_join<cudf::detail::join_kind::FULL_JOIN>(
+    probe, probe_on, compare_nulls, stream, mr);
+}
+
 std::unique_ptr<cudf::table> hash_join::hash_join_impl::full_join(
   cudf::table_view const &probe,
   std::vector<size_type> const &probe_on,
diff --git a/cpp/src/join/hash_join.cuh b/cpp/src/join/hash_join.cuh
index fb2b52e62e8..34f6d4f262d 100644
--- a/cpp/src/join/hash_join.cuh
+++ b/cpp/src/join/hash_join.cuh
@@ -231,6 +231,13 @@ struct hash_join::hash_join_impl {
                  null_equality compare_nulls,
                  rmm::cuda_stream_view stream = rmm::cuda_stream_default);
 
+  std::pair<rmm::device_vector<size_type>, rmm::device_vector<size_type>> inner_join(
+    cudf::table_view const& probe,
+    std::vector<size_type> const& probe_on,
+    null_equality compare_nulls,
+    rmm::cuda_stream_view stream,
+    rmm::mr::device_memory_resource* mr) const;
+
   std::pair<std::unique_ptr<cudf::table>, std::unique_ptr<cudf::table>> inner_join(
     cudf::table_view const& probe,
     std::vector<size_type> const& probe_on,
@@ -240,6 +247,13 @@ struct hash_join::hash_join_impl {
     rmm::cuda_stream_view stream,
     rmm::mr::device_memory_resource* mr) const;
 
+  std::pair<rmm::device_vector<size_type>, rmm::device_vector<size_type>> left_join(
+    cudf::table_view const& probe,
+    std::vector<size_type> const& probe_on,
+    null_equality compare_nulls,
+    rmm::cuda_stream_view stream,
+    rmm::mr::device_memory_resource* mr) const;
+
   std::unique_ptr<cudf::table> left_join(
     cudf::table_view const& probe,
     std::vector<size_type> const& probe_on,
@@ -248,6 +262,13 @@ struct hash_join::hash_join_impl {
     rmm::cuda_stream_view stream,
     rmm::mr::device_memory_resource* mr) const;
 
+  std::pair<rmm::device_vector<size_type>, rmm::device_vector<size_type>> full_join(
+    cudf::table_view const& probe,
+    std::vector<size_type> const& probe_on,
+    null_equality compare_nulls,
+    rmm::cuda_stream_view stream,
+    rmm::mr::device_memory_resource* mr) const;
+
   std::unique_ptr<cudf::table> full_join(
     cudf::table_view const& probe,
     std::vector<size_type> const& probe_on,

From 70abf483c5a0202fbf90ee7b956060158692a11b Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Mon, 11 Jan 2021 13:52:06 -0500
Subject: [PATCH 004/138] Add overloads to public hash_join class

---
 cpp/include/cudf/join.hpp | 22 ++++++++++++++++++++++
 cpp/src/join/join.cu      | 30 ++++++++++++++++++++++++++++++
 2 files changed, 52 insertions(+)

diff --git a/cpp/include/cudf/join.hpp b/cpp/include/cudf/join.hpp
index b2c1296ccef..cbe984947d1 100644
--- a/cpp/include/cudf/join.hpp
+++ b/cpp/include/cudf/join.hpp
@@ -20,6 +20,7 @@
 #include <cudf/types.hpp>
 
 #include <rmm/cuda_stream_view.hpp>
+#include <rmm/device_vector.hpp>
 
 #include <vector>
 
@@ -412,6 +413,13 @@ class hash_join {
             ///< `inner_join`.
   };
 
+  std::pair<rmm::device_vector<size_type>, rmm::device_vector<size_type>> inner_join(
+    cudf::table_view const& probe,
+    std::vector<size_type> const& probe_on,
+    null_equality compare_nulls         = null_equality::EQUAL,
+    rmm::cuda_stream_view stream        = rmm::cuda_stream_default,
+    rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) const;
+
   /**
    * @brief Performs an inner join by probing in the internal hash table.
    *
@@ -456,6 +464,13 @@ class hash_join {
     rmm::cuda_stream_view stream                          = rmm::cuda_stream_default,
     rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) const;
 
+  std::pair<rmm::device_vector<size_type>, rmm::device_vector<size_type>> left_join(
+    cudf::table_view const& probe,
+    std::vector<size_type> const& probe_on,
+    null_equality compare_nulls         = null_equality::EQUAL,
+    rmm::cuda_stream_view stream        = rmm::cuda_stream_default,
+    rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) const;
+
   /**
    * @brief Performs a left join by probing in the internal hash table.
    *
@@ -486,6 +501,13 @@ class hash_join {
     rmm::cuda_stream_view stream        = rmm::cuda_stream_default,
     rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) const;
 
+  std::pair<rmm::device_vector<size_type>, rmm::device_vector<size_type>> full_join(
+    cudf::table_view const& probe,
+    std::vector<size_type> const& probe_on,
+    null_equality compare_nulls         = null_equality::EQUAL,
+    rmm::cuda_stream_view stream        = rmm::cuda_stream_default,
+    rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) const;
+
   /**
    * @brief Performs a full join by probing in the internal hash table.
    *
diff --git a/cpp/src/join/join.cu b/cpp/src/join/join.cu
index ce27cfcd616..87bd9786f8c 100644
--- a/cpp/src/join/join.cu
+++ b/cpp/src/join/join.cu
@@ -139,6 +139,16 @@ hash_join::hash_join(cudf::table_view const& build,
 {
 }
 
+std::pair<rmm::device_vector<size_type>, rmm::device_vector<size_type>> hash_join::inner_join(
+  cudf::table_view const& probe,
+  std::vector<size_type> const& probe_on,
+  null_equality compare_nulls,
+  rmm::cuda_stream_view stream,
+  rmm::mr::device_memory_resource* mr) const
+{
+  return impl->inner_join(probe, probe_on, compare_nulls, stream, mr);
+}
+
 std::pair<std::unique_ptr<cudf::table>, std::unique_ptr<cudf::table>> hash_join::inner_join(
   cudf::table_view const& probe,
   std::vector<size_type> const& probe_on,
@@ -152,6 +162,16 @@ std::pair<std::unique_ptr<cudf::table>, std::unique_ptr<cudf::table>> hash_join:
     probe, probe_on, columns_in_common, common_columns_output_side, compare_nulls, stream, mr);
 }
 
+std::pair<rmm::device_vector<size_type>, rmm::device_vector<size_type>> hash_join::left_join(
+  cudf::table_view const& probe,
+  std::vector<size_type> const& probe_on,
+  null_equality compare_nulls,
+  rmm::cuda_stream_view stream,
+  rmm::mr::device_memory_resource* mr) const
+{
+  return impl->left_join(probe, probe_on, compare_nulls, stream, mr);
+}
+
 std::unique_ptr<cudf::table> hash_join::left_join(
   cudf::table_view const& probe,
   std::vector<size_type> const& probe_on,
@@ -163,6 +183,16 @@ std::unique_ptr<cudf::table> hash_join::left_join(
   return impl->left_join(probe, probe_on, columns_in_common, compare_nulls, stream, mr);
 }
 
+std::pair<rmm::device_vector<size_type>, rmm::device_vector<size_type>> hash_join::full_join(
+  cudf::table_view const& probe,
+  std::vector<size_type> const& probe_on,
+  null_equality compare_nulls,
+  rmm::cuda_stream_view stream,
+  rmm::mr::device_memory_resource* mr) const
+{
+  return impl->full_join(probe, probe_on, compare_nulls, stream, mr);
+}
+
 std::unique_ptr<cudf::table> hash_join::full_join(
   cudf::table_view const& probe,
   std::vector<size_type> const& probe_on,

From 13dff676523ea8ca91d00bf462b2a54a0dd0b9bc Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Mon, 11 Jan 2021 17:25:33 -0500
Subject: [PATCH 005/138] Add top-level join APIs that return gathermaps

---
 cpp/include/cudf/join.hpp     | 25 +++++++++++++++++++++++++
 cpp/tests/join/join_tests.cpp | 12 ++++++++----
 2 files changed, 33 insertions(+), 4 deletions(-)

diff --git a/cpp/include/cudf/join.hpp b/cpp/include/cudf/join.hpp
index cbe984947d1..91627321f59 100644
--- a/cpp/include/cudf/join.hpp
+++ b/cpp/include/cudf/join.hpp
@@ -31,6 +31,14 @@ namespace cudf {
  * @file
  */
 
+std::pair<rmm::device_vector<size_type>, rmm::device_vector<size_type>> inner_join(
+  cudf::table_view const& left,
+  cudf::table_view const& right,
+  std::vector<cudf::size_type> const& left_on,
+  std::vector<cudf::size_type> const& right_on,
+  null_equality compare_nulls         = null_equality::EQUAL,
+  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+
 /**
  * @brief Performs an inner join on the specified columns of two
  * tables (`left`, `right`)
@@ -97,6 +105,14 @@ std::unique_ptr<cudf::table> inner_join(
   null_equality compare_nulls         = null_equality::EQUAL,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
+std::pair<rmm::device_vector<size_type>, rmm::device_vector<size_type>> left_join(
+  cudf::table_view const& left,
+  cudf::table_view const& right,
+  std::vector<cudf::size_type> const& left_on,
+  std::vector<cudf::size_type> const& right_on,
+  null_equality compare_nulls         = null_equality::EQUAL,
+  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+
 /**
  * @brief Performs a left join (also known as left outer join) on the
  * specified columns of two tables (`left`, `right`)
@@ -165,6 +181,14 @@ std::unique_ptr<cudf::table> left_join(
   null_equality compare_nulls         = null_equality::EQUAL,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
+std::pair<rmm::device_vector<size_type>, rmm::device_vector<size_type>> full_join(
+  cudf::table_view const& left,
+  cudf::table_view const& right,
+  std::vector<cudf::size_type> const& left_on,
+  std::vector<cudf::size_type> const& right_on,
+  null_equality compare_nulls         = null_equality::EQUAL,
+  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+
 /**
  * @brief Performs a full join (also known as full outer join) on the
  * specified columns of two tables (`left`, `right`)
@@ -232,6 +256,7 @@ std::unique_ptr<cudf::table> full_join(
   std::vector<std::pair<cudf::size_type, cudf::size_type>> const& columns_in_common,
   null_equality compare_nulls         = null_equality::EQUAL,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+
 /**
  * @brief Performs a left semi join on the specified columns of two
  * tables (`left`, `right`)
diff --git a/cpp/tests/join/join_tests.cpp b/cpp/tests/join/join_tests.cpp
index efc5330ea7d..b3b86e5cb66 100644
--- a/cpp/tests/join/join_tests.cpp
+++ b/cpp/tests/join/join_tests.cpp
@@ -101,7 +101,8 @@ TEST_F(JoinTest, FullJoinNoCommon)
   exp_cols.push_back(exp_col0_1.release());
   Table gold(std::move(exp_cols));
 
-  auto result            = cudf::full_join(t0, t1, {0}, {0}, {});
+  auto result =
+    cudf::full_join(t0, t1, {0}, {0}, std::vector<std::pair<cudf::size_type, cudf::size_type>>{});
   auto result_sort_order = cudf::sorted_order(result->view());
   auto sorted_result     = cudf::gather(result->view(), *result_sort_order);
 
@@ -131,7 +132,8 @@ TEST_F(JoinTest, LeftJoinNoNullsWithNoCommon)
   Table t0(std::move(cols0));
   Table t1(std::move(cols1));
 
-  auto result            = cudf::left_join(t0, t1, {0}, {0}, {});
+  auto result =
+    cudf::left_join(t0, t1, {0}, {0}, std::vector<std::pair<cudf::size_type, cudf::size_type>>{});
   auto result_sort_order = cudf::sorted_order(result->view());
   auto sorted_result     = cudf::gather(result->view(), *result_sort_order);
 
@@ -1262,7 +1264,8 @@ TEST_F(JoinDictionaryTest, LeftJoinNoNulls)
   auto g0 = cudf::table_view({col0_0, col0_1_w, col0_2});
   auto g1 = cudf::table_view({col1_0, col1_1_w, col1_2});
   {
-    auto result      = cudf::left_join(t0, t1, {0}, {0}, {});
+    auto result =
+      cudf::left_join(t0, t1, {0}, {0}, std::vector<std::pair<cudf::size_type, cudf::size_type>>{});
     auto result_view = result->view();
     auto decoded1    = cudf::dictionary::decode(result_view.column(1));
     auto decoded4    = cudf::dictionary::decode(result_view.column(4));
@@ -1273,7 +1276,8 @@ TEST_F(JoinDictionaryTest, LeftJoinNoNulls)
                                                    decoded4->view(),
                                                    result_view.column(5)});
 
-    auto gold = cudf::left_join(g0, g1, {0}, {0}, {});
+    auto gold =
+      cudf::left_join(g0, g1, {0}, {0}, std::vector<std::pair<cudf::size_type, cudf::size_type>>{});
     CUDF_TEST_EXPECT_TABLES_EQUAL(*gold, cudf::table_view(result_decoded));
   }
   {

From 7ed694c00f2a9bd0214be21fc8f71da51c8489c8 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Tue, 12 Jan 2021 12:47:31 -0500
Subject: [PATCH 006/138] Use device_uvector instead of device_vector in join

---
 cpp/include/cudf/join.hpp          | 14 ++---
 cpp/src/join/hash_join.cu          | 91 ++++++++++++++++--------------
 cpp/src/join/hash_join.cuh         | 18 +++---
 cpp/src/join/join.cu               |  6 +-
 cpp/src/join/join_common_utils.hpp |  4 +-
 5 files changed, 71 insertions(+), 62 deletions(-)

diff --git a/cpp/include/cudf/join.hpp b/cpp/include/cudf/join.hpp
index 91627321f59..2f1b1a1ab5b 100644
--- a/cpp/include/cudf/join.hpp
+++ b/cpp/include/cudf/join.hpp
@@ -20,7 +20,7 @@
 #include <cudf/types.hpp>
 
 #include <rmm/cuda_stream_view.hpp>
-#include <rmm/device_vector.hpp>
+#include <rmm/device_uvector.hpp>
 
 #include <vector>
 
@@ -31,7 +31,7 @@ namespace cudf {
  * @file
  */
 
-std::pair<rmm::device_vector<size_type>, rmm::device_vector<size_type>> inner_join(
+std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>> inner_join(
   cudf::table_view const& left,
   cudf::table_view const& right,
   std::vector<cudf::size_type> const& left_on,
@@ -105,7 +105,7 @@ std::unique_ptr<cudf::table> inner_join(
   null_equality compare_nulls         = null_equality::EQUAL,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
-std::pair<rmm::device_vector<size_type>, rmm::device_vector<size_type>> left_join(
+std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>> left_join(
   cudf::table_view const& left,
   cudf::table_view const& right,
   std::vector<cudf::size_type> const& left_on,
@@ -181,7 +181,7 @@ std::unique_ptr<cudf::table> left_join(
   null_equality compare_nulls         = null_equality::EQUAL,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
-std::pair<rmm::device_vector<size_type>, rmm::device_vector<size_type>> full_join(
+std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>> full_join(
   cudf::table_view const& left,
   cudf::table_view const& right,
   std::vector<cudf::size_type> const& left_on,
@@ -438,7 +438,7 @@ class hash_join {
             ///< `inner_join`.
   };
 
-  std::pair<rmm::device_vector<size_type>, rmm::device_vector<size_type>> inner_join(
+  std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>> inner_join(
     cudf::table_view const& probe,
     std::vector<size_type> const& probe_on,
     null_equality compare_nulls         = null_equality::EQUAL,
@@ -489,7 +489,7 @@ class hash_join {
     rmm::cuda_stream_view stream                          = rmm::cuda_stream_default,
     rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) const;
 
-  std::pair<rmm::device_vector<size_type>, rmm::device_vector<size_type>> left_join(
+  std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>> left_join(
     cudf::table_view const& probe,
     std::vector<size_type> const& probe_on,
     null_equality compare_nulls         = null_equality::EQUAL,
@@ -526,7 +526,7 @@ class hash_join {
     rmm::cuda_stream_view stream        = rmm::cuda_stream_default,
     rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) const;
 
-  std::pair<rmm::device_vector<size_type>, rmm::device_vector<size_type>> full_join(
+  std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>> full_join(
     cudf::table_view const& probe,
     std::vector<size_type> const& probe_on,
     null_equality compare_nulls         = null_equality::EQUAL,
diff --git a/cpp/src/join/hash_join.cu b/cpp/src/join/hash_join.cu
index 1c19ff9d000..426ed873df7 100644
--- a/cpp/src/join/hash_join.cu
+++ b/cpp/src/join/hash_join.cu
@@ -20,9 +20,10 @@
 #include <cudf/detail/gather.hpp>
 
 #include <rmm/cuda_stream_view.hpp>
-#include <rmm/device_vector.hpp>
+#include <rmm/device_uvector.hpp>
 #include <rmm/exec_policy.hpp>
 
+#include <iostream>
 #include <numeric>
 
 namespace cudf {
@@ -90,23 +91,25 @@ std::pair<std::unique_ptr<table>, std::unique_ptr<table>> get_empty_joined_table
   return std::make_pair(std::move(empty_probe), std::move(empty_build));
 }
 
-VectorPair concatenate_vector_pairs(VectorPair &a, VectorPair &b)
+VectorPair concatenate_vector_pairs(VectorPair &a, VectorPair &b, rmm::cuda_stream_view stream)
 {
   CUDF_EXPECTS((a.first.size() == a.second.size()),
                "Mismatch between sizes of vectors in vector pair");
   CUDF_EXPECTS((b.first.size() == b.second.size()),
                "Mismatch between sizes of vectors in vector pair");
-  if (a.first.empty()) {
-    return b;
-  } else if (b.first.empty()) {
-    return a;
+  if (a.first.is_empty()) {
+    return std::move(b);
+  } else if (b.first.is_empty()) {
+    return std::move(a);
   }
   auto original_size = a.first.size();
-  a.first.resize(a.first.size() + b.first.size());
-  a.second.resize(a.second.size() + b.second.size());
-  thrust::copy(b.first.begin(), b.first.end(), a.first.begin() + original_size);
-  thrust::copy(b.second.begin(), b.second.end(), a.second.begin() + original_size);
-  return a;
+  a.first.resize(a.first.size() + b.first.size(), stream);
+  a.second.resize(a.second.size() + b.second.size(), stream);
+  thrust::copy(
+    rmm::exec_policy(stream), b.first.begin(), b.first.end(), a.first.begin() + original_size);
+  thrust::copy(
+    rmm::exec_policy(stream), b.second.begin(), b.second.end(), a.second.begin() + original_size);
+  return std::move(a);
 }
 
 template <typename T>
@@ -133,8 +136,8 @@ struct valid_range {
  *
  * @return Pair of vectors containing the left join indices complement
  */
-std::pair<rmm::device_vector<size_type>, rmm::device_vector<size_type>>
-get_left_join_indices_complement(rmm::device_vector<size_type> &right_indices,
+std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>>
+get_left_join_indices_complement(rmm::device_uvector<size_type> &right_indices,
                                  size_type left_table_row_count,
                                  size_type right_table_row_count,
                                  rmm::cuda_stream_view stream)
@@ -142,7 +145,7 @@ get_left_join_indices_complement(rmm::device_vector<size_type> &right_indices,
   // Get array of indices that do not appear in right_indices
 
   // Vector allocated for unmatched result
-  rmm::device_vector<size_type> right_indices_complement(right_table_row_count);
+  rmm::device_uvector<size_type> right_indices_complement(right_table_row_count, stream);
 
   // If left table is empty in a full join call then all rows of the right table
   // should be represented in the joined indices. This is an optimization since
@@ -180,11 +183,14 @@ get_left_join_indices_complement(rmm::device_vector<size_type> &right_indices,
                                               right_indices_complement.begin(),
                                               thrust::identity<size_type>()) -
                               right_indices_complement.begin();
-    right_indices_complement.resize(indices_count);
+    right_indices_complement.resize(indices_count, stream);
   }
 
-  rmm::device_vector<size_type> left_invalid_indices(right_indices_complement.size(),
-                                                     JoinNoneValue);
+  rmm::device_uvector<size_type> left_invalid_indices(right_indices_complement.size(), stream);
+  thrust::fill(rmm::exec_policy(stream),
+               left_invalid_indices.begin(),
+               left_invalid_indices.end(),
+               JoinNoneValue);
 
   return std::make_pair(std::move(left_invalid_indices), std::move(right_indices_complement));
 }
@@ -256,7 +262,7 @@ std::unique_ptr<multimap_type, std::function<void(multimap_type *)>> build_join_
  * @return Join output indices vector pair.
  */
 template <join_kind JoinKind>
-std::pair<rmm::device_vector<size_type>, rmm::device_vector<size_type>> probe_join_hash_table(
+std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>> probe_join_hash_table(
   cudf::table_device_view build_table,
   cudf::table_device_view probe_table,
   multimap_type const &hash_table,
@@ -268,7 +274,8 @@ std::pair<rmm::device_vector<size_type>, rmm::device_vector<size_type>> probe_jo
 
   // If the estimated output size is zero, return immediately
   if (estimated_size == 0) {
-    return std::make_pair(rmm::device_vector<size_type>{}, rmm::device_vector<size_type>{});
+    return std::make_pair(rmm::device_uvector<size_type>{0, stream},
+                          rmm::device_uvector<size_type>{0, stream});
   }
 
   // Because we are approximating the number of joined elements, our approximation
@@ -278,12 +285,12 @@ std::pair<rmm::device_vector<size_type>, rmm::device_vector<size_type>> probe_jo
   rmm::device_scalar<size_type> write_index(0, stream);
   size_type join_size{0};
 
-  rmm::device_vector<size_type> left_indices;
-  rmm::device_vector<size_type> right_indices;
+  rmm::device_uvector<size_type> left_indices{0, stream};
+  rmm::device_uvector<size_type> right_indices{0, stream};
   auto current_estimated_size = estimated_size;
   do {
-    left_indices.resize(estimated_size);
-    right_indices.resize(estimated_size);
+    left_indices.resize(estimated_size, stream);
+    right_indices.resize(estimated_size, stream);
 
     constexpr int block_size{DEFAULT_JOIN_BLOCK_SIZE};
     detail::grid_1d config(probe_table.num_rows(), block_size);
@@ -292,16 +299,15 @@ std::pair<rmm::device_vector<size_type>, rmm::device_vector<size_type>> probe_jo
     row_hash hash_probe{probe_table};
     row_equality equality{probe_table, build_table, compare_nulls == null_equality::EQUAL};
     probe_hash_table<JoinKind, multimap_type, block_size, DEFAULT_JOIN_CACHE_SIZE>
-      <<<config.num_blocks, config.num_threads_per_block, 0, stream.value()>>>(
-        hash_table,
-        build_table,
-        probe_table,
-        hash_probe,
-        equality,
-        left_indices.data().get(),
-        right_indices.data().get(),
-        write_index.data(),
-        estimated_size);
+      <<<config.num_blocks, config.num_threads_per_block, 0, stream.value()>>>(hash_table,
+                                                                               build_table,
+                                                                               probe_table,
+                                                                               hash_probe,
+                                                                               equality,
+                                                                               left_indices.data(),
+                                                                               right_indices.data(),
+                                                                               write_index.data(),
+                                                                               estimated_size);
 
     CHECK_CUDA(stream.value());
 
@@ -310,8 +316,8 @@ std::pair<rmm::device_vector<size_type>, rmm::device_vector<size_type>> probe_jo
     estimated_size *= 2;
   } while ((current_estimated_size < join_size));
 
-  left_indices.resize(join_size);
-  right_indices.resize(join_size);
+  left_indices.resize(join_size, stream);
+  right_indices.resize(join_size, stream);
   return std::make_pair(std::move(left_indices), std::move(right_indices));
 }
 
@@ -444,7 +450,7 @@ std::pair<std::unique_ptr<table>, std::unique_ptr<table>> construct_join_output_
       common_table           = cudf::detail::concatenate(
         {common_from_build->view(), common_from_probe->view()}, stream, mr);
     }
-    joined_indices = concatenate_vector_pairs(complement_indices, joined_indices);
+    joined_indices = concatenate_vector_pairs(complement_indices, joined_indices, stream);
   } else {
     if (not columns_in_common.empty()) {
       common_table = detail::gather(probe.select(probe_common_col),
@@ -515,7 +521,7 @@ hash_join::hash_join_impl::hash_join_impl(cudf::table_view const &build,
   _hash_table = build_join_hash_table(_build_selected, compare_nulls, stream);
 }
 
-std::pair<rmm::device_vector<size_type>, rmm::device_vector<size_type>>
+std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>>
 hash_join::hash_join_impl::inner_join(cudf::table_view const &probe,
                                       std::vector<size_type> const &probe_on,
                                       null_equality compare_nulls,
@@ -542,7 +548,7 @@ hash_join::hash_join_impl::inner_join(
     probe, probe_on, columns_in_common, common_columns_output_side, compare_nulls, stream, mr);
 }
 
-std::pair<rmm::device_vector<size_type>, rmm::device_vector<size_type>>
+std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>>
 hash_join::hash_join_impl::left_join(cudf::table_view const &probe,
                                      std::vector<size_type> const &probe_on,
                                      null_equality compare_nulls,
@@ -575,7 +581,7 @@ std::unique_ptr<cudf::table> hash_join::hash_join_impl::left_join(
                                           std::move(probe_build_pair.second));
 }
 
-std::pair<rmm::device_vector<size_type>, rmm::device_vector<size_type>>
+std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>>
 hash_join::hash_join_impl::full_join(cudf::table_view const &probe,
                                      std::vector<size_type> const &probe_on,
                                      null_equality compare_nulls,
@@ -609,7 +615,7 @@ std::unique_ptr<cudf::table> hash_join::hash_join_impl::full_join(
 }
 
 template <cudf::detail::join_kind JoinKind>
-std::pair<rmm::device_vector<size_type>, rmm::device_vector<size_type>>
+std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>>
 hash_join::hash_join_impl::compute_hash_join(cudf::table_view const &probe,
                                              std::vector<size_type> const &probe_on,
                                              null_equality compare_nulls,
@@ -623,7 +629,8 @@ hash_join::hash_join_impl::compute_hash_join(cudf::table_view const &probe,
                "Mismatch in number of columns to be joined on");
 
   if (is_trivial_join(probe, _build, probe_on, _build_on, JoinKind)) {
-    return std::make_pair(rmm::device_vector<size_type>{}, rmm::device_vector<size_type>{});
+    return std::make_pair(rmm::device_uvector<size_type>{0, stream},
+                          rmm::device_uvector<size_type>{0, stream});
   }
 
   auto probe_selected = probe.select(probe_on);
@@ -674,7 +681,7 @@ hash_join::hash_join_impl::compute_hash_join(
 
 template <cudf::detail::join_kind JoinKind>
 std::enable_if_t<JoinKind != cudf::detail::join_kind::FULL_JOIN,
-                 std::pair<rmm::device_vector<size_type>, rmm::device_vector<size_type>>>
+                 std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>>>
 hash_join::hash_join_impl::probe_join_indices(cudf::table_view const &probe,
                                               null_equality compare_nulls,
                                               rmm::cuda_stream_view stream) const
diff --git a/cpp/src/join/hash_join.cuh b/cpp/src/join/hash_join.cuh
index 34f6d4f262d..fed5080d2be 100644
--- a/cpp/src/join/hash_join.cuh
+++ b/cpp/src/join/hash_join.cuh
@@ -25,7 +25,7 @@
 #include <cudf/table/table_view.hpp>
 
 #include <rmm/cuda_stream_view.hpp>
-#include <rmm/device_vector.hpp>
+#include <rmm/device_uvector.hpp>
 #include <rmm/exec_policy.hpp>
 
 #include <thrust/sequence.h>
@@ -181,12 +181,12 @@ size_type estimate_join_output_size(table_device_view build_table,
  *
  * @return Join output indices vector pair
  */
-inline std::pair<rmm::device_vector<size_type>, rmm::device_vector<size_type>>
+inline std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>>
 get_trivial_left_join_indices(table_view const& left, rmm::cuda_stream_view stream)
 {
-  rmm::device_vector<size_type> left_indices(left.num_rows());
+  rmm::device_uvector<size_type> left_indices(left.num_rows(), stream);
   thrust::sequence(rmm::exec_policy(stream), left_indices.begin(), left_indices.end(), 0);
-  rmm::device_vector<size_type> right_indices(left.num_rows());
+  rmm::device_uvector<size_type> right_indices(left.num_rows(), stream);
   thrust::fill(rmm::exec_policy(stream), right_indices.begin(), right_indices.end(), JoinNoneValue);
   return std::make_pair(std::move(left_indices), std::move(right_indices));
 }
@@ -231,7 +231,7 @@ struct hash_join::hash_join_impl {
                  null_equality compare_nulls,
                  rmm::cuda_stream_view stream = rmm::cuda_stream_default);
 
-  std::pair<rmm::device_vector<size_type>, rmm::device_vector<size_type>> inner_join(
+  std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>> inner_join(
     cudf::table_view const& probe,
     std::vector<size_type> const& probe_on,
     null_equality compare_nulls,
@@ -247,7 +247,7 @@ struct hash_join::hash_join_impl {
     rmm::cuda_stream_view stream,
     rmm::mr::device_memory_resource* mr) const;
 
-  std::pair<rmm::device_vector<size_type>, rmm::device_vector<size_type>> left_join(
+  std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>> left_join(
     cudf::table_view const& probe,
     std::vector<size_type> const& probe_on,
     null_equality compare_nulls,
@@ -262,7 +262,7 @@ struct hash_join::hash_join_impl {
     rmm::cuda_stream_view stream,
     rmm::mr::device_memory_resource* mr) const;
 
-  std::pair<rmm::device_vector<size_type>, rmm::device_vector<size_type>> full_join(
+  std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>> full_join(
     cudf::table_view const& probe,
     std::vector<size_type> const& probe_on,
     null_equality compare_nulls,
@@ -279,7 +279,7 @@ struct hash_join::hash_join_impl {
 
  private:
   template <cudf::detail::join_kind JoinKind>
-  std::pair<rmm::device_vector<size_type>, rmm::device_vector<size_type>> compute_hash_join(
+  std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>> compute_hash_join(
     cudf::table_view const& probe,
     std::vector<size_type> const& probe_on,
     null_equality compare_nulls,
@@ -354,7 +354,7 @@ struct hash_join::hash_join_impl {
    */
   template <cudf::detail::join_kind JoinKind>
   std::enable_if_t<JoinKind != cudf::detail::join_kind::FULL_JOIN,
-                   std::pair<rmm::device_vector<size_type>, rmm::device_vector<size_type>>>
+                   std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>>>
   probe_join_indices(cudf::table_view const& probe,
                      null_equality compare_nulls,
                      rmm::cuda_stream_view stream) const;
diff --git a/cpp/src/join/join.cu b/cpp/src/join/join.cu
index 87bd9786f8c..08af40405ec 100644
--- a/cpp/src/join/join.cu
+++ b/cpp/src/join/join.cu
@@ -139,7 +139,7 @@ hash_join::hash_join(cudf::table_view const& build,
 {
 }
 
-std::pair<rmm::device_vector<size_type>, rmm::device_vector<size_type>> hash_join::inner_join(
+std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>> hash_join::inner_join(
   cudf::table_view const& probe,
   std::vector<size_type> const& probe_on,
   null_equality compare_nulls,
@@ -162,7 +162,7 @@ std::pair<std::unique_ptr<cudf::table>, std::unique_ptr<cudf::table>> hash_join:
     probe, probe_on, columns_in_common, common_columns_output_side, compare_nulls, stream, mr);
 }
 
-std::pair<rmm::device_vector<size_type>, rmm::device_vector<size_type>> hash_join::left_join(
+std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>> hash_join::left_join(
   cudf::table_view const& probe,
   std::vector<size_type> const& probe_on,
   null_equality compare_nulls,
@@ -183,7 +183,7 @@ std::unique_ptr<cudf::table> hash_join::left_join(
   return impl->left_join(probe, probe_on, columns_in_common, compare_nulls, stream, mr);
 }
 
-std::pair<rmm::device_vector<size_type>, rmm::device_vector<size_type>> hash_join::full_join(
+std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>> hash_join::full_join(
   cudf::table_view const& probe,
   std::vector<size_type> const& probe_on,
   null_equality compare_nulls,
diff --git a/cpp/src/join/join_common_utils.hpp b/cpp/src/join/join_common_utils.hpp
index f0c158c1ef6..9965fa496aa 100644
--- a/cpp/src/join/join_common_utils.hpp
+++ b/cpp/src/join/join_common_utils.hpp
@@ -19,6 +19,8 @@
 #include <cudf/table/row_operators.cuh>
 #include <cudf/table/table_view.hpp>
 
+#include <rmm/device_uvector.hpp>
+
 #include <hash/concurrent_unordered_multimap.cuh>
 
 #include <limits>
@@ -31,7 +33,7 @@ constexpr int DEFAULT_JOIN_BLOCK_SIZE = 128;
 constexpr int DEFAULT_JOIN_CACHE_SIZE = 128;
 constexpr size_type JoinNoneValue     = -1;
 
-using VectorPair = std::pair<rmm::device_vector<size_type>, rmm::device_vector<size_type>>;
+using VectorPair = std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>>;
 
 using multimap_type =
   concurrent_unordered_multimap<hash_value_type,

From 636c2eaf81919fb0de45a3ec699fbf524bf15376 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Tue, 12 Jan 2021 15:11:28 -0500
Subject: [PATCH 007/138] Undo some API changes

---
 cpp/include/cudf/join.hpp  | 45 ----------------------------------
 cpp/src/join/hash_join.cu  | 49 ++++++--------------------------------
 cpp/src/join/hash_join.cuh | 33 +++++--------------------
 cpp/src/join/join.cu       | 30 -----------------------
 4 files changed, 13 insertions(+), 144 deletions(-)

diff --git a/cpp/include/cudf/join.hpp b/cpp/include/cudf/join.hpp
index 2f1b1a1ab5b..2b8fc4bb2cb 100644
--- a/cpp/include/cudf/join.hpp
+++ b/cpp/include/cudf/join.hpp
@@ -31,14 +31,6 @@ namespace cudf {
  * @file
  */
 
-std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>> inner_join(
-  cudf::table_view const& left,
-  cudf::table_view const& right,
-  std::vector<cudf::size_type> const& left_on,
-  std::vector<cudf::size_type> const& right_on,
-  null_equality compare_nulls         = null_equality::EQUAL,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
-
 /**
  * @brief Performs an inner join on the specified columns of two
  * tables (`left`, `right`)
@@ -105,14 +97,6 @@ std::unique_ptr<cudf::table> inner_join(
   null_equality compare_nulls         = null_equality::EQUAL,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
-std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>> left_join(
-  cudf::table_view const& left,
-  cudf::table_view const& right,
-  std::vector<cudf::size_type> const& left_on,
-  std::vector<cudf::size_type> const& right_on,
-  null_equality compare_nulls         = null_equality::EQUAL,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
-
 /**
  * @brief Performs a left join (also known as left outer join) on the
  * specified columns of two tables (`left`, `right`)
@@ -181,14 +165,6 @@ std::unique_ptr<cudf::table> left_join(
   null_equality compare_nulls         = null_equality::EQUAL,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
-std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>> full_join(
-  cudf::table_view const& left,
-  cudf::table_view const& right,
-  std::vector<cudf::size_type> const& left_on,
-  std::vector<cudf::size_type> const& right_on,
-  null_equality compare_nulls         = null_equality::EQUAL,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
-
 /**
  * @brief Performs a full join (also known as full outer join) on the
  * specified columns of two tables (`left`, `right`)
@@ -438,13 +414,6 @@ class hash_join {
             ///< `inner_join`.
   };
 
-  std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>> inner_join(
-    cudf::table_view const& probe,
-    std::vector<size_type> const& probe_on,
-    null_equality compare_nulls         = null_equality::EQUAL,
-    rmm::cuda_stream_view stream        = rmm::cuda_stream_default,
-    rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) const;
-
   /**
    * @brief Performs an inner join by probing in the internal hash table.
    *
@@ -489,13 +458,6 @@ class hash_join {
     rmm::cuda_stream_view stream                          = rmm::cuda_stream_default,
     rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) const;
 
-  std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>> left_join(
-    cudf::table_view const& probe,
-    std::vector<size_type> const& probe_on,
-    null_equality compare_nulls         = null_equality::EQUAL,
-    rmm::cuda_stream_view stream        = rmm::cuda_stream_default,
-    rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) const;
-
   /**
    * @brief Performs a left join by probing in the internal hash table.
    *
@@ -526,13 +488,6 @@ class hash_join {
     rmm::cuda_stream_view stream        = rmm::cuda_stream_default,
     rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) const;
 
-  std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>> full_join(
-    cudf::table_view const& probe,
-    std::vector<size_type> const& probe_on,
-    null_equality compare_nulls         = null_equality::EQUAL,
-    rmm::cuda_stream_view stream        = rmm::cuda_stream_default,
-    rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) const;
-
   /**
    * @brief Performs a full join by probing in the internal hash table.
    *
diff --git a/cpp/src/join/hash_join.cu b/cpp/src/join/hash_join.cu
index 426ed873df7..d2d26dbc3b7 100644
--- a/cpp/src/join/hash_join.cu
+++ b/cpp/src/join/hash_join.cu
@@ -521,18 +521,6 @@ hash_join::hash_join_impl::hash_join_impl(cudf::table_view const &build,
   _hash_table = build_join_hash_table(_build_selected, compare_nulls, stream);
 }
 
-std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>>
-hash_join::hash_join_impl::inner_join(cudf::table_view const &probe,
-                                      std::vector<size_type> const &probe_on,
-                                      null_equality compare_nulls,
-                                      rmm::cuda_stream_view stream,
-                                      rmm::mr::device_memory_resource *mr) const
-{
-  CUDF_FUNC_RANGE();
-  return compute_hash_join<cudf::detail::join_kind::INNER_JOIN>(
-    probe, probe_on, compare_nulls, stream, mr);
-}
-
 std::pair<std::unique_ptr<cudf::table>, std::unique_ptr<cudf::table>>
 hash_join::hash_join_impl::inner_join(
   cudf::table_view const &probe,
@@ -548,18 +536,6 @@ hash_join::hash_join_impl::inner_join(
     probe, probe_on, columns_in_common, common_columns_output_side, compare_nulls, stream, mr);
 }
 
-std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>>
-hash_join::hash_join_impl::left_join(cudf::table_view const &probe,
-                                     std::vector<size_type> const &probe_on,
-                                     null_equality compare_nulls,
-                                     rmm::cuda_stream_view stream,
-                                     rmm::mr::device_memory_resource *mr) const
-{
-  CUDF_FUNC_RANGE();
-  return compute_hash_join<cudf::detail::join_kind::LEFT_JOIN>(
-    probe, probe_on, compare_nulls, stream, mr);
-}
-
 std::unique_ptr<cudf::table> hash_join::hash_join_impl::left_join(
   cudf::table_view const &probe,
   std::vector<size_type> const &probe_on,
@@ -581,18 +557,6 @@ std::unique_ptr<cudf::table> hash_join::hash_join_impl::left_join(
                                           std::move(probe_build_pair.second));
 }
 
-std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>>
-hash_join::hash_join_impl::full_join(cudf::table_view const &probe,
-                                     std::vector<size_type> const &probe_on,
-                                     null_equality compare_nulls,
-                                     rmm::cuda_stream_view stream,
-                                     rmm::mr::device_memory_resource *mr) const
-{
-  CUDF_FUNC_RANGE();
-  return compute_hash_join<cudf::detail::join_kind::FULL_JOIN>(
-    probe, probe_on, compare_nulls, stream, mr);
-}
-
 std::unique_ptr<cudf::table> hash_join::hash_join_impl::full_join(
   cudf::table_view const &probe,
   std::vector<size_type> const &probe_on,
@@ -616,11 +580,11 @@ std::unique_ptr<cudf::table> hash_join::hash_join_impl::full_join(
 
 template <cudf::detail::join_kind JoinKind>
 std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>>
-hash_join::hash_join_impl::compute_hash_join(cudf::table_view const &probe,
-                                             std::vector<size_type> const &probe_on,
-                                             null_equality compare_nulls,
-                                             rmm::cuda_stream_view stream,
-                                             rmm::mr::device_memory_resource *mr) const
+hash_join::hash_join_impl::compute_hash_join_indices(cudf::table_view const &probe,
+                                                     std::vector<size_type> const &probe_on,
+                                                     null_equality compare_nulls,
+                                                     rmm::cuda_stream_view stream,
+                                                     rmm::mr::device_memory_resource *mr) const
 {
   CUDF_EXPECTS(0 != probe.num_columns(), "Hash join probe table is empty");
   CUDF_EXPECTS(probe.num_rows() < cudf::detail::MAX_JOIN_SIZE,
@@ -669,7 +633,8 @@ hash_join::hash_join_impl::compute_hash_join(
                            }),
                "Invalid values passed to columns_in_common");
 
-  auto joined_indices = compute_hash_join<JoinKind>(probe, probe_on, compare_nulls, stream, mr);
+  auto joined_indices =
+    compute_hash_join_indices<JoinKind>(probe, probe_on, compare_nulls, stream, mr);
 
   if (is_trivial_join(probe, _build, probe_on, _build_on, JoinKind)) {
     return get_empty_joined_table(probe, _build, columns_in_common, common_columns_output_side);
diff --git a/cpp/src/join/hash_join.cuh b/cpp/src/join/hash_join.cuh
index fed5080d2be..a222a932edd 100644
--- a/cpp/src/join/hash_join.cuh
+++ b/cpp/src/join/hash_join.cuh
@@ -231,13 +231,6 @@ struct hash_join::hash_join_impl {
                  null_equality compare_nulls,
                  rmm::cuda_stream_view stream = rmm::cuda_stream_default);
 
-  std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>> inner_join(
-    cudf::table_view const& probe,
-    std::vector<size_type> const& probe_on,
-    null_equality compare_nulls,
-    rmm::cuda_stream_view stream,
-    rmm::mr::device_memory_resource* mr) const;
-
   std::pair<std::unique_ptr<cudf::table>, std::unique_ptr<cudf::table>> inner_join(
     cudf::table_view const& probe,
     std::vector<size_type> const& probe_on,
@@ -247,13 +240,6 @@ struct hash_join::hash_join_impl {
     rmm::cuda_stream_view stream,
     rmm::mr::device_memory_resource* mr) const;
 
-  std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>> left_join(
-    cudf::table_view const& probe,
-    std::vector<size_type> const& probe_on,
-    null_equality compare_nulls,
-    rmm::cuda_stream_view stream,
-    rmm::mr::device_memory_resource* mr) const;
-
   std::unique_ptr<cudf::table> left_join(
     cudf::table_view const& probe,
     std::vector<size_type> const& probe_on,
@@ -262,13 +248,6 @@ struct hash_join::hash_join_impl {
     rmm::cuda_stream_view stream,
     rmm::mr::device_memory_resource* mr) const;
 
-  std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>> full_join(
-    cudf::table_view const& probe,
-    std::vector<size_type> const& probe_on,
-    null_equality compare_nulls,
-    rmm::cuda_stream_view stream,
-    rmm::mr::device_memory_resource* mr) const;
-
   std::unique_ptr<cudf::table> full_join(
     cudf::table_view const& probe,
     std::vector<size_type> const& probe_on,
@@ -279,12 +258,12 @@ struct hash_join::hash_join_impl {
 
  private:
   template <cudf::detail::join_kind JoinKind>
-  std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>> compute_hash_join(
-    cudf::table_view const& probe,
-    std::vector<size_type> const& probe_on,
-    null_equality compare_nulls,
-    rmm::cuda_stream_view stream,
-    rmm::mr::device_memory_resource* mr) const;
+  std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>>
+  compute_hash_join_indices(cudf::table_view const& probe,
+                            std::vector<size_type> const& probe_on,
+                            null_equality compare_nulls,
+                            rmm::cuda_stream_view stream,
+                            rmm::mr::device_memory_resource* mr) const;
 
   /**
    * @brief Performs hash join by probing the columns provided in `probe` as per
diff --git a/cpp/src/join/join.cu b/cpp/src/join/join.cu
index 08af40405ec..ce27cfcd616 100644
--- a/cpp/src/join/join.cu
+++ b/cpp/src/join/join.cu
@@ -139,16 +139,6 @@ hash_join::hash_join(cudf::table_view const& build,
 {
 }
 
-std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>> hash_join::inner_join(
-  cudf::table_view const& probe,
-  std::vector<size_type> const& probe_on,
-  null_equality compare_nulls,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr) const
-{
-  return impl->inner_join(probe, probe_on, compare_nulls, stream, mr);
-}
-
 std::pair<std::unique_ptr<cudf::table>, std::unique_ptr<cudf::table>> hash_join::inner_join(
   cudf::table_view const& probe,
   std::vector<size_type> const& probe_on,
@@ -162,16 +152,6 @@ std::pair<std::unique_ptr<cudf::table>, std::unique_ptr<cudf::table>> hash_join:
     probe, probe_on, columns_in_common, common_columns_output_side, compare_nulls, stream, mr);
 }
 
-std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>> hash_join::left_join(
-  cudf::table_view const& probe,
-  std::vector<size_type> const& probe_on,
-  null_equality compare_nulls,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr) const
-{
-  return impl->left_join(probe, probe_on, compare_nulls, stream, mr);
-}
-
 std::unique_ptr<cudf::table> hash_join::left_join(
   cudf::table_view const& probe,
   std::vector<size_type> const& probe_on,
@@ -183,16 +163,6 @@ std::unique_ptr<cudf::table> hash_join::left_join(
   return impl->left_join(probe, probe_on, columns_in_common, compare_nulls, stream, mr);
 }
 
-std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>> hash_join::full_join(
-  cudf::table_view const& probe,
-  std::vector<size_type> const& probe_on,
-  null_equality compare_nulls,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr) const
-{
-  return impl->full_join(probe, probe_on, compare_nulls, stream, mr);
-}
-
 std::unique_ptr<cudf::table> hash_join::full_join(
   cudf::table_view const& probe,
   std::vector<size_type> const& probe_on,

From b79da68d8f8b551f95eec4a35ac054b9c8b6842f Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Wed, 13 Jan 2021 07:39:53 -0500
Subject: [PATCH 008/138] Add join_result

---
 cpp/include/cudf/join.hpp | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/cpp/include/cudf/join.hpp b/cpp/include/cudf/join.hpp
index 2b8fc4bb2cb..ccf983bc72f 100644
--- a/cpp/include/cudf/join.hpp
+++ b/cpp/include/cudf/join.hpp
@@ -31,6 +31,27 @@ namespace cudf {
  * @file
  */
 
+/**
+ * @brief The result of a `join`.
+ *
+ * A `join_result` holds two columns containing the
+ * left and right gathermaps.
+ */  // TODO: explain this better
+struct join_result {
+  column_view left_indices;   /// < The left gathermap
+  column_view right_indices;  /// < The right gathermap
+  std::unique_ptr<rmm::device_buffer> left_buf;
+  std::unique_ptr<rmm::device_buffer> right_buf;
+};
+
+std::unique_ptr<cudf::table> inner_join(
+  cudf::table_view const& left,
+  cudf::table_view const& right,
+  std::vector<cudf::size_type> const& left_on,
+  std::vector<cudf::size_type> const& right_on,
+  null_equality compare_nulls         = null_equality::EQUAL,
+  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+
 /**
  * @brief Performs an inner join on the specified columns of two
  * tables (`left`, `right`)

From 380aa5908b9e6cab466cd581f83cf23d5c0605e6 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Wed, 13 Jan 2021 08:08:16 -0500
Subject: [PATCH 009/138] Add APIs that return join_result

---
 cpp/include/cudf/join.hpp | 69 ++++++++++++++++++++++++++++++++++++--
 cpp/src/join/join.cu      | 70 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 137 insertions(+), 2 deletions(-)

diff --git a/cpp/include/cudf/join.hpp b/cpp/include/cudf/join.hpp
index ccf983bc72f..b644ee5c049 100644
--- a/cpp/include/cudf/join.hpp
+++ b/cpp/include/cudf/join.hpp
@@ -36,7 +36,7 @@ namespace cudf {
  *
  * A `join_result` holds two columns containing the
  * left and right gathermaps.
- */  // TODO: explain this better
+ */ // TODO: explain this better
 struct join_result {
   column_view left_indices;   /// < The left gathermap
   column_view right_indices;  /// < The right gathermap
@@ -44,7 +44,12 @@ struct join_result {
   std::unique_ptr<rmm::device_buffer> right_buf;
 };
 
-std::unique_ptr<cudf::table> inner_join(
+/**
+ * @brief Performs  an inner join on the specified columns of two
+ * tables (`left`, `right`), and returns the row indices corresponding
+ * to the result.
+ */ // TODO: explain this better
+join_result inner_join(
   cudf::table_view const& left,
   cudf::table_view const& right,
   std::vector<cudf::size_type> const& left_on,
@@ -118,6 +123,18 @@ std::unique_ptr<cudf::table> inner_join(
   null_equality compare_nulls         = null_equality::EQUAL,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
+/**
+ * @brief Performs a left join on the specified columns of two
+ * tables (`left`, `right`), and returns the row indices corresponding
+ * to the result.
+ */ // TODO: explain this better
+join_result left_join(cudf::table_view const& left,
+                      cudf::table_view const& right,
+                      std::vector<cudf::size_type> const& left_on,
+                      std::vector<cudf::size_type> const& right_on,
+                      null_equality compare_nulls         = null_equality::EQUAL,
+                      rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+
 /**
  * @brief Performs a left join (also known as left outer join) on the
  * specified columns of two tables (`left`, `right`)
@@ -186,6 +203,18 @@ std::unique_ptr<cudf::table> left_join(
   null_equality compare_nulls         = null_equality::EQUAL,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
+/**
+ * @brief Performs a left join on the specified columns of two
+ * tables (`left`, `right`), and returns the row indices corresponding
+ * to the result.
+ */ // TODO: explain this better
+join_result full_join(cudf::table_view const& left,
+                      cudf::table_view const& right,
+                      std::vector<cudf::size_type> const& left_on,
+                      std::vector<cudf::size_type> const& right_on,
+                      null_equality compare_nulls         = null_equality::EQUAL,
+                      rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+
 /**
  * @brief Performs a full join (also known as full outer join) on the
  * specified columns of two tables (`left`, `right`)
@@ -435,6 +464,18 @@ class hash_join {
             ///< `inner_join`.
   };
 
+  /**
+   * @brief Performs  an inner join on the specified columns of two
+   * tables (`left`, `right`), and returns the row indices corresponding
+   * to the result.
+   */ // TODO: explain this better
+  join_result inner_join(
+    cudf::table_view const& probe,
+    std::vector<size_type> const& probe_on,
+    null_equality compare_nulls         = null_equality::EQUAL,
+    rmm::cuda_stream_view stream        = rmm::cuda_stream_default,
+    rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) const;
+
   /**
    * @brief Performs an inner join by probing in the internal hash table.
    *
@@ -479,6 +520,18 @@ class hash_join {
     rmm::cuda_stream_view stream                          = rmm::cuda_stream_default,
     rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) const;
 
+  /**
+   * @brief Performs a left join on the specified columns of two
+   * tables (`left`, `right`), and returns the row indices corresponding
+   * to the result.
+   */ // TODO: explain this better
+  join_result left_join(
+    cudf::table_view const& probe,
+    std::vector<size_type> const& probe_on,
+    null_equality compare_nulls         = null_equality::EQUAL,
+    rmm::cuda_stream_view stream        = rmm::cuda_stream_default,
+    rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) const;
+
   /**
    * @brief Performs a left join by probing in the internal hash table.
    *
@@ -509,6 +562,18 @@ class hash_join {
     rmm::cuda_stream_view stream        = rmm::cuda_stream_default,
     rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) const;
 
+  /**
+   * @brief Performs a full join on the specified columns of two
+   * tables (`left`, `right`), and returns the row indices corresponding
+   * to the result.
+   */ // TODO: explain this better
+  join_result full_join(
+    cudf::table_view const& probe,
+    std::vector<size_type> const& probe_on,
+    null_equality compare_nulls         = null_equality::EQUAL,
+    rmm::cuda_stream_view stream        = rmm::cuda_stream_default,
+    rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) const;
+
   /**
    * @brief Performs a full join by probing in the internal hash table.
    *
diff --git a/cpp/src/join/join.cu b/cpp/src/join/join.cu
index ce27cfcd616..488adb45920 100644
--- a/cpp/src/join/join.cu
+++ b/cpp/src/join/join.cu
@@ -26,6 +26,17 @@
 namespace cudf {
 namespace detail {
 
+join_result inner_join(table_view const& left_input,
+                       table_view const& right_input,
+                       std::vector<size_type> const& left_on,
+                       std::vector<size_type> const& right_on,
+                       null_equality compare_nulls,
+                       rmm::cuda_stream_view stream,
+                       rmm::mr::device_memory_resource* mr)
+{
+  return cudf::join_result{};
+}
+
 std::unique_ptr<table> inner_join(
   table_view const& left_input,
   table_view const& right_input,
@@ -79,6 +90,17 @@ std::unique_ptr<table> inner_join(
   }
 }
 
+join_result left_join(table_view const& left_input,
+                      table_view const& right_input,
+                      std::vector<size_type> const& left_on,
+                      std::vector<size_type> const& right_on,
+                      null_equality compare_nulls,
+                      rmm::cuda_stream_view stream,
+                      rmm::mr::device_memory_resource* mr)
+{
+  return cudf::join_result{};
+}
+
 std::unique_ptr<table> left_join(
   table_view const& left_input,
   table_view const& right_input,
@@ -103,6 +125,18 @@ std::unique_ptr<table> left_join(
   return hj_obj.left_join(left, left_on, columns_in_common, compare_nulls, stream, mr);
 }
 
+join_result full_join(table_view const& left_input,
+                      table_view const& right_input,
+                      std::vector<size_type> const& left_on,
+                      std::vector<size_type> const& right_on,
+                      std::vector<std::pair<size_type, size_type>> const& columns_in_common,
+                      null_equality compare_nulls,
+                      rmm::cuda_stream_view stream,
+                      rmm::mr::device_memory_resource* mr)
+{
+  return cudf::join_result{};
+}
+
 std::unique_ptr<table> full_join(
   table_view const& left_input,
   table_view const& right_input,
@@ -176,6 +210,18 @@ std::unique_ptr<cudf::table> hash_join::full_join(
 
 // external APIs
 
+join_result inner_join(table_view const& left,
+                       table_view const& right,
+                       std::vector<size_type> const& left_on,
+                       std::vector<size_type> const& right_on,
+                       null_equality compare_nulls,
+                       rmm::mr::device_memory_resource* mr)
+{
+  CUDF_FUNC_RANGE();
+  return detail::inner_join(
+    left, right, left_on, right_on, compare_nulls, rmm::cuda_stream_default, mr);
+}
+
 std::unique_ptr<table> inner_join(
   table_view const& left,
   table_view const& right,
@@ -190,6 +236,18 @@ std::unique_ptr<table> inner_join(
     left, right, left_on, right_on, columns_in_common, compare_nulls, rmm::cuda_stream_default, mr);
 }
 
+join_result left_join(table_view const& left,
+                      table_view const& right,
+                      std::vector<size_type> const& left_on,
+                      std::vector<size_type> const& right_on,
+                      null_equality compare_nulls,
+                      rmm::mr::device_memory_resource* mr)
+{
+  CUDF_FUNC_RANGE();
+  return detail::left_join(
+    left, right, left_on, right_on, compare_nulls, rmm::cuda_stream_default, mr);
+}
+
 std::unique_ptr<table> left_join(
   table_view const& left,
   table_view const& right,
@@ -204,6 +262,18 @@ std::unique_ptr<table> left_join(
     left, right, left_on, right_on, columns_in_common, compare_nulls, rmm::cuda_stream_default, mr);
 }
 
+join_result full_join(table_view const& left,
+                      table_view const& right,
+                      std::vector<size_type> const& left_on,
+                      std::vector<size_type> const& right_on,
+                      null_equality compare_nulls,
+                      rmm::mr::device_memory_resource* mr)
+{
+  CUDF_FUNC_RANGE();
+  return detail::full_join(
+    left, right, left_on, right_on, compare_nulls, rmm::cuda_stream_default, mr);
+}
+
 std::unique_ptr<table> full_join(
   table_view const& left,
   table_view const& right,

From 3cbb2b47a5e5967462d54a71d2312bb4c0249c40 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Wed, 13 Jan 2021 08:08:29 -0500
Subject: [PATCH 010/138] Remove column_in_common

---
 cpp/src/join/join.cu | 1 -
 1 file changed, 1 deletion(-)

diff --git a/cpp/src/join/join.cu b/cpp/src/join/join.cu
index 488adb45920..717cd4b824e 100644
--- a/cpp/src/join/join.cu
+++ b/cpp/src/join/join.cu
@@ -129,7 +129,6 @@ join_result full_join(table_view const& left_input,
                       table_view const& right_input,
                       std::vector<size_type> const& left_on,
                       std::vector<size_type> const& right_on,
-                      std::vector<std::pair<size_type, size_type>> const& columns_in_common,
                       null_equality compare_nulls,
                       rmm::cuda_stream_view stream,
                       rmm::mr::device_memory_resource* mr)

From 53ae7c90e0e6e45696d067fb48f7326043529b93 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Thu, 14 Jan 2021 17:52:41 -0500
Subject: [PATCH 011/138] Add an inner join API that returns gathermaps

---
 cpp/src/join/hash_join.cu  | 35 +++++++++++++++++++++++++++++++++++
 cpp/src/join/hash_join.cuh | 13 +++++++++++++
 cpp/src/join/join.cu       | 31 ++++++++++++++++++++++++++++++-
 3 files changed, 78 insertions(+), 1 deletion(-)

diff --git a/cpp/src/join/hash_join.cu b/cpp/src/join/hash_join.cu
index d2d26dbc3b7..74037ed91c0 100644
--- a/cpp/src/join/hash_join.cu
+++ b/cpp/src/join/hash_join.cu
@@ -521,6 +521,17 @@ hash_join::hash_join_impl::hash_join_impl(cudf::table_view const &build,
   _hash_table = build_join_hash_table(_build_selected, compare_nulls, stream);
 }
 
+join_result hash_join::hash_join_impl::inner_join(cudf::table_view const &probe,
+                                                  std::vector<size_type> const &probe_on,
+                                                  null_equality compare_nulls,
+                                                  rmm::cuda_stream_view stream,
+                                                  rmm::mr::device_memory_resource *mr) const
+{
+  CUDF_FUNC_RANGE();
+  return compute_hash_join<cudf::detail::join_kind::INNER_JOIN>(
+    probe, probe_on, compare_nulls, stream, mr);
+}
+
 std::pair<std::unique_ptr<cudf::table>, std::unique_ptr<cudf::table>>
 hash_join::hash_join_impl::inner_join(
   cudf::table_view const &probe,
@@ -611,6 +622,30 @@ hash_join::hash_join_impl::compute_hash_join_indices(cudf::table_view const &pro
   return probe_join_indices<ProbeJoinKind>(probe_selected, compare_nulls, stream);
 }
 
+template <cudf::detail::join_kind JoinKind>
+join_result hash_join::hash_join_impl::compute_hash_join(cudf::table_view const &probe,
+                                                         std::vector<size_type> const &probe_on,
+                                                         null_equality compare_nulls,
+                                                         rmm::cuda_stream_view stream,
+                                                         rmm::mr::device_memory_resource *mr) const
+{
+  auto join_indices =
+    compute_hash_join_indices<JoinKind>(probe, probe_on, compare_nulls, stream, mr);
+  auto left_map  = cudf::column_view(cudf::data_type(type_to_id<cudf::size_type>()),
+                                    join_indices.second.size(),
+                                    join_indices.second.data(),
+                                    nullptr,
+                                    0);
+  auto right_map = cudf::column_view(cudf::data_type(type_to_id<cudf::size_type>()),
+                                     join_indices.second.size(),
+                                     join_indices.second.data(),
+                                     nullptr,
+                                     0);
+  auto left_buf  = std::make_unique<rmm::device_buffer>(join_indices.first.release());
+  auto right_buf = std::make_unique<rmm::device_buffer>(join_indices.first.release());
+  return join_result{left_map, right_map, std::move(left_buf), std::move(right_buf)};
+}
+
 template <cudf::detail::join_kind JoinKind>
 std::pair<std::unique_ptr<cudf::table>, std::unique_ptr<cudf::table>>
 hash_join::hash_join_impl::compute_hash_join(
diff --git a/cpp/src/join/hash_join.cuh b/cpp/src/join/hash_join.cuh
index a222a932edd..41247dcbecb 100644
--- a/cpp/src/join/hash_join.cuh
+++ b/cpp/src/join/hash_join.cuh
@@ -231,6 +231,12 @@ struct hash_join::hash_join_impl {
                  null_equality compare_nulls,
                  rmm::cuda_stream_view stream = rmm::cuda_stream_default);
 
+  join_result inner_join(cudf::table_view const& probe,
+                         std::vector<size_type> const& probe_on,
+                         null_equality compare_nulls,
+                         rmm::cuda_stream_view stream,
+                         rmm::mr::device_memory_resource* mr) const;
+
   std::pair<std::unique_ptr<cudf::table>, std::unique_ptr<cudf::table>> inner_join(
     cudf::table_view const& probe,
     std::vector<size_type> const& probe_on,
@@ -265,6 +271,13 @@ struct hash_join::hash_join_impl {
                             rmm::cuda_stream_view stream,
                             rmm::mr::device_memory_resource* mr) const;
 
+  template <cudf::detail::join_kind JoinKind>
+  join_result compute_hash_join(cudf::table_view const& probe,
+                                std::vector<size_type> const& probe_on,
+                                null_equality compare_nulls,
+                                rmm::cuda_stream_view stream,
+                                rmm::mr::device_memory_resource* mr) const;
+
   /**
    * @brief Performs hash join by probing the columns provided in `probe` as per
    * the joining indices given in `probe_on` and returns a (`probe`, `_build`) table pair, which
diff --git a/cpp/src/join/join.cu b/cpp/src/join/join.cu
index 717cd4b824e..8808473f4c3 100644
--- a/cpp/src/join/join.cu
+++ b/cpp/src/join/join.cu
@@ -34,7 +34,27 @@ join_result inner_join(table_view const& left_input,
                        rmm::cuda_stream_view stream,
                        rmm::mr::device_memory_resource* mr)
 {
-  return cudf::join_result{};
+  // Make sure any dictionary columns have matched key sets.
+  // This will return any new dictionary columns created as well as updated table_views.
+  auto matched = cudf::dictionary::detail::match_dictionaries(
+    {left_input.select(left_on), right_input.select(right_on)},
+    stream,
+    rmm::mr::get_current_device_resource());  // temporary objects returned
+
+  // now rebuild the table views with the updated ones
+  auto const left  = scatter_columns(matched.second.front(), left_on, left_input);
+  auto const right = scatter_columns(matched.second.back(), right_on, right_input);
+
+  // For `inner_join`, we can freely choose either the `left` or `right` table to use for
+  // building/probing the hash map. Because building is typically more expensive than probing, we
+  // build the hash map from the smaller table.
+  if (right.num_rows() > left.num_rows()) {
+    cudf::hash_join hj_obj(left, left_on, compare_nulls, stream);
+    return hj_obj.inner_join(right, right_on, compare_nulls, stream, mr);
+  } else {
+    cudf::hash_join hj_obj(right, right_on, compare_nulls, stream);
+    return hj_obj.inner_join(left, left_on, compare_nulls, stream, mr);
+  }
 }
 
 std::unique_ptr<table> inner_join(
@@ -172,6 +192,15 @@ hash_join::hash_join(cudf::table_view const& build,
 {
 }
 
+join_result hash_join::inner_join(cudf::table_view const& probe,
+                                  std::vector<size_type> const& probe_on,
+                                  null_equality compare_nulls,
+                                  rmm::cuda_stream_view stream,
+                                  rmm::mr::device_memory_resource* mr) const
+{
+  return impl->inner_join(probe, probe_on, compare_nulls, stream, mr);
+}
+
 std::pair<std::unique_ptr<cudf::table>, std::unique_ptr<cudf::table>> hash_join::inner_join(
   cudf::table_view const& probe,
   std::vector<size_type> const& probe_on,

From fde172b9224f15997e02b4b015abd601b0d9d1da Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Thu, 14 Jan 2021 18:03:19 -0500
Subject: [PATCH 012/138] Add remaining APIs to return gathermaps

---
 cpp/src/join/hash_join.cu  | 22 +++++++++++++++++++
 cpp/src/join/hash_join.cuh | 12 +++++++++++
 cpp/src/join/join.cu       | 44 ++++++++++++++++++++++++++++++++++++--
 3 files changed, 76 insertions(+), 2 deletions(-)

diff --git a/cpp/src/join/hash_join.cu b/cpp/src/join/hash_join.cu
index 74037ed91c0..363a4e8c1ca 100644
--- a/cpp/src/join/hash_join.cu
+++ b/cpp/src/join/hash_join.cu
@@ -547,6 +547,17 @@ hash_join::hash_join_impl::inner_join(
     probe, probe_on, columns_in_common, common_columns_output_side, compare_nulls, stream, mr);
 }
 
+join_result hash_join::hash_join_impl::left_join(cudf::table_view const &probe,
+                                                 std::vector<size_type> const &probe_on,
+                                                 null_equality compare_nulls,
+                                                 rmm::cuda_stream_view stream,
+                                                 rmm::mr::device_memory_resource *mr) const
+{
+  CUDF_FUNC_RANGE();
+  return compute_hash_join<cudf::detail::join_kind::INNER_JOIN>(
+    probe, probe_on, compare_nulls, stream, mr);
+}
+
 std::unique_ptr<cudf::table> hash_join::hash_join_impl::left_join(
   cudf::table_view const &probe,
   std::vector<size_type> const &probe_on,
@@ -568,6 +579,17 @@ std::unique_ptr<cudf::table> hash_join::hash_join_impl::left_join(
                                           std::move(probe_build_pair.second));
 }
 
+join_result hash_join::hash_join_impl::full_join(cudf::table_view const &probe,
+                                                 std::vector<size_type> const &probe_on,
+                                                 null_equality compare_nulls,
+                                                 rmm::cuda_stream_view stream,
+                                                 rmm::mr::device_memory_resource *mr) const
+{
+  CUDF_FUNC_RANGE();
+  return compute_hash_join<cudf::detail::join_kind::INNER_JOIN>(
+    probe, probe_on, compare_nulls, stream, mr);
+}
+
 std::unique_ptr<cudf::table> hash_join::hash_join_impl::full_join(
   cudf::table_view const &probe,
   std::vector<size_type> const &probe_on,
diff --git a/cpp/src/join/hash_join.cuh b/cpp/src/join/hash_join.cuh
index 41247dcbecb..7bc49bf8683 100644
--- a/cpp/src/join/hash_join.cuh
+++ b/cpp/src/join/hash_join.cuh
@@ -246,6 +246,12 @@ struct hash_join::hash_join_impl {
     rmm::cuda_stream_view stream,
     rmm::mr::device_memory_resource* mr) const;
 
+  join_result left_join(cudf::table_view const& probe,
+                        std::vector<size_type> const& probe_on,
+                        null_equality compare_nulls,
+                        rmm::cuda_stream_view stream,
+                        rmm::mr::device_memory_resource* mr) const;
+
   std::unique_ptr<cudf::table> left_join(
     cudf::table_view const& probe,
     std::vector<size_type> const& probe_on,
@@ -254,6 +260,12 @@ struct hash_join::hash_join_impl {
     rmm::cuda_stream_view stream,
     rmm::mr::device_memory_resource* mr) const;
 
+  join_result full_join(cudf::table_view const& probe,
+                        std::vector<size_type> const& probe_on,
+                        null_equality compare_nulls,
+                        rmm::cuda_stream_view stream,
+                        rmm::mr::device_memory_resource* mr) const;
+
   std::unique_ptr<cudf::table> full_join(
     cudf::table_view const& probe,
     std::vector<size_type> const& probe_on,
diff --git a/cpp/src/join/join.cu b/cpp/src/join/join.cu
index 8808473f4c3..75bda62d809 100644
--- a/cpp/src/join/join.cu
+++ b/cpp/src/join/join.cu
@@ -118,7 +118,18 @@ join_result left_join(table_view const& left_input,
                       rmm::cuda_stream_view stream,
                       rmm::mr::device_memory_resource* mr)
 {
-  return cudf::join_result{};
+  // Make sure any dictionary columns have matched key sets.
+  // This will return any new dictionary columns created as well as updated table_views.
+  auto matched = cudf::dictionary::detail::match_dictionaries(
+    {left_input.select(left_on), right_input.select(right_on)},  // these should match
+    stream,
+    rmm::mr::get_current_device_resource());  // temporary objects returned
+  // now rebuild the table views with the updated ones
+  table_view const left  = scatter_columns(matched.second.front(), left_on, left_input);
+  table_view const right = scatter_columns(matched.second.back(), right_on, right_input);
+
+  cudf::hash_join hj_obj(right, right_on, compare_nulls, stream);
+  return hj_obj.left_join(left, left_on, compare_nulls, stream, mr);
 }
 
 std::unique_ptr<table> left_join(
@@ -153,7 +164,18 @@ join_result full_join(table_view const& left_input,
                       rmm::cuda_stream_view stream,
                       rmm::mr::device_memory_resource* mr)
 {
-  return cudf::join_result{};
+  // Make sure any dictionary columns have matched key sets.
+  // This will return any new dictionary columns created as well as updated table_views.
+  auto matched = cudf::dictionary::detail::match_dictionaries(
+    {left_input.select(left_on), right_input.select(right_on)},  // these should match
+    stream,
+    rmm::mr::get_current_device_resource());  // temporary objects returned
+  // now rebuild the table views with the updated ones
+  table_view const left  = scatter_columns(matched.second.front(), left_on, left_input);
+  table_view const right = scatter_columns(matched.second.back(), right_on, right_input);
+
+  cudf::hash_join hj_obj(right, right_on, compare_nulls, stream);
+  return hj_obj.full_join(left, left_on, compare_nulls, stream, mr);
 }
 
 std::unique_ptr<table> full_join(
@@ -214,6 +236,15 @@ std::pair<std::unique_ptr<cudf::table>, std::unique_ptr<cudf::table>> hash_join:
     probe, probe_on, columns_in_common, common_columns_output_side, compare_nulls, stream, mr);
 }
 
+join_result hash_join::left_join(cudf::table_view const& probe,
+                                 std::vector<size_type> const& probe_on,
+                                 null_equality compare_nulls,
+                                 rmm::cuda_stream_view stream,
+                                 rmm::mr::device_memory_resource* mr) const
+{
+  return impl->left_join(probe, probe_on, compare_nulls, stream, mr);
+}
+
 std::unique_ptr<cudf::table> hash_join::left_join(
   cudf::table_view const& probe,
   std::vector<size_type> const& probe_on,
@@ -225,6 +256,15 @@ std::unique_ptr<cudf::table> hash_join::left_join(
   return impl->left_join(probe, probe_on, columns_in_common, compare_nulls, stream, mr);
 }
 
+join_result hash_join::full_join(cudf::table_view const& probe,
+                                 std::vector<size_type> const& probe_on,
+                                 null_equality compare_nulls,
+                                 rmm::cuda_stream_view stream,
+                                 rmm::mr::device_memory_resource* mr) const
+{
+  return impl->full_join(probe, probe_on, compare_nulls, stream, mr);
+}
+
 std::unique_ptr<cudf::table> hash_join::full_join(
   cudf::table_view const& probe,
   std::vector<size_type> const& probe_on,

From 4a286dd164a01f0605e5ddf07de1aee2a416c925 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Mon, 18 Jan 2021 15:16:22 -0500
Subject: [PATCH 013/138] Add gathermap join test

---
 cpp/src/join/hash_join.cu     |  6 +++---
 cpp/tests/join/join_tests.cpp | 33 +++++++++++++++++++++++++++++++++
 2 files changed, 36 insertions(+), 3 deletions(-)

diff --git a/cpp/src/join/hash_join.cu b/cpp/src/join/hash_join.cu
index 363a4e8c1ca..b8f896a49b4 100644
--- a/cpp/src/join/hash_join.cu
+++ b/cpp/src/join/hash_join.cu
@@ -654,8 +654,8 @@ join_result hash_join::hash_join_impl::compute_hash_join(cudf::table_view const
   auto join_indices =
     compute_hash_join_indices<JoinKind>(probe, probe_on, compare_nulls, stream, mr);
   auto left_map  = cudf::column_view(cudf::data_type(type_to_id<cudf::size_type>()),
-                                    join_indices.second.size(),
-                                    join_indices.second.data(),
+                                    join_indices.first.size(),
+                                    join_indices.first.data(),
                                     nullptr,
                                     0);
   auto right_map = cudf::column_view(cudf::data_type(type_to_id<cudf::size_type>()),
@@ -664,7 +664,7 @@ join_result hash_join::hash_join_impl::compute_hash_join(cudf::table_view const
                                      nullptr,
                                      0);
   auto left_buf  = std::make_unique<rmm::device_buffer>(join_indices.first.release());
-  auto right_buf = std::make_unique<rmm::device_buffer>(join_indices.first.release());
+  auto right_buf = std::make_unique<rmm::device_buffer>(join_indices.second.release());
   return join_result{left_map, right_map, std::move(left_buf), std::move(right_buf)};
 }
 
diff --git a/cpp/tests/join/join_tests.cpp b/cpp/tests/join/join_tests.cpp
index b3b86e5cb66..785a414d418 100644
--- a/cpp/tests/join/join_tests.cpp
+++ b/cpp/tests/join/join_tests.cpp
@@ -1429,4 +1429,37 @@ TEST_F(JoinDictionaryTest, FullJoinWithNulls)
   CUDF_TEST_EXPECT_TABLES_EQUAL(*gold, cudf::table_view(result_decoded));
 }
 
+TEST_F(JoinTest, InnerJoinNoNullsGathermap)
+{
+  column_wrapper<int32_t> col0_0{{3, 1, 2, 0, 2}};
+  strcol_wrapper col0_1({"s1", "s1", "s0", "s4", "s0"});
+  column_wrapper<int32_t> col0_2{{0, 1, 2, 4, 1}};
+
+  column_wrapper<int32_t> col1_0{{2, 2, 0, 4, 3}};
+  strcol_wrapper col1_1({"s1", "s0", "s1", "s2", "s1"});
+  column_wrapper<int32_t> col1_2{{1, 0, 1, 2, 1}};
+
+  CVector cols0, cols1;
+  cols0.push_back(col0_0.release());
+  cols0.push_back(col0_1.release());
+  cols0.push_back(col0_2.release());
+  cols1.push_back(col1_0.release());
+  cols1.push_back(col1_1.release());
+  cols1.push_back(col1_2.release());
+
+  Table t0(std::move(cols0));
+  Table t1(std::move(cols1));
+
+  auto result          = cudf::inner_join(t0, t1, {0, 1}, {0, 1});
+  auto lmap_sort_order = cudf::sorted_order(cudf::table_view({result.left_indices}));
+  auto rmap_sort_order = cudf::sorted_order(cudf::table_view({result.right_indices}));
+  auto lmap_sorted     = cudf::gather(cudf::table_view({result.left_indices}), *lmap_sort_order);
+  auto rmap_sorted     = cudf::gather(cudf::table_view({result.right_indices}), *rmap_sort_order);
+
+  column_wrapper<int32_t> lmap_gold{{0, 2, 4}};
+  column_wrapper<int32_t> rmap_gold{{1, 1, 4}};
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(lmap_sorted->view().column(0), lmap_gold);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(rmap_sorted->view().column(0), rmap_gold);
+}
+
 CUDF_TEST_PROGRAM_MAIN()

From c756db91175031f523bf9c03dbabbf0c346b1a10 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Mon, 18 Jan 2021 18:30:09 -0500
Subject: [PATCH 014/138] Replace -1 with INT_MIN

---
 cpp/src/join/join_common_utils.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cpp/src/join/join_common_utils.hpp b/cpp/src/join/join_common_utils.hpp
index 9965fa496aa..917bcb9bdd5 100644
--- a/cpp/src/join/join_common_utils.hpp
+++ b/cpp/src/join/join_common_utils.hpp
@@ -31,7 +31,7 @@ constexpr size_type MAX_JOIN_SIZE{std::numeric_limits<size_type>::max()};
 
 constexpr int DEFAULT_JOIN_BLOCK_SIZE = 128;
 constexpr int DEFAULT_JOIN_CACHE_SIZE = 128;
-constexpr size_type JoinNoneValue     = -1;
+constexpr size_type JoinNoneValue     = std::numeric_limits<size_type>::min();
 
 using VectorPair = std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>>;
 

From 6a3d23e8fd89e14af9cc131cb005df8c110ba424 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Wed, 20 Jan 2021 09:16:11 -0500
Subject: [PATCH 015/138] Make join_result columns instead of column_views

---
 cpp/include/cudf/join.hpp     |  6 ++----
 cpp/src/join/hash_join.cu     | 25 ++++++++++++-------------
 cpp/tests/join/join_tests.cpp | 10 ++++++----
 3 files changed, 20 insertions(+), 21 deletions(-)

diff --git a/cpp/include/cudf/join.hpp b/cpp/include/cudf/join.hpp
index b644ee5c049..42c42e2ce7e 100644
--- a/cpp/include/cudf/join.hpp
+++ b/cpp/include/cudf/join.hpp
@@ -38,10 +38,8 @@ namespace cudf {
  * left and right gathermaps.
  */ // TODO: explain this better
 struct join_result {
-  column_view left_indices;   /// < The left gathermap
-  column_view right_indices;  /// < The right gathermap
-  std::unique_ptr<rmm::device_buffer> left_buf;
-  std::unique_ptr<rmm::device_buffer> right_buf;
+  std::unique_ptr<cudf::column> left_indices;   /// < The left gathermap
+  std::unique_ptr<cudf::column> right_indices;  /// < The left gathermap
 };
 
 /**
diff --git a/cpp/src/join/hash_join.cu b/cpp/src/join/hash_join.cu
index b8f896a49b4..ca875f99bef 100644
--- a/cpp/src/join/hash_join.cu
+++ b/cpp/src/join/hash_join.cu
@@ -653,19 +653,18 @@ join_result hash_join::hash_join_impl::compute_hash_join(cudf::table_view const
 {
   auto join_indices =
     compute_hash_join_indices<JoinKind>(probe, probe_on, compare_nulls, stream, mr);
-  auto left_map  = cudf::column_view(cudf::data_type(type_to_id<cudf::size_type>()),
-                                    join_indices.first.size(),
-                                    join_indices.first.data(),
-                                    nullptr,
-                                    0);
-  auto right_map = cudf::column_view(cudf::data_type(type_to_id<cudf::size_type>()),
-                                     join_indices.second.size(),
-                                     join_indices.second.data(),
-                                     nullptr,
-                                     0);
-  auto left_buf  = std::make_unique<rmm::device_buffer>(join_indices.first.release());
-  auto right_buf = std::make_unique<rmm::device_buffer>(join_indices.second.release());
-  return join_result{left_map, right_map, std::move(left_buf), std::move(right_buf)};
+  auto join_size = join_indices.first.size();
+  auto left_map  = std::make_unique<cudf::column>(cudf::data_type(type_to_id<cudf::size_type>()),
+                                                 join_size,
+                                                 join_indices.first.release(),
+                                                 rmm::device_buffer{},
+                                                 0);
+  auto right_map = std::make_unique<cudf::column>(cudf::data_type(type_to_id<cudf::size_type>()),
+                                                  join_size,
+                                                  join_indices.second.release(),
+                                                  rmm::device_buffer{},
+                                                  0);
+  return join_result{std::move(left_map), std::move(right_map)};
 }
 
 template <cudf::detail::join_kind JoinKind>
diff --git a/cpp/tests/join/join_tests.cpp b/cpp/tests/join/join_tests.cpp
index 785a414d418..5b5b4253961 100644
--- a/cpp/tests/join/join_tests.cpp
+++ b/cpp/tests/join/join_tests.cpp
@@ -1451,10 +1451,12 @@ TEST_F(JoinTest, InnerJoinNoNullsGathermap)
   Table t1(std::move(cols1));
 
   auto result          = cudf::inner_join(t0, t1, {0, 1}, {0, 1});
-  auto lmap_sort_order = cudf::sorted_order(cudf::table_view({result.left_indices}));
-  auto rmap_sort_order = cudf::sorted_order(cudf::table_view({result.right_indices}));
-  auto lmap_sorted     = cudf::gather(cudf::table_view({result.left_indices}), *lmap_sort_order);
-  auto rmap_sorted     = cudf::gather(cudf::table_view({result.right_indices}), *rmap_sort_order);
+  auto lmap_sort_order = cudf::sorted_order(cudf::table_view({result.left_indices->view()}));
+  auto rmap_sort_order = cudf::sorted_order(cudf::table_view({result.right_indices->view()}));
+  auto lmap_sorted =
+    cudf::gather(cudf::table_view({result.left_indices->view()}), *lmap_sort_order);
+  auto rmap_sorted =
+    cudf::gather(cudf::table_view({result.right_indices->view()}), *rmap_sort_order);
 
   column_wrapper<int32_t> lmap_gold{{0, 2, 4}};
   column_wrapper<int32_t> rmap_gold{{1, 1, 4}};

From 5dfc2a0cdfaceb742b7a230176d031312e1b4da7 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Wed, 20 Jan 2021 09:41:37 -0500
Subject: [PATCH 016/138] Replace join_result with a pair of columns

---
 cpp/include/cudf/join.hpp     |  45 ++++++-------
 cpp/src/join/hash_join.cu     |  47 ++++++++------
 cpp/src/join/hash_join.cuh    |  44 +++++++------
 cpp/src/join/join.cu          | 117 ++++++++++++++++++----------------
 cpp/tests/join/join_tests.cpp |  10 ++-
 5 files changed, 135 insertions(+), 128 deletions(-)

diff --git a/cpp/include/cudf/join.hpp b/cpp/include/cudf/join.hpp
index 42c42e2ce7e..9de03031a37 100644
--- a/cpp/include/cudf/join.hpp
+++ b/cpp/include/cudf/join.hpp
@@ -31,23 +31,12 @@ namespace cudf {
  * @file
  */
 
-/**
- * @brief The result of a `join`.
- *
- * A `join_result` holds two columns containing the
- * left and right gathermaps.
- */ // TODO: explain this better
-struct join_result {
-  std::unique_ptr<cudf::column> left_indices;   /// < The left gathermap
-  std::unique_ptr<cudf::column> right_indices;  /// < The left gathermap
-};
-
 /**
  * @brief Performs  an inner join on the specified columns of two
  * tables (`left`, `right`), and returns the row indices corresponding
  * to the result.
  */ // TODO: explain this better
-join_result inner_join(
+std::pair<std::unique_ptr<cudf::column>, std::unique_ptr<cudf::column>> inner_join(
   cudf::table_view const& left,
   cudf::table_view const& right,
   std::vector<cudf::size_type> const& left_on,
@@ -126,12 +115,13 @@ std::unique_ptr<cudf::table> inner_join(
  * tables (`left`, `right`), and returns the row indices corresponding
  * to the result.
  */ // TODO: explain this better
-join_result left_join(cudf::table_view const& left,
-                      cudf::table_view const& right,
-                      std::vector<cudf::size_type> const& left_on,
-                      std::vector<cudf::size_type> const& right_on,
-                      null_equality compare_nulls         = null_equality::EQUAL,
-                      rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::pair<std::unique_ptr<cudf::column>, std::unique_ptr<cudf::column>> left_join(
+  cudf::table_view const& left,
+  cudf::table_view const& right,
+  std::vector<cudf::size_type> const& left_on,
+  std::vector<cudf::size_type> const& right_on,
+  null_equality compare_nulls         = null_equality::EQUAL,
+  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
  * @brief Performs a left join (also known as left outer join) on the
@@ -206,12 +196,13 @@ std::unique_ptr<cudf::table> left_join(
  * tables (`left`, `right`), and returns the row indices corresponding
  * to the result.
  */ // TODO: explain this better
-join_result full_join(cudf::table_view const& left,
-                      cudf::table_view const& right,
-                      std::vector<cudf::size_type> const& left_on,
-                      std::vector<cudf::size_type> const& right_on,
-                      null_equality compare_nulls         = null_equality::EQUAL,
-                      rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::pair<std::unique_ptr<cudf::column>, std::unique_ptr<cudf::column>> full_join(
+  cudf::table_view const& left,
+  cudf::table_view const& right,
+  std::vector<cudf::size_type> const& left_on,
+  std::vector<cudf::size_type> const& right_on,
+  null_equality compare_nulls         = null_equality::EQUAL,
+  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
  * @brief Performs a full join (also known as full outer join) on the
@@ -467,7 +458,7 @@ class hash_join {
    * tables (`left`, `right`), and returns the row indices corresponding
    * to the result.
    */ // TODO: explain this better
-  join_result inner_join(
+  std::pair<std::unique_ptr<cudf::column>, std::unique_ptr<cudf::column>> inner_join(
     cudf::table_view const& probe,
     std::vector<size_type> const& probe_on,
     null_equality compare_nulls         = null_equality::EQUAL,
@@ -523,7 +514,7 @@ class hash_join {
    * tables (`left`, `right`), and returns the row indices corresponding
    * to the result.
    */ // TODO: explain this better
-  join_result left_join(
+  std::pair<std::unique_ptr<cudf::column>, std::unique_ptr<cudf::column>> left_join(
     cudf::table_view const& probe,
     std::vector<size_type> const& probe_on,
     null_equality compare_nulls         = null_equality::EQUAL,
@@ -565,7 +556,7 @@ class hash_join {
    * tables (`left`, `right`), and returns the row indices corresponding
    * to the result.
    */ // TODO: explain this better
-  join_result full_join(
+  std::pair<std::unique_ptr<cudf::column>, std::unique_ptr<cudf::column>> full_join(
     cudf::table_view const& probe,
     std::vector<size_type> const& probe_on,
     null_equality compare_nulls         = null_equality::EQUAL,
diff --git a/cpp/src/join/hash_join.cu b/cpp/src/join/hash_join.cu
index ca875f99bef..e7c8f1959d3 100644
--- a/cpp/src/join/hash_join.cu
+++ b/cpp/src/join/hash_join.cu
@@ -521,11 +521,12 @@ hash_join::hash_join_impl::hash_join_impl(cudf::table_view const &build,
   _hash_table = build_join_hash_table(_build_selected, compare_nulls, stream);
 }
 
-join_result hash_join::hash_join_impl::inner_join(cudf::table_view const &probe,
-                                                  std::vector<size_type> const &probe_on,
-                                                  null_equality compare_nulls,
-                                                  rmm::cuda_stream_view stream,
-                                                  rmm::mr::device_memory_resource *mr) const
+std::pair<std::unique_ptr<cudf::column>, std::unique_ptr<cudf::column>>
+hash_join::hash_join_impl::inner_join(cudf::table_view const &probe,
+                                      std::vector<size_type> const &probe_on,
+                                      null_equality compare_nulls,
+                                      rmm::cuda_stream_view stream,
+                                      rmm::mr::device_memory_resource *mr) const
 {
   CUDF_FUNC_RANGE();
   return compute_hash_join<cudf::detail::join_kind::INNER_JOIN>(
@@ -547,11 +548,12 @@ hash_join::hash_join_impl::inner_join(
     probe, probe_on, columns_in_common, common_columns_output_side, compare_nulls, stream, mr);
 }
 
-join_result hash_join::hash_join_impl::left_join(cudf::table_view const &probe,
-                                                 std::vector<size_type> const &probe_on,
-                                                 null_equality compare_nulls,
-                                                 rmm::cuda_stream_view stream,
-                                                 rmm::mr::device_memory_resource *mr) const
+std::pair<std::unique_ptr<cudf::column>, std::unique_ptr<cudf::column>>
+hash_join::hash_join_impl::left_join(cudf::table_view const &probe,
+                                     std::vector<size_type> const &probe_on,
+                                     null_equality compare_nulls,
+                                     rmm::cuda_stream_view stream,
+                                     rmm::mr::device_memory_resource *mr) const
 {
   CUDF_FUNC_RANGE();
   return compute_hash_join<cudf::detail::join_kind::INNER_JOIN>(
@@ -579,11 +581,12 @@ std::unique_ptr<cudf::table> hash_join::hash_join_impl::left_join(
                                           std::move(probe_build_pair.second));
 }
 
-join_result hash_join::hash_join_impl::full_join(cudf::table_view const &probe,
-                                                 std::vector<size_type> const &probe_on,
-                                                 null_equality compare_nulls,
-                                                 rmm::cuda_stream_view stream,
-                                                 rmm::mr::device_memory_resource *mr) const
+std::pair<std::unique_ptr<cudf::column>, std::unique_ptr<cudf::column>>
+hash_join::hash_join_impl::full_join(cudf::table_view const &probe,
+                                     std::vector<size_type> const &probe_on,
+                                     null_equality compare_nulls,
+                                     rmm::cuda_stream_view stream,
+                                     rmm::mr::device_memory_resource *mr) const
 {
   CUDF_FUNC_RANGE();
   return compute_hash_join<cudf::detail::join_kind::INNER_JOIN>(
@@ -645,11 +648,12 @@ hash_join::hash_join_impl::compute_hash_join_indices(cudf::table_view const &pro
 }
 
 template <cudf::detail::join_kind JoinKind>
-join_result hash_join::hash_join_impl::compute_hash_join(cudf::table_view const &probe,
-                                                         std::vector<size_type> const &probe_on,
-                                                         null_equality compare_nulls,
-                                                         rmm::cuda_stream_view stream,
-                                                         rmm::mr::device_memory_resource *mr) const
+std::pair<std::unique_ptr<cudf::column>, std::unique_ptr<cudf::column>>
+hash_join::hash_join_impl::compute_hash_join(cudf::table_view const &probe,
+                                             std::vector<size_type> const &probe_on,
+                                             null_equality compare_nulls,
+                                             rmm::cuda_stream_view stream,
+                                             rmm::mr::device_memory_resource *mr) const
 {
   auto join_indices =
     compute_hash_join_indices<JoinKind>(probe, probe_on, compare_nulls, stream, mr);
@@ -664,7 +668,8 @@ join_result hash_join::hash_join_impl::compute_hash_join(cudf::table_view const
                                                   join_indices.second.release(),
                                                   rmm::device_buffer{},
                                                   0);
-  return join_result{std::move(left_map), std::move(right_map)};
+  return std::make_pair<std::unique_ptr<cudf::column>, std::unique_ptr<cudf::column>>(
+    std::move(left_map), std::move(right_map));
 }
 
 template <cudf::detail::join_kind JoinKind>
diff --git a/cpp/src/join/hash_join.cuh b/cpp/src/join/hash_join.cuh
index 7bc49bf8683..463902604c9 100644
--- a/cpp/src/join/hash_join.cuh
+++ b/cpp/src/join/hash_join.cuh
@@ -231,11 +231,12 @@ struct hash_join::hash_join_impl {
                  null_equality compare_nulls,
                  rmm::cuda_stream_view stream = rmm::cuda_stream_default);
 
-  join_result inner_join(cudf::table_view const& probe,
-                         std::vector<size_type> const& probe_on,
-                         null_equality compare_nulls,
-                         rmm::cuda_stream_view stream,
-                         rmm::mr::device_memory_resource* mr) const;
+  std::pair<std::unique_ptr<cudf::column>, std::unique_ptr<cudf::column>> inner_join(
+    cudf::table_view const& probe,
+    std::vector<size_type> const& probe_on,
+    null_equality compare_nulls,
+    rmm::cuda_stream_view stream,
+    rmm::mr::device_memory_resource* mr) const;
 
   std::pair<std::unique_ptr<cudf::table>, std::unique_ptr<cudf::table>> inner_join(
     cudf::table_view const& probe,
@@ -246,11 +247,12 @@ struct hash_join::hash_join_impl {
     rmm::cuda_stream_view stream,
     rmm::mr::device_memory_resource* mr) const;
 
-  join_result left_join(cudf::table_view const& probe,
-                        std::vector<size_type> const& probe_on,
-                        null_equality compare_nulls,
-                        rmm::cuda_stream_view stream,
-                        rmm::mr::device_memory_resource* mr) const;
+  std::pair<std::unique_ptr<cudf::column>, std::unique_ptr<cudf::column>> left_join(
+    cudf::table_view const& probe,
+    std::vector<size_type> const& probe_on,
+    null_equality compare_nulls,
+    rmm::cuda_stream_view stream,
+    rmm::mr::device_memory_resource* mr) const;
 
   std::unique_ptr<cudf::table> left_join(
     cudf::table_view const& probe,
@@ -260,11 +262,12 @@ struct hash_join::hash_join_impl {
     rmm::cuda_stream_view stream,
     rmm::mr::device_memory_resource* mr) const;
 
-  join_result full_join(cudf::table_view const& probe,
-                        std::vector<size_type> const& probe_on,
-                        null_equality compare_nulls,
-                        rmm::cuda_stream_view stream,
-                        rmm::mr::device_memory_resource* mr) const;
+  std::pair<std::unique_ptr<cudf::column>, std::unique_ptr<cudf::column>> full_join(
+    cudf::table_view const& probe,
+    std::vector<size_type> const& probe_on,
+    null_equality compare_nulls,
+    rmm::cuda_stream_view stream,
+    rmm::mr::device_memory_resource* mr) const;
 
   std::unique_ptr<cudf::table> full_join(
     cudf::table_view const& probe,
@@ -284,11 +287,12 @@ struct hash_join::hash_join_impl {
                             rmm::mr::device_memory_resource* mr) const;
 
   template <cudf::detail::join_kind JoinKind>
-  join_result compute_hash_join(cudf::table_view const& probe,
-                                std::vector<size_type> const& probe_on,
-                                null_equality compare_nulls,
-                                rmm::cuda_stream_view stream,
-                                rmm::mr::device_memory_resource* mr) const;
+  std::pair<std::unique_ptr<cudf::column>, std::unique_ptr<cudf::column>> compute_hash_join(
+    cudf::table_view const& probe,
+    std::vector<size_type> const& probe_on,
+    null_equality compare_nulls,
+    rmm::cuda_stream_view stream,
+    rmm::mr::device_memory_resource* mr) const;
 
   /**
    * @brief Performs hash join by probing the columns provided in `probe` as per
diff --git a/cpp/src/join/join.cu b/cpp/src/join/join.cu
index 75bda62d809..61a0a85ef50 100644
--- a/cpp/src/join/join.cu
+++ b/cpp/src/join/join.cu
@@ -26,13 +26,14 @@
 namespace cudf {
 namespace detail {
 
-join_result inner_join(table_view const& left_input,
-                       table_view const& right_input,
-                       std::vector<size_type> const& left_on,
-                       std::vector<size_type> const& right_on,
-                       null_equality compare_nulls,
-                       rmm::cuda_stream_view stream,
-                       rmm::mr::device_memory_resource* mr)
+std::pair<std::unique_ptr<cudf::column>, std::unique_ptr<cudf::column>> inner_join(
+  table_view const& left_input,
+  table_view const& right_input,
+  std::vector<size_type> const& left_on,
+  std::vector<size_type> const& right_on,
+  null_equality compare_nulls,
+  rmm::cuda_stream_view stream,
+  rmm::mr::device_memory_resource* mr)
 {
   // Make sure any dictionary columns have matched key sets.
   // This will return any new dictionary columns created as well as updated table_views.
@@ -110,13 +111,14 @@ std::unique_ptr<table> inner_join(
   }
 }
 
-join_result left_join(table_view const& left_input,
-                      table_view const& right_input,
-                      std::vector<size_type> const& left_on,
-                      std::vector<size_type> const& right_on,
-                      null_equality compare_nulls,
-                      rmm::cuda_stream_view stream,
-                      rmm::mr::device_memory_resource* mr)
+std::pair<std::unique_ptr<cudf::column>, std::unique_ptr<cudf::column>> left_join(
+  table_view const& left_input,
+  table_view const& right_input,
+  std::vector<size_type> const& left_on,
+  std::vector<size_type> const& right_on,
+  null_equality compare_nulls,
+  rmm::cuda_stream_view stream,
+  rmm::mr::device_memory_resource* mr)
 {
   // Make sure any dictionary columns have matched key sets.
   // This will return any new dictionary columns created as well as updated table_views.
@@ -156,13 +158,14 @@ std::unique_ptr<table> left_join(
   return hj_obj.left_join(left, left_on, columns_in_common, compare_nulls, stream, mr);
 }
 
-join_result full_join(table_view const& left_input,
-                      table_view const& right_input,
-                      std::vector<size_type> const& left_on,
-                      std::vector<size_type> const& right_on,
-                      null_equality compare_nulls,
-                      rmm::cuda_stream_view stream,
-                      rmm::mr::device_memory_resource* mr)
+std::pair<std::unique_ptr<cudf::column>, std::unique_ptr<cudf::column>> full_join(
+  table_view const& left_input,
+  table_view const& right_input,
+  std::vector<size_type> const& left_on,
+  std::vector<size_type> const& right_on,
+  null_equality compare_nulls,
+  rmm::cuda_stream_view stream,
+  rmm::mr::device_memory_resource* mr)
 {
   // Make sure any dictionary columns have matched key sets.
   // This will return any new dictionary columns created as well as updated table_views.
@@ -214,11 +217,12 @@ hash_join::hash_join(cudf::table_view const& build,
 {
 }
 
-join_result hash_join::inner_join(cudf::table_view const& probe,
-                                  std::vector<size_type> const& probe_on,
-                                  null_equality compare_nulls,
-                                  rmm::cuda_stream_view stream,
-                                  rmm::mr::device_memory_resource* mr) const
+std::pair<std::unique_ptr<cudf::column>, std::unique_ptr<cudf::column>> hash_join::inner_join(
+  cudf::table_view const& probe,
+  std::vector<size_type> const& probe_on,
+  null_equality compare_nulls,
+  rmm::cuda_stream_view stream,
+  rmm::mr::device_memory_resource* mr) const
 {
   return impl->inner_join(probe, probe_on, compare_nulls, stream, mr);
 }
@@ -236,11 +240,12 @@ std::pair<std::unique_ptr<cudf::table>, std::unique_ptr<cudf::table>> hash_join:
     probe, probe_on, columns_in_common, common_columns_output_side, compare_nulls, stream, mr);
 }
 
-join_result hash_join::left_join(cudf::table_view const& probe,
-                                 std::vector<size_type> const& probe_on,
-                                 null_equality compare_nulls,
-                                 rmm::cuda_stream_view stream,
-                                 rmm::mr::device_memory_resource* mr) const
+std::pair<std::unique_ptr<cudf::column>, std::unique_ptr<cudf::column>> hash_join::left_join(
+  cudf::table_view const& probe,
+  std::vector<size_type> const& probe_on,
+  null_equality compare_nulls,
+  rmm::cuda_stream_view stream,
+  rmm::mr::device_memory_resource* mr) const
 {
   return impl->left_join(probe, probe_on, compare_nulls, stream, mr);
 }
@@ -256,11 +261,12 @@ std::unique_ptr<cudf::table> hash_join::left_join(
   return impl->left_join(probe, probe_on, columns_in_common, compare_nulls, stream, mr);
 }
 
-join_result hash_join::full_join(cudf::table_view const& probe,
-                                 std::vector<size_type> const& probe_on,
-                                 null_equality compare_nulls,
-                                 rmm::cuda_stream_view stream,
-                                 rmm::mr::device_memory_resource* mr) const
+std::pair<std::unique_ptr<cudf::column>, std::unique_ptr<cudf::column>> hash_join::full_join(
+  cudf::table_view const& probe,
+  std::vector<size_type> const& probe_on,
+  null_equality compare_nulls,
+  rmm::cuda_stream_view stream,
+  rmm::mr::device_memory_resource* mr) const
 {
   return impl->full_join(probe, probe_on, compare_nulls, stream, mr);
 }
@@ -278,12 +284,13 @@ std::unique_ptr<cudf::table> hash_join::full_join(
 
 // external APIs
 
-join_result inner_join(table_view const& left,
-                       table_view const& right,
-                       std::vector<size_type> const& left_on,
-                       std::vector<size_type> const& right_on,
-                       null_equality compare_nulls,
-                       rmm::mr::device_memory_resource* mr)
+std::pair<std::unique_ptr<cudf::column>, std::unique_ptr<cudf::column>> inner_join(
+  table_view const& left,
+  table_view const& right,
+  std::vector<size_type> const& left_on,
+  std::vector<size_type> const& right_on,
+  null_equality compare_nulls,
+  rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
   return detail::inner_join(
@@ -304,12 +311,13 @@ std::unique_ptr<table> inner_join(
     left, right, left_on, right_on, columns_in_common, compare_nulls, rmm::cuda_stream_default, mr);
 }
 
-join_result left_join(table_view const& left,
-                      table_view const& right,
-                      std::vector<size_type> const& left_on,
-                      std::vector<size_type> const& right_on,
-                      null_equality compare_nulls,
-                      rmm::mr::device_memory_resource* mr)
+std::pair<std::unique_ptr<cudf::column>, std::unique_ptr<cudf::column>> left_join(
+  table_view const& left,
+  table_view const& right,
+  std::vector<size_type> const& left_on,
+  std::vector<size_type> const& right_on,
+  null_equality compare_nulls,
+  rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
   return detail::left_join(
@@ -330,12 +338,13 @@ std::unique_ptr<table> left_join(
     left, right, left_on, right_on, columns_in_common, compare_nulls, rmm::cuda_stream_default, mr);
 }
 
-join_result full_join(table_view const& left,
-                      table_view const& right,
-                      std::vector<size_type> const& left_on,
-                      std::vector<size_type> const& right_on,
-                      null_equality compare_nulls,
-                      rmm::mr::device_memory_resource* mr)
+std::pair<std::unique_ptr<cudf::column>, std::unique_ptr<cudf::column>> full_join(
+  table_view const& left,
+  table_view const& right,
+  std::vector<size_type> const& left_on,
+  std::vector<size_type> const& right_on,
+  null_equality compare_nulls,
+  rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
   return detail::full_join(
diff --git a/cpp/tests/join/join_tests.cpp b/cpp/tests/join/join_tests.cpp
index 5b5b4253961..e4deea9fe1a 100644
--- a/cpp/tests/join/join_tests.cpp
+++ b/cpp/tests/join/join_tests.cpp
@@ -1451,12 +1451,10 @@ TEST_F(JoinTest, InnerJoinNoNullsGathermap)
   Table t1(std::move(cols1));
 
   auto result          = cudf::inner_join(t0, t1, {0, 1}, {0, 1});
-  auto lmap_sort_order = cudf::sorted_order(cudf::table_view({result.left_indices->view()}));
-  auto rmap_sort_order = cudf::sorted_order(cudf::table_view({result.right_indices->view()}));
-  auto lmap_sorted =
-    cudf::gather(cudf::table_view({result.left_indices->view()}), *lmap_sort_order);
-  auto rmap_sorted =
-    cudf::gather(cudf::table_view({result.right_indices->view()}), *rmap_sort_order);
+  auto lmap_sort_order = cudf::sorted_order(cudf::table_view({result.first->view()}));
+  auto rmap_sort_order = cudf::sorted_order(cudf::table_view({result.second->view()}));
+  auto lmap_sorted     = cudf::gather(cudf::table_view({result.first->view()}), *lmap_sort_order);
+  auto rmap_sorted     = cudf::gather(cudf::table_view({result.second->view()}), *rmap_sort_order);
 
   column_wrapper<int32_t> lmap_gold{{0, 2, 4}};
   column_wrapper<int32_t> rmap_gold{{1, 1, 4}};

From 362829bce1b6a5cf89cb6cf9a711a9fec5cf93c5 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Wed, 20 Jan 2021 10:48:21 -0500
Subject: [PATCH 017/138] Add gathermap test for outer join

---
 cpp/src/join/hash_join.cu     |  4 +-
 cpp/tests/join/join_tests.cpp | 74 ++++++++++++++++++++++++++++++++++-
 2 files changed, 75 insertions(+), 3 deletions(-)

diff --git a/cpp/src/join/hash_join.cu b/cpp/src/join/hash_join.cu
index e7c8f1959d3..2bc7396115c 100644
--- a/cpp/src/join/hash_join.cu
+++ b/cpp/src/join/hash_join.cu
@@ -556,7 +556,7 @@ hash_join::hash_join_impl::left_join(cudf::table_view const &probe,
                                      rmm::mr::device_memory_resource *mr) const
 {
   CUDF_FUNC_RANGE();
-  return compute_hash_join<cudf::detail::join_kind::INNER_JOIN>(
+  return compute_hash_join<cudf::detail::join_kind::LEFT_JOIN>(
     probe, probe_on, compare_nulls, stream, mr);
 }
 
@@ -589,7 +589,7 @@ hash_join::hash_join_impl::full_join(cudf::table_view const &probe,
                                      rmm::mr::device_memory_resource *mr) const
 {
   CUDF_FUNC_RANGE();
-  return compute_hash_join<cudf::detail::join_kind::INNER_JOIN>(
+  return compute_hash_join<cudf::detail::join_kind::FULL_JOIN>(
     probe, probe_on, compare_nulls, stream, mr);
 }
 
diff --git a/cpp/tests/join/join_tests.cpp b/cpp/tests/join/join_tests.cpp
index e4deea9fe1a..7aaef599297 100644
--- a/cpp/tests/join/join_tests.cpp
+++ b/cpp/tests/join/join_tests.cpp
@@ -33,11 +33,15 @@
 #include <cudf_test/table_utilities.hpp>
 #include <cudf_test/type_lists.hpp>
 
+#include <limits>
+
 template <typename T>
 using column_wrapper = cudf::test::fixed_width_column_wrapper<T>;
 using strcol_wrapper = cudf::test::strings_column_wrapper;
 using CVector        = std::vector<std::unique_ptr<cudf::column>>;
 using Table          = cudf::table;
+constexpr cudf::size_type NoneValue =
+  std::numeric_limits<cudf::size_type>::min();  // TODO: how to test if this isn't public?
 
 struct JoinTest : public cudf::test::BaseFixture {
 };
@@ -1429,7 +1433,7 @@ TEST_F(JoinDictionaryTest, FullJoinWithNulls)
   CUDF_TEST_EXPECT_TABLES_EQUAL(*gold, cudf::table_view(result_decoded));
 }
 
-TEST_F(JoinTest, InnerJoinNoNullsGathermap)
+TEST_F(JoinTest, InnerJoinGathermap)
 {
   column_wrapper<int32_t> col0_0{{3, 1, 2, 0, 2}};
   strcol_wrapper col0_1({"s1", "s1", "s0", "s4", "s0"});
@@ -1462,4 +1466,72 @@ TEST_F(JoinTest, InnerJoinNoNullsGathermap)
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(rmap_sorted->view().column(0), rmap_gold);
 }
 
+TEST_F(JoinTest, LeftJoinGathermap)
+{
+  column_wrapper<int32_t> col0_0{{3, 1, 2, 0, 3}};
+  strcol_wrapper col0_1({"s0", "s1", "s2", "s4", "s1"});
+  column_wrapper<int32_t> col0_2{{0, 1, 2, 4, 1}};
+
+  column_wrapper<int32_t> col1_0{{2, 2, 0, 4, 3}};
+  strcol_wrapper col1_1({"s1", "s0", "s1", "s2", "s1"});
+  column_wrapper<int32_t> col1_2{{1, 0, 1, 2, 1}};
+
+  CVector cols0, cols1;
+  cols0.push_back(col0_0.release());
+  cols0.push_back(col0_1.release());
+  cols0.push_back(col0_2.release());
+  cols1.push_back(col1_0.release());
+  cols1.push_back(col1_1.release());
+  cols1.push_back(col1_2.release());
+
+  Table t0(std::move(cols0));
+  Table t1(std::move(cols1));
+
+  auto result          = cudf::left_join(t0, t1, {0, 1}, {0, 1});
+  auto lmap_sort_order = cudf::sorted_order(cudf::table_view({result.first->view()}));
+  auto rmap_sort_order = cudf::sorted_order(cudf::table_view({result.second->view()}));
+  auto lmap_sorted     = cudf::gather(cudf::table_view({result.first->view()}), *lmap_sort_order);
+  auto rmap_sorted     = cudf::gather(cudf::table_view({result.second->view()}), *rmap_sort_order);
+
+  column_wrapper<int32_t> lmap_gold{{0, 1, 2, 3, 4}};
+  column_wrapper<int32_t> rmap_gold{{NoneValue, NoneValue, NoneValue, NoneValue, 4}};
+
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(lmap_sorted->view().column(0), lmap_gold);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(rmap_sorted->view().column(0), rmap_gold);
+}
+
+TEST_F(JoinTest, FullJoinGatherMap)
+{
+  column_wrapper<int32_t> col0_0{{3, 1, 2, 0, 3}};
+  strcol_wrapper col0_1({"s0", "s1", "s2", "s4", "s1"});
+  column_wrapper<int32_t> col0_2{{0, 1, 2, 4, 1}};
+
+  column_wrapper<int32_t> col1_0{{2, 2, 0, 4, 3}, {1, 1, 1, 0, 1}};
+  strcol_wrapper col1_1{{"s1", "s0", "s1", "s2", "s1"}};
+  column_wrapper<int32_t> col1_2{{1, 0, 1, 2, 1}};
+
+  CVector cols0, cols1;
+  cols0.push_back(col0_0.release());
+  cols0.push_back(col0_1.release());
+  cols0.push_back(col0_2.release());
+  cols1.push_back(col1_0.release());
+  cols1.push_back(col1_1.release());
+  cols1.push_back(col1_2.release());
+
+  Table t0(std::move(cols0));
+  Table t1(std::move(cols1));
+
+  auto result          = cudf::full_join(t0, t1, {0, 1}, {0, 1});
+  auto lmap_sort_order = cudf::sorted_order(cudf::table_view({result.first->view()}));
+  auto rmap_sort_order = cudf::sorted_order(cudf::table_view({result.second->view()}));
+  auto lmap_sorted     = cudf::gather(cudf::table_view({result.first->view()}), *lmap_sort_order);
+  auto rmap_sorted     = cudf::gather(cudf::table_view({result.second->view()}), *rmap_sort_order);
+
+  column_wrapper<int32_t> lmap_gold{{NoneValue, NoneValue, NoneValue, NoneValue, 0, 1, 2, 3}};
+  column_wrapper<int32_t> rmap_gold{{NoneValue, NoneValue, NoneValue, NoneValue, 0, 1, 2, 3}};
+
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(lmap_sorted->view().column(0), lmap_gold);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(rmap_sorted->view().column(0), rmap_gold);
+}
+
 CUDF_TEST_PROGRAM_MAIN()

From 4e4380cec9d6adc0f794bf4aec733443bf1610e9 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Wed, 20 Jan 2021 17:30:38 -0500
Subject: [PATCH 018/138] Add and pass full join gathermap test

---
 cpp/src/join/hash_join.cu     | 35 ++++++++++++++++++++---------------
 cpp/src/join/hash_join.cuh    |  7 ++-----
 cpp/tests/join/join_tests.cpp |  4 ++--
 3 files changed, 24 insertions(+), 22 deletions(-)

diff --git a/cpp/src/join/hash_join.cu b/cpp/src/join/hash_join.cu
index 2bc7396115c..16a453d7068 100644
--- a/cpp/src/join/hash_join.cu
+++ b/cpp/src/join/hash_join.cu
@@ -432,25 +432,22 @@ std::pair<std::unique_ptr<table>, std::unique_ptr<table>> construct_join_output_
   std::unique_ptr<table> common_table = std::make_unique<table>();
   // Construct the joined columns
   if (join_kind::FULL_JOIN == JoinKind) {
-    auto complement_indices = get_left_join_indices_complement(
-      joined_indices.second, probe.num_rows(), build.num_rows(), stream);
     if (not columns_in_common.empty()) {
       auto common_from_build = detail::gather(build.select(build_common_col),
-                                              complement_indices.second.begin(),
-                                              complement_indices.second.end(),
+                                              joined_indices.second.begin() + probe.num_rows(),
+                                              joined_indices.second.end(),
                                               bounds_policy,
                                               stream,
                                               rmm::mr::get_current_device_resource());
       auto common_from_probe = detail::gather(probe.select(probe_common_col),
                                               joined_indices.first.begin(),
-                                              joined_indices.first.end(),
+                                              joined_indices.first.begin() + probe.num_rows(),
                                               bounds_policy,
                                               stream,
                                               rmm::mr::get_current_device_resource());
       common_table           = cudf::detail::concatenate(
-        {common_from_build->view(), common_from_probe->view()}, stream, mr);
+        {common_from_probe->view(), common_from_build->view()}, stream, mr);
     }
-    joined_indices = concatenate_vector_pairs(complement_indices, joined_indices, stream);
   } else {
     if (not columns_in_common.empty()) {
       common_table = detail::gather(probe.select(probe_common_col),
@@ -641,10 +638,7 @@ hash_join::hash_join_impl::compute_hash_join_indices(cudf::table_view const &pro
                           [](const auto &b, const auto &p) { return b.type() == p.type(); }),
                "Mismatch in joining column data types");
 
-  constexpr cudf::detail::join_kind ProbeJoinKind = (JoinKind == cudf::detail::join_kind::FULL_JOIN)
-                                                      ? cudf::detail::join_kind::LEFT_JOIN
-                                                      : JoinKind;
-  return probe_join_indices<ProbeJoinKind>(probe_selected, compare_nulls, stream);
+  return probe_join_indices<JoinKind>(probe_selected, compare_nulls, stream);
 }
 
 template <cudf::detail::join_kind JoinKind>
@@ -706,14 +700,13 @@ hash_join::hash_join_impl::compute_hash_join(
 }
 
 template <cudf::detail::join_kind JoinKind>
-std::enable_if_t<JoinKind != cudf::detail::join_kind::FULL_JOIN,
-                 std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>>>
+std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>>
 hash_join::hash_join_impl::probe_join_indices(cudf::table_view const &probe,
                                               null_equality compare_nulls,
                                               rmm::cuda_stream_view stream) const
 {
   // Trivial left join case - exit early
-  if (!_hash_table && JoinKind == cudf::detail::join_kind::LEFT_JOIN) {
+  if (!_hash_table && JoinKind != cudf::detail::join_kind::INNER_JOIN) {
     return get_trivial_left_join_indices(probe, stream);
   }
 
@@ -721,8 +714,20 @@ hash_join::hash_join_impl::probe_join_indices(cudf::table_view const &probe,
 
   auto build_table = cudf::table_device_view::create(_build_selected, stream);
   auto probe_table = cudf::table_device_view::create(probe, stream);
-  return cudf::detail::probe_join_hash_table<JoinKind>(
+
+  constexpr cudf::detail::join_kind ProbeJoinKind = (JoinKind == cudf::detail::join_kind::FULL_JOIN)
+                                                      ? cudf::detail::join_kind::LEFT_JOIN
+                                                      : JoinKind;
+  auto join_indices = cudf::detail::probe_join_hash_table<ProbeJoinKind>(
     *build_table, *probe_table, *_hash_table, compare_nulls, stream);
+
+  if (JoinKind == cudf::detail::join_kind::FULL_JOIN) {
+    auto complement_indices = detail::get_left_join_indices_complement(
+      join_indices.second, probe.num_rows(), _build.num_rows(), stream);
+    join_indices = detail::concatenate_vector_pairs(join_indices, complement_indices, stream);
+  }
+
+  return join_indices;
 }
 
 }  // namespace cudf
diff --git a/cpp/src/join/hash_join.cuh b/cpp/src/join/hash_join.cuh
index 463902604c9..c33029dea55 100644
--- a/cpp/src/join/hash_join.cuh
+++ b/cpp/src/join/hash_join.cuh
@@ -361,11 +361,8 @@ struct hash_join::hash_join_impl {
    * @return Join output indices vector pair.
    */
   template <cudf::detail::join_kind JoinKind>
-  std::enable_if_t<JoinKind != cudf::detail::join_kind::FULL_JOIN,
-                   std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>>>
-  probe_join_indices(cudf::table_view const& probe,
-                     null_equality compare_nulls,
-                     rmm::cuda_stream_view stream) const;
+  std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>> probe_join_indices(
+    cudf::table_view const& probe, null_equality compare_nulls, rmm::cuda_stream_view stream) const;
 };
 
 }  // namespace cudf
diff --git a/cpp/tests/join/join_tests.cpp b/cpp/tests/join/join_tests.cpp
index 7aaef599297..f0748e2ec29 100644
--- a/cpp/tests/join/join_tests.cpp
+++ b/cpp/tests/join/join_tests.cpp
@@ -1527,8 +1527,8 @@ TEST_F(JoinTest, FullJoinGatherMap)
   auto lmap_sorted     = cudf::gather(cudf::table_view({result.first->view()}), *lmap_sort_order);
   auto rmap_sorted     = cudf::gather(cudf::table_view({result.second->view()}), *rmap_sort_order);
 
-  column_wrapper<int32_t> lmap_gold{{NoneValue, NoneValue, NoneValue, NoneValue, 0, 1, 2, 3}};
-  column_wrapper<int32_t> rmap_gold{{NoneValue, NoneValue, NoneValue, NoneValue, 0, 1, 2, 3}};
+  column_wrapper<int32_t> lmap_gold{{NoneValue, NoneValue, NoneValue, NoneValue, 0, 1, 2, 3, 4}};
+  column_wrapper<int32_t> rmap_gold{{NoneValue, NoneValue, NoneValue, NoneValue, 0, 1, 2, 3, 4}};
 
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(lmap_sorted->view().column(0), lmap_gold);
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(rmap_sorted->view().column(0), rmap_gold);

From 339a13d15f74742dd916235f7bc0f90bcef63cf0 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Thu, 21 Jan 2021 11:56:18 -0500
Subject: [PATCH 019/138] Begin Python-side refactor

---
 python/cudf/cudf/_lib/cpp/join.pxd |  18 ++---
 python/cudf/cudf/_lib/join.pyx     | 112 +++--------------------------
 2 files changed, 19 insertions(+), 111 deletions(-)

diff --git a/python/cudf/cudf/_lib/cpp/join.pxd b/python/cudf/cudf/_lib/cpp/join.pxd
index 10edf370f5d..b25062d529e 100644
--- a/python/cudf/cudf/_lib/cpp/join.pxd
+++ b/python/cudf/cudf/_lib/cpp/join.pxd
@@ -4,32 +4,32 @@ from libcpp.memory cimport unique_ptr
 from libcpp.vector cimport vector
 from libcpp.pair cimport pair
 from libcpp cimport bool
+from libcpp.pair cimport pair
+from libcpp.memory cimport unique_ptr
 
+from cudf._lib.cpp.column.column cimport column
 from cudf._lib.cpp.table.table cimport table
 from cudf._lib.cpp.table.table_view cimport table_view
 
 
 cdef extern from "cudf/join.hpp" namespace "cudf" nogil:
-    cdef unique_ptr[table] inner_join(
+    cdef pair[unique_ptr[column], unique_ptr[column]] inner_join(
         const table_view left,
         const table_view right,
         const vector[int] left_on,
-        const vector[int] right_on,
-        const vector[pair[int, int]] columns_in_common
+        const vector[int] right_on
     ) except +
-    cdef unique_ptr[table] left_join(
+    cdef pair[unique_ptr[column], unique_ptr[column]] left_join(
         const table_view left,
         const table_view right,
         const vector[int] left_on,
-        const vector[int] right_on,
-        const vector[pair[int, int]] columns_in_common
+        const vector[int] right_on
     ) except +
-    cdef unique_ptr[table] full_join(
+    cdef pair[unique_ptr[column], unique_ptr[column]] full_join(
         const table_view left,
         const table_view right,
         const vector[int] left_on,
-        const vector[int] right_on,
-        const vector[pair[int, int]] columns_in_common
+        const vector[int] right_on
     ) except +
     cdef unique_ptr[table] left_semi_join(
         const table_view left,
diff --git a/python/cudf/cudf/_lib/join.pyx b/python/cudf/cudf/_lib/join.pyx
index 38f13b9f994..9291c42625f 100644
--- a/python/cudf/cudf/_lib/join.pyx
+++ b/python/cudf/cudf/_lib/join.pyx
@@ -9,12 +9,15 @@ from libcpp.vector cimport vector
 from libcpp.pair cimport pair
 from libcpp cimport bool
 
+from cudf._lib.column cimport Column
 from cudf._lib.table cimport Table, columns_from_ptr
 
+from cudf._lib.cpp.column.column cimport column
 from cudf._lib.cpp.table.table cimport table
 from cudf._lib.cpp.table.table_view cimport table_view
 cimport cudf._lib.cpp.join as cpp_join
 
+
 cpdef join(Table lhs,
            Table rhs,
            object how,
@@ -27,7 +30,6 @@ cpdef join(Table lhs,
     """
     Call libcudf++ join for full outer, inner and left joins.
     """
-
     cdef Table c_lhs = lhs
     cdef Table c_rhs = rhs
 
@@ -51,17 +53,7 @@ cpdef join(Table lhs,
     cdef vector[int] all_left_inds = range(
         lhs._num_columns + (lhs._num_indices * left_index)
     )
-    cdef vector[int] all_right_inds = range(
-        rhs._num_columns + (rhs._num_indices * right_index)
-    )
-
-    result_col_names = compute_result_col_names(lhs, rhs, how)
 
-    columns_in_common = OrderedDict()
-    cdef vector[pair[int, int]] c_columns_in_common
-
-    # keep track of where the desired index column will end up
-    result_index_pos = None
     if left_index or right_index:
         # If either true, we need to process both indices as columns
         lhs_view = c_lhs.view()
@@ -69,54 +61,27 @@ cpdef join(Table lhs,
 
         left_join_cols = list(lhs._index_names) + list(lhs._data.keys())
         right_join_cols = list(rhs._index_names) + list(rhs._data.keys())
+
         if left_index and right_index:
             # Index columns will be common, on the left, dropped from right
             # Index name is from the left
             # Both views, must take index column indices
             left_on_indices = right_on_indices = range(lhs._num_indices)
-            result_idx_positions = range(lhs._num_indices)
-            result_index_names = lhs._index_names
-
         elif left_index:
             # Joins left index columns with right 'on' columns
             left_on_indices = range(lhs._num_indices)
             right_on_indices = [
                 right_join_cols.index(on_col) for on_col in right_on
             ]
-
-            # The left index columns 'become' the new RHS columns
-            # and the right index 'survives'
-            result_idx_positions = range(
-                len(left_join_cols), len(left_join_cols) + lhs._num_indices
-            )
-            result_index_names = rhs._index_names
-
-            # but since the common columns are gathered from the left
-            # the rhs 'on' cols are returned on the left of the result
-            # rearrange the names so account for this
-            common = [None] * rhs._num_indices
-            for i in range(rhs._num_indices):
-                common[i] = result_col_names.pop(
-                    result_col_names.index(right_on[i])
-                )
-            result_col_names = common + result_col_names
         elif right_index:
             # Joins right index columns with left 'on' columns
             right_on_indices = range(rhs._num_indices)
             left_on_indices = [
                 left_join_cols.index(on_col) for on_col in left_on
             ]
-
-            # The right index columns 'become' the new LHS columns
-            # and the left index survives
-            # since they are already gathered from the left,
-            # no rearranging has to be done
-            result_idx_positions = range(lhs._num_indices)
-            result_index_names = lhs._index_names
         for i_l, i_r in zip(left_on_indices, right_on_indices):
             left_on_ind.push_back(i_l)
             right_on_ind.push_back(i_r)
-            columns_in_common[(i_l, i_r)] = None
     else:
         # cuDF's Python layer will create a new RangeIndex for this case
         lhs_view = c_lhs.data_view()
@@ -131,16 +96,10 @@ cpdef join(Table lhs,
     if left_index == right_index:
         for name in left_on:
             left_on_ind.push_back(left_join_cols.index(name))
-            if name in right_on:
-                if (left_on.index(name) == right_on.index(name)):
-                    columns_in_common[(
-                        left_join_cols.index(name),
-                        right_join_cols.index(name)
-                    )] = None
         for name in right_on:
             right_on_ind.push_back(right_join_cols.index(name))
-    c_columns_in_common = list(columns_in_common.keys())
-    cdef unique_ptr[table] c_result
+
+    cdef pair[unique_ptr[column], unique_ptr[column]] c_result
     if how == 'inner':
         with nogil:
             c_result = move(cpp_join.inner_join(
@@ -148,7 +107,6 @@ cpdef join(Table lhs,
                 rhs_view,
                 left_on_ind,
                 right_on_ind,
-                c_columns_in_common
             ))
     elif how == 'left':
         with nogil:
@@ -157,7 +115,6 @@ cpdef join(Table lhs,
                 rhs_view,
                 left_on_ind,
                 right_on_ind,
-                c_columns_in_common
             ))
     elif how == 'outer':
         with nogil:
@@ -166,57 +123,8 @@ cpdef join(Table lhs,
                 rhs_view,
                 left_on_ind,
                 right_on_ind,
-                c_columns_in_common
-            ))
-    elif how == 'leftsemi':
-        with nogil:
-            c_result = move(cpp_join.left_semi_join(
-                lhs_view,
-                rhs_view,
-                left_on_ind,
-                right_on_ind,
-                all_left_inds
             ))
-    elif how == 'leftanti':
-        with nogil:
-            c_result = move(cpp_join.left_anti_join(
-                lhs_view,
-                rhs_view,
-                left_on_ind,
-                right_on_ind,
-                all_left_inds
-            ))
-
-    all_cols_py = columns_from_ptr(move(c_result))
-    if left_index or right_index:
-        ind_cols = OrderedDict()
-        for name, pos in zip(
-            result_index_names[::-1], result_idx_positions[::-1]
-        ):
-            ind_cols[name] = all_cols_py.pop(pos)
-        index = OrderedDict()
-        for k, v in reversed(ind_cols.items()):
-            index[k] = v
-        index = Table(index)
-    else:
-        index = None
-    data_ordered_dict = OrderedDict(zip(result_col_names, all_cols_py))
-    return Table(data=data_ordered_dict, index=index)
-
-
-def compute_result_col_names(lhs, rhs, how):
-    """
-    Determine the names of the data columns in the result of
-    a libcudf join, based on the original left and right frames
-    as well as the type of join that was performed.
-    """
-    if how in {"left", "inner", "outer", "leftsemi", "leftanti"}:
-        a = lhs._data.keys()
-        if how not in {"leftsemi", "leftanti"}:
-            return list(chain(a, (k for k in rhs._data.keys()
-                        if k not in lhs._data.keys())))
-        return list(a)
-    else:
-        raise NotImplementedError(
-            f"{how} merge not supported yet"
-        )
+    return (
+        Column.from_unique_ptr(move(c_result.first)),
+        Column.from_unique_ptr(move(c_result.second))
+    )

From 044eac1c4f9c7df9e657c229c43689b3def3e8db Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Mon, 8 Feb 2021 14:03:35 -0500
Subject: [PATCH 020/138] Add  left_semi and left_anti join APIs that return
 gathermaps

---
 cpp/include/cudf/join.hpp          |  18 ++++
 cpp/src/join/semi_join.cu          | 167 ++++++++++++++++++-----------
 cpp/tests/join/semi_join_tests.cpp |   7 +-
 3 files changed, 130 insertions(+), 62 deletions(-)

diff --git a/cpp/include/cudf/join.hpp b/cpp/include/cudf/join.hpp
index 9de03031a37..0a42d28de2d 100644
--- a/cpp/include/cudf/join.hpp
+++ b/cpp/include/cudf/join.hpp
@@ -328,6 +328,15 @@ std::unique_ptr<cudf::table> left_semi_join(
   null_equality compare_nulls         = null_equality::EQUAL,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
+/** TODO: document */
+std::unique_ptr<cudf::column> left_semi_join(
+  cudf::table_view const& left,
+  cudf::table_view const& right,
+  std::vector<cudf::size_type> const& left_on,
+  std::vector<cudf::size_type> const& right_on,
+  null_equality compare_nulls         = null_equality::EQUAL,
+  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+
 /**
  * @brief Performs a left anti join on the specified columns of two
  * tables (`left`, `right`)
@@ -384,6 +393,15 @@ std::unique_ptr<cudf::table> left_anti_join(
   null_equality compare_nulls         = null_equality::EQUAL,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
+/** TODO: document */
+std::unique_ptr<cudf::column> left_anti_join(
+  cudf::table_view const& left,
+  cudf::table_view const& right,
+  std::vector<cudf::size_type> const& left_on,
+  std::vector<cudf::size_type> const& right_on,
+  null_equality compare_nulls         = null_equality::EQUAL,
+  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+
 /**
  * @brief Performs a cross join on two tables (`left`, `right`)
  *
diff --git a/cpp/src/join/semi_join.cu b/cpp/src/join/semi_join.cu
index 9d046f9983c..59298c75f1e 100644
--- a/cpp/src/join/semi_join.cu
+++ b/cpp/src/join/semi_join.cu
@@ -26,11 +26,88 @@
 #include <cudf/utilities/error.hpp>
 
 #include <rmm/cuda_stream_view.hpp>
+#include <rmm/device_uvector.hpp>
 #include <rmm/device_vector.hpp>
 #include <rmm/exec_policy.hpp>
 
 namespace cudf {
 namespace detail {
+
+template <join_kind JoinKind>
+std::unique_ptr<cudf::column> left_semi_anti_join(
+  cudf::table_view const& left,
+  cudf::table_view const& right,
+  std::vector<cudf::size_type> const& left_on,
+  std::vector<cudf::size_type> const& right_on,
+  null_equality compare_nulls,
+  rmm::cuda_stream_view stream,
+  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
+{
+  CUDF_EXPECTS(0 != left.num_columns(), "Left table is empty");
+  CUDF_EXPECTS(0 != right.num_columns(), "Right table is empty");
+  CUDF_EXPECTS(left_on.size() == right_on.size(), "Mismatch in number of columns to be joined on");
+
+  auto const left_num_rows  = left.num_rows();
+  auto const right_num_rows = right.num_rows();
+
+  // Only care about existence, so we'll use an unordered map (other joins need a multimap)
+  using hash_table_type = concurrent_unordered_map<cudf::size_type, bool, row_hash, row_equality>;
+
+  // Create hash table containing all keys found in right table
+  auto right_rows_d            = table_device_view::create(right, stream);
+  size_t const hash_table_size = compute_hash_table_size(right_num_rows);
+  row_hash hash_build{*right_rows_d};
+  row_equality equality_build{*right_rows_d, *right_rows_d, compare_nulls == null_equality::EQUAL};
+
+  // Going to join it with left table
+  auto left_rows_d = table_device_view::create(left, stream);
+  row_hash hash_probe{*left_rows_d};
+  row_equality equality_probe{*left_rows_d, *right_rows_d, compare_nulls == null_equality::EQUAL};
+
+  auto hash_table_ptr = hash_table_type::create(hash_table_size,
+                                                stream,
+                                                std::numeric_limits<bool>::max(),
+                                                std::numeric_limits<cudf::size_type>::max(),
+                                                hash_build,
+                                                equality_build);
+  auto hash_table     = *hash_table_ptr;
+
+  thrust::for_each_n(rmm::exec_policy(stream),
+                     thrust::make_counting_iterator<size_type>(0),
+                     right_num_rows,
+                     [hash_table] __device__(size_type idx) mutable {
+                       hash_table.insert(thrust::make_pair(idx, true));
+                     });
+
+  //
+  // Now we have a hash table, we need to iterate over the rows of the left table
+  // and check to see if they are contained in the hash table
+  //
+
+  // For semi join we want contains to be true, for anti join we want contains to be false
+  bool join_type_boolean = (JoinKind == join_kind::LEFT_SEMI_JOIN);
+
+  rmm::device_uvector<size_type> gather_map(left_num_rows, stream);
+
+  // gather_map_end will be the end of valid data in gather_map
+  auto gather_map_end = thrust::copy_if(
+    rmm::exec_policy(stream),
+    thrust::make_counting_iterator<size_type>(0),
+    thrust::make_counting_iterator<size_type>(left_num_rows),
+    gather_map.begin(),
+    [hash_table, join_type_boolean, hash_probe, equality_probe] __device__(size_type idx) {
+      auto pos = hash_table.find(idx, hash_probe, equality_probe);
+      return (pos != hash_table.end()) == join_type_boolean;
+    });
+
+  auto join_size = std::distance(gather_map.begin(), gather_map_end);
+  return std::make_unique<cudf::column>(cudf::data_type(type_to_id<cudf::size_type>()),
+                                        join_size,
+                                        gather_map.release(),
+                                        rmm::device_buffer{},
+                                        0);
+}
+
 /**
  * @brief  Performs a left semi or anti join on the specified columns of two
  * tables (left, right)
@@ -80,20 +157,13 @@ std::unique_ptr<cudf::table> left_semi_anti_join(
   rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
 {
-  CUDF_EXPECTS(0 != left.num_columns(), "Left table is empty");
-  CUDF_EXPECTS(0 != right.num_columns(), "Right table is empty");
-  CUDF_EXPECTS(left_on.size() == right_on.size(), "Mismatch in number of columns to be joined on");
-
   if (0 == return_columns.size()) { return empty_like(left.select(return_columns)); }
 
   if (is_trivial_join(left, right, left_on, right_on, JoinKind)) {
     return empty_like(left.select(return_columns));
   }
 
-  auto const left_num_rows  = left.num_rows();
-  auto const right_num_rows = right.num_rows();
-
-  if ((join_kind::LEFT_ANTI_JOIN == JoinKind) && (0 == right_num_rows)) {
+  if ((join_kind::LEFT_ANTI_JOIN == JoinKind) && (0 == right.num_rows())) {
     // Everything matches, just copy the proper columns from the left table
     return std::make_unique<table>(left.select(return_columns), stream, mr);
   }
@@ -108,65 +178,18 @@ std::unique_ptr<cudf::table> left_semi_anti_join(
   auto const left_selected  = matched.second.front();
   auto const right_selected = matched.second.back();
 
-  // Only care about existence, so we'll use an unordered map (other joins need a multimap)
-  using hash_table_type = concurrent_unordered_map<cudf::size_type, bool, row_hash, row_equality>;
-
-  // Create hash table containing all keys found in right table
-  auto right_rows_d            = table_device_view::create(right_selected, stream);
-  size_t const hash_table_size = compute_hash_table_size(right_num_rows);
-  row_hash hash_build{*right_rows_d};
-  row_equality equality_build{*right_rows_d, *right_rows_d, compare_nulls == null_equality::EQUAL};
+  auto gather_map = left_semi_anti_join<JoinKind>(
+    left_selected, right_selected, left_on, right_on, compare_nulls, stream);
 
-  // Going to join it with left table
-  auto left_rows_d = table_device_view::create(left_selected, stream);
-  row_hash hash_probe{*left_rows_d};
-  row_equality equality_probe{*left_rows_d, *right_rows_d, compare_nulls == null_equality::EQUAL};
-
-  auto hash_table_ptr = hash_table_type::create(hash_table_size,
-                                                stream,
-                                                std::numeric_limits<bool>::max(),
-                                                std::numeric_limits<cudf::size_type>::max(),
-                                                hash_build,
-                                                equality_build);
-  auto hash_table     = *hash_table_ptr;
-
-  thrust::for_each_n(rmm::exec_policy(stream),
-                     thrust::make_counting_iterator<size_type>(0),
-                     right_num_rows,
-                     [hash_table] __device__(size_type idx) mutable {
-                       hash_table.insert(thrust::make_pair(idx, true));
-                     });
-
-  //
-  // Now we have a hash table, we need to iterate over the rows of the left table
-  // and check to see if they are contained in the hash table
-  //
-
-  // For semi join we want contains to be true, for anti join we want contains to be false
-  bool join_type_boolean = (JoinKind == join_kind::LEFT_SEMI_JOIN);
-
-  rmm::device_vector<size_type> gather_map(left_num_rows);
-
-  // gather_map_end will be the end of valid data in gather_map
-  auto gather_map_end = thrust::copy_if(
-    rmm::exec_policy(stream),
-    thrust::make_counting_iterator<size_type>(0),
-    thrust::make_counting_iterator<size_type>(left_num_rows),
-    gather_map.begin(),
-    [hash_table, join_type_boolean, hash_probe, equality_probe] __device__(size_type idx) {
-      auto pos = hash_table.find(idx, hash_probe, equality_probe);
-      return (pos != hash_table.end()) == join_type_boolean;
-    });
-
-  // rebuild left table for call to gather
   auto const left_updated = scatter_columns(left_selected, left_on, left);
   return cudf::detail::gather(left_updated.select(return_columns),
-                              gather_map.begin(),
-                              gather_map_end,
+                              gather_map->view().template begin<cudf::size_type>(),
+                              gather_map->view().template end<cudf::size_type>(),
                               out_of_bounds_policy::DONT_CHECK,
                               stream,
                               mr);
 }
+
 }  // namespace detail
 
 std::unique_ptr<cudf::table> left_semi_join(cudf::table_view const& left,
@@ -182,6 +205,18 @@ std::unique_ptr<cudf::table> left_semi_join(cudf::table_view const& left,
     left, right, left_on, right_on, return_columns, compare_nulls, rmm::cuda_stream_default, mr);
 }
 
+std::unique_ptr<cudf::column> left_semi_join(cudf::table_view const& left,
+                                             cudf::table_view const& right,
+                                             std::vector<cudf::size_type> const& left_on,
+                                             std::vector<cudf::size_type> const& right_on,
+                                             null_equality compare_nulls,
+                                             rmm::mr::device_memory_resource* mr)
+{
+  CUDF_FUNC_RANGE();
+  return detail::left_semi_anti_join<detail::join_kind::LEFT_SEMI_JOIN>(
+    left, right, left_on, right_on, compare_nulls, rmm::cuda_stream_default, mr);
+}
+
 std::unique_ptr<cudf::table> left_anti_join(cudf::table_view const& left,
                                             cudf::table_view const& right,
                                             std::vector<cudf::size_type> const& left_on,
@@ -195,4 +230,16 @@ std::unique_ptr<cudf::table> left_anti_join(cudf::table_view const& left,
     left, right, left_on, right_on, return_columns, compare_nulls, rmm::cuda_stream_default, mr);
 }
 
+std::unique_ptr<cudf::column> left_anti_join(cudf::table_view const& left,
+                                             cudf::table_view const& right,
+                                             std::vector<cudf::size_type> const& left_on,
+                                             std::vector<cudf::size_type> const& right_on,
+                                             null_equality compare_nulls,
+                                             rmm::mr::device_memory_resource* mr)
+{
+  CUDF_FUNC_RANGE();
+  return detail::left_semi_anti_join<detail::join_kind::LEFT_ANTI_JOIN>(
+    left, right, left_on, right_on, compare_nulls, rmm::cuda_stream_default, mr);
+}
+
 }  // namespace cudf
diff --git a/cpp/tests/join/semi_join_tests.cpp b/cpp/tests/join/semi_join_tests.cpp
index 13c74616484..a665d07ee3c 100644
--- a/cpp/tests/join/semi_join_tests.cpp
+++ b/cpp/tests/join/semi_join_tests.cpp
@@ -20,6 +20,7 @@
 #include <cudf/join.hpp>
 #include <cudf/table/table.hpp>
 #include <cudf/table/table_view.hpp>
+#include <cudf/types.hpp>
 
 #include <cudf_test/base_fixture.hpp>
 #include <cudf_test/column_utilities.hpp>
@@ -476,7 +477,8 @@ TEST_F(JoinTest, LeftSemiJoin_empty_result)
   cudf::table table_a(std::move(column_a));
   cudf::table table_b(std::move(column_b));
 
-  auto join_table = cudf::left_semi_join(table_a, table_b, {0, 1, 3}, {0, 1, 3}, {});
+  auto join_table =
+    cudf::left_semi_join(table_a, table_b, {0, 1, 3}, {0, 1, 3}, std::vector<cudf::size_type>{});
 
   EXPECT_EQ(join_table->num_columns(), 0);
   EXPECT_EQ(join_table->num_rows(), 0);
@@ -540,7 +542,8 @@ TEST_F(JoinTest, LeftAntiJoin_empty_result)
   cudf::table table_a(std::move(column_a));
   cudf::table table_b(std::move(column_b));
 
-  auto join_table = cudf::left_anti_join(table_a, table_b, {0, 1, 3}, {0, 1, 3}, {});
+  auto join_table =
+    cudf::left_anti_join(table_a, table_b, {0, 1, 3}, {0, 1, 3}, std::vector<cudf::size_type>{});
 
   EXPECT_EQ(join_table->num_columns(), 0);
   EXPECT_EQ(join_table->num_rows(), 0);

From 555d5ec5ad9ca04142e8b1c6a9448637f9d900e8 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Mon, 8 Feb 2021 16:08:19 -0500
Subject: [PATCH 021/138] Add Cython bindings

---
 python/cudf/cudf/_lib/cpp/join.pxd |  10 +-
 python/cudf/cudf/_lib/join.pyx     | 161 ++++++++++-------------------
 2 files changed, 62 insertions(+), 109 deletions(-)

diff --git a/python/cudf/cudf/_lib/cpp/join.pxd b/python/cudf/cudf/_lib/cpp/join.pxd
index b25062d529e..55180e2b74e 100644
--- a/python/cudf/cudf/_lib/cpp/join.pxd
+++ b/python/cudf/cudf/_lib/cpp/join.pxd
@@ -19,29 +19,31 @@ cdef extern from "cudf/join.hpp" namespace "cudf" nogil:
         const vector[int] left_on,
         const vector[int] right_on
     ) except +
+
     cdef pair[unique_ptr[column], unique_ptr[column]] left_join(
         const table_view left,
         const table_view right,
         const vector[int] left_on,
         const vector[int] right_on
     ) except +
+
     cdef pair[unique_ptr[column], unique_ptr[column]] full_join(
         const table_view left,
         const table_view right,
         const vector[int] left_on,
         const vector[int] right_on
     ) except +
-    cdef unique_ptr[table] left_semi_join(
+
+    cdef unique_ptr[column] left_semi_join(
         const table_view left,
         const table_view right,
         const vector[int] left_on,
         const vector[int] right_on,
-        const vector[int] return_columns
     ) except +
-    cdef unique_ptr[table] left_anti_join(
+
+    cdef unique_ptr[column] left_anti_join(
         const table_view left,
         const table_view right,
         const vector[int] left_on,
         const vector[int] right_on,
-        const vector[int] return_columns
     ) except +
diff --git a/python/cudf/cudf/_lib/join.pyx b/python/cudf/cudf/_lib/join.pyx
index 9291c42625f..94d82aa2638 100644
--- a/python/cudf/cudf/_lib/join.pyx
+++ b/python/cudf/cudf/_lib/join.pyx
@@ -18,113 +18,64 @@ from cudf._lib.cpp.table.table_view cimport table_view
 cimport cudf._lib.cpp.join as cpp_join
 
 
-cpdef join(Table lhs,
-           Table rhs,
-           object how,
-           object method,
-           object left_on=None,
-           object right_on=None,
-           bool left_index=False,
-           bool right_index=False
-           ):
-    """
-    Call libcudf++ join for full outer, inner and left joins.
-    """
-    cdef Table c_lhs = lhs
-    cdef Table c_rhs = rhs
-
-    # Views might or might not include index
-    cdef table_view lhs_view
-    cdef table_view rhs_view
-
-    # Will hold the join column indices into L and R tables
-    cdef vector[int] left_on_ind
-    cdef vector[int] right_on_ind
-
-    # If left/right index, will pass a full view
-    # must offset the data column indices by # of index columns
-    num_inds_left = len(left_on) + (lhs._num_indices * left_index)
-    num_inds_right = len(right_on) + (rhs._num_indices * right_index)
-    left_on_ind.reserve(num_inds_left)
-    right_on_ind.reserve(num_inds_right)
-
-    # Only used for semi or anti joins
-    # The result columns are only the left hand columns
-    cdef vector[int] all_left_inds = range(
-        lhs._num_columns + (lhs._num_indices * left_index)
-    )
-
-    if left_index or right_index:
-        # If either true, we need to process both indices as columns
-        lhs_view = c_lhs.view()
-        rhs_view = c_rhs.view()
-
-        left_join_cols = list(lhs._index_names) + list(lhs._data.keys())
-        right_join_cols = list(rhs._index_names) + list(rhs._data.keys())
-
-        if left_index and right_index:
-            # Index columns will be common, on the left, dropped from right
-            # Index name is from the left
-            # Both views, must take index column indices
-            left_on_indices = right_on_indices = range(lhs._num_indices)
-        elif left_index:
-            # Joins left index columns with right 'on' columns
-            left_on_indices = range(lhs._num_indices)
-            right_on_indices = [
-                right_join_cols.index(on_col) for on_col in right_on
-            ]
-        elif right_index:
-            # Joins right index columns with left 'on' columns
-            right_on_indices = range(rhs._num_indices)
-            left_on_indices = [
-                left_join_cols.index(on_col) for on_col in left_on
-            ]
-        for i_l, i_r in zip(left_on_indices, right_on_indices):
-            left_on_ind.push_back(i_l)
-            right_on_ind.push_back(i_r)
-    else:
-        # cuDF's Python layer will create a new RangeIndex for this case
-        lhs_view = c_lhs.data_view()
-        rhs_view = c_rhs.data_view()
-
-        left_join_cols = list(lhs._data.keys())
-        right_join_cols = list(rhs._data.keys())
-
-    # If both left/right_index, joining on indices plus additional cols
-    # If neither, joining on just cols, not indices
-    # In both cases, must match up additional column indices in lhs/rhs
-    if left_index == right_index:
-        for name in left_on:
-            left_on_ind.push_back(left_join_cols.index(name))
-        for name in right_on:
-            right_on_ind.push_back(right_join_cols.index(name))
-
+cpdef join(Table lhs, Table rhs, left_on, right_on, how=None):
+    # left, inner and outer join
+    cdef vector[int] c_left_on = left_on
+    cdef vector[int] c_right_on = right_on
     cdef pair[unique_ptr[column], unique_ptr[column]] c_result
-    if how == 'inner':
-        with nogil:
-            c_result = move(cpp_join.inner_join(
-                lhs_view,
-                rhs_view,
-                left_on_ind,
-                right_on_ind,
-            ))
-    elif how == 'left':
-        with nogil:
-            c_result = move(cpp_join.left_join(
-                lhs_view,
-                rhs_view,
-                left_on_ind,
-                right_on_ind,
-            ))
-    elif how == 'outer':
-        with nogil:
-            c_result = move(cpp_join.full_join(
-                lhs_view,
-                rhs_view,
-                left_on_ind,
-                right_on_ind,
-            ))
+    cdef table_view c_lhs = lhs.view()
+    cdef table_view c_rhs = rhs.view()
+
+    if how == "inner":
+        c_result = move(cpp_join.inner_join(
+            c_lhs,
+            c_rhs,
+            c_left_on,
+            c_right_on,
+        ))
+    elif how == "left":
+        c_result = move(cpp_join.left_join(
+            c_lhs,
+            c_rhs,
+            c_left_on,
+            c_right_on,
+        ))
+    elif how == "outer":
+        c_result = move(cpp_join.outer_join(
+            c_lhs,
+            c_rhs
+            c_left_on,
+            c_right_on
+        ))
+    else:
+        raise ValueError(f"Unkown join type {how}")
     return (
         Column.from_unique_ptr(move(c_result.first)),
         Column.from_unique_ptr(move(c_result.second))
     )
+
+
+cpdef join_semi_anti(Table lhs, Table rhs, left_on, right_on, how=None):
+    # left-semi and left-anti joins
+    cdef vector[int] c_left_on = left_on
+    cdef vector[int] c_right_on = right_on
+    cdef unique_ptr[column] c_result
+    cdef table_view c_lhs = lhs.view()
+    cdef table_view c_rhs = rhs.view()
+
+    if how == "semi":
+        c_result = move(cpp_join.left_semi_join(
+            c_lhs,
+            c_rhs,
+            c_left_on,
+            c_right_on
+        ))
+    elif how == "anti":
+        c_result = move(cpp_join.left_anti_join(
+            c_lhs,
+            c_rhs,
+            c_left_on,
+            c_right_on
+        ))
+    else:
+        raise ValueError(f"Invalid join type {how}")

From 56ae6162fbb25477b1edc6e0f68c9b3276df8f83 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Tue, 9 Feb 2021 09:45:41 -0500
Subject: [PATCH 022/138] full -> outer

---
 python/cudf/cudf/_lib/join.pyx | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/cudf/cudf/_lib/join.pyx b/python/cudf/cudf/_lib/join.pyx
index 94d82aa2638..5d07604f3bb 100644
--- a/python/cudf/cudf/_lib/join.pyx
+++ b/python/cudf/cudf/_lib/join.pyx
@@ -41,9 +41,9 @@ cpdef join(Table lhs, Table rhs, left_on, right_on, how=None):
             c_right_on,
         ))
     elif how == "outer":
-        c_result = move(cpp_join.outer_join(
+        c_result = move(cpp_join.full_join(
             c_lhs,
-            c_rhs
+            c_rhs,
             c_left_on,
             c_right_on
         ))

From d447924fcbc24042459231e316b5786683788ad0 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Tue, 9 Feb 2021 16:38:21 -0500
Subject: [PATCH 023/138] Progress

---
 python/cudf/cudf/core/join/join.py | 347 ++++++++++++++---------------
 1 file changed, 166 insertions(+), 181 deletions(-)

diff --git a/python/cudf/cudf/core/join/join.py b/python/cudf/cudf/core/join/join.py
index c6da3ee8dc4..257741e6f4c 100644
--- a/python/cudf/cudf/core/join/join.py
+++ b/python/cudf/cudf/core/join/join.py
@@ -1,34 +1,88 @@
 # Copyright (c) 2020-2021, NVIDIA CORPORATION.
 import itertools
+from collections import namedtuple
 
 import pandas as pd
 
 import cudf
 from cudf import _lib as libcudf
-from cudf._lib.join import compute_result_col_names
 from cudf.core.join.casting_logic import (
     _input_to_libcudf_castrules_any,
     _libcudf_to_output_castrules,
 )
 
 
+class _MISSING_TYPE:
+    pass
+
+
+MISSING = _MISSING_TYPE()
+
+
+class JoinKey:
+    # A JoinKey represents one column of a Series
+    # or DataFrame - either an index column or a
+    # data column
+
+    # we need a different sentinel value than `None`
+    # because `None` is totally a valid index/column name
+    def __init__(self, obj, column=MISSING, index=MISSING):
+        self.obj = obj
+        self.column, self.index = column, index
+
+    def get_numeric_index(self):
+        # get the position of the column (including any index columns)
+        if self.index is MISSING:
+            return len(self.obj.index.names) + self.obj.columns.get_loc(
+                self.column
+            )
+        else:
+            return self.obj.index.names.index(self.index)
+
+    @property
+    def name(self):
+        # get the name of the column
+        if self.index is MISSING:
+            return self.column
+        else:
+            return self.index
+
+    @property
+    def value(self):
+        # get the column
+        if self.index is MISSING:
+            return self.obj._data[self.name]
+        else:
+            return self.obj._index._data[self.name]
+
+    def set_value(self, value):
+        # set the colum
+        if self.index is MISSING:
+            self.obj._data[self.name] = value
+        else:
+            self.obj._index._data[self.name] = value
+
+
+JoinKeys = namedtuple("JoinKeys", ["left", "right"])
+
+
 class Merge(object):
     def __init__(
         self,
         lhs,
         rhs,
-        on,
-        left_on,
-        right_on,
-        left_index,
-        right_index,
-        how,
-        sort,
-        lsuffix,
-        rsuffix,
-        method,
-        indicator,
-        suffixes,
+        on=None,
+        left_on=None,
+        right_on=None,
+        left_index=False,
+        right_index=False,
+        how="inner",
+        sort=False,
+        lsuffix="_x",
+        rsuffix="_y",
+        method=None,
+        indicator=None,
+        suffixes=None,
     ):
         """
         Manage the merging of two Frames.
@@ -72,32 +126,89 @@ def __init__(
             Left and right suffixes specified together, unpacked into lsuffix
             and rsuffix.
         """
-        self.lhs = lhs
-        self.rhs = rhs
-        self.left_index = left_index
-        self.right_index = right_index
-        self.method = method
-        self.sort = sort
-
-        # check that the merge is valid
-
-        self.validate_merge_cfg(
+        self.validate_merge_params(
             lhs,
             rhs,
-            on,
-            left_on,
-            right_on,
-            left_index,
-            right_index,
-            how,
-            lsuffix,
-            rsuffix,
-            suffixes,
+            on=on,
+            left_on=left_on,
+            right_on=right_on,
+            left_index=left_index,
+            right_index=right_index,
+            how=how,
+            lsuffix=lsuffix,
+            rsuffix=rsuffix,
+            suffixes=suffixes,
         )
+
+        # warning: self.lhs and self.rhs are mutated both before
+        # and after the join
+        self.lhs = lhs.copy(deep=False)
+        self.rhs = rhs.copy(deep=False)
+
+        self.on = on
+        self.left_on = left_on
+        self.right_on = right_on
+        self.left_index = left_index
+        self.right_index = right_index
         self.how = how
-        self.preprocess_merge_params(
-            on, left_on, right_on, lsuffix, rsuffix, suffixes
-        )
+        self.lsuffix = lsuffix
+        self.rsuffix = rsuffix
+        self.suffixes = suffixes
+
+        self.compute_join_keys()
+
+    def compute_join_keys(self):
+        def _coerce_to_tuple(obj):
+            if hasattr(obj, "__iter__") and not isinstance(obj, str):
+                return tuple(obj)
+            else:
+                return (obj,)
+
+        if (
+            self.left_index
+            or self.right_index
+            or self.left_on
+            or self.right_on
+        ):
+            if self.left_index:
+                left_keys = [
+                    JoinKey(obj=self.lhs, index=on)
+                    for on in self.lhs.index.names
+                ]
+            else:
+                # TODO: require left_on or left_index to be specified
+                left_keys = [
+                    JoinKey(obj=self.lhs, column=on)
+                    for on in _coerce_to_tuple(self.left_on)
+                ]
+            if self.right_index:
+                right_keys = [
+                    JoinKey(obj=self.rhs, index=on)
+                    for on in self.rhs.index.names
+                ]
+            else:
+                # TODO: require right_on or right_index to be specified
+                right_keys = [
+                    JoinKey(obj=self.rhs, column=on)
+                    for on in _coerce_to_tuple(self.right_on)
+                ]
+        else:
+            # Use `on` if provided. Otherwise,
+            # implicitly use identically named columns as the key columns:
+            on_names = (
+                _coerce_to_tuple(self.on)
+                if self.on is not None
+                else set(self.lhs._data.keys()) & set(self.rhs._data.keys())
+            )
+            left_keys = [JoinKey(obj=self.lhs, column=on) for on in on_names]
+            right_keys = [JoinKey(obj=self.rhs, column=on) for on in on_names]
+
+        if len(left_keys) != len(right_keys):
+            raise ValueError(
+                "Merge operands must have same number of join key columns"
+            )
+
+        self._keys = JoinKeys(left=left_keys, right=right_keys)
 
     def perform_merge(self):
         """
@@ -105,9 +216,8 @@ def perform_merge(self):
         necessary, cast the input key columns to compatible types.
         Potentially also cast the output back to categorical.
         """
-        output_dtypes = self.compute_output_dtypes()
-        self.typecast_input_to_libcudf()
-        libcudf_result = libcudf.join.join(
+        self.match_key_dtypes(_input_to_libcudf_castrules_any)
+        left_rows, right_rows = libcudf.join.join(
             self.lhs,
             self.rhs,
             self.how,
@@ -117,83 +227,24 @@ def perform_merge(self):
             left_index=self.left_index,
             right_index=self.right_index,
         )
-        result = self.out_class._from_table(libcudf_result)
-        result = self.typecast_libcudf_to_output(result, output_dtypes)
-        if isinstance(result, cudf.Index):
-            return result
-        else:
-            return result[
-                compute_result_col_names(self.lhs, self.rhs, self.how)
-            ]
-
-    def preprocess_merge_params(
-        self, on, left_on, right_on, lsuffix, rsuffix, suffixes
-    ):
-        """
-        Translate a valid configuration of user input parameters into
-        the subset of input configurations handled by the cython layer.
-        Apply suffixes to columns.
-        """
+        return self.construct_result(left_rows, right_rows)
 
-        self.out_class = cudf.DataFrame
-        if isinstance(self.lhs, cudf.MultiIndex) or isinstance(
-            self.rhs, cudf.MultiIndex
-        ):
-            self.out_class = cudf.MultiIndex
-        elif isinstance(self.lhs, cudf.Index):
-            self.out_class = self.lhs.__class__
+    def construct_result(self, left_rows, right_rows):
+        self.match_key_dtypes(_libcudf_to_output_castrules)
 
-        if on:
-            on = [on] if isinstance(on, str) else list(on)
-            left_on = right_on = on
+        # first construct the index:
+        if self.left_index and not self.right_index:
+            out_index = self.rhs.index.iloc[right_rows]
+        elif self.right_index and not self.left_index:
+            out_index = self.lhs.index.iloc[left_rows]
         else:
-            if left_on:
-                left_on = (
-                    [left_on] if isinstance(left_on, str) else list(left_on)
-                )
-            if right_on:
-                right_on = (
-                    [right_on] if isinstance(right_on, str) else list(right_on)
-                )
-
-        same_named_columns = set(self.lhs._data.keys()) & set(
-            self.rhs._data.keys()
-        )
-        if not (left_on or right_on) and not (
-            self.left_index and self.right_index
-        ):
-            left_on = right_on = list(same_named_columns)
-
-        no_suffix_cols = []
-        if left_on and right_on:
-            no_suffix_cols = [
-                left_name
-                for left_name, right_name in zip(left_on, right_on)
-                if left_name == right_name and left_name in same_named_columns
-            ]
-
-        if suffixes:
-            lsuffix, rsuffix = suffixes
-        for name in same_named_columns:
-            if name not in no_suffix_cols:
-                self.lhs.rename(
-                    {name: f"{name}{lsuffix}"}, inplace=True, axis=1
-                )
-                self.rhs.rename(
-                    {name: f"{name}{rsuffix}"}, inplace=True, axis=1
-                )
-                if left_on and name in left_on:
-                    left_on[left_on.index(name)] = f"{name}{lsuffix}"
-                if right_on and name in right_on:
-                    right_on[right_on.index(name)] = f"{name}{rsuffix}"
+            out_index = None
 
-        self.left_on = left_on if left_on is not None else []
-        self.right_on = right_on if right_on is not None else []
-        self.lsuffix = lsuffix
-        self.rsuffix = rsuffix
+        # now construct the data:
+        return out_index
 
     @staticmethod
-    def validate_merge_cfg(
+    def validate_merge_params(
         lhs,
         rhs,
         on,
@@ -227,50 +278,6 @@ def validate_merge_cfg(
         ):
             raise ValueError("Can not merge on unnamed Series")
 
-        # Keys need to be in their corresponding operands
-        if on:
-            if isinstance(on, str):
-                on_keys = [on]
-            elif isinstance(on, tuple):
-                on_keys = list(on)
-            else:
-                on_keys = on
-            for key in on_keys:
-                if not (key in lhs._data.keys() and key in rhs._data.keys()):
-                    raise KeyError(f"on key {on} not in both operands")
-        elif left_on and right_on:
-            left_on_keys = (
-                [left_on] if not isinstance(left_on, list) else left_on
-            )
-            right_on_keys = (
-                [right_on] if not isinstance(right_on, list) else right_on
-            )
-
-            for key in left_on_keys:
-                if key not in lhs._data.keys():
-                    raise KeyError(f'Key "{key}" not in left operand')
-            for key in right_on_keys:
-                if key not in rhs._data.keys():
-                    raise KeyError(f'Key "{key}" not in right operand')
-
-        # Require same total number of columns to join on in both operands
-        len_left_on = 0
-        len_right_on = 0
-        if left_on:
-            len_left_on += (
-                len(left_on) if pd.api.types.is_list_like(left_on) else 1
-            )
-        if right_on:
-            len_right_on += (
-                len(right_on) if pd.api.types.is_list_like(right_on) else 1
-            )
-        if not (len_left_on + left_index * lhs._num_indices) == (
-            len_right_on + right_index * rhs._num_indices
-        ):
-            raise ValueError(
-                "Merge operands must have same number of join key columns"
-            )
-
         # If nothing specified, must have common cols to use implicitly
         same_named_columns = set(lhs._data.keys()) & set(rhs._data.keys())
         if (
@@ -297,39 +304,17 @@ def validate_merge_cfg(
                         "lsuffix and rsuffix are not defined"
                     )
 
-    def typecast_input_to_libcudf(self):
+    def match_key_dtypes(self, match_func):
         """
         Check each pair of join keys in the left and right hand
         operands and apply casting rules to match their types
         before passing the result to libcudf.
         """
-        lhs_keys, rhs_keys, lhs_cols, rhs_cols = [], [], [], []
-        if self.left_index:
-            lhs_keys.append(self.lhs.index._data.keys())
-            lhs_cols.append(self.lhs.index)
-        if self.right_index:
-            rhs_keys.append(self.rhs.index._data.keys())
-            rhs_cols.append(self.rhs.index)
-        if self.left_on:
-            lhs_keys.append(self.left_on)
-            lhs_cols.append(self.lhs)
-        if self.right_on:
-            rhs_keys.append(self.right_on)
-            rhs_cols.append(self.rhs)
-
-        for l_key_grp, r_key_grp, l_col_grp, r_col_grp in zip(
-            lhs_keys, rhs_keys, lhs_cols, rhs_cols
-        ):
-            for l_key, r_key in zip(l_key_grp, r_key_grp):
-                to_dtype = _input_to_libcudf_castrules_any(
-                    l_col_grp._data[l_key], r_col_grp._data[r_key], self.how
-                )
-                l_col_grp._data[l_key] = l_col_grp._data[l_key].astype(
-                    to_dtype
-                )
-                r_col_grp._data[r_key] = r_col_grp._data[r_key].astype(
-                    to_dtype
-                )
+        for left_key, right_key in zip(self._keys.left, self._keys.right):
+            lcol, rcol = left_key.value, right_key.value
+            dtype = match_func(lcol, rcol, how=self.how)
+            left_key.set_value(lcol.astype(dtype))
+            right_key.set_value(rcol.astype(dtype))
 
     def compute_output_dtypes(self):
         """

From 484512eb17e51e75b3353f54edcfc5504a677f42 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Tue, 9 Feb 2021 18:13:02 -0500
Subject: [PATCH 024/138] More progress on py refactor

---
 python/cudf/cudf/core/join/join.py | 176 ++++++++++-------------------
 1 file changed, 60 insertions(+), 116 deletions(-)

diff --git a/python/cudf/cudf/core/join/join.py b/python/cudf/cudf/core/join/join.py
index 257741e6f4c..322913f93cf 100644
--- a/python/cudf/cudf/core/join/join.py
+++ b/python/cudf/cudf/core/join/join.py
@@ -1,8 +1,5 @@
 # Copyright (c) 2020-2021, NVIDIA CORPORATION.
-import itertools
-from collections import namedtuple
-
-import pandas as pd
+from collections import OrderedDict, namedtuple
 
 import cudf
 from cudf import _lib as libcudf
@@ -39,6 +36,11 @@ def get_numeric_index(self):
         else:
             return self.obj.index.names.index(self.index)
 
+    @property
+    def is_index_level(self):
+        # True if this is an index column
+        return self.index is not MISSING
+
     @property
     def name(self):
         # get the name of the column
@@ -217,15 +219,18 @@ def perform_merge(self):
         Potentially also cast the output back to categorical.
         """
         self.match_key_dtypes(_input_to_libcudf_castrules_any)
+
+        left_key_indices = [key.get_numeric_index() for key in self._keys.left]
+        right_key_indices = [
+            key.get_numeric_index() for key in self._keys.right
+        ]
+        breakpoint()
         left_rows, right_rows = libcudf.join.join(
             self.lhs,
             self.rhs,
-            self.how,
-            self.method,
-            left_on=self.left_on,
-            right_on=self.right_on,
-            left_index=self.left_index,
-            right_index=self.right_index,
+            left_on=left_key_indices,
+            right_on=right_key_indices,
+            how=self.how,
         )
         return self.construct_result(left_rows, right_rows)
 
@@ -234,14 +239,53 @@ def construct_result(self, left_rows, right_rows):
 
         # first construct the index:
         if self.left_index and not self.right_index:
-            out_index = self.rhs.index.iloc[right_rows]
+            # TODO: only gather on index columns:
+            out_index = self.rhs.index._gather(right_rows)
         elif self.right_index and not self.left_index:
-            out_index = self.lhs.index.iloc[left_rows]
+            # TODO: only gather on index columns:
+            out_index = self.lhs.index._gather(left_rows)
         else:
             out_index = None
 
         # now construct the data:
-        return out_index
+        data = cudf.core.column_accessor.ColumnAccessor()
+        left_names, right_names = self.output_column_names()
+
+        for lcol in left_names:
+            data[left_names[lcol]] = self.lhs[lcol].iloc[left_rows]
+        for rcol in right_names:
+            data[right_names[rcol]] = self.rhs[rcol].iloc[right_rows]
+        return cudf.DataFrame._from_data(data, index=out_index)
+
+    def output_column_names(self):
+        # Return mappings of input column names to (possibly) suffixed
+        # result column names
+        left_names = OrderedDict(
+            zip(self.lhs._data.keys(), self.lhs._data.keys())
+        )
+        right_names = OrderedDict(
+            zip(self.rhs._data.keys(), self.rhs._data.keys())
+        )
+        common_names = set(left_names) & set(right_names)
+
+        if self.on:
+            key_columns_with_same_name = self.on
+        else:
+            key_columns_with_same_name = []
+            for lkey, rkey in zip(self._keys.left, self._keys.right):
+                if (lkey.is_index_level, rkey.is_index_level) == (
+                    False,
+                    False,
+                ):
+                    if lkey.name == rkey.name:
+                        key_columns_with_same_name.append(lkey.name)
+        for name in common_names:
+            if name not in key_columns_with_same_name:
+                left_names[name] = f"{name}{self.lsuffix}"
+                right_names[name] = f"{name}{self.rsuffix}"
+            else:
+                del right_names[name]
+        return left_names, right_names
 
     @staticmethod
     def validate_merge_params(
@@ -305,111 +349,11 @@ def validate_merge_params(
                     )
 
     def match_key_dtypes(self, match_func):
-        """
-        Check each pair of join keys in the left and right hand
-        operands and apply casting rules to match their types
-        before passing the result to libcudf.
-        """
+        # match the dtypes of the key columns in
+        # self.lhs and self.rhs according to the matching
+        # function `match_func`
         for left_key, right_key in zip(self._keys.left, self._keys.right):
             lcol, rcol = left_key.value, right_key.value
             dtype = match_func(lcol, rcol, how=self.how)
             left_key.set_value(lcol.astype(dtype))
             right_key.set_value(rcol.astype(dtype))
-
-    def compute_output_dtypes(self):
-        """
-        Determine what datatypes should be applied to the result
-        of a libcudf join, baesd on the original left and right
-        frames.
-        """
-
-        index_dtypes = {}
-        l_data_join_cols = {}
-        r_data_join_cols = {}
-
-        data_dtypes = {
-            name: col.dtype
-            for name, col in itertools.chain(
-                self.lhs._data.items(), self.rhs._data.items()
-            )
-        }
-
-        if self.left_index and self.right_index:
-            l_idx_join_cols = list(self.lhs.index._data.values())
-            r_idx_join_cols = list(self.rhs.index._data.values())
-        elif self.left_on and self.right_index:
-            # Keep the orignal dtypes in the LEFT index if possible
-            # should trigger a bunch of no-ops
-            l_idx_join_cols = list(self.lhs.index._data.values())
-            r_idx_join_cols = list(self.lhs.index._data.values())
-            for i, name in enumerate(self.left_on):
-                l_data_join_cols[name] = self.lhs._data[name]
-                r_data_join_cols[name] = list(self.rhs.index._data.values())[i]
-
-        elif self.left_index and self.right_on:
-            # see above
-            l_idx_join_cols = list(self.rhs.index._data.values())
-            r_idx_join_cols = list(self.rhs.index._data.values())
-            for i, name in enumerate(self.right_on):
-                l_data_join_cols[name] = list(self.lhs.index._data.values())[i]
-                r_data_join_cols[name] = self.rhs._data[name]
-
-        if self.left_on and self.right_on:
-            l_data_join_cols = self.lhs._data
-            r_data_join_cols = self.rhs._data
-
-        if self.left_index or self.right_index:
-            for i in range(len(self.lhs.index._data.items())):
-                index_dtypes[i] = _libcudf_to_output_castrules(
-                    l_idx_join_cols[i], r_idx_join_cols[i], self.how
-                )
-
-        for name in itertools.chain(self.left_on, self.right_on):
-            if name in self.left_on and name in self.right_on:
-                data_dtypes[name] = _libcudf_to_output_castrules(
-                    l_data_join_cols[name], r_data_join_cols[name], self.how
-                )
-        return (index_dtypes, data_dtypes)
-
-    def typecast_libcudf_to_output(self, output, output_dtypes):
-        """
-        Apply precomputed output index and data column data types
-        to the output of a libcudf join.
-        """
-
-        index_dtypes, data_dtypes = output_dtypes
-        if output._index and len(index_dtypes) > 0:
-            for index_dtype, index_col_lbl, index_col in zip(
-                index_dtypes.values(),
-                output._index._data.keys(),
-                output._index._data.values(),
-            ):
-                if index_dtype:
-                    output._index._data[
-                        index_col_lbl
-                    ] = self._build_output_col(index_col, index_dtype)
-            # reconstruct the Index object as the underlying data types
-            # have changed:
-            output._index = cudf.core.index.Index._from_table(output._index)
-
-        for data_col_lbl, data_col in output._data.items():
-            data_dtype = data_dtypes[data_col_lbl]
-            if data_dtype:
-                output._data[data_col_lbl] = self._build_output_col(
-                    data_col, data_dtype
-                )
-        return output
-
-    def _build_output_col(self, col, dtype):
-        if isinstance(
-            dtype, (cudf.core.dtypes.CategoricalDtype, pd.CategoricalDtype)
-        ):
-            outcol = cudf.core.column.build_categorical_column(
-                categories=dtype.categories,
-                codes=col.set_mask(None),
-                mask=col.base_mask,
-                ordered=dtype.ordered,
-            )
-        else:
-            outcol = col.astype(dtype)
-        return outcol

From 5227582c355aa684afb77ac7b8cb8e71feb0970c Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Wed, 10 Feb 2021 09:04:40 -0500
Subject: [PATCH 025/138] Remove breakpoint

---
 python/cudf/cudf/core/join/join.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/python/cudf/cudf/core/join/join.py b/python/cudf/cudf/core/join/join.py
index 322913f93cf..fa7970dc8fa 100644
--- a/python/cudf/cudf/core/join/join.py
+++ b/python/cudf/cudf/core/join/join.py
@@ -224,7 +224,6 @@ def perform_merge(self):
         right_key_indices = [
             key.get_numeric_index() for key in self._keys.right
         ]
-        breakpoint()
         left_rows, right_rows = libcudf.join.join(
             self.lhs,
             self.rhs,

From 9cd870eb794538ded5e2668e9a7e6228a4bdcd50 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Wed, 10 Feb 2021 09:51:03 -0500
Subject: [PATCH 026/138] Fix neg index handling

---
 cpp/src/copying/gather.cu | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/cpp/src/copying/gather.cu b/cpp/src/copying/gather.cu
index dc153e9395d..181752d18e8 100644
--- a/cpp/src/copying/gather.cu
+++ b/cpp/src/copying/gather.cu
@@ -43,9 +43,7 @@ std::unique_ptr<table> gather(table_view const& source_table,
 
   if (neg_indices == negative_index_policy::ALLOWED) {
     cudf::size_type n_rows = source_table.num_rows();
-    auto idx_converter     = [n_rows] __device__(size_type in) {
-      return ((in % n_rows) + n_rows) % n_rows;
-    };
+    auto idx_converter = [n_rows] __device__(size_type in) { return in < 0 ? in + n_rows : in; };
     return gather(source_table,
                   thrust::make_transform_iterator(map_begin, idx_converter),
                   thrust::make_transform_iterator(map_end, idx_converter),

From 8e4f193d5fd72ac44dd44bc01d868c0b9fb62008 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Wed, 10 Feb 2021 09:52:01 -0500
Subject: [PATCH 027/138] Use nullify gather in join

---
 python/cudf/cudf/_lib/copying.pyx  | 12 +++++++++---
 python/cudf/cudf/core/frame.py     |  8 +++++---
 python/cudf/cudf/core/join/join.py | 12 ++++++++----
 3 files changed, 22 insertions(+), 10 deletions(-)

diff --git a/python/cudf/cudf/_lib/copying.pyx b/python/cudf/cudf/_lib/copying.pyx
index ad798a73ed2..ebd14510734 100644
--- a/python/cudf/cudf/_lib/copying.pyx
+++ b/python/cudf/cudf/_lib/copying.pyx
@@ -130,11 +130,16 @@ def copy_range(Column input_column,
                            input_begin, input_end, target_begin)
 
 
-def gather(Table source_table, Column gather_map, bool keep_index=True):
+def gather(
+    Table source_table,
+    Column gather_map,
+    bool keep_index=True,
+    bool nullify=False
+):
     if not pd.api.types.is_integer_dtype(gather_map.dtype):
         raise ValueError("Gather map is not integer dtype.")
 
-    if len(gather_map) > 0:
+    if len(gather_map) > 0 and not nullify:
         gm_min, gm_max = minmax(gather_map)
         if gm_min < -len(source_table) or gm_max >= len(source_table):
             raise IndexError(f"Gather map index with min {gm_min},"
@@ -150,7 +155,8 @@ def gather(Table source_table, Column gather_map, bool keep_index=True):
         source_table_view = source_table.data_view()
     cdef column_view gather_map_view = gather_map.view()
     cdef cpp_copying.out_of_bounds_policy policy = (
-        cpp_copying.out_of_bounds_policy.DONT_CHECK
+        cpp_copying.out_of_bounds_policy.NULLIFY if nullify
+        else cpp_copying.out_of_bounds_policy.DONT_CHECK
     )
 
     with nogil:
diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index 2a1aed814fe..c881720ab9a 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -27,7 +27,6 @@
     min_scalar_type,
 )
 
-
 T = TypeVar("T", bound="Frame")
 
 if TYPE_CHECKING:
@@ -587,12 +586,15 @@ def _get_columns_by_index(self, indices):
             data, columns=data.to_pandas_index(), index=self.index
         )
 
-    def _gather(self, gather_map, keep_index=True):
+    def _gather(self, gather_map, keep_index=True, nullify=False):
         if not pd.api.types.is_integer_dtype(gather_map.dtype):
             gather_map = gather_map.astype("int32")
         result = self.__class__._from_table(
             libcudf.copying.gather(
-                self, as_column(gather_map), keep_index=keep_index
+                self,
+                as_column(gather_map),
+                keep_index=keep_index,
+                nullify=nullify,
             )
         )
         result._copy_type_metadata(self)
diff --git a/python/cudf/cudf/core/join/join.py b/python/cudf/cudf/core/join/join.py
index fa7970dc8fa..917a7a85486 100644
--- a/python/cudf/cudf/core/join/join.py
+++ b/python/cudf/cudf/core/join/join.py
@@ -239,10 +239,10 @@ def construct_result(self, left_rows, right_rows):
         # first construct the index:
         if self.left_index and not self.right_index:
             # TODO: only gather on index columns:
-            out_index = self.rhs.index._gather(right_rows)
+            out_index = self.rhs.index._gather(right_rows, nullify=True)
         elif self.right_index and not self.left_index:
             # TODO: only gather on index columns:
-            out_index = self.lhs.index._gather(left_rows)
+            out_index = self.lhs.index._gather(left_rows, nullify=True)
         else:
             out_index = None
 
@@ -251,9 +251,13 @@ def construct_result(self, left_rows, right_rows):
         left_names, right_names = self.output_column_names()
 
         for lcol in left_names:
-            data[left_names[lcol]] = self.lhs[lcol].iloc[left_rows]
+            data[left_names[lcol]] = self.lhs[lcol]._gather(
+                left_rows, nullify=True
+            )
         for rcol in right_names:
-            data[right_names[rcol]] = self.rhs[rcol].iloc[right_rows]
+            data[right_names[rcol]] = self.rhs[rcol]._gather(
+                right_rows, nullify=True
+            )
         return cudf.DataFrame._from_data(data, index=out_index)
 
     def output_column_names(self):

From 29fe140b112b5dee3ffb4128cdd13e9bf688e810 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Wed, 10 Feb 2021 11:14:15 -0500
Subject: [PATCH 028/138] Handle outer joins better

---
 python/cudf/cudf/core/column/column.py |  9 +++++++--
 python/cudf/cudf/core/join/join.py     | 27 ++++++++++++++++++--------
 2 files changed, 26 insertions(+), 10 deletions(-)

diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index d615a7cfae4..0f99395d919 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -807,7 +807,12 @@ def quantile(
     def median(self, skipna: bool = None) -> ScalarLike:
         raise TypeError(f"cannot perform median with type {self.dtype}")
 
-    def take(self: T, indices: ColumnBase, keep_index: bool = True) -> T:
+    def take(
+        self: T,
+        indices: ColumnBase,
+        keep_index: bool = True,
+        nullify: bool = False,
+    ) -> T:
         """Return Column by taking values from the corresponding *indices*.
         """
         # Handle zero size
@@ -816,7 +821,7 @@ def take(self: T, indices: ColumnBase, keep_index: bool = True) -> T:
         try:
             return (
                 self.as_frame()
-                ._gather(indices, keep_index=keep_index)
+                ._gather(indices, keep_index=keep_index, nullify=nullify)
                 ._as_column()
             )
         except RuntimeError as e:
diff --git a/python/cudf/cudf/core/join/join.py b/python/cudf/cudf/core/join/join.py
index 917a7a85486..2232d02acd9 100644
--- a/python/cudf/cudf/core/join/join.py
+++ b/python/cudf/cudf/core/join/join.py
@@ -237,12 +237,10 @@ def construct_result(self, left_rows, right_rows):
         self.match_key_dtypes(_libcudf_to_output_castrules)
 
         # first construct the index:
-        if self.left_index and not self.right_index:
-            # TODO: only gather on index columns:
-            out_index = self.rhs.index._gather(right_rows, nullify=True)
-        elif self.right_index and not self.left_index:
-            # TODO: only gather on index columns:
+        if self.left_index:
             out_index = self.lhs.index._gather(left_rows, nullify=True)
+        elif self.right_index:
+            out_index = self.rhs.index._gather(right_rows, nullify=True)
         else:
             out_index = None
 
@@ -258,7 +256,20 @@ def construct_result(self, left_rows, right_rows):
             data[right_names[rcol]] = self.rhs[rcol]._gather(
                 right_rows, nullify=True
             )
-        return cudf.DataFrame._from_data(data, index=out_index)
+
+        result = cudf.DataFrame._from_data(data, index=out_index)
+
+        # if outer join, key columns are combine:
+        for lkey, rkey in zip(*self._keys):
+            # get the key column as it appears in the result:
+            out_key = JoinKey(result, column=lkey.column, index=lkey.index)
+
+            # fill nulls in the key column with values from the RHS
+            out_key.set_value(
+                out_key.value.fillna(rkey.value.take(right_rows, nullify=True))
+            )
+
+        return result
 
     def output_column_names(self):
         # Return mappings of input column names to (possibly) suffixed
@@ -275,7 +286,7 @@ def output_column_names(self):
             key_columns_with_same_name = self.on
         else:
             key_columns_with_same_name = []
-            for lkey, rkey in zip(self._keys.left, self._keys.right):
+            for lkey, rkey in zip(*self._keys):
                 if (lkey.is_index_level, rkey.is_index_level) == (
                     False,
                     False,
@@ -355,7 +366,7 @@ def match_key_dtypes(self, match_func):
         # match the dtypes of the key columns in
         # self.lhs and self.rhs according to the matching
         # function `match_func`
-        for left_key, right_key in zip(self._keys.left, self._keys.right):
+        for left_key, right_key in zip(*self._keys):
             lcol, rcol = left_key.value, right_key.value
             dtype = match_func(lcol, rcol, how=self.how)
             left_key.set_value(lcol.astype(dtype))

From b63405517c875b84aca3e0058d733c38a0045773 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Wed, 10 Feb 2021 11:41:43 -0500
Subject: [PATCH 029/138] Fix index construction

---
 python/cudf/cudf/core/join/join.py | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/python/cudf/cudf/core/join/join.py b/python/cudf/cudf/core/join/join.py
index 2232d02acd9..adff223116d 100644
--- a/python/cudf/cudf/core/join/join.py
+++ b/python/cudf/cudf/core/join/join.py
@@ -236,11 +236,18 @@ def perform_merge(self):
     def construct_result(self, left_rows, right_rows):
         self.match_key_dtypes(_libcudf_to_output_castrules)
 
-        # first construct the index:
-        if self.left_index:
-            out_index = self.lhs.index._gather(left_rows, nullify=True)
-        elif self.right_index:
+        # first construct the index.
+        if self.left_index and self.right_index:
+            if self.how == "right":
+                out_index = self.rhs.index._gather(left_rows, nullify=True)
+            else:
+                out_index = self.lhs.index._gather(left_rows, nullify=True)
+        elif self.left_index:
+            # left_index and right_on
             out_index = self.rhs.index._gather(right_rows, nullify=True)
+        elif self.right_index:
+            # right_index and left_on
+            out_index = self.lhs.index._gather(left_rows, nullify=True)
         else:
             out_index = None
 

From cd53d6c2770f4693f38e7e2c9bd15f449ef3a703 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Wed, 10 Feb 2021 12:01:39 -0500
Subject: [PATCH 030/138] Fix sorting behaviour

---
 python/cudf/cudf/core/frame.py     | 34 +---------------------------
 python/cudf/cudf/core/join/join.py | 36 +++++++++++++++++++++++++-----
 2 files changed, 32 insertions(+), 38 deletions(-)

diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index c881720ab9a..ccbf2cd10b6 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -3421,39 +3421,7 @@ def _merge(
             indicator,
             suffixes,
         )
-        to_return = mergeop.perform_merge()
-
-        # If sort=True, Pandas would sort on the key columns in the
-        # same order as given in 'on'. If the indices are used as
-        # keys, the index will be sorted. If one index is specified,
-        # the key column on the other side will be used to sort.
-        # If no index is specified, return a new RangeIndex
-        if sort:
-            to_sort = cudf.DataFrame()
-            if left_index and right_index:
-                by = list(to_return._index._data.columns)
-                if left_on and right_on:
-                    by.extend(to_return[mergeop.left_on]._data.columns)
-            elif left_index:
-                by = list(to_return[mergeop.right_on]._data.columns)
-            elif right_index:
-                by = list(to_return[mergeop.left_on]._data.columns)
-            else:
-                # left_on == right_on, or different names but same columns
-                # in both cases we can sort by either
-                by = [to_return._data[name] for name in mergeop.left_on]
-            for i, col in enumerate(by):
-                to_sort[i] = col
-            inds = to_sort.argsort()
-            if isinstance(to_return, cudf.Index):
-                to_return = to_return.take(inds)
-            else:
-                to_return = to_return.take(
-                    inds, keep_index=(left_index or right_index)
-                )
-            return to_return
-        else:
-            return to_return
+        return mergeop.perform_merge()
 
     def _is_sorted(self, ascending=None, null_position=None):
         """
diff --git a/python/cudf/cudf/core/join/join.py b/python/cudf/cudf/core/join/join.py
index adff223116d..fb61027bcbd 100644
--- a/python/cudf/cudf/core/join/join.py
+++ b/python/cudf/cudf/core/join/join.py
@@ -153,6 +153,7 @@ def __init__(
         self.left_index = left_index
         self.right_index = right_index
         self.how = how
+        self.sort = sort
         self.lsuffix = lsuffix
         self.rsuffix = rsuffix
         self.suffixes = suffixes
@@ -160,11 +161,6 @@ def __init__(
         self.compute_join_keys()
 
     def compute_join_keys(self):
-        def _coerce_to_tuple(obj):
-            if hasattr(obj, "__iter__") and not isinstance(obj, str):
-                return tuple(obj)
-            else:
-                return (obj,)
 
         if (
             self.left_index
@@ -276,6 +272,25 @@ def construct_result(self, left_rows, right_rows):
                 out_key.value.fillna(rkey.value.take(right_rows, nullify=True))
             )
 
+        return self.sort_result(result)
+
+    def sort_result(self, result):
+        # If sort=True, Pandas sorts on the key columns in the
+        # same order as given in 'on'. If the indices are used as
+        # keys, the index will be sorted. If one index is specified,
+        # the key columns on the other side will be used to sort.
+        if self.sort:
+            if self.on:
+                return result.sort_values(
+                    _coerce_to_list(self.on), ignore_index=True
+                )
+            elif self.left_index and self.right_index:
+                return result.sort_index()
+            elif self.left_index:
+                return result.sort_values(_coerce_to_list(self.right_on))
+            else:
+                # self.right_index and self.left_on
+                return result.sort_values(_coerce_to_list(self.left_on))
         return result
 
     def output_column_names(self):
@@ -378,3 +393,14 @@ def match_key_dtypes(self, match_func):
             dtype = match_func(lcol, rcol, how=self.how)
             left_key.set_value(lcol.astype(dtype))
             right_key.set_value(rcol.astype(dtype))
+
+
+def _coerce_to_tuple(obj):
+    if hasattr(obj, "__iter__") and not isinstance(obj, str):
+        return tuple(obj)
+    else:
+        return (obj,)
+
+
+def _coerce_to_list(obj):
+    return list(_coerce_to_tuple(obj))

From 75f1efdc51fd0ccf0fe9d07f5e1991377cd23fb0 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Wed, 10 Feb 2021 16:13:11 -0500
Subject: [PATCH 031/138] Fix Index.join

---
 python/cudf/cudf/core/index.py      |  4 +++
 python/cudf/cudf/core/join/join.py  | 41 +++++++++++++++++++----------
 python/cudf/cudf/core/multiindex.py |  8 +++++-
 3 files changed, 38 insertions(+), 15 deletions(-)

diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py
index e3899a403f1..78d43d0275b 100644
--- a/python/cudf/cudf/core/index.py
+++ b/python/cudf/cudf/core/index.py
@@ -1493,6 +1493,10 @@ def _from_table(cls, table):
         else:
             return as_index(table)
 
+    @classmethod
+    def _from_data(cls, data, index=None):
+        return cls._from_table(Frame(data=data))
+
     _accessors = set()  # type: Set[Any]
 
     @property
diff --git a/python/cudf/cudf/core/join/join.py b/python/cudf/cudf/core/join/join.py
index fb61027bcbd..1223a082800 100644
--- a/python/cudf/cudf/core/join/join.py
+++ b/python/cudf/cudf/core/join/join.py
@@ -16,8 +16,8 @@ class _MISSING_TYPE:
 MISSING = _MISSING_TYPE()
 
 
-class JoinKey:
-    # A JoinKey represents one column of a Series
+class ColumnView:
+    # A ColumnView represents one column of a Series
     # or DataFrame - either an index column or a
     # data column
 
@@ -30,9 +30,10 @@ def __init__(self, obj, column=MISSING, index=MISSING):
     def get_numeric_index(self):
         # get the position of the column (including any index columns)
         if self.index is MISSING:
-            return len(self.obj.index.names) + self.obj.columns.get_loc(
-                self.column
+            index_nlevels = (
+                self.obj.index.nlevels if self.obj._index is not None else 0
             )
+            return index_nlevels + tuple(self.obj._data).index(self.column)
         else:
             return self.obj.index.names.index(self.index)
 
@@ -158,6 +159,14 @@ def __init__(
         self.rsuffix = rsuffix
         self.suffixes = suffixes
 
+        self.out_class = cudf.DataFrame
+        if isinstance(self.lhs, cudf.MultiIndex) or isinstance(
+            self.rhs, cudf.MultiIndex
+        ):
+            self.out_class = cudf.MultiIndex
+        elif isinstance(self.lhs, cudf.Index):
+            self.out_class = self.lhs.__class__
+
         self.compute_join_keys()
 
     def compute_join_keys(self):
@@ -170,24 +179,24 @@ def compute_join_keys(self):
         ):
             if self.left_index:
                 left_keys = [
-                    JoinKey(obj=self.lhs, index=on)
+                    ColumnView(obj=self.lhs, index=on)
                     for on in self.lhs.index.names
                 ]
             else:
                 # TODO: require left_on or left_index to be specified
                 left_keys = [
-                    JoinKey(obj=self.lhs, column=on)
+                    ColumnView(obj=self.lhs, column=on)
                     for on in _coerce_to_tuple(self.left_on)
                 ]
             if self.right_index:
                 right_keys = [
-                    JoinKey(obj=self.rhs, index=on)
+                    ColumnView(obj=self.rhs, index=on)
                     for on in self.rhs.index.names
                 ]
             else:
                 # TODO: require right_on or right_index to be specified
                 right_keys = [
-                    JoinKey(obj=self.rhs, column=on)
+                    ColumnView(obj=self.rhs, column=on)
                     for on in _coerce_to_tuple(self.right_on)
                 ]
         else:
@@ -198,8 +207,12 @@ def compute_join_keys(self):
                 if self.on is not None
                 else set(self.lhs._data.keys()) & set(self.rhs._data.keys())
             )
-            left_keys = [JoinKey(obj=self.lhs, column=on) for on in on_names]
-            right_keys = [JoinKey(obj=self.rhs, column=on) for on in on_names]
+            left_keys = [
+                ColumnView(obj=self.lhs, column=on) for on in on_names
+            ]
+            right_keys = [
+                ColumnView(obj=self.rhs, column=on) for on in on_names
+            ]
 
         if len(left_keys) != len(right_keys):
             raise ValueError(
@@ -252,20 +265,20 @@ def construct_result(self, left_rows, right_rows):
         left_names, right_names = self.output_column_names()
 
         for lcol in left_names:
-            data[left_names[lcol]] = self.lhs[lcol]._gather(
+            data[left_names[lcol]] = self.lhs._data[lcol].take(
                 left_rows, nullify=True
             )
         for rcol in right_names:
-            data[right_names[rcol]] = self.rhs[rcol]._gather(
+            data[right_names[rcol]] = self.rhs._data[rcol].take(
                 right_rows, nullify=True
             )
 
-        result = cudf.DataFrame._from_data(data, index=out_index)
+        result = self.out_class._from_data(data, index=out_index)
 
         # if outer join, key columns are combine:
         for lkey, rkey in zip(*self._keys):
             # get the key column as it appears in the result:
-            out_key = JoinKey(result, column=lkey.column, index=lkey.index)
+            out_key = ColumnView(result, column=lkey.column, index=lkey.index)
 
             # fill nulls in the key column with values from the RHS
             out_key.set_value(
diff --git a/python/cudf/cudf/core/multiindex.py b/python/cudf/cudf/core/multiindex.py
index 4e82a1f72b0..b72fa748cff 100644
--- a/python/cudf/cudf/core/multiindex.py
+++ b/python/cudf/cudf/core/multiindex.py
@@ -1,4 +1,6 @@
 # Copyright (c) 2019-2020, NVIDIA CORPORATION.
+from __future__ import annotations
+
 import itertools
 import numbers
 import pickle
@@ -16,6 +18,7 @@
 from cudf import _lib as libcudf
 from cudf._typing import DataFrameOrSeries
 from cudf.core.column import column
+from cudf.core.column_accessor import ColumnAccessor
 from cudf.core.frame import Frame
 from cudf.core.index import Index, as_index
 
@@ -232,7 +235,6 @@ def rename(self, names, inplace=False):
         ValueError: Length of names must match number of levels in MultiIndex.
 
         """
-
         return self.set_names(names, level=None, inplace=inplace)
 
     def set_names(self, names, level=None, inplace=False):
@@ -276,6 +278,10 @@ def set_names(self, names, level=None, inplace=False):
 
         return self._set_names(names=names, inplace=inplace)
 
+    @classmethod
+    def _from_data(cls, data: ColumnAccessor, index=None) -> MultiIndex:
+        return cls.from_frame(cudf.DataFrame._from_data(data))
+
     @classmethod
     def _from_table(cls, table, names=None):
         df = cudf.DataFrame(table._data)

From 1f5d6ad0c3bd8ac938b366506275490f13498817 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Wed, 10 Feb 2021 17:42:53 -0500
Subject: [PATCH 032/138] Progress on semi/anti joins

---
 cpp/src/join/semi_join.cu          |   5 +-
 python/cudf/cudf/_lib/join.pyx     |   7 +-
 python/cudf/cudf/core/join/join.py | 100 +++++++++++++++++++++++++----
 3 files changed, 95 insertions(+), 17 deletions(-)

diff --git a/cpp/src/join/semi_join.cu b/cpp/src/join/semi_join.cu
index 59298c75f1e..db60e74f81b 100644
--- a/cpp/src/join/semi_join.cu
+++ b/cpp/src/join/semi_join.cu
@@ -17,6 +17,8 @@
 #include <hash/concurrent_unordered_map.cuh>
 #include <join/join_common_utils.hpp>
 
+#include <thrust/distance.h>
+
 #include <cudf/column/column_factories.hpp>
 #include <cudf/detail/gather.cuh>
 #include <cudf/detail/nvtx/ranges.hpp>
@@ -100,7 +102,8 @@ std::unique_ptr<cudf::column> left_semi_anti_join(
       return (pos != hash_table.end()) == join_type_boolean;
     });
 
-  auto join_size = std::distance(gather_map.begin(), gather_map_end);
+  auto join_size = thrust::distance(gather_map.begin(), gather_map_end);
+  std::cout << join_size << std::endl;
   return std::make_unique<cudf::column>(cudf::data_type(type_to_id<cudf::size_type>()),
                                         join_size,
                                         gather_map.release(),
diff --git a/python/cudf/cudf/_lib/join.pyx b/python/cudf/cudf/_lib/join.pyx
index 5d07604f3bb..41b59e3d2e7 100644
--- a/python/cudf/cudf/_lib/join.pyx
+++ b/python/cudf/cudf/_lib/join.pyx
@@ -55,7 +55,7 @@ cpdef join(Table lhs, Table rhs, left_on, right_on, how=None):
     )
 
 
-cpdef join_semi_anti(Table lhs, Table rhs, left_on, right_on, how=None):
+cpdef semi_join(Table lhs, Table rhs, left_on, right_on, how=None):
     # left-semi and left-anti joins
     cdef vector[int] c_left_on = left_on
     cdef vector[int] c_right_on = right_on
@@ -63,14 +63,14 @@ cpdef join_semi_anti(Table lhs, Table rhs, left_on, right_on, how=None):
     cdef table_view c_lhs = lhs.view()
     cdef table_view c_rhs = rhs.view()
 
-    if how == "semi":
+    if how == "leftsemi":
         c_result = move(cpp_join.left_semi_join(
             c_lhs,
             c_rhs,
             c_left_on,
             c_right_on
         ))
-    elif how == "anti":
+    elif how == "leftanti":
         c_result = move(cpp_join.left_anti_join(
             c_lhs,
             c_rhs,
@@ -79,3 +79,4 @@ cpdef join_semi_anti(Table lhs, Table rhs, left_on, right_on, how=None):
         ))
     else:
         raise ValueError(f"Invalid join type {how}")
+    return Column.from_unique_ptr(move(c_result))
diff --git a/python/cudf/cudf/core/join/join.py b/python/cudf/cudf/core/join/join.py
index 1223a082800..094e6e1b46e 100644
--- a/python/cudf/cudf/core/join/join.py
+++ b/python/cudf/cudf/core/join/join.py
@@ -69,7 +69,59 @@ def set_value(self, value):
 JoinKeys = namedtuple("JoinKeys", ["left", "right"])
 
 
-class Merge(object):
+def Merge(
+    lhs,
+    rhs,
+    on=None,
+    left_on=None,
+    right_on=None,
+    left_index=False,
+    right_index=False,
+    how="inner",
+    sort=False,
+    lsuffix="_x",
+    rsuffix="_y",
+    method=None,
+    indicator=None,
+    suffixes=None,
+):
+    if how not in {"leftsemi", "leftanti"}:
+        return MergeBase(
+            lhs,
+            rhs,
+            on=on,
+            left_on=left_on,
+            right_on=right_on,
+            left_index=left_index,
+            right_index=right_index,
+            how=how,
+            sort=sort,
+            lsuffix=lsuffix,
+            rsuffix=rsuffix,
+            method=method,
+            indicator=indicator,
+            suffixes=suffixes,
+        )
+    else:
+        return MergeSemi(
+            lhs,
+            rhs,
+            on=on,
+            left_on=left_on,
+            right_on=right_on,
+            left_index=left_index,
+            right_index=right_index,
+            how=how,
+            sort=sort,
+            lsuffix=lsuffix,
+            rsuffix=rsuffix,
+            method=method,
+            indicator=indicator,
+            suffixes=suffixes,
+        )
+
+
+class MergeBase(object):
     def __init__(
         self,
         lhs,
@@ -222,11 +274,6 @@ def compute_join_keys(self):
         self._keys = JoinKeys(left=left_keys, right=right_keys)
 
     def perform_merge(self):
-        """
-        Call libcudf to perform a merge between the operands. If
-        necessary, cast the input key columns to compatible types.
-        Potentially also cast the output back to categorical.
-        """
         self.match_key_dtypes(_input_to_libcudf_castrules_any)
 
         left_key_indices = [key.get_numeric_index() for key in self._keys.left]
@@ -276,14 +323,19 @@ def construct_result(self, left_rows, right_rows):
         result = self.out_class._from_data(data, index=out_index)
 
         # if outer join, key columns are combine:
-        for lkey, rkey in zip(*self._keys):
-            # get the key column as it appears in the result:
-            out_key = ColumnView(result, column=lkey.column, index=lkey.index)
+        if self.how == "outer":
+            for lkey, rkey in zip(*self._keys):
+                # get the key column as it appears in the result:
+                out_key = ColumnView(
+                    result, column=lkey.column, index=lkey.index
+                )
 
-            # fill nulls in the key column with values from the RHS
-            out_key.set_value(
-                out_key.value.fillna(rkey.value.take(right_rows, nullify=True))
-            )
+                # fill nulls in the key column with values from the RHS
+                out_key.set_value(
+                    out_key.value.fillna(
+                        rkey.value.take(right_rows, nullify=True)
+                    )
+                )
 
         return self.sort_result(result)
 
@@ -408,6 +460,28 @@ def match_key_dtypes(self, match_func):
             right_key.set_value(rcol.astype(dtype))
 
 
+class MergeSemi(MergeBase):
+    def perform_merge(self):
+        self.match_key_dtypes(_input_to_libcudf_castrules_any)
+
+        left_key_indices = [key.get_numeric_index() for key in self._keys.left]
+        right_key_indices = [
+            key.get_numeric_index() for key in self._keys.right
+        ]
+        left_rows = libcudf.join.semi_join(
+            self.lhs,
+            self.rhs,
+            left_on=left_key_indices,
+            right_on=right_key_indices,
+            how=self.how,
+        )
+        return self.construct_result(left_rows, cudf.core.column.as_column([]))
+
+    def output_column_names(self):
+        left_names, _ = super().output_column_names()
+        return left_names, {}
+
+
 def _coerce_to_tuple(obj):
     if hasattr(obj, "__iter__") and not isinstance(obj, str):
         return tuple(obj)

From de305200c388fa4575d7145f9c35dd4f035dbbd2 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Wed, 10 Feb 2021 18:57:28 -0500
Subject: [PATCH 033/138] Add simple join test

---
 cpp/tests/join/semi_join_tests.cpp | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/cpp/tests/join/semi_join_tests.cpp b/cpp/tests/join/semi_join_tests.cpp
index a665d07ee3c..30ac1b57e55 100644
--- a/cpp/tests/join/semi_join_tests.cpp
+++ b/cpp/tests/join/semi_join_tests.cpp
@@ -842,3 +842,22 @@ TEST_F(JoinDictionaryTest, LeftAntiJoinWithNulls)
   auto expected = cudf::left_anti_join(expect_a, expect_b, {0, 1, 3}, {0, 1, 3}, {0, 1, 2, 3});
   CUDF_TEST_EXPECT_TABLES_EQUIVALENT(cudf::table_view(result_decoded), *expected);
 }
+
+TEST_F(JoinTest, LeftSemiSimple)
+{
+  column_wrapper<int32_t> a_0{1, 9, 0};
+  column_wrapper<int32_t> a_1{1, 2, 3};
+  auto table_a = cudf::table_view({a_0, a_1});
+
+  column_wrapper<int32_t> b_0{0, 1};
+  column_wrapper<int32_t> b_1{1, 2};
+  auto table_b = cudf::table_view({b_0, b_1});
+
+  auto result      = cudf::left_anti_join(table_a, table_b, {0}, {0}, {0, 1});
+  auto result_view = result->view();
+
+  column_wrapper<int32_t> expect_0{9};
+  column_wrapper<int32_t> expect_1{2};
+  auto expect = cudf::table_view({expect_0, expect_1});
+  CUDF_TEST_EXPECT_TABLES_EQUIVALENT(result_view, expect);
+}

From 66a0de5e3da5195e90ef4c2712f06baf8a1d44eb Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Thu, 11 Feb 2021 12:07:32 -0500
Subject: [PATCH 034/138] Semi-join fix

---
 cpp/src/join/semi_join.cu | 30 ++++++++++++++----------------
 1 file changed, 14 insertions(+), 16 deletions(-)

diff --git a/cpp/src/join/semi_join.cu b/cpp/src/join/semi_join.cu
index db60e74f81b..8d91ee38725 100644
--- a/cpp/src/join/semi_join.cu
+++ b/cpp/src/join/semi_join.cu
@@ -37,32 +37,29 @@ namespace detail {
 
 template <join_kind JoinKind>
 std::unique_ptr<cudf::column> left_semi_anti_join(
-  cudf::table_view const& left,
-  cudf::table_view const& right,
-  std::vector<cudf::size_type> const& left_on,
-  std::vector<cudf::size_type> const& right_on,
+  cudf::table_view const& left_keys,
+  cudf::table_view const& right_keys,
   null_equality compare_nulls,
   rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
 {
-  CUDF_EXPECTS(0 != left.num_columns(), "Left table is empty");
-  CUDF_EXPECTS(0 != right.num_columns(), "Right table is empty");
-  CUDF_EXPECTS(left_on.size() == right_on.size(), "Mismatch in number of columns to be joined on");
+  CUDF_EXPECTS(0 != left_keys.num_columns(), "Left table is empty");
+  CUDF_EXPECTS(0 != right_keys.num_columns(), "Right table is empty");
 
-  auto const left_num_rows  = left.num_rows();
-  auto const right_num_rows = right.num_rows();
+  auto const left_num_rows  = left_keys.num_rows();
+  auto const right_num_rows = right_keys.num_rows();
 
   // Only care about existence, so we'll use an unordered map (other joins need a multimap)
   using hash_table_type = concurrent_unordered_map<cudf::size_type, bool, row_hash, row_equality>;
 
   // Create hash table containing all keys found in right table
-  auto right_rows_d            = table_device_view::create(right, stream);
+  auto right_rows_d            = table_device_view::create(right_keys, stream);
   size_t const hash_table_size = compute_hash_table_size(right_num_rows);
   row_hash hash_build{*right_rows_d};
   row_equality equality_build{*right_rows_d, *right_rows_d, compare_nulls == null_equality::EQUAL};
 
   // Going to join it with left table
-  auto left_rows_d = table_device_view::create(left, stream);
+  auto left_rows_d = table_device_view::create(left_keys, stream);
   row_hash hash_probe{*left_rows_d};
   row_equality equality_probe{*left_rows_d, *right_rows_d, compare_nulls == null_equality::EQUAL};
 
@@ -103,7 +100,6 @@ std::unique_ptr<cudf::column> left_semi_anti_join(
     });
 
   auto join_size = thrust::distance(gather_map.begin(), gather_map_end);
-  std::cout << join_size << std::endl;
   return std::make_unique<cudf::column>(cudf::data_type(type_to_id<cudf::size_type>()),
                                         join_size,
                                         gather_map.release(),
@@ -160,6 +156,8 @@ std::unique_ptr<cudf::table> left_semi_anti_join(
   rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
 {
+  CUDF_EXPECTS(left_on.size() == right_on.size(), "Mismatch in number of columns to be joined on");
+
   if (0 == return_columns.size()) { return empty_like(left.select(return_columns)); }
 
   if (is_trivial_join(left, right, left_on, right_on, JoinKind)) {
@@ -181,8 +179,8 @@ std::unique_ptr<cudf::table> left_semi_anti_join(
   auto const left_selected  = matched.second.front();
   auto const right_selected = matched.second.back();
 
-  auto gather_map = left_semi_anti_join<JoinKind>(
-    left_selected, right_selected, left_on, right_on, compare_nulls, stream);
+  auto gather_map =
+    left_semi_anti_join<JoinKind>(left_selected, right_selected, compare_nulls, stream);
 
   auto const left_updated = scatter_columns(left_selected, left_on, left);
   return cudf::detail::gather(left_updated.select(return_columns),
@@ -217,7 +215,7 @@ std::unique_ptr<cudf::column> left_semi_join(cudf::table_view const& left,
 {
   CUDF_FUNC_RANGE();
   return detail::left_semi_anti_join<detail::join_kind::LEFT_SEMI_JOIN>(
-    left, right, left_on, right_on, compare_nulls, rmm::cuda_stream_default, mr);
+    left.select(left_on), right.select(right_on), compare_nulls, rmm::cuda_stream_default, mr);
 }
 
 std::unique_ptr<cudf::table> left_anti_join(cudf::table_view const& left,
@@ -242,7 +240,7 @@ std::unique_ptr<cudf::column> left_anti_join(cudf::table_view const& left,
 {
   CUDF_FUNC_RANGE();
   return detail::left_semi_anti_join<detail::join_kind::LEFT_ANTI_JOIN>(
-    left, right, left_on, right_on, compare_nulls, rmm::cuda_stream_default, mr);
+    left.select(left_on), right.select(right_on), compare_nulls, rmm::cuda_stream_default, mr);
 }
 
 }  // namespace cudf

From ca72295905f1838fc441aa5b7605e51337314655 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Thu, 11 Feb 2021 12:16:55 -0500
Subject: [PATCH 035/138] Only combine key columns in outer join if they have
 the same name

---
 python/cudf/cudf/core/join/join.py | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/python/cudf/cudf/core/join/join.py b/python/cudf/cudf/core/join/join.py
index 094e6e1b46e..155cfdb99f7 100644
--- a/python/cudf/cudf/core/join/join.py
+++ b/python/cudf/cudf/core/join/join.py
@@ -322,20 +322,21 @@ def construct_result(self, left_rows, right_rows):
 
         result = self.out_class._from_data(data, index=out_index)
 
-        # if outer join, key columns are combine:
+        # if outer join, key columns with the same name are combined:
         if self.how == "outer":
             for lkey, rkey in zip(*self._keys):
-                # get the key column as it appears in the result:
-                out_key = ColumnView(
-                    result, column=lkey.column, index=lkey.index
-                )
+                if lkey.name == rkey.name:
+                    # get the key column as it appears in the result:
+                    out_key = ColumnView(
+                        result, column=lkey.column, index=lkey.index
+                    )
 
-                # fill nulls in the key column with values from the RHS
-                out_key.set_value(
-                    out_key.value.fillna(
-                        rkey.value.take(right_rows, nullify=True)
+                    # fill nulls in the key column with values from the RHS
+                    out_key.set_value(
+                        out_key.value.fillna(
+                            rkey.value.take(right_rows, nullify=True)
+                        )
                     )
-                )
 
         return self.sort_result(result)
 

From ee2242dcfdebc9b386dd7ad4b4d3711a30ebfcbf Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Thu, 11 Feb 2021 12:53:53 -0500
Subject: [PATCH 036/138] Handle when both _on and _index are provided

---
 python/cudf/cudf/core/join/join.py | 46 ++++++++++++++++++------------
 1 file changed, 28 insertions(+), 18 deletions(-)

diff --git a/python/cudf/cudf/core/join/join.py b/python/cudf/cudf/core/join/join.py
index 155cfdb99f7..927bf6d9d4b 100644
--- a/python/cudf/cudf/core/join/join.py
+++ b/python/cudf/cudf/core/join/join.py
@@ -229,28 +229,38 @@ def compute_join_keys(self):
             or self.left_on
             or self.right_on
         ):
+            left_keys = []
+            right_keys = []
             if self.left_index:
-                left_keys = [
-                    ColumnView(obj=self.lhs, index=on)
-                    for on in self.lhs.index.names
-                ]
-            else:
+                left_keys.extend(
+                    [
+                        ColumnView(obj=self.lhs, index=on)
+                        for on in self.lhs.index.names
+                    ]
+                )
+            if self.left_on:
                 # TODO: require left_on or left_index to be specified
-                left_keys = [
-                    ColumnView(obj=self.lhs, column=on)
-                    for on in _coerce_to_tuple(self.left_on)
-                ]
+                left_keys.extend(
+                    [
+                        ColumnView(obj=self.lhs, column=on)
+                        for on in _coerce_to_tuple(self.left_on)
+                    ]
+                )
             if self.right_index:
-                right_keys = [
-                    ColumnView(obj=self.rhs, index=on)
-                    for on in self.rhs.index.names
-                ]
-            else:
+                right_keys.extend(
+                    [
+                        ColumnView(obj=self.rhs, index=on)
+                        for on in self.rhs.index.names
+                    ]
+                )
+            if self.right_on:
                 # TODO: require right_on or right_index to be specified
-                right_keys = [
-                    ColumnView(obj=self.rhs, column=on)
-                    for on in _coerce_to_tuple(self.right_on)
-                ]
+                right_keys.extend(
+                    [
+                        ColumnView(obj=self.rhs, column=on)
+                        for on in _coerce_to_tuple(self.right_on)
+                    ]
+                )
         else:
             # Use `on` if provided. Otherwise,
             # implicitly use identically named columns as the key columns:

From e53172582d9e63ca64de4283804f8bf838ab105b Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Thu, 11 Feb 2021 12:54:39 -0500
Subject: [PATCH 037/138] Fix sorting join result

---
 python/cudf/cudf/core/join/join.py | 28 +++++++++++++++++++++-------
 1 file changed, 21 insertions(+), 7 deletions(-)

diff --git a/python/cudf/cudf/core/join/join.py b/python/cudf/cudf/core/join/join.py
index 927bf6d9d4b..41830b7a80f 100644
--- a/python/cudf/cudf/core/join/join.py
+++ b/python/cudf/cudf/core/join/join.py
@@ -360,13 +360,27 @@ def sort_result(self, result):
                 return result.sort_values(
                     _coerce_to_list(self.on), ignore_index=True
                 )
-            elif self.left_index and self.right_index:
-                return result.sort_index()
-            elif self.left_index:
-                return result.sort_values(_coerce_to_list(self.right_on))
-            else:
-                # self.right_index and self.left_on
-                return result.sort_values(_coerce_to_list(self.left_on))
+            by = []
+            if self.left_index and self.right_index:
+                by.extend(result.index._data.columns)
+            if self.left_on:
+                by.extend(
+                    [
+                        result._data[col]
+                        for col in _coerce_to_list(self.left_on)
+                    ]
+                )
+            if self.right_on:
+                by.extend(
+                    [
+                        result._data[col]
+                        for col in _coerce_to_list(self.right_on)
+                    ]
+                )
+            if by:
+                to_sort = cudf.DataFrame._from_columns(by)
+                sort_order = to_sort.argsort()
+                result = result.take(sort_order)
         return result
 
     def output_column_names(self):

From 674095ce434bca80bc77409d5a22d4544653b0f4 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Fri, 12 Feb 2021 07:26:49 -0500
Subject: [PATCH 038/138] whitespace

---
 python/cudf/cudf/core/join/casting_logic.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/cudf/cudf/core/join/casting_logic.py b/python/cudf/cudf/core/join/casting_logic.py
index eb85cecd14d..acd8efca8a9 100644
--- a/python/cudf/cudf/core/join/casting_logic.py
+++ b/python/cudf/cudf/core/join/casting_logic.py
@@ -186,7 +186,7 @@ def _libcudf_to_output_castrules(lcol, rcol, how):
     l_is_cat = isinstance(ltype, CategoricalDtype)
     r_is_cat = isinstance(rtype, CategoricalDtype)
 
-    # we  currently only need to do this for categorical variables
+    # we currently only need to do this for categorical variables
     if how == "inner":
         if l_is_cat and r_is_cat:
             merge_return_type = "category"

From cbd9dc371fcf60f5404e667ae1a8cebfd6513a37 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Fri, 12 Feb 2021 14:08:04 -0500
Subject: [PATCH 039/138] Make construct_join_output_df work with column views

---
 cpp/src/join/hash_join.cu | 50 +++++++++++++++++++++------------------
 1 file changed, 27 insertions(+), 23 deletions(-)

diff --git a/cpp/src/join/hash_join.cu b/cpp/src/join/hash_join.cu
index 16a453d7068..d478209c3d6 100644
--- a/cpp/src/join/hash_join.cu
+++ b/cpp/src/join/hash_join.cu
@@ -406,7 +406,7 @@ template <join_kind JoinKind>
 std::pair<std::unique_ptr<table>, std::unique_ptr<table>> construct_join_output_df(
   table_view const &probe,
   table_view const &build,
-  VectorPair &joined_indices,
+  std::pair<cudf::column_view, cudf::column_view> &joined_indices,
   std::vector<std::pair<size_type, size_type>> const &columns_in_common,
   cudf::hash_join::common_columns_output_side common_columns_output_side,
   rmm::cuda_stream_view stream,
@@ -433,26 +433,28 @@ std::pair<std::unique_ptr<table>, std::unique_ptr<table>> construct_join_output_
   // Construct the joined columns
   if (join_kind::FULL_JOIN == JoinKind) {
     if (not columns_in_common.empty()) {
-      auto common_from_build = detail::gather(build.select(build_common_col),
-                                              joined_indices.second.begin() + probe.num_rows(),
-                                              joined_indices.second.end(),
-                                              bounds_policy,
-                                              stream,
-                                              rmm::mr::get_current_device_resource());
-      auto common_from_probe = detail::gather(probe.select(probe_common_col),
-                                              joined_indices.first.begin(),
-                                              joined_indices.first.begin() + probe.num_rows(),
-                                              bounds_policy,
-                                              stream,
-                                              rmm::mr::get_current_device_resource());
-      common_table           = cudf::detail::concatenate(
+      auto common_from_build =
+        detail::gather(build.select(build_common_col),
+                       joined_indices.second.begin<size_type>() + probe.num_rows(),
+                       joined_indices.second.end<size_type>(),
+                       bounds_policy,
+                       stream,
+                       rmm::mr::get_current_device_resource());
+      auto common_from_probe =
+        detail::gather(probe.select(probe_common_col),
+                       joined_indices.first.begin<size_type>(),
+                       joined_indices.first.begin<size_type>() + probe.num_rows(),
+                       bounds_policy,
+                       stream,
+                       rmm::mr::get_current_device_resource());
+      common_table = cudf::detail::concatenate(
         {common_from_probe->view(), common_from_build->view()}, stream, mr);
     }
   } else {
     if (not columns_in_common.empty()) {
       common_table = detail::gather(probe.select(probe_common_col),
-                                    joined_indices.first.begin(),
-                                    joined_indices.first.end(),
+                                    joined_indices.first.begin<size_type>(),
+                                    joined_indices.first.end<size_type>(),
                                     bounds_policy,
                                     stream,
                                     mr);
@@ -461,15 +463,15 @@ std::pair<std::unique_ptr<table>, std::unique_ptr<table>> construct_join_output_
 
   // Construct the probe non common columns
   std::unique_ptr<table> probe_table = detail::gather(probe.select(probe_noncommon_col),
-                                                      joined_indices.first.begin(),
-                                                      joined_indices.first.end(),
+                                                      joined_indices.first.begin<size_type>(),
+                                                      joined_indices.first.end<size_type>(),
                                                       bounds_policy,
                                                       stream,
                                                       mr);
 
   std::unique_ptr<table> build_table = detail::gather(build.select(build_noncommon_col),
-                                                      joined_indices.second.begin(),
-                                                      joined_indices.second.end(),
+                                                      joined_indices.second.begin<size_type>(),
+                                                      joined_indices.second.end<size_type>(),
                                                       bounds_policy,
                                                       stream,
                                                       mr);
@@ -688,15 +690,17 @@ hash_join::hash_join_impl::compute_hash_join(
                            }),
                "Invalid values passed to columns_in_common");
 
-  auto joined_indices =
-    compute_hash_join_indices<JoinKind>(probe, probe_on, compare_nulls, stream, mr);
+  auto joined_indices = compute_hash_join<JoinKind>(probe, probe_on, compare_nulls, stream, mr);
 
   if (is_trivial_join(probe, _build, probe_on, _build_on, JoinKind)) {
     return get_empty_joined_table(probe, _build, columns_in_common, common_columns_output_side);
   }
 
+  auto joined_indices_view = std::make_pair<cudf::column_view, cudf::column_view>(
+    joined_indices.first->view(), joined_indices.second->view());
+
   return cudf::detail::construct_join_output_df<JoinKind>(
-    probe, _build, joined_indices, columns_in_common, common_columns_output_side, stream, mr);
+    probe, _build, joined_indices_view, columns_in_common, common_columns_output_side, stream, mr);
 }
 
 template <cudf::detail::join_kind JoinKind>

From 3f3c3cb60d5f02bf62855a00dcc2ff7674750ebe Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Fri, 12 Feb 2021 15:25:53 -0500
Subject: [PATCH 040/138] Get rid of hash_join::left_join

---
 cpp/include/cudf/join.hpp     |  30 ----
 cpp/src/join/hash_join.cu     | 108 +-------------
 cpp/src/join/hash_join.cuh    | 114 +++++++++++++--
 cpp/src/join/join.cu          |  34 +++--
 cpp/tests/join/join_tests.cpp | 260 +++++++++++++++++-----------------
 5 files changed, 260 insertions(+), 286 deletions(-)

diff --git a/cpp/include/cudf/join.hpp b/cpp/include/cudf/join.hpp
index 0a42d28de2d..72cd4066cf1 100644
--- a/cpp/include/cudf/join.hpp
+++ b/cpp/include/cudf/join.hpp
@@ -539,36 +539,6 @@ class hash_join {
     rmm::cuda_stream_view stream        = rmm::cuda_stream_default,
     rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) const;
 
-  /**
-   * @brief Performs a left join by probing in the internal hash table.
-   *
-   * More details please @see cudf::left_join().
-   *
-   * @param probe The probe table, from which the tuples are probed.
-   * @param probe_on The column indices from `probe` to join on.
-   * @param columns_in_common is a vector of pairs of column indices into
-   * `probe` and `build`, respectively, that are "in common". For "common"
-   * columns, only a single output column will be produced, which is gathered
-   * from `probe_on` columns. Else, for every column in `probe_on` and `build_on`,
-   * an output column will be produced. For each of these pairs (P, B), P
-   * should exist in `probe_on` and B should exist in `build_on`.
-   * @param compare_nulls Controls whether null join-key values should match or not.
-   * @param mr Device memory resource used to allocate the returned table and columns' device
-   * memory.
-   * @param stream CUDA stream used for device memory operations and kernel launches
-   *
-   * @return Result of joining `build` and `probe` tables on the columns
-   * specified by `build_on` and `probe_on`. The resulting table will be joined columns of
-   * `probe(including common columns)+build(excluding common columns)`.
-   */
-  std::unique_ptr<cudf::table> left_join(
-    cudf::table_view const& probe,
-    std::vector<size_type> const& probe_on,
-    std::vector<std::pair<cudf::size_type, cudf::size_type>> const& columns_in_common,
-    null_equality compare_nulls         = null_equality::EQUAL,
-    rmm::cuda_stream_view stream        = rmm::cuda_stream_default,
-    rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) const;
-
   /**
    * @brief Performs a full join on the specified columns of two
    * tables (`left`, `right`), and returns the row indices corresponding
diff --git a/cpp/src/join/hash_join.cu b/cpp/src/join/hash_join.cu
index d478209c3d6..4cc91368565 100644
--- a/cpp/src/join/hash_join.cu
+++ b/cpp/src/join/hash_join.cu
@@ -41,8 +41,8 @@ namespace detail {
  * @return vector A vector containing only the indices which are not present in
  * `common_column_indices`
  */
-auto non_common_column_indices(size_type num_columns,
-                               std::vector<size_type> const &common_column_indices)
+std::vector<size_type> non_common_column_indices(
+  size_type num_columns, std::vector<size_type> const &common_column_indices)
 {
   CUDF_EXPECTS(common_column_indices.size() <= static_cast<uint64_t>(num_columns),
                "Too many columns in common");
@@ -402,89 +402,6 @@ std::pair<std::unique_ptr<table>, std::unique_ptr<table>> combine_join_columns(
  * `common_columns_output_side` is `PROBE`, or (`probe(excluding common columns)`,
  * `build(including common columns)`) if `common_columns_output_side` is `BUILD`.
  */
-template <join_kind JoinKind>
-std::pair<std::unique_ptr<table>, std::unique_ptr<table>> construct_join_output_df(
-  table_view const &probe,
-  table_view const &build,
-  std::pair<cudf::column_view, cudf::column_view> &joined_indices,
-  std::vector<std::pair<size_type, size_type>> const &columns_in_common,
-  cudf::hash_join::common_columns_output_side common_columns_output_side,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource *mr)
-{
-  std::vector<size_type> probe_common_col;
-  probe_common_col.reserve(columns_in_common.size());
-  std::vector<size_type> build_common_col;
-  build_common_col.reserve(columns_in_common.size());
-  for (const auto &c : columns_in_common) {
-    probe_common_col.push_back(c.first);
-    build_common_col.push_back(c.second);
-  }
-  std::vector<size_type> probe_noncommon_col =
-    non_common_column_indices(probe.num_columns(), probe_common_col);
-  std::vector<size_type> build_noncommon_col =
-    non_common_column_indices(build.num_columns(), build_common_col);
-
-  out_of_bounds_policy const bounds_policy = JoinKind != join_kind::INNER_JOIN
-                                               ? out_of_bounds_policy::NULLIFY
-                                               : out_of_bounds_policy::DONT_CHECK;
-
-  std::unique_ptr<table> common_table = std::make_unique<table>();
-  // Construct the joined columns
-  if (join_kind::FULL_JOIN == JoinKind) {
-    if (not columns_in_common.empty()) {
-      auto common_from_build =
-        detail::gather(build.select(build_common_col),
-                       joined_indices.second.begin<size_type>() + probe.num_rows(),
-                       joined_indices.second.end<size_type>(),
-                       bounds_policy,
-                       stream,
-                       rmm::mr::get_current_device_resource());
-      auto common_from_probe =
-        detail::gather(probe.select(probe_common_col),
-                       joined_indices.first.begin<size_type>(),
-                       joined_indices.first.begin<size_type>() + probe.num_rows(),
-                       bounds_policy,
-                       stream,
-                       rmm::mr::get_current_device_resource());
-      common_table = cudf::detail::concatenate(
-        {common_from_probe->view(), common_from_build->view()}, stream, mr);
-    }
-  } else {
-    if (not columns_in_common.empty()) {
-      common_table = detail::gather(probe.select(probe_common_col),
-                                    joined_indices.first.begin<size_type>(),
-                                    joined_indices.first.end<size_type>(),
-                                    bounds_policy,
-                                    stream,
-                                    mr);
-    }
-  }
-
-  // Construct the probe non common columns
-  std::unique_ptr<table> probe_table = detail::gather(probe.select(probe_noncommon_col),
-                                                      joined_indices.first.begin<size_type>(),
-                                                      joined_indices.first.end<size_type>(),
-                                                      bounds_policy,
-                                                      stream,
-                                                      mr);
-
-  std::unique_ptr<table> build_table = detail::gather(build.select(build_noncommon_col),
-                                                      joined_indices.second.begin<size_type>(),
-                                                      joined_indices.second.end<size_type>(),
-                                                      bounds_policy,
-                                                      stream,
-                                                      mr);
-
-  return combine_join_columns(probe_table->release(),
-                              probe_noncommon_col,
-                              probe_common_col,
-                              build_table->release(),
-                              build_noncommon_col,
-                              build_common_col,
-                              common_table->release(),
-                              common_columns_output_side);
-}
 
 std::unique_ptr<cudf::table> combine_table_pair(std::unique_ptr<cudf::table> &&left,
                                                 std::unique_ptr<cudf::table> &&right)
@@ -559,27 +476,6 @@ hash_join::hash_join_impl::left_join(cudf::table_view const &probe,
     probe, probe_on, compare_nulls, stream, mr);
 }
 
-std::unique_ptr<cudf::table> hash_join::hash_join_impl::left_join(
-  cudf::table_view const &probe,
-  std::vector<size_type> const &probe_on,
-  std::vector<std::pair<cudf::size_type, cudf::size_type>> const &columns_in_common,
-  null_equality compare_nulls,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource *mr) const
-{
-  CUDF_FUNC_RANGE();
-  auto probe_build_pair =
-    compute_hash_join<cudf::detail::join_kind::LEFT_JOIN>(probe,
-                                                          probe_on,
-                                                          columns_in_common,
-                                                          common_columns_output_side::PROBE,
-                                                          compare_nulls,
-                                                          stream,
-                                                          mr);
-  return cudf::detail::combine_table_pair(std::move(probe_build_pair.first),
-                                          std::move(probe_build_pair.second));
-}
-
 std::pair<std::unique_ptr<cudf::column>, std::unique_ptr<cudf::column>>
 hash_join::hash_join_impl::full_join(cudf::table_view const &probe,
                                      std::vector<size_type> const &probe_on,
diff --git a/cpp/src/join/hash_join.cuh b/cpp/src/join/hash_join.cuh
index c33029dea55..16a4edf7d5f 100644
--- a/cpp/src/join/hash_join.cuh
+++ b/cpp/src/join/hash_join.cuh
@@ -15,6 +15,9 @@
  */
 #pragma once
 
+#include <cudf/detail/concatenate.cuh>
+#include <cudf/detail/gather.cuh>
+#include <cudf/detail/gather.hpp>
 #include <join/join_common_utils.hpp>
 #include <join/join_kernels.cuh>
 
@@ -191,9 +194,112 @@ get_trivial_left_join_indices(table_view const& left, rmm::cuda_stream_view stre
   return std::make_pair(std::move(left_indices), std::move(right_indices));
 }
 
+std::pair<std::unique_ptr<table>, std::unique_ptr<table>> get_empty_joined_table(
+  table_view const& probe,
+  table_view const& build,
+  std::vector<std::pair<size_type, size_type>> const& columns_in_common,
+  cudf::hash_join::common_columns_output_side common_columns_output_side);
+
 std::unique_ptr<cudf::table> combine_table_pair(std::unique_ptr<cudf::table>&& left,
                                                 std::unique_ptr<cudf::table>&& right);
 
+std::pair<std::unique_ptr<table>, std::unique_ptr<table>> combine_join_columns(
+  std::vector<std::unique_ptr<column>>&& probe_noncommon_cols,
+  std::vector<size_type> const& probe_noncommon_col_indices,
+  std::vector<size_type> const& probe_common_col_indices,
+  std::vector<std::unique_ptr<column>>&& build_noncommon_cols,
+  std::vector<size_type> const& build_noncommon_col_indices,
+  std::vector<size_type> const& build_common_col_indices,
+  std::vector<std::unique_ptr<column>>&& common_cols,
+  cudf::hash_join::common_columns_output_side common_columns_output_side);
+
+std::vector<size_type> non_common_column_indices(
+  size_type num_columns, std::vector<size_type> const& common_column_indices);
+
+template <join_kind JoinKind>
+std::pair<std::unique_ptr<table>, std::unique_ptr<table>> construct_join_output_df(
+  table_view const& probe,
+  table_view const& build,
+  std::pair<cudf::column_view, cudf::column_view>& joined_indices,
+  std::vector<std::pair<size_type, size_type>> const& columns_in_common,
+  cudf::hash_join::common_columns_output_side common_columns_output_side,
+  rmm::cuda_stream_view stream,
+  rmm::mr::device_memory_resource* mr)
+{
+  std::vector<size_type> probe_common_col;
+  probe_common_col.reserve(columns_in_common.size());
+  std::vector<size_type> build_common_col;
+  build_common_col.reserve(columns_in_common.size());
+  for (const auto& c : columns_in_common) {
+    probe_common_col.push_back(c.first);
+    build_common_col.push_back(c.second);
+  }
+  std::vector<size_type> probe_noncommon_col =
+    non_common_column_indices(probe.num_columns(), probe_common_col);
+  std::vector<size_type> build_noncommon_col =
+    non_common_column_indices(build.num_columns(), build_common_col);
+
+  out_of_bounds_policy const bounds_policy = JoinKind != join_kind::INNER_JOIN
+                                               ? out_of_bounds_policy::NULLIFY
+                                               : out_of_bounds_policy::DONT_CHECK;
+
+  std::unique_ptr<table> common_table = std::make_unique<table>();
+  // Construct the joined columns
+  if (join_kind::FULL_JOIN == JoinKind) {
+    if (not columns_in_common.empty()) {
+      auto common_from_build =
+        detail::gather(build.select(build_common_col),
+                       joined_indices.second.begin<size_type>() + probe.num_rows(),
+                       joined_indices.second.end<size_type>(),
+                       bounds_policy,
+                       stream,
+                       rmm::mr::get_current_device_resource());
+      auto common_from_probe =
+        detail::gather(probe.select(probe_common_col),
+                       joined_indices.first.begin<size_type>(),
+                       joined_indices.first.begin<size_type>() + probe.num_rows(),
+                       bounds_policy,
+                       stream,
+                       rmm::mr::get_current_device_resource());
+      common_table = cudf::detail::concatenate(
+        {common_from_probe->view(), common_from_build->view()}, stream, mr);
+    }
+  } else {
+    if (not columns_in_common.empty()) {
+      common_table = detail::gather(probe.select(probe_common_col),
+                                    joined_indices.first.begin<size_type>(),
+                                    joined_indices.first.end<size_type>(),
+                                    bounds_policy,
+                                    stream,
+                                    mr);
+    }
+  }
+
+  // Construct the probe non common columns
+  std::unique_ptr<table> probe_table = detail::gather(probe.select(probe_noncommon_col),
+                                                      joined_indices.first.begin<size_type>(),
+                                                      joined_indices.first.end<size_type>(),
+                                                      bounds_policy,
+                                                      stream,
+                                                      mr);
+
+  std::unique_ptr<table> build_table = detail::gather(build.select(build_noncommon_col),
+                                                      joined_indices.second.begin<size_type>(),
+                                                      joined_indices.second.end<size_type>(),
+                                                      bounds_policy,
+                                                      stream,
+                                                      mr);
+
+  return combine_join_columns(probe_table->release(),
+                              probe_noncommon_col,
+                              probe_common_col,
+                              build_table->release(),
+                              build_noncommon_col,
+                              build_common_col,
+                              common_table->release(),
+                              common_columns_output_side);
+}
+
 }  // namespace detail
 
 struct hash_join::hash_join_impl {
@@ -254,14 +360,6 @@ struct hash_join::hash_join_impl {
     rmm::cuda_stream_view stream,
     rmm::mr::device_memory_resource* mr) const;
 
-  std::unique_ptr<cudf::table> left_join(
-    cudf::table_view const& probe,
-    std::vector<size_type> const& probe_on,
-    std::vector<std::pair<cudf::size_type, cudf::size_type>> const& columns_in_common,
-    null_equality compare_nulls,
-    rmm::cuda_stream_view stream,
-    rmm::mr::device_memory_resource* mr) const;
-
   std::pair<std::unique_ptr<cudf::column>, std::unique_ptr<cudf::column>> full_join(
     cudf::table_view const& probe,
     std::vector<size_type> const& probe_on,
diff --git a/cpp/src/join/join.cu b/cpp/src/join/join.cu
index 61a0a85ef50..ddac509d9ef 100644
--- a/cpp/src/join/join.cu
+++ b/cpp/src/join/join.cu
@@ -155,7 +155,28 @@ std::unique_ptr<table> left_join(
   table_view const right = scatter_columns(matched.second.back(), right_on, right_input);
 
   cudf::hash_join hj_obj(right, right_on, compare_nulls, stream);
-  return hj_obj.left_join(left, left_on, columns_in_common, compare_nulls, stream, mr);
+  auto join_indices = hj_obj.left_join(left, left_on, compare_nulls, stream, mr);
+
+  if (is_trivial_join(left, right, left_on, right_on, cudf::detail::join_kind::LEFT_JOIN)) {
+    auto probe_build_pair = get_empty_joined_table(
+      left, right, columns_in_common, cudf::hash_join::common_columns_output_side::PROBE);
+    return cudf::detail::combine_table_pair(std::move(probe_build_pair.first),
+                                            std::move(probe_build_pair.second));
+  }
+
+  auto join_indices_view = std::make_pair<cudf::column_view, cudf::column_view>(
+    join_indices.first->view(), join_indices.second->view());
+
+  auto probe_build_pair = construct_join_output_df<cudf::detail::join_kind::LEFT_JOIN>(
+    left,
+    right,
+    join_indices_view,
+    columns_in_common,
+    cudf::hash_join::common_columns_output_side::PROBE,
+    stream,
+    mr);
+
+  return combine_table_pair(std::move(probe_build_pair.first), std::move(probe_build_pair.second));
 }
 
 std::pair<std::unique_ptr<cudf::column>, std::unique_ptr<cudf::column>> full_join(
@@ -250,17 +271,6 @@ std::pair<std::unique_ptr<cudf::column>, std::unique_ptr<cudf::column>> hash_joi
   return impl->left_join(probe, probe_on, compare_nulls, stream, mr);
 }
 
-std::unique_ptr<cudf::table> hash_join::left_join(
-  cudf::table_view const& probe,
-  std::vector<size_type> const& probe_on,
-  std::vector<std::pair<cudf::size_type, cudf::size_type>> const& columns_in_common,
-  null_equality compare_nulls,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr) const
-{
-  return impl->left_join(probe, probe_on, columns_in_common, compare_nulls, stream, mr);
-}
-
 std::pair<std::unique_ptr<cudf::column>, std::unique_ptr<cudf::column>> hash_join::full_join(
   cudf::table_view const& probe,
   std::vector<size_type> const& probe_on,
diff --git a/cpp/tests/join/join_tests.cpp b/cpp/tests/join/join_tests.cpp
index f0748e2ec29..9fbccbda795 100644
--- a/cpp/tests/join/join_tests.cpp
+++ b/cpp/tests/join/join_tests.cpp
@@ -1117,136 +1117,136 @@ TEST_F(JoinTest, InnerJoinCornerCase)
   CUDF_TEST_EXPECT_TABLES_EQUAL(*sorted_gold, *sorted_result);
 }
 
-TEST_F(JoinTest, HashJoinSequentialProbes)
-{
-  CVector cols1;
-  cols1.emplace_back(column_wrapper<int32_t>{{2, 2, 0, 4, 3}}.release());
-  cols1.emplace_back(strcol_wrapper{{"s1", "s0", "s1", "s2", "s1"}}.release());
-  cols1.emplace_back(column_wrapper<int32_t>{{1, 0, 1, 2, 1}}.release());
-
-  Table t1(std::move(cols1));
-
-  cudf::hash_join hash_join(t1, {0, 1}, cudf::null_equality::EQUAL);
-
-  {
-    CVector cols0;
-    cols0.emplace_back(column_wrapper<int32_t>{{3, 1, 2, 0, 3}}.release());
-    cols0.emplace_back(strcol_wrapper({"s0", "s1", "s2", "s4", "s1"}).release());
-    cols0.emplace_back(column_wrapper<int32_t>{{0, 1, 2, 4, 1}}.release());
-
-    Table t0(std::move(cols0));
-
-    auto result            = hash_join.full_join(t0, {0, 1}, {{0, 0}, {1, 1}});
-    auto result_sort_order = cudf::sorted_order(result->view());
-    auto sorted_result     = cudf::gather(result->view(), *result_sort_order);
-
-    CVector cols_gold;
-    cols_gold.emplace_back(column_wrapper<int32_t>{{2, 2, 0, 4, 3, 3, 1, 2, 0}}.release());
-    cols_gold.emplace_back(
-      strcol_wrapper({"s1", "s0", "s1", "s2", "s1", "s0", "s1", "s2", "s4"}).release());
-    cols_gold.emplace_back(
-      column_wrapper<int32_t>{{-1, -1, -1, -1, 1, 0, 1, 2, 4}, {0, 0, 0, 0, 1, 1, 1, 1, 1}}
-        .release());
-    cols_gold.emplace_back(
-      column_wrapper<int32_t>{{1, 0, 1, 2, 1, -1, -1, -1, -1}, {1, 1, 1, 1, 1, 0, 0, 0, 0}}
-        .release());
-    Table gold(std::move(cols_gold));
-
-    auto gold_sort_order = cudf::sorted_order(gold.view());
-    auto sorted_gold     = cudf::gather(gold.view(), *gold_sort_order);
-    CUDF_TEST_EXPECT_TABLES_EQUAL(*sorted_gold, *sorted_result);
-  }
-
-  {
-    CVector cols0;
-    cols0.emplace_back(column_wrapper<int32_t>{{3, 1, 2, 0, 3}}.release());
-    cols0.emplace_back(strcol_wrapper({"s0", "s1", "s2", "s4", "s1"}).release());
-    cols0.emplace_back(column_wrapper<int32_t>{{0, 1, 2, 4, 1}}.release());
-
-    Table t0(std::move(cols0));
-
-    auto result            = hash_join.left_join(t0, {0, 1}, {{0, 0}, {1, 1}});
-    auto result_sort_order = cudf::sorted_order(result->view());
-    auto sorted_result     = cudf::gather(result->view(), *result_sort_order);
-
-    CVector cols_gold;
-    cols_gold.emplace_back(column_wrapper<int32_t>{{3, 3, 1, 2, 0}, {1, 1, 1, 1, 1}}.release());
-    cols_gold.emplace_back(
-      strcol_wrapper({"s1", "s0", "s1", "s2", "s4"}, {1, 1, 1, 1, 1, 1}).release());
-    cols_gold.emplace_back(column_wrapper<int32_t>{{1, 0, 1, 2, 4}, {1, 1, 1, 1, 1}}.release());
-    cols_gold.emplace_back(column_wrapper<int32_t>{{1, -1, -1, -1, -1}, {1, 0, 0, 0, 0}}.release());
-    Table gold(std::move(cols_gold));
-
-    auto gold_sort_order = cudf::sorted_order(gold.view());
-    auto sorted_gold     = cudf::gather(gold.view(), *gold_sort_order);
-    CUDF_TEST_EXPECT_TABLES_EQUAL(*sorted_gold, *sorted_result);
-  }
-
-  {
-    CVector cols0;
-    cols0.emplace_back(column_wrapper<int32_t>{{3, 1, 2, 0, 2}}.release());
-    cols0.emplace_back(column_wrapper<int32_t>{{3, 1, 2, 0, 2}}.release());
-    cols0.emplace_back(strcol_wrapper({"s1", "s1", "s0", "s4", "s0"}).release());
-    cols0.emplace_back(column_wrapper<int32_t>{{0, 1, 2, 4, 1}}.release());
-
-    Table t0(std::move(cols0));
-
-    auto probe_build_pair = hash_join.inner_join(t0, {1, 2}, {{1, 0}, {2, 1}});
-    auto joined_cols      = probe_build_pair.first->release();
-    auto build_cols       = probe_build_pair.second->release();
-    joined_cols.insert(joined_cols.end(),
-                       std::make_move_iterator(build_cols.begin()),
-                       std::make_move_iterator(build_cols.end()));
-    auto result            = std::make_unique<cudf::table>(std::move(joined_cols));
-    auto result_sort_order = cudf::sorted_order(result->view());
-    auto sorted_result     = cudf::gather(result->view(), *result_sort_order);
-
-    CVector cols_gold;
-    cols_gold.emplace_back(column_wrapper<int32_t>{{3, 2, 2}}.release());
-    cols_gold.emplace_back(column_wrapper<int32_t>{{3, 2, 2}}.release());
-    cols_gold.emplace_back(strcol_wrapper({"s1", "s0", "s0"}).release());
-    cols_gold.emplace_back(column_wrapper<int32_t>{{0, 2, 1}}.release());
-    cols_gold.emplace_back(column_wrapper<int32_t>{{1, 0, 0}}.release());
-    Table gold(std::move(cols_gold));
-
-    auto gold_sort_order = cudf::sorted_order(gold.view());
-    auto sorted_gold     = cudf::gather(gold.view(), *gold_sort_order);
-    CUDF_TEST_EXPECT_TABLES_EQUAL(*sorted_gold, *sorted_result);
-  }
-
-  {
-    CVector cols0;
-    cols0.emplace_back(column_wrapper<int32_t>{{3, 1, 2, 0, 2}}.release());
-    cols0.emplace_back(column_wrapper<int32_t>{{3, 1, 2, 0, 2}}.release());
-    cols0.emplace_back(strcol_wrapper({"s1", "s1", "s0", "s4", "s0"}).release());
-    cols0.emplace_back(column_wrapper<int32_t>{{0, 1, 2, 4, 1}}.release());
-
-    Table t0(std::move(cols0));
-
-    auto probe_build_pair = hash_join.inner_join(
-      t0, {1, 2}, {{1, 0}, {2, 1}}, cudf::hash_join::common_columns_output_side::BUILD);
-    auto joined_cols = probe_build_pair.second->release();
-    auto probe_cols  = probe_build_pair.first->release();
-    joined_cols.insert(joined_cols.end(),
-                       std::make_move_iterator(probe_cols.begin()),
-                       std::make_move_iterator(probe_cols.end()));
-    auto result            = std::make_unique<cudf::table>(std::move(joined_cols));
-    auto result_sort_order = cudf::sorted_order(result->view());
-    auto sorted_result     = cudf::gather(result->view(), *result_sort_order);
-
-    CVector cols_gold;
-    cols_gold.emplace_back(column_wrapper<int32_t>{{3, 2, 2}}.release());
-    cols_gold.emplace_back(strcol_wrapper({"s1", "s0", "s0"}).release());
-    cols_gold.emplace_back(column_wrapper<int32_t>{{1, 0, 0}}.release());
-    cols_gold.emplace_back(column_wrapper<int32_t>{{3, 2, 2}}.release());
-    cols_gold.emplace_back(column_wrapper<int32_t>{{0, 2, 1}}.release());
-    Table gold(std::move(cols_gold));
-
-    auto gold_sort_order = cudf::sorted_order(gold.view());
-    auto sorted_gold     = cudf::gather(gold.view(), *gold_sort_order);
-    CUDF_TEST_EXPECT_TABLES_EQUAL(*sorted_gold, *sorted_result);
-  }
-}
+// TEST_F(JoinTest, HashJoinSequentialProbes)
+// {
+//   CVector cols1;
+//   cols1.emplace_back(column_wrapper<int32_t>{{2, 2, 0, 4, 3}}.release());
+//   cols1.emplace_back(strcol_wrapper{{"s1", "s0", "s1", "s2", "s1"}}.release());
+//   cols1.emplace_back(column_wrapper<int32_t>{{1, 0, 1, 2, 1}}.release());
+
+//   Table t1(std::move(cols1));
+
+//   cudf::hash_join hash_join(t1, {0, 1}, cudf::null_equality::EQUAL);
+
+//   {
+//     CVector cols0;
+//     cols0.emplace_back(column_wrapper<int32_t>{{3, 1, 2, 0, 3}}.release());
+//     cols0.emplace_back(strcol_wrapper({"s0", "s1", "s2", "s4", "s1"}).release());
+//     cols0.emplace_back(column_wrapper<int32_t>{{0, 1, 2, 4, 1}}.release());
+
+//     Table t0(std::move(cols0));
+
+//     auto result            = hash_join.full_join(t0, {0, 1}, {{0, 0}, {1, 1}});
+//     auto result_sort_order = cudf::sorted_order(result->view());
+//     auto sorted_result     = cudf::gather(result->view(), *result_sort_order);
+
+//     CVector cols_gold;
+//     cols_gold.emplace_back(column_wrapper<int32_t>{{2, 2, 0, 4, 3, 3, 1, 2, 0}}.release());
+//     cols_gold.emplace_back(
+//       strcol_wrapper({"s1", "s0", "s1", "s2", "s1", "s0", "s1", "s2", "s4"}).release());
+//     cols_gold.emplace_back(
+//       column_wrapper<int32_t>{{-1, -1, -1, -1, 1, 0, 1, 2, 4}, {0, 0, 0, 0, 1, 1, 1, 1, 1}}
+//         .release());
+//     cols_gold.emplace_back(
+//       column_wrapper<int32_t>{{1, 0, 1, 2, 1, -1, -1, -1, -1}, {1, 1, 1, 1, 1, 0, 0, 0, 0}}
+//         .release());
+//     Table gold(std::move(cols_gold));
+
+//     auto gold_sort_order = cudf::sorted_order(gold.view());
+//     auto sorted_gold     = cudf::gather(gold.view(), *gold_sort_order);
+//     CUDF_TEST_EXPECT_TABLES_EQUAL(*sorted_gold, *sorted_result);
+//   }
+
+//   {
+//     CVector cols0;
+//     cols0.emplace_back(column_wrapper<int32_t>{{3, 1, 2, 0, 3}}.release());
+//     cols0.emplace_back(strcol_wrapper({"s0", "s1", "s2", "s4", "s1"}).release());
+//     cols0.emplace_back(column_wrapper<int32_t>{{0, 1, 2, 4, 1}}.release());
+
+//     Table t0(std::move(cols0));
+
+//     auto result            = hash_join.left_join(t0, {0, 1}, {{0, 0}, {1, 1}});
+//     auto result_sort_order = cudf::sorted_order(result->view());
+//     auto sorted_result     = cudf::gather(result->view(), *result_sort_order);
+
+//     CVector cols_gold;
+//     cols_gold.emplace_back(column_wrapper<int32_t>{{3, 3, 1, 2, 0}, {1, 1, 1, 1, 1}}.release());
+//     cols_gold.emplace_back(
+//       strcol_wrapper({"s1", "s0", "s1", "s2", "s4"}, {1, 1, 1, 1, 1, 1}).release());
+//     cols_gold.emplace_back(column_wrapper<int32_t>{{1, 0, 1, 2, 4}, {1, 1, 1, 1, 1}}.release());
+//     cols_gold.emplace_back(column_wrapper<int32_t>{{1, -1, -1, -1, -1}, {1, 0, 0, 0,
+//     0}}.release()); Table gold(std::move(cols_gold));
+
+//     auto gold_sort_order = cudf::sorted_order(gold.view());
+//     auto sorted_gold     = cudf::gather(gold.view(), *gold_sort_order);
+//     CUDF_TEST_EXPECT_TABLES_EQUAL(*sorted_gold, *sorted_result);
+//   }
+
+//   {
+//     CVector cols0;
+//     cols0.emplace_back(column_wrapper<int32_t>{{3, 1, 2, 0, 2}}.release());
+//     cols0.emplace_back(column_wrapper<int32_t>{{3, 1, 2, 0, 2}}.release());
+//     cols0.emplace_back(strcol_wrapper({"s1", "s1", "s0", "s4", "s0"}).release());
+//     cols0.emplace_back(column_wrapper<int32_t>{{0, 1, 2, 4, 1}}.release());
+
+//     Table t0(std::move(cols0));
+
+//     auto probe_build_pair = hash_join.inner_join(t0, {1, 2}, {{1, 0}, {2, 1}});
+//     auto joined_cols      = probe_build_pair.first->release();
+//     auto build_cols       = probe_build_pair.second->release();
+//     joined_cols.insert(joined_cols.end(),
+//                        std::make_move_iterator(build_cols.begin()),
+//                        std::make_move_iterator(build_cols.end()));
+//     auto result            = std::make_unique<cudf::table>(std::move(joined_cols));
+//     auto result_sort_order = cudf::sorted_order(result->view());
+//     auto sorted_result     = cudf::gather(result->view(), *result_sort_order);
+
+//     CVector cols_gold;
+//     cols_gold.emplace_back(column_wrapper<int32_t>{{3, 2, 2}}.release());
+//     cols_gold.emplace_back(column_wrapper<int32_t>{{3, 2, 2}}.release());
+//     cols_gold.emplace_back(strcol_wrapper({"s1", "s0", "s0"}).release());
+//     cols_gold.emplace_back(column_wrapper<int32_t>{{0, 2, 1}}.release());
+//     cols_gold.emplace_back(column_wrapper<int32_t>{{1, 0, 0}}.release());
+//     Table gold(std::move(cols_gold));
+
+//     auto gold_sort_order = cudf::sorted_order(gold.view());
+//     auto sorted_gold     = cudf::gather(gold.view(), *gold_sort_order);
+//     CUDF_TEST_EXPECT_TABLES_EQUAL(*sorted_gold, *sorted_result);
+//   }
+
+//   {
+//     CVector cols0;
+//     cols0.emplace_back(column_wrapper<int32_t>{{3, 1, 2, 0, 2}}.release());
+//     cols0.emplace_back(column_wrapper<int32_t>{{3, 1, 2, 0, 2}}.release());
+//     cols0.emplace_back(strcol_wrapper({"s1", "s1", "s0", "s4", "s0"}).release());
+//     cols0.emplace_back(column_wrapper<int32_t>{{0, 1, 2, 4, 1}}.release());
+
+//     Table t0(std::move(cols0));
+
+//     auto probe_build_pair = hash_join.inner_join(
+//       t0, {1, 2}, {{1, 0}, {2, 1}}, cudf::hash_join::common_columns_output_side::BUILD);
+//     auto joined_cols = probe_build_pair.second->release();
+//     auto probe_cols  = probe_build_pair.first->release();
+//     joined_cols.insert(joined_cols.end(),
+//                        std::make_move_iterator(probe_cols.begin()),
+//                        std::make_move_iterator(probe_cols.end()));
+//     auto result            = std::make_unique<cudf::table>(std::move(joined_cols));
+//     auto result_sort_order = cudf::sorted_order(result->view());
+//     auto sorted_result     = cudf::gather(result->view(), *result_sort_order);
+
+//     CVector cols_gold;
+//     cols_gold.emplace_back(column_wrapper<int32_t>{{3, 2, 2}}.release());
+//     cols_gold.emplace_back(strcol_wrapper({"s1", "s0", "s0"}).release());
+//     cols_gold.emplace_back(column_wrapper<int32_t>{{1, 0, 0}}.release());
+//     cols_gold.emplace_back(column_wrapper<int32_t>{{3, 2, 2}}.release());
+//     cols_gold.emplace_back(column_wrapper<int32_t>{{0, 2, 1}}.release());
+//     Table gold(std::move(cols_gold));
+
+//     auto gold_sort_order = cudf::sorted_order(gold.view());
+//     auto sorted_gold     = cudf::gather(gold.view(), *gold_sort_order);
+//     CUDF_TEST_EXPECT_TABLES_EQUAL(*sorted_gold, *sorted_result);
+//   }
+// }
 
 struct JoinDictionaryTest : public cudf::test::BaseFixture {
 };

From 01415fc11fac50895e7d1586c6ccc36e0503e2e1 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Fri, 12 Feb 2021 16:09:27 -0500
Subject: [PATCH 041/138] More join C++ cleanup

---
 cpp/include/cudf/join.hpp |  74 ---------------------------
 cpp/src/join/join.cu      | 104 ++++++++++++++++++++++++--------------
 2 files changed, 65 insertions(+), 113 deletions(-)

diff --git a/cpp/include/cudf/join.hpp b/cpp/include/cudf/join.hpp
index 72cd4066cf1..09064ee24da 100644
--- a/cpp/include/cudf/join.hpp
+++ b/cpp/include/cudf/join.hpp
@@ -483,50 +483,6 @@ class hash_join {
     rmm::cuda_stream_view stream        = rmm::cuda_stream_default,
     rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) const;
 
-  /**
-   * @brief Performs an inner join by probing in the internal hash table.
-   *
-   * Given that it is sometimes desired to choose the small table to be the `build` side for an
-   * inner join，a (`probe`, `build`) table pair, which contains the probe and build portions of the
-   * logical joined table respectively, is returned so that caller can freely rearrange them to
-   * restore the logical `left` `right` order. This introduces some extra logic about where "common"
-   * columns should go, i.e. the legacy `cudf::inner_join()` API always outputs "common" columns in
-   * the `left` portion and the corresponding columns in the `right` portion are omitted. To better
-   * align with the legacy `cudf::inner_join()` API, a `common_columns_output_side` parameter is
-   * introduced to specify whether "common" columns should go in `probe` or `build` portion.
-   *
-   * More details please @see cudf::inner_join().
-   *
-   * @param probe The probe table, from which the tuples are probed.
-   * @param probe_on The column indices from `probe` to join on.
-   * @param columns_in_common is a vector of pairs of column indices into
-   * `probe` and `build`, respectively, that are "in common". For "common"
-   * columns, only a single output column will be produced, which is gathered
-   * from `probe_on` columns or `build_on` columns if `probe_output_side` is LEFT or RIGHT.
-   * Else, for every column in `probe_on` and `build_on`,
-   * an output column will be produced. For each of these pairs (P, B), P
-   * should exist in `probe_on` and B should exist in `build_on`.
-   * @param common_columns_output_side @see `common_columns_output_side`.
-   * @param compare_nulls Controls whether null join-key values should match or not.
-   * @param mr Device memory resource used to allocate the returned table and columns' device
-   * memory.
-   * @param stream CUDA stream used for device memory operations and kernel launches
-   *
-   * @return Table pair of (`probe`, `build`) of joining both tables on the columns
-   * specified by `probe_on` and `build_on`. The resulting table pair will be joined columns of
-   * (`probe(including common columns)`, `build(excluding common columns)`) if
-   * `common_columns_output_side` is `PROBE`, or (`probe(excluding common columns)`,
-   * `build(including common columns)`) if `common_columns_output_side` is `BUILD`.
-   */
-  std::pair<std::unique_ptr<cudf::table>, std::unique_ptr<cudf::table>> inner_join(
-    cudf::table_view const& probe,
-    std::vector<size_type> const& probe_on,
-    std::vector<std::pair<cudf::size_type, cudf::size_type>> const& columns_in_common,
-    common_columns_output_side common_columns_output_side = common_columns_output_side::PROBE,
-    null_equality compare_nulls                           = null_equality::EQUAL,
-    rmm::cuda_stream_view stream                          = rmm::cuda_stream_default,
-    rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) const;
-
   /**
    * @brief Performs a left join on the specified columns of two
    * tables (`left`, `right`), and returns the row indices corresponding
@@ -551,36 +507,6 @@ class hash_join {
     rmm::cuda_stream_view stream        = rmm::cuda_stream_default,
     rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) const;
 
-  /**
-   * @brief Performs a full join by probing in the internal hash table.
-   *
-   * More details please @see cudf::full_join().
-   *
-   * @param probe The probe table, from which the tuples are probed.
-   * @param probe_on The column indices from `probe` to join on.
-   * @param columns_in_common is a vector of pairs of column indices into
-   * `probe` and `build`, respectively, that are "in common". For "common"
-   * columns, only a single output column will be produced, which is gathered
-   * from `probe_on` columns. Else, for every column in `probe_on` and `build_on`,
-   * an output column will be produced. For each of these pairs (P, B), P
-   * should exist in `probe_on` and B should exist in `build_on`.
-   * @param compare_nulls Controls whether null join-key values should match or not.
-   * @param mr Device memory resource used to allocate the returned table and columns' device
-   * memory.
-   * @param stream CUDA stream used for device memory operations and kernel launches
-   *
-   * @return Result of joining `build` and `probe` tables on the columns
-   * specified by `build_on` and `probe_on`. The resulting table will be joined columns of
-   * `probe(including common columns)+build(excluding common columns)`.
-   */
-  std::unique_ptr<cudf::table> full_join(
-    cudf::table_view const& probe,
-    std::vector<size_type> const& probe_on,
-    std::vector<std::pair<cudf::size_type, cudf::size_type>> const& columns_in_common,
-    null_equality compare_nulls         = null_equality::EQUAL,
-    rmm::cuda_stream_view stream        = rmm::cuda_stream_default,
-    rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) const;
-
  private:
   struct hash_join_impl;
   const std::unique_ptr<const hash_join_impl> impl;
diff --git a/cpp/src/join/join.cu b/cpp/src/join/join.cu
index ddac509d9ef..10743f9326a 100644
--- a/cpp/src/join/join.cu
+++ b/cpp/src/join/join.cu
@@ -84,28 +84,57 @@ std::unique_ptr<table> inner_join(
   // build the hash map from the smaller table.
   if (right.num_rows() > left.num_rows()) {
     cudf::hash_join hj_obj(left, left_on, compare_nulls, stream);
+    auto join_indices = hj_obj.inner_join(right, right_on, compare_nulls, stream, mr);
+
     auto actual_columns_in_common = columns_in_common;
     std::for_each(actual_columns_in_common.begin(), actual_columns_in_common.end(), [](auto& pair) {
       std::swap(pair.first, pair.second);
     });
-    auto probe_build_pair = hj_obj.inner_join(right,
-                                              right_on,
-                                              actual_columns_in_common,
-                                              cudf::hash_join::common_columns_output_side::BUILD,
-                                              compare_nulls,
-                                              stream,
-                                              mr);
+
+    if (is_trivial_join(left, right, left_on, right_on, cudf::detail::join_kind::INNER_JOIN)) {
+      auto probe_build_pair = get_empty_joined_table(
+        right, left, actual_columns_in_common, cudf::hash_join::common_columns_output_side::BUILD);
+      return cudf::detail::combine_table_pair(std::move(probe_build_pair.second),
+                                              std::move(probe_build_pair.first));
+    }
+
+    auto join_indices_view = std::make_pair<cudf::column_view, cudf::column_view>(
+      join_indices.first->view(), join_indices.second->view());
+
+    auto probe_build_pair = construct_join_output_df<cudf::detail::join_kind::INNER_JOIN>(
+      right,
+      left,
+      join_indices_view,
+      actual_columns_in_common,
+      cudf::hash_join::common_columns_output_side::BUILD,
+      stream,
+      mr);
+
     return cudf::detail::combine_table_pair(std::move(probe_build_pair.second),
                                             std::move(probe_build_pair.first));
   } else {
     cudf::hash_join hj_obj(right, right_on, compare_nulls, stream);
-    auto probe_build_pair = hj_obj.inner_join(left,
-                                              left_on,
-                                              columns_in_common,
-                                              cudf::hash_join::common_columns_output_side::PROBE,
-                                              compare_nulls,
-                                              stream,
-                                              mr);
+    auto join_indices = hj_obj.inner_join(left, left_on, compare_nulls, stream, mr);
+
+    if (is_trivial_join(left, right, left_on, right_on, cudf::detail::join_kind::INNER_JOIN)) {
+      auto probe_build_pair = get_empty_joined_table(
+        left, right, columns_in_common, cudf::hash_join::common_columns_output_side::PROBE);
+      return cudf::detail::combine_table_pair(std::move(probe_build_pair.first),
+                                              std::move(probe_build_pair.second));
+    }
+
+    auto join_indices_view = std::make_pair<cudf::column_view, cudf::column_view>(
+      join_indices.first->view(), join_indices.second->view());
+
+    auto probe_build_pair = construct_join_output_df<cudf::detail::join_kind::INNER_JOIN>(
+      left,
+      right,
+      join_indices_view,
+      columns_in_common,
+      cudf::hash_join::common_columns_output_side::PROBE,
+      stream,
+      mr);
+
     return cudf::detail::combine_table_pair(std::move(probe_build_pair.first),
                                             std::move(probe_build_pair.second));
   }
@@ -223,7 +252,28 @@ std::unique_ptr<table> full_join(
   table_view const right = scatter_columns(matched.second.back(), right_on, right_input);
 
   cudf::hash_join hj_obj(right, right_on, compare_nulls, stream);
-  return hj_obj.full_join(left, left_on, columns_in_common, compare_nulls, stream, mr);
+  auto join_indices = hj_obj.full_join(left, left_on, compare_nulls, stream, mr);
+
+  if (is_trivial_join(left, right, left_on, right_on, cudf::detail::join_kind::FULL_JOIN)) {
+    auto probe_build_pair = get_empty_joined_table(
+      left, right, columns_in_common, cudf::hash_join::common_columns_output_side::PROBE);
+    return cudf::detail::combine_table_pair(std::move(probe_build_pair.first),
+                                            std::move(probe_build_pair.second));
+  }
+
+  auto join_indices_view = std::make_pair<cudf::column_view, cudf::column_view>(
+    join_indices.first->view(), join_indices.second->view());
+
+  auto probe_build_pair = construct_join_output_df<cudf::detail::join_kind::FULL_JOIN>(
+    left,
+    right,
+    join_indices_view,
+    columns_in_common,
+    cudf::hash_join::common_columns_output_side::PROBE,
+    stream,
+    mr);
+
+  return combine_table_pair(std::move(probe_build_pair.first), std::move(probe_build_pair.second));
 }
 
 }  // namespace detail
@@ -248,19 +298,6 @@ std::pair<std::unique_ptr<cudf::column>, std::unique_ptr<cudf::column>> hash_joi
   return impl->inner_join(probe, probe_on, compare_nulls, stream, mr);
 }
 
-std::pair<std::unique_ptr<cudf::table>, std::unique_ptr<cudf::table>> hash_join::inner_join(
-  cudf::table_view const& probe,
-  std::vector<size_type> const& probe_on,
-  std::vector<std::pair<cudf::size_type, cudf::size_type>> const& columns_in_common,
-  common_columns_output_side common_columns_output_side,
-  null_equality compare_nulls,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr) const
-{
-  return impl->inner_join(
-    probe, probe_on, columns_in_common, common_columns_output_side, compare_nulls, stream, mr);
-}
-
 std::pair<std::unique_ptr<cudf::column>, std::unique_ptr<cudf::column>> hash_join::left_join(
   cudf::table_view const& probe,
   std::vector<size_type> const& probe_on,
@@ -281,17 +318,6 @@ std::pair<std::unique_ptr<cudf::column>, std::unique_ptr<cudf::column>> hash_joi
   return impl->full_join(probe, probe_on, compare_nulls, stream, mr);
 }
 
-std::unique_ptr<cudf::table> hash_join::full_join(
-  cudf::table_view const& probe,
-  std::vector<size_type> const& probe_on,
-  std::vector<std::pair<cudf::size_type, cudf::size_type>> const& columns_in_common,
-  null_equality compare_nulls,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr) const
-{
-  return impl->full_join(probe, probe_on, columns_in_common, compare_nulls, stream, mr);
-}
-
 // external APIs
 
 std::pair<std::unique_ptr<cudf::column>, std::unique_ptr<cudf::column>> inner_join(

From 618549255894ef4bc55d19918dd5293db1e8640e Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Wed, 17 Feb 2021 13:52:09 -0500
Subject: [PATCH 042/138] Even more cleaning

---
 cpp/include/cudf/join.hpp          |   49 +-
 cpp/src/join/hash_join.cu          |  261 +---
 cpp/src/join/hash_join.cuh         |  184 +--
 cpp/src/join/join.cu               |  318 ++--
 cpp/src/join/join_common_utils.hpp |    8 +-
 cpp/src/join/semi_join.cu          |   30 +-
 cpp/tests/join/join_tests.cpp      | 2159 ++++++++++++++--------------
 cpp/tests/join/semi_join_tests.cpp |  827 -----------
 8 files changed, 1260 insertions(+), 2576 deletions(-)

diff --git a/cpp/include/cudf/join.hpp b/cpp/include/cudf/join.hpp
index 09064ee24da..2707c60fa34 100644
--- a/cpp/include/cudf/join.hpp
+++ b/cpp/include/cudf/join.hpp
@@ -37,10 +37,8 @@ namespace cudf {
  * to the result.
  */ // TODO: explain this better
 std::pair<std::unique_ptr<cudf::column>, std::unique_ptr<cudf::column>> inner_join(
-  cudf::table_view const& left,
-  cudf::table_view const& right,
-  std::vector<cudf::size_type> const& left_on,
-  std::vector<cudf::size_type> const& right_on,
+  cudf::table_view const& left_keys,
+  cudf::table_view const& right_keys,
   null_equality compare_nulls         = null_equality::EQUAL,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
@@ -106,7 +104,6 @@ std::unique_ptr<cudf::table> inner_join(
   cudf::table_view const& right,
   std::vector<cudf::size_type> const& left_on,
   std::vector<cudf::size_type> const& right_on,
-  std::vector<std::pair<cudf::size_type, cudf::size_type>> const& columns_in_common,
   null_equality compare_nulls         = null_equality::EQUAL,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
@@ -116,10 +113,8 @@ std::unique_ptr<cudf::table> inner_join(
  * to the result.
  */ // TODO: explain this better
 std::pair<std::unique_ptr<cudf::column>, std::unique_ptr<cudf::column>> left_join(
-  cudf::table_view const& left,
-  cudf::table_view const& right,
-  std::vector<cudf::size_type> const& left_on,
-  std::vector<cudf::size_type> const& right_on,
+  cudf::table_view const& left_keys,
+  cudf::table_view const& right_keys,
   null_equality compare_nulls         = null_equality::EQUAL,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
@@ -187,7 +182,6 @@ std::unique_ptr<cudf::table> left_join(
   cudf::table_view const& right,
   std::vector<cudf::size_type> const& left_on,
   std::vector<cudf::size_type> const& right_on,
-  std::vector<std::pair<cudf::size_type, cudf::size_type>> const& columns_in_common,
   null_equality compare_nulls         = null_equality::EQUAL,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
@@ -197,10 +191,8 @@ std::unique_ptr<cudf::table> left_join(
  * to the result.
  */ // TODO: explain this better
 std::pair<std::unique_ptr<cudf::column>, std::unique_ptr<cudf::column>> full_join(
-  cudf::table_view const& left,
-  cudf::table_view const& right,
-  std::vector<cudf::size_type> const& left_on,
-  std::vector<cudf::size_type> const& right_on,
+  cudf::table_view const& left_keys,
+  cudf::table_view const& right_keys,
   null_equality compare_nulls         = null_equality::EQUAL,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
@@ -268,7 +260,6 @@ std::unique_ptr<cudf::table> full_join(
   cudf::table_view const& right,
   std::vector<cudf::size_type> const& left_on,
   std::vector<cudf::size_type> const& right_on,
-  std::vector<std::pair<cudf::size_type, cudf::size_type>> const& columns_in_common,
   null_equality compare_nulls         = null_equality::EQUAL,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
@@ -324,16 +315,13 @@ std::unique_ptr<cudf::table> left_semi_join(
   cudf::table_view const& right,
   std::vector<cudf::size_type> const& left_on,
   std::vector<cudf::size_type> const& right_on,
-  std::vector<cudf::size_type> const& return_columns,
   null_equality compare_nulls         = null_equality::EQUAL,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /** TODO: document */
 std::unique_ptr<cudf::column> left_semi_join(
-  cudf::table_view const& left,
-  cudf::table_view const& right,
-  std::vector<cudf::size_type> const& left_on,
-  std::vector<cudf::size_type> const& right_on,
+  cudf::table_view const& left_keys,
+  cudf::table_view const& right_keys,
   null_equality compare_nulls         = null_equality::EQUAL,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
@@ -389,16 +377,13 @@ std::unique_ptr<cudf::table> left_anti_join(
   cudf::table_view const& right,
   std::vector<cudf::size_type> const& left_on,
   std::vector<cudf::size_type> const& right_on,
-  std::vector<cudf::size_type> const& return_columns,
   null_equality compare_nulls         = null_equality::EQUAL,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /** TODO: document */
 std::unique_ptr<cudf::column> left_anti_join(
-  cudf::table_view const& left,
-  cudf::table_view const& right,
-  std::vector<cudf::size_type> const& left_on,
-  std::vector<cudf::size_type> const& right_on,
+  cudf::table_view const& left_keys,
+  cudf::table_view const& right_keys,
   null_equality compare_nulls         = null_equality::EQUAL,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
@@ -457,20 +442,9 @@ class hash_join {
    * @param stream CUDA stream used for device memory operations and kernel launches
    */
   hash_join(cudf::table_view const& build,
-            std::vector<size_type> const& build_on,
             null_equality compare_nulls,
             rmm::cuda_stream_view stream = rmm::cuda_stream_default);
 
-  /**
-   * @brief Controls where common columns will be output for a inner join.
-   */
-  enum class common_columns_output_side {
-    PROBE,  ///< Common columns is output in the probe portion of the table pair returned by
-            ///< `inner_join`.
-    BUILD   ///< Common columns is output in the build portion of the table pair returned by
-            ///< `inner_join`.
-  };
-
   /**
    * @brief Performs  an inner join on the specified columns of two
    * tables (`left`, `right`), and returns the row indices corresponding
@@ -478,7 +452,6 @@ class hash_join {
    */ // TODO: explain this better
   std::pair<std::unique_ptr<cudf::column>, std::unique_ptr<cudf::column>> inner_join(
     cudf::table_view const& probe,
-    std::vector<size_type> const& probe_on,
     null_equality compare_nulls         = null_equality::EQUAL,
     rmm::cuda_stream_view stream        = rmm::cuda_stream_default,
     rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) const;
@@ -490,7 +463,6 @@ class hash_join {
    */ // TODO: explain this better
   std::pair<std::unique_ptr<cudf::column>, std::unique_ptr<cudf::column>> left_join(
     cudf::table_view const& probe,
-    std::vector<size_type> const& probe_on,
     null_equality compare_nulls         = null_equality::EQUAL,
     rmm::cuda_stream_view stream        = rmm::cuda_stream_default,
     rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) const;
@@ -502,7 +474,6 @@ class hash_join {
    */ // TODO: explain this better
   std::pair<std::unique_ptr<cudf::column>, std::unique_ptr<cudf::column>> full_join(
     cudf::table_view const& probe,
-    std::vector<size_type> const& probe_on,
     null_equality compare_nulls         = null_equality::EQUAL,
     rmm::cuda_stream_view stream        = rmm::cuda_stream_default,
     rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) const;
diff --git a/cpp/src/join/hash_join.cu b/cpp/src/join/hash_join.cu
index 4cc91368565..d8fe8870001 100644
--- a/cpp/src/join/hash_join.cu
+++ b/cpp/src/join/hash_join.cu
@@ -29,65 +29,11 @@
 namespace cudf {
 namespace detail {
 
-/**
- * @brief Returns a vector with non-common indices which is set difference
- * between `[0, num_columns)` and index values in common_column_indices
- *
- * @param num_columns The number of columns, which represents column indices
- * from `[0, num_columns)` in a table
- * @param common_column_indices A vector of common indices which needs to be
- * excluded from `[0, num_columns)`
- *
- * @return vector A vector containing only the indices which are not present in
- * `common_column_indices`
- */
-std::vector<size_type> non_common_column_indices(
-  size_type num_columns, std::vector<size_type> const &common_column_indices)
-{
-  CUDF_EXPECTS(common_column_indices.size() <= static_cast<uint64_t>(num_columns),
-               "Too many columns in common");
-  std::vector<size_type> all_column_indices(num_columns);
-  std::iota(std::begin(all_column_indices), std::end(all_column_indices), 0);
-  std::vector<size_type> sorted_common_column_indices{common_column_indices};
-  std::sort(std::begin(sorted_common_column_indices), std::end(sorted_common_column_indices));
-  std::vector<size_type> non_common_column_indices(num_columns - common_column_indices.size());
-  std::set_difference(std::cbegin(all_column_indices),
-                      std::cend(all_column_indices),
-                      std::cbegin(sorted_common_column_indices),
-                      std::cend(sorted_common_column_indices),
-                      std::begin(non_common_column_indices));
-  return non_common_column_indices;
-}
-
 std::pair<std::unique_ptr<table>, std::unique_ptr<table>> get_empty_joined_table(
-  table_view const &probe,
-  table_view const &build,
-  std::vector<std::pair<size_type, size_type>> const &columns_in_common,
-  cudf::hash_join::common_columns_output_side common_columns_output_side)
+  table_view const &probe, table_view const &build)
 {
-  std::vector<size_type> columns_to_exclude(columns_in_common.size());
-  std::transform(columns_in_common.begin(),
-                 columns_in_common.end(),
-                 columns_to_exclude.begin(),
-                 [common_columns_output_side](auto &col) {
-                   return common_columns_output_side == hash_join::common_columns_output_side::PROBE
-                            ? col.second
-                            : col.first;
-                 });
-  std::vector<size_type> non_common_indices = non_common_column_indices(
-    common_columns_output_side == hash_join::common_columns_output_side::PROBE
-      ? build.num_columns()
-      : probe.num_columns(),
-    columns_to_exclude);
   std::unique_ptr<table> empty_probe = empty_like(probe);
   std::unique_ptr<table> empty_build = empty_like(build);
-  if (common_columns_output_side == hash_join::common_columns_output_side::PROBE) {
-    table_view empty_build_view = empty_build->select(non_common_indices);
-    empty_build                 = std::make_unique<table>(empty_build_view);
-  } else {
-    table_view empty_probe_view = empty_probe->select(non_common_indices);
-    empty_probe                 = std::make_unique<table>(empty_probe_view);
-  }
   return std::make_pair(std::move(empty_probe), std::move(empty_build));
 }
 
@@ -201,8 +147,6 @@ get_left_join_indices_complement(rmm::device_uvector<size_type> &right_indices,
  * @throw cudf::logic_error if the number of columns in `build` table is 0.
  * @throw cudf::logic_error if the number of rows in `build` table is 0.
  * @throw cudf::logic_error if insertion to the hash table fails.
- * @throw std::out_of_range if elements of `build_on` exceed the number of columns in the `build`
- * table.
  *
  * @param build Table of columns used to build join hash.
  * @param compare_nulls Controls whether null join-key values should match or not.
@@ -321,88 +265,6 @@ std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>> probe_
   return std::make_pair(std::move(left_indices), std::move(right_indices));
 }
 
-/**
- * @brief  Combines the non common probe, common probe, non common build and common build
- * columns in the correct order according to `common_columns_output_side` to form the joined
- * (`probe`, `build`) table pair.
- *
- * @param probe_noncommon_cols Columns obtained by gathering non common probe columns.
- * @param probe_noncommon_col_indices Output locations of non common probe columns in the probe
- * portion.
- * @param probe_common_col_indices Output locations of common probe columns in the probe portion.
- * @param build_noncommon_cols Columns obtained by gathering non common build columns.
- * @param build_noncommon_col_indices Output locations of non common build columns in the build
- * portion.
- * @param build_common_col_indices Output locations of common build columns in the build portion.
- * @param common_cols Columns obtained by gathering common columns from `probe` and `build` tables
- * in the build portion.
- * @param common_columns_output_side @see cudf::hash_join::common_columns_output_side.
- *
- * @return Table pair of (`probe`, `build`).
- */
-std::pair<std::unique_ptr<table>, std::unique_ptr<table>> combine_join_columns(
-  std::vector<std::unique_ptr<column>> &&probe_noncommon_cols,
-  std::vector<size_type> const &probe_noncommon_col_indices,
-  std::vector<size_type> const &probe_common_col_indices,
-  std::vector<std::unique_ptr<column>> &&build_noncommon_cols,
-  std::vector<size_type> const &build_noncommon_col_indices,
-  std::vector<size_type> const &build_common_col_indices,
-  std::vector<std::unique_ptr<column>> &&common_cols,
-  cudf::hash_join::common_columns_output_side common_columns_output_side)
-{
-  if (common_columns_output_side == cudf::hash_join::common_columns_output_side::PROBE) {
-    std::vector<std::unique_ptr<column>> probe_cols(probe_noncommon_cols.size() +
-                                                    common_cols.size());
-    for (size_t i = 0; i < probe_noncommon_cols.size(); ++i) {
-      probe_cols.at(probe_noncommon_col_indices.at(i)) = std::move(probe_noncommon_cols.at(i));
-    }
-    for (size_t i = 0; i < common_cols.size(); ++i) {
-      probe_cols.at(probe_common_col_indices.at(i)) = std::move(common_cols.at(i));
-    }
-    return std::make_pair(std::make_unique<cudf::table>(std::move(probe_cols)),
-                          std::make_unique<cudf::table>(std::move(build_noncommon_cols)));
-  } else {
-    std::vector<std::unique_ptr<column>> build_cols(build_noncommon_cols.size() +
-                                                    common_cols.size());
-    for (size_t i = 0; i < build_noncommon_cols.size(); ++i) {
-      build_cols.at(build_noncommon_col_indices.at(i)) = std::move(build_noncommon_cols.at(i));
-    }
-    for (size_t i = 0; i < common_cols.size(); ++i) {
-      build_cols.at(build_common_col_indices.at(i)) = std::move(common_cols.at(i));
-    }
-    return std::make_pair(std::make_unique<cudf::table>(std::move(probe_noncommon_cols)),
-                          std::make_unique<cudf::table>(std::move(build_cols)));
-  }
-}
-
-/**
- * @brief  Gathers rows from `probe` and `build` table and returns a (`probe`, `build`) table pair,
- * which contains the probe and build portions of the logical joined table respectively.
- *
- * @tparam JoinKind The type of join to be performed
- *
- * @param probe Probe side table
- * @param build build side table
- * @param joined_indices Pair of vectors containing row indices from which
- * `probe` and `build` tables are gathered. If any row index is out of bounds,
- * the contribution in the output `table` will be NULL.
- * @param columns_in_common is a vector of pairs of column indices
- * from tables `probe` and `build` respectively, that are "in common".
- * For "common" columns, only a single output column will be produced.
- * For an inner or left join, the result will be gathered from the column in
- * `probe`. For a full join, the result will be gathered from both common
- * columns in `probe` and `build` and concatenated to form a single column.
- * @param common_columns_output_side @see cudf::hash_join::common_columns_output_side.
- *
- * @return Table pair of (`probe`, `build`) containing the rows from `probe` and
- * `build` specified by `joined_indices`.
- * Columns in `columns_in_common` will be included in either `probe` or `build` portion as
- * `common_columns_output_side` indicates. Final form would look like
- * (`probe(including common columns)`, `build(excluding common columns)`) if
- * `common_columns_output_side` is `PROBE`, or (`probe(excluding common columns)`,
- * `build(including common columns)`) if `common_columns_output_side` is `BUILD`.
- */
-
 std::unique_ptr<cudf::table> combine_table_pair(std::unique_ptr<cudf::table> &&left,
                                                 std::unique_ptr<cudf::table> &&right)
 {
@@ -419,100 +281,53 @@ std::unique_ptr<cudf::table> combine_table_pair(std::unique_ptr<cudf::table> &&l
 hash_join::hash_join_impl::~hash_join_impl() = default;
 
 hash_join::hash_join_impl::hash_join_impl(cudf::table_view const &build,
-                                          std::vector<size_type> const &build_on,
                                           null_equality compare_nulls,
                                           rmm::cuda_stream_view stream)
-  : _build(build),
-    _build_selected(build.select(build_on)),
-    _build_on(build_on),
-    _hash_table(nullptr)
+  : _build(build), _hash_table(nullptr)
 {
   CUDF_FUNC_RANGE();
   CUDF_EXPECTS(0 != _build.num_columns(), "Hash join build table is empty");
   CUDF_EXPECTS(_build.num_rows() < cudf::detail::MAX_JOIN_SIZE,
                "Build column size is too big for hash join");
 
-  if (_build_on.empty() || 0 == build.num_rows()) { return; }
+  if (0 == build.num_rows()) { return; }
 
-  _hash_table = build_join_hash_table(_build_selected, compare_nulls, stream);
+  _hash_table = build_join_hash_table(_build, compare_nulls, stream);
 }
 
 std::pair<std::unique_ptr<cudf::column>, std::unique_ptr<cudf::column>>
 hash_join::hash_join_impl::inner_join(cudf::table_view const &probe,
-                                      std::vector<size_type> const &probe_on,
                                       null_equality compare_nulls,
                                       rmm::cuda_stream_view stream,
                                       rmm::mr::device_memory_resource *mr) const
 {
   CUDF_FUNC_RANGE();
-  return compute_hash_join<cudf::detail::join_kind::INNER_JOIN>(
-    probe, probe_on, compare_nulls, stream, mr);
-}
-
-std::pair<std::unique_ptr<cudf::table>, std::unique_ptr<cudf::table>>
-hash_join::hash_join_impl::inner_join(
-  cudf::table_view const &probe,
-  std::vector<size_type> const &probe_on,
-  std::vector<std::pair<cudf::size_type, cudf::size_type>> const &columns_in_common,
-  common_columns_output_side common_columns_output_side,
-  null_equality compare_nulls,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource *mr) const
-{
-  CUDF_FUNC_RANGE();
-  return compute_hash_join<cudf::detail::join_kind::INNER_JOIN>(
-    probe, probe_on, columns_in_common, common_columns_output_side, compare_nulls, stream, mr);
+  return compute_hash_join<cudf::detail::join_kind::INNER_JOIN>(probe, compare_nulls, stream, mr);
 }
 
 std::pair<std::unique_ptr<cudf::column>, std::unique_ptr<cudf::column>>
 hash_join::hash_join_impl::left_join(cudf::table_view const &probe,
-                                     std::vector<size_type> const &probe_on,
                                      null_equality compare_nulls,
                                      rmm::cuda_stream_view stream,
                                      rmm::mr::device_memory_resource *mr) const
 {
   CUDF_FUNC_RANGE();
-  return compute_hash_join<cudf::detail::join_kind::LEFT_JOIN>(
-    probe, probe_on, compare_nulls, stream, mr);
+  return compute_hash_join<cudf::detail::join_kind::LEFT_JOIN>(probe, compare_nulls, stream, mr);
 }
 
 std::pair<std::unique_ptr<cudf::column>, std::unique_ptr<cudf::column>>
 hash_join::hash_join_impl::full_join(cudf::table_view const &probe,
-                                     std::vector<size_type> const &probe_on,
                                      null_equality compare_nulls,
                                      rmm::cuda_stream_view stream,
                                      rmm::mr::device_memory_resource *mr) const
 {
   CUDF_FUNC_RANGE();
-  return compute_hash_join<cudf::detail::join_kind::FULL_JOIN>(
-    probe, probe_on, compare_nulls, stream, mr);
-}
-
-std::unique_ptr<cudf::table> hash_join::hash_join_impl::full_join(
-  cudf::table_view const &probe,
-  std::vector<size_type> const &probe_on,
-  std::vector<std::pair<cudf::size_type, cudf::size_type>> const &columns_in_common,
-  null_equality compare_nulls,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource *mr) const
-{
-  CUDF_FUNC_RANGE();
-  auto probe_build_pair =
-    compute_hash_join<cudf::detail::join_kind::FULL_JOIN>(probe,
-                                                          probe_on,
-                                                          columns_in_common,
-                                                          common_columns_output_side::PROBE,
-                                                          compare_nulls,
-                                                          stream,
-                                                          mr);
-  return cudf::detail::combine_table_pair(std::move(probe_build_pair.first),
-                                          std::move(probe_build_pair.second));
+  return compute_hash_join<cudf::detail::join_kind::FULL_JOIN>(probe, compare_nulls, stream, mr);
 }
 
 template <cudf::detail::join_kind JoinKind>
 std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>>
 hash_join::hash_join_impl::compute_hash_join_indices(cudf::table_view const &probe,
-                                                     std::vector<size_type> const &probe_on,
                                                      null_equality compare_nulls,
                                                      rmm::cuda_stream_view stream,
                                                      rmm::mr::device_memory_resource *mr) const
@@ -520,42 +335,39 @@ hash_join::hash_join_impl::compute_hash_join_indices(cudf::table_view const &pro
   CUDF_EXPECTS(0 != probe.num_columns(), "Hash join probe table is empty");
   CUDF_EXPECTS(probe.num_rows() < cudf::detail::MAX_JOIN_SIZE,
                "Probe column size is too big for hash join");
-  CUDF_EXPECTS(_build_on.size() == probe_on.size(),
+  CUDF_EXPECTS(_build.num_columns() == probe.num_columns(),
                "Mismatch in number of columns to be joined on");
 
-  if (is_trivial_join(probe, _build, probe_on, _build_on, JoinKind)) {
+  if (is_trivial_join(probe, _build, JoinKind)) {
     return std::make_pair(rmm::device_uvector<size_type>{0, stream},
                           rmm::device_uvector<size_type>{0, stream});
   }
 
-  auto probe_selected = probe.select(probe_on);
-  CUDF_EXPECTS(std::equal(std::cbegin(_build_selected),
-                          std::cend(_build_selected),
-                          std::cbegin(probe_selected),
-                          std::cend(probe_selected),
+  CUDF_EXPECTS(std::equal(std::cbegin(_build),
+                          std::cend(_build),
+                          std::cbegin(probe),
+                          std::cend(probe),
                           [](const auto &b, const auto &p) { return b.type() == p.type(); }),
                "Mismatch in joining column data types");
 
-  return probe_join_indices<JoinKind>(probe_selected, compare_nulls, stream);
+  return probe_join_indices<JoinKind>(probe, compare_nulls, stream);
 }
 
 template <cudf::detail::join_kind JoinKind>
 std::pair<std::unique_ptr<cudf::column>, std::unique_ptr<cudf::column>>
 hash_join::hash_join_impl::compute_hash_join(cudf::table_view const &probe,
-                                             std::vector<size_type> const &probe_on,
                                              null_equality compare_nulls,
                                              rmm::cuda_stream_view stream,
                                              rmm::mr::device_memory_resource *mr) const
 {
-  auto join_indices =
-    compute_hash_join_indices<JoinKind>(probe, probe_on, compare_nulls, stream, mr);
-  auto join_size = join_indices.first.size();
-  auto left_map  = std::make_unique<cudf::column>(cudf::data_type(type_to_id<cudf::size_type>()),
+  auto join_indices = compute_hash_join_indices<JoinKind>(probe, compare_nulls, stream, mr);
+  auto join_size    = join_indices.first.size();
+  auto left_map     = std::make_unique<cudf::column>(cudf::data_type(type_to_id<cudf::size_type>()),
                                                  join_size,
                                                  join_indices.first.release(),
                                                  rmm::device_buffer{},
                                                  0);
-  auto right_map = std::make_unique<cudf::column>(cudf::data_type(type_to_id<cudf::size_type>()),
+  auto right_map    = std::make_unique<cudf::column>(cudf::data_type(type_to_id<cudf::size_type>()),
                                                   join_size,
                                                   join_indices.second.release(),
                                                   rmm::device_buffer{},
@@ -564,41 +376,6 @@ hash_join::hash_join_impl::compute_hash_join(cudf::table_view const &probe,
     std::move(left_map), std::move(right_map));
 }
 
-template <cudf::detail::join_kind JoinKind>
-std::pair<std::unique_ptr<cudf::table>, std::unique_ptr<cudf::table>>
-hash_join::hash_join_impl::compute_hash_join(
-  cudf::table_view const &probe,
-  std::vector<size_type> const &probe_on,
-  std::vector<std::pair<cudf::size_type, cudf::size_type>> const &columns_in_common,
-  common_columns_output_side common_columns_output_side,
-  null_equality compare_nulls,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource *mr) const
-{
-  CUDF_EXPECTS(std::all_of(columns_in_common.begin(),
-                           columns_in_common.end(),
-                           [this, &probe_on](auto pair) {
-                             size_t p = std::find(probe_on.begin(), probe_on.end(), pair.first) -
-                                        probe_on.begin();
-                             size_t b = std::find(_build_on.begin(), _build_on.end(), pair.second) -
-                                        _build_on.begin();
-                             return (p != probe_on.size()) && (b != _build_on.size()) && (p == b);
-                           }),
-               "Invalid values passed to columns_in_common");
-
-  auto joined_indices = compute_hash_join<JoinKind>(probe, probe_on, compare_nulls, stream, mr);
-
-  if (is_trivial_join(probe, _build, probe_on, _build_on, JoinKind)) {
-    return get_empty_joined_table(probe, _build, columns_in_common, common_columns_output_side);
-  }
-
-  auto joined_indices_view = std::make_pair<cudf::column_view, cudf::column_view>(
-    joined_indices.first->view(), joined_indices.second->view());
-
-  return cudf::detail::construct_join_output_df<JoinKind>(
-    probe, _build, joined_indices_view, columns_in_common, common_columns_output_side, stream, mr);
-}
-
 template <cudf::detail::join_kind JoinKind>
 std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>>
 hash_join::hash_join_impl::probe_join_indices(cudf::table_view const &probe,
@@ -612,7 +389,7 @@ hash_join::hash_join_impl::probe_join_indices(cudf::table_view const &probe,
 
   CUDF_EXPECTS(_hash_table, "Hash table of hash join is null.");
 
-  auto build_table = cudf::table_device_view::create(_build_selected, stream);
+  auto build_table = cudf::table_device_view::create(_build, stream);
   auto probe_table = cudf::table_device_view::create(probe, stream);
 
   constexpr cudf::detail::join_kind ProbeJoinKind = (JoinKind == cudf::detail::join_kind::FULL_JOIN)
diff --git a/cpp/src/join/hash_join.cuh b/cpp/src/join/hash_join.cuh
index 16a4edf7d5f..d547d5190c4 100644
--- a/cpp/src/join/hash_join.cuh
+++ b/cpp/src/join/hash_join.cuh
@@ -195,111 +195,11 @@ get_trivial_left_join_indices(table_view const& left, rmm::cuda_stream_view stre
 }
 
 std::pair<std::unique_ptr<table>, std::unique_ptr<table>> get_empty_joined_table(
-  table_view const& probe,
-  table_view const& build,
-  std::vector<std::pair<size_type, size_type>> const& columns_in_common,
-  cudf::hash_join::common_columns_output_side common_columns_output_side);
+  table_view const& probe, table_view const& build);
 
 std::unique_ptr<cudf::table> combine_table_pair(std::unique_ptr<cudf::table>&& left,
                                                 std::unique_ptr<cudf::table>&& right);
 
-std::pair<std::unique_ptr<table>, std::unique_ptr<table>> combine_join_columns(
-  std::vector<std::unique_ptr<column>>&& probe_noncommon_cols,
-  std::vector<size_type> const& probe_noncommon_col_indices,
-  std::vector<size_type> const& probe_common_col_indices,
-  std::vector<std::unique_ptr<column>>&& build_noncommon_cols,
-  std::vector<size_type> const& build_noncommon_col_indices,
-  std::vector<size_type> const& build_common_col_indices,
-  std::vector<std::unique_ptr<column>>&& common_cols,
-  cudf::hash_join::common_columns_output_side common_columns_output_side);
-
-std::vector<size_type> non_common_column_indices(
-  size_type num_columns, std::vector<size_type> const& common_column_indices);
-
-template <join_kind JoinKind>
-std::pair<std::unique_ptr<table>, std::unique_ptr<table>> construct_join_output_df(
-  table_view const& probe,
-  table_view const& build,
-  std::pair<cudf::column_view, cudf::column_view>& joined_indices,
-  std::vector<std::pair<size_type, size_type>> const& columns_in_common,
-  cudf::hash_join::common_columns_output_side common_columns_output_side,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr)
-{
-  std::vector<size_type> probe_common_col;
-  probe_common_col.reserve(columns_in_common.size());
-  std::vector<size_type> build_common_col;
-  build_common_col.reserve(columns_in_common.size());
-  for (const auto& c : columns_in_common) {
-    probe_common_col.push_back(c.first);
-    build_common_col.push_back(c.second);
-  }
-  std::vector<size_type> probe_noncommon_col =
-    non_common_column_indices(probe.num_columns(), probe_common_col);
-  std::vector<size_type> build_noncommon_col =
-    non_common_column_indices(build.num_columns(), build_common_col);
-
-  out_of_bounds_policy const bounds_policy = JoinKind != join_kind::INNER_JOIN
-                                               ? out_of_bounds_policy::NULLIFY
-                                               : out_of_bounds_policy::DONT_CHECK;
-
-  std::unique_ptr<table> common_table = std::make_unique<table>();
-  // Construct the joined columns
-  if (join_kind::FULL_JOIN == JoinKind) {
-    if (not columns_in_common.empty()) {
-      auto common_from_build =
-        detail::gather(build.select(build_common_col),
-                       joined_indices.second.begin<size_type>() + probe.num_rows(),
-                       joined_indices.second.end<size_type>(),
-                       bounds_policy,
-                       stream,
-                       rmm::mr::get_current_device_resource());
-      auto common_from_probe =
-        detail::gather(probe.select(probe_common_col),
-                       joined_indices.first.begin<size_type>(),
-                       joined_indices.first.begin<size_type>() + probe.num_rows(),
-                       bounds_policy,
-                       stream,
-                       rmm::mr::get_current_device_resource());
-      common_table = cudf::detail::concatenate(
-        {common_from_probe->view(), common_from_build->view()}, stream, mr);
-    }
-  } else {
-    if (not columns_in_common.empty()) {
-      common_table = detail::gather(probe.select(probe_common_col),
-                                    joined_indices.first.begin<size_type>(),
-                                    joined_indices.first.end<size_type>(),
-                                    bounds_policy,
-                                    stream,
-                                    mr);
-    }
-  }
-
-  // Construct the probe non common columns
-  std::unique_ptr<table> probe_table = detail::gather(probe.select(probe_noncommon_col),
-                                                      joined_indices.first.begin<size_type>(),
-                                                      joined_indices.first.end<size_type>(),
-                                                      bounds_policy,
-                                                      stream,
-                                                      mr);
-
-  std::unique_ptr<table> build_table = detail::gather(build.select(build_noncommon_col),
-                                                      joined_indices.second.begin<size_type>(),
-                                                      joined_indices.second.end<size_type>(),
-                                                      bounds_policy,
-                                                      stream,
-                                                      mr);
-
-  return combine_join_columns(probe_table->release(),
-                              probe_noncommon_col,
-                              probe_common_col,
-                              build_table->release(),
-                              build_noncommon_col,
-                              build_common_col,
-                              common_table->release(),
-                              common_columns_output_side);
-}
-
 }  // namespace detail
 
 struct hash_join::hash_join_impl {
@@ -313,64 +213,37 @@ struct hash_join::hash_join_impl {
 
  private:
   cudf::table_view _build;
-  cudf::table_view _build_selected;
-  std::vector<size_type> _build_on;
   std::unique_ptr<cudf::detail::multimap_type, std::function<void(cudf::detail::multimap_type*)>>
     _hash_table;
 
  public:
   /**
-   * @brief Constructor that internally builds the hash table based on the given `build` table and
-   * column indices specified by `build_on` for subsequent probe calls.
+   * @brief Constructor that internally builds the hash table based on the given `build` table
    *
    * @throw cudf::logic_error if the number of columns in `build` table is 0.
    * @throw cudf::logic_error if the number of rows in `build` table exceeds MAX_JOIN_SIZE.
-   * @throw std::out_of_range if elements of `build_on` exceed the number of columns in the `build`
-   * table.
    *
    * @param build The build table, from which the hash table is built.
-   * @param build_on The column indices from `build` to join on.
    * @param compare_nulls Controls whether null join-key values should match or not.
    */
   hash_join_impl(cudf::table_view const& build,
-                 std::vector<size_type> const& build_on,
                  null_equality compare_nulls,
                  rmm::cuda_stream_view stream = rmm::cuda_stream_default);
 
   std::pair<std::unique_ptr<cudf::column>, std::unique_ptr<cudf::column>> inner_join(
     cudf::table_view const& probe,
-    std::vector<size_type> const& probe_on,
-    null_equality compare_nulls,
-    rmm::cuda_stream_view stream,
-    rmm::mr::device_memory_resource* mr) const;
-
-  std::pair<std::unique_ptr<cudf::table>, std::unique_ptr<cudf::table>> inner_join(
-    cudf::table_view const& probe,
-    std::vector<size_type> const& probe_on,
-    std::vector<std::pair<cudf::size_type, cudf::size_type>> const& columns_in_common,
-    common_columns_output_side common_columns_output_side,
     null_equality compare_nulls,
     rmm::cuda_stream_view stream,
     rmm::mr::device_memory_resource* mr) const;
 
   std::pair<std::unique_ptr<cudf::column>, std::unique_ptr<cudf::column>> left_join(
     cudf::table_view const& probe,
-    std::vector<size_type> const& probe_on,
     null_equality compare_nulls,
     rmm::cuda_stream_view stream,
     rmm::mr::device_memory_resource* mr) const;
 
   std::pair<std::unique_ptr<cudf::column>, std::unique_ptr<cudf::column>> full_join(
     cudf::table_view const& probe,
-    std::vector<size_type> const& probe_on,
-    null_equality compare_nulls,
-    rmm::cuda_stream_view stream,
-    rmm::mr::device_memory_resource* mr) const;
-
-  std::unique_ptr<cudf::table> full_join(
-    cudf::table_view const& probe,
-    std::vector<size_type> const& probe_on,
-    std::vector<std::pair<cudf::size_type, cudf::size_type>> const& columns_in_common,
     null_equality compare_nulls,
     rmm::cuda_stream_view stream,
     rmm::mr::device_memory_resource* mr) const;
@@ -379,7 +252,6 @@ struct hash_join::hash_join_impl {
   template <cudf::detail::join_kind JoinKind>
   std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>>
   compute_hash_join_indices(cudf::table_view const& probe,
-                            std::vector<size_type> const& probe_on,
                             null_equality compare_nulls,
                             rmm::cuda_stream_view stream,
                             rmm::mr::device_memory_resource* mr) const;
@@ -387,58 +259,6 @@ struct hash_join::hash_join_impl {
   template <cudf::detail::join_kind JoinKind>
   std::pair<std::unique_ptr<cudf::column>, std::unique_ptr<cudf::column>> compute_hash_join(
     cudf::table_view const& probe,
-    std::vector<size_type> const& probe_on,
-    null_equality compare_nulls,
-    rmm::cuda_stream_view stream,
-    rmm::mr::device_memory_resource* mr) const;
-
-  /**
-   * @brief Performs hash join by probing the columns provided in `probe` as per
-   * the joining indices given in `probe_on` and returns a (`probe`, `_build`) table pair, which
-   * contains the probe and build portions of the logical joined table respectively.
-   *
-   * @throw cudf::logic_error if `columns_in_common` contains a pair of indices
-   * (`P`, `B`) where `P` does not exist in `probe_on` or `B` does not exist in
-   * `_build_on`.
-   * @throw cudf::logic_error if `columns_in_common` contains a pair of indices
-   * (`P`, `B`) such that the location of `P` within `probe_on` is not equal to
-   * the location of `B` within `_build_on`.
-   * @throw cudf::logic_error if the number of elements in `probe_on` and
-   * `_build_on` are not equal.
-   * @throw cudf::logic_error if the number of columns in `probe` is 0.
-   * @throw cudf::logic_error if the number of rows in `probe` table exceeds MAX_JOIN_SIZE.
-   * @throw std::out_of_range if elements of `probe_on` exceed the number of columns in the `probe`
-   * table.
-   * @throw cudf::logic_error if types do not match between joining columns.
-   *
-   * @tparam JoinKind The type of join to be performed.
-   *
-   * @param probe The probe table.
-   * @param probe_on The column's indices from `probe` to join on.
-   * Column `i` from `probe_on` will be compared against column `i` of `_build_on`.
-   * @param columns_in_common is a vector of pairs of column indices into
-   * `probe` and `_build`, respectively, that are "in common". For "common"
-   * columns, only a single output column will be produced, which is gathered
-   * from `probe_on` columns. Else, for every column in `probe_on` and `_build_on`,
-   * an output column will be produced. For each of these pairs (P, B), P
-   * should exist in `probe_on` and B should exist in `_build_on`.
-   * @param common_columns_output_side @see cudf::hash_join::common_columns_output_side.
-   * @param compare_nulls Controls whether null join-key values should match or not.
-   * @param mr Device memory resource used to allocate the returned table's device memory.
-   * @param stream CUDA stream used for device memory operations and kernel launches.
-   *
-   * @return Table pair of (`probe`, `_build`) of joining both tables on the columns
-   * specified by `probe_on` and `_build_on`. The resulting table pair will be joined columns of
-   * (`probe(including common columns)`, `_build(excluding common columns)`) if
-   * `common_columns_output_side` is `PROBE`, or (`probe(excluding common columns)`,
-   * `_build(including common columns)`) if `common_columns_output_side` is `BUILD`.
-   */
-  template <cudf::detail::join_kind JoinKind>
-  std::pair<std::unique_ptr<cudf::table>, std::unique_ptr<cudf::table>> compute_hash_join(
-    cudf::table_view const& probe,
-    std::vector<size_type> const& probe_on,
-    std::vector<std::pair<cudf::size_type, cudf::size_type>> const& columns_in_common,
-    common_columns_output_side common_columns_output_side,
     null_equality compare_nulls,
     rmm::cuda_stream_view stream,
     rmm::mr::device_memory_resource* mr) const;
diff --git a/cpp/src/join/join.cu b/cpp/src/join/join.cu
index 10743f9326a..9e61a924e03 100644
--- a/cpp/src/join/join.cu
+++ b/cpp/src/join/join.cu
@@ -29,8 +29,6 @@ namespace detail {
 std::pair<std::unique_ptr<cudf::column>, std::unique_ptr<cudf::column>> inner_join(
   table_view const& left_input,
   table_view const& right_input,
-  std::vector<size_type> const& left_on,
-  std::vector<size_type> const& right_on,
   null_equality compare_nulls,
   rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr)
@@ -38,35 +36,33 @@ std::pair<std::unique_ptr<cudf::column>, std::unique_ptr<cudf::column>> inner_jo
   // Make sure any dictionary columns have matched key sets.
   // This will return any new dictionary columns created as well as updated table_views.
   auto matched = cudf::dictionary::detail::match_dictionaries(
-    {left_input.select(left_on), right_input.select(right_on)},
+    {left_input, right_input},
     stream,
     rmm::mr::get_current_device_resource());  // temporary objects returned
 
   // now rebuild the table views with the updated ones
-  auto const left  = scatter_columns(matched.second.front(), left_on, left_input);
-  auto const right = scatter_columns(matched.second.back(), right_on, right_input);
+  auto const left  = matched.second.front();
+  auto const right = matched.second.back();
 
   // For `inner_join`, we can freely choose either the `left` or `right` table to use for
   // building/probing the hash map. Because building is typically more expensive than probing, we
   // build the hash map from the smaller table.
   if (right.num_rows() > left.num_rows()) {
-    cudf::hash_join hj_obj(left, left_on, compare_nulls, stream);
-    return hj_obj.inner_join(right, right_on, compare_nulls, stream, mr);
+    cudf::hash_join hj_obj(left, compare_nulls, stream);
+    return hj_obj.inner_join(right, compare_nulls, stream, mr);
   } else {
-    cudf::hash_join hj_obj(right, right_on, compare_nulls, stream);
-    return hj_obj.inner_join(left, left_on, compare_nulls, stream, mr);
+    cudf::hash_join hj_obj(right, compare_nulls, stream);
+    return hj_obj.inner_join(left, compare_nulls, stream, mr);
   }
 }
 
-std::unique_ptr<table> inner_join(
-  table_view const& left_input,
-  table_view const& right_input,
-  std::vector<size_type> const& left_on,
-  std::vector<size_type> const& right_on,
-  std::vector<std::pair<size_type, size_type>> const& columns_in_common,
-  null_equality compare_nulls,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr)
+std::unique_ptr<table> inner_join(table_view const& left_input,
+                                  table_view const& right_input,
+                                  std::vector<size_type> const& left_on,
+                                  std::vector<size_type> const& right_on,
+                                  null_equality compare_nulls,
+                                  rmm::cuda_stream_view stream,
+                                  rmm::mr::device_memory_resource* mr)
 {
   // Make sure any dictionary columns have matched key sets.
   // This will return any new dictionary columns created as well as updated table_views.
@@ -83,68 +79,51 @@ std::unique_ptr<table> inner_join(
   // building/probing the hash map. Because building is typically more expensive than probing, we
   // build the hash map from the smaller table.
   if (right.num_rows() > left.num_rows()) {
-    cudf::hash_join hj_obj(left, left_on, compare_nulls, stream);
-    auto join_indices = hj_obj.inner_join(right, right_on, compare_nulls, stream, mr);
-
-    auto actual_columns_in_common = columns_in_common;
-    std::for_each(actual_columns_in_common.begin(), actual_columns_in_common.end(), [](auto& pair) {
-      std::swap(pair.first, pair.second);
-    });
-
-    if (is_trivial_join(left, right, left_on, right_on, cudf::detail::join_kind::INNER_JOIN)) {
-      auto probe_build_pair = get_empty_joined_table(
-        right, left, actual_columns_in_common, cudf::hash_join::common_columns_output_side::BUILD);
-      return cudf::detail::combine_table_pair(std::move(probe_build_pair.second),
-                                              std::move(probe_build_pair.first));
-    }
-
+    cudf::hash_join hj_obj(left.select(left_on), compare_nulls, stream);
+    auto join_indices      = hj_obj.inner_join(right.select(right_on), compare_nulls, stream, mr);
     auto join_indices_view = std::make_pair<cudf::column_view, cudf::column_view>(
       join_indices.first->view(), join_indices.second->view());
-
-    auto probe_build_pair = construct_join_output_df<cudf::detail::join_kind::INNER_JOIN>(
-      right,
-      left,
-      join_indices_view,
-      actual_columns_in_common,
-      cudf::hash_join::common_columns_output_side::BUILD,
-      stream,
-      mr);
-
-    return cudf::detail::combine_table_pair(std::move(probe_build_pair.second),
-                                            std::move(probe_build_pair.first));
+    std::unique_ptr<table> left_result =
+      detail::gather(left,
+                     join_indices_view.second.template begin<cudf::size_type>(),
+                     join_indices_view.second.template end<cudf::size_type>(),
+                     out_of_bounds_policy::DONT_CHECK,
+                     stream,
+                     mr);
+    std::unique_ptr<table> right_result =
+      detail::gather(right,
+                     join_indices_view.first.template begin<cudf::size_type>(),
+                     join_indices_view.first.template end<cudf::size_type>(),
+                     out_of_bounds_policy::DONT_CHECK,
+                     stream,
+                     mr);
+    return combine_table_pair(std::move(left_result), std::move(right_result));
   } else {
-    cudf::hash_join hj_obj(right, right_on, compare_nulls, stream);
-    auto join_indices = hj_obj.inner_join(left, left_on, compare_nulls, stream, mr);
-
-    if (is_trivial_join(left, right, left_on, right_on, cudf::detail::join_kind::INNER_JOIN)) {
-      auto probe_build_pair = get_empty_joined_table(
-        left, right, columns_in_common, cudf::hash_join::common_columns_output_side::PROBE);
-      return cudf::detail::combine_table_pair(std::move(probe_build_pair.first),
-                                              std::move(probe_build_pair.second));
-    }
-
+    cudf::hash_join hj_obj(right.select(right_on), compare_nulls, stream);
+    auto join_indices      = hj_obj.inner_join(left.select(left_on), compare_nulls, stream, mr);
     auto join_indices_view = std::make_pair<cudf::column_view, cudf::column_view>(
       join_indices.first->view(), join_indices.second->view());
-
-    auto probe_build_pair = construct_join_output_df<cudf::detail::join_kind::INNER_JOIN>(
-      left,
-      right,
-      join_indices_view,
-      columns_in_common,
-      cudf::hash_join::common_columns_output_side::PROBE,
-      stream,
-      mr);
-
-    return cudf::detail::combine_table_pair(std::move(probe_build_pair.first),
-                                            std::move(probe_build_pair.second));
+    std::unique_ptr<table> left_result =
+      detail::gather(left,
+                     join_indices_view.first.template begin<cudf::size_type>(),
+                     join_indices_view.first.template end<cudf::size_type>(),
+                     out_of_bounds_policy::DONT_CHECK,
+                     stream,
+                     mr);
+    std::unique_ptr<table> right_result =
+      detail::gather(right,
+                     join_indices_view.second.template begin<cudf::size_type>(),
+                     join_indices_view.second.template end<cudf::size_type>(),
+                     out_of_bounds_policy::DONT_CHECK,
+                     stream,
+                     mr);
+    return combine_table_pair(std::move(left_result), std::move(right_result));
   }
 }
 
 std::pair<std::unique_ptr<cudf::column>, std::unique_ptr<cudf::column>> left_join(
   table_view const& left_input,
   table_view const& right_input,
-  std::vector<size_type> const& left_on,
-  std::vector<size_type> const& right_on,
   null_equality compare_nulls,
   rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr)
@@ -152,26 +131,24 @@ std::pair<std::unique_ptr<cudf::column>, std::unique_ptr<cudf::column>> left_joi
   // Make sure any dictionary columns have matched key sets.
   // This will return any new dictionary columns created as well as updated table_views.
   auto matched = cudf::dictionary::detail::match_dictionaries(
-    {left_input.select(left_on), right_input.select(right_on)},  // these should match
+    {left_input, right_input},  // these should match
     stream,
     rmm::mr::get_current_device_resource());  // temporary objects returned
   // now rebuild the table views with the updated ones
-  table_view const left  = scatter_columns(matched.second.front(), left_on, left_input);
-  table_view const right = scatter_columns(matched.second.back(), right_on, right_input);
+  table_view const left  = matched.second.front();
+  table_view const right = matched.second.back();
 
-  cudf::hash_join hj_obj(right, right_on, compare_nulls, stream);
-  return hj_obj.left_join(left, left_on, compare_nulls, stream, mr);
+  cudf::hash_join hj_obj(right, compare_nulls, stream);
+  return hj_obj.left_join(left, compare_nulls, stream, mr);
 }
 
-std::unique_ptr<table> left_join(
-  table_view const& left_input,
-  table_view const& right_input,
-  std::vector<size_type> const& left_on,
-  std::vector<size_type> const& right_on,
-  std::vector<std::pair<size_type, size_type>> const& columns_in_common,
-  null_equality compare_nulls,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr)
+std::unique_ptr<table> left_join(table_view const& left_input,
+                                 table_view const& right_input,
+                                 std::vector<size_type> const& left_on,
+                                 std::vector<size_type> const& right_on,
+                                 null_equality compare_nulls,
+                                 rmm::cuda_stream_view stream,
+                                 rmm::mr::device_memory_resource* mr)
 {
   // Make sure any dictionary columns have matched key sets.
   // This will return any new dictionary columns created as well as updated table_views.
@@ -183,36 +160,38 @@ std::unique_ptr<table> left_join(
   table_view const left  = scatter_columns(matched.second.front(), left_on, left_input);
   table_view const right = scatter_columns(matched.second.back(), right_on, right_input);
 
-  cudf::hash_join hj_obj(right, right_on, compare_nulls, stream);
-  auto join_indices = hj_obj.left_join(left, left_on, compare_nulls, stream, mr);
+  cudf::hash_join hj_obj(right.select(right_on), compare_nulls, stream);
+  auto join_indices = hj_obj.left_join(left.select(left_on), compare_nulls, stream, mr);
 
-  if (is_trivial_join(left, right, left_on, right_on, cudf::detail::join_kind::LEFT_JOIN)) {
-    auto probe_build_pair = get_empty_joined_table(
-      left, right, columns_in_common, cudf::hash_join::common_columns_output_side::PROBE);
+  if ((left_on.empty() || right_on.empty()) ||
+      is_trivial_join(left, right, cudf::detail::join_kind::LEFT_JOIN)) {
+    auto probe_build_pair = get_empty_joined_table(left, right);
     return cudf::detail::combine_table_pair(std::move(probe_build_pair.first),
                                             std::move(probe_build_pair.second));
   }
 
   auto join_indices_view = std::make_pair<cudf::column_view, cudf::column_view>(
     join_indices.first->view(), join_indices.second->view());
-
-  auto probe_build_pair = construct_join_output_df<cudf::detail::join_kind::LEFT_JOIN>(
-    left,
-    right,
-    join_indices_view,
-    columns_in_common,
-    cudf::hash_join::common_columns_output_side::PROBE,
-    stream,
-    mr);
-
-  return combine_table_pair(std::move(probe_build_pair.first), std::move(probe_build_pair.second));
+  std::unique_ptr<table> left_result =
+    detail::gather(left,
+                   join_indices_view.first.template begin<cudf::size_type>(),
+                   join_indices_view.first.template end<cudf::size_type>(),
+                   out_of_bounds_policy::NULLIFY,
+                   stream,
+                   mr);
+  std::unique_ptr<table> right_result =
+    detail::gather(right,
+                   join_indices_view.second.template begin<cudf::size_type>(),
+                   join_indices_view.second.template end<cudf::size_type>(),
+                   out_of_bounds_policy::NULLIFY,
+                   stream,
+                   mr);
+  return combine_table_pair(std::move(left_result), std::move(right_result));
 }
 
 std::pair<std::unique_ptr<cudf::column>, std::unique_ptr<cudf::column>> full_join(
   table_view const& left_input,
   table_view const& right_input,
-  std::vector<size_type> const& left_on,
-  std::vector<size_type> const& right_on,
   null_equality compare_nulls,
   rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr)
@@ -220,26 +199,24 @@ std::pair<std::unique_ptr<cudf::column>, std::unique_ptr<cudf::column>> full_joi
   // Make sure any dictionary columns have matched key sets.
   // This will return any new dictionary columns created as well as updated table_views.
   auto matched = cudf::dictionary::detail::match_dictionaries(
-    {left_input.select(left_on), right_input.select(right_on)},  // these should match
+    {left_input, right_input},  // these should match
     stream,
     rmm::mr::get_current_device_resource());  // temporary objects returned
   // now rebuild the table views with the updated ones
-  table_view const left  = scatter_columns(matched.second.front(), left_on, left_input);
-  table_view const right = scatter_columns(matched.second.back(), right_on, right_input);
+  table_view const left  = matched.second.front();
+  table_view const right = matched.second.back();
 
-  cudf::hash_join hj_obj(right, right_on, compare_nulls, stream);
-  return hj_obj.full_join(left, left_on, compare_nulls, stream, mr);
+  cudf::hash_join hj_obj(right, compare_nulls, stream);
+  return hj_obj.full_join(left, compare_nulls, stream, mr);
 }
 
-std::unique_ptr<table> full_join(
-  table_view const& left_input,
-  table_view const& right_input,
-  std::vector<size_type> const& left_on,
-  std::vector<size_type> const& right_on,
-  std::vector<std::pair<size_type, size_type>> const& columns_in_common,
-  null_equality compare_nulls,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr)
+std::unique_ptr<table> full_join(table_view const& left_input,
+                                 table_view const& right_input,
+                                 std::vector<size_type> const& left_on,
+                                 std::vector<size_type> const& right_on,
+                                 null_equality compare_nulls,
+                                 rmm::cuda_stream_view stream,
+                                 rmm::mr::device_memory_resource* mr)
 {
   // Make sure any dictionary columns have matched key sets.
   // This will return any new dictionary columns created as well as updated table_views.
@@ -251,29 +228,33 @@ std::unique_ptr<table> full_join(
   table_view const left  = scatter_columns(matched.second.front(), left_on, left_input);
   table_view const right = scatter_columns(matched.second.back(), right_on, right_input);
 
-  cudf::hash_join hj_obj(right, right_on, compare_nulls, stream);
-  auto join_indices = hj_obj.full_join(left, left_on, compare_nulls, stream, mr);
+  cudf::hash_join hj_obj(right.select(right_on), compare_nulls, stream);
+  auto join_indices = hj_obj.full_join(left.select(left_on), compare_nulls, stream, mr);
 
-  if (is_trivial_join(left, right, left_on, right_on, cudf::detail::join_kind::FULL_JOIN)) {
-    auto probe_build_pair = get_empty_joined_table(
-      left, right, columns_in_common, cudf::hash_join::common_columns_output_side::PROBE);
+  if ((left_on.empty() || right_on.empty()) ||
+      is_trivial_join(left, right, cudf::detail::join_kind::FULL_JOIN)) {
+    auto probe_build_pair = get_empty_joined_table(left, right);
     return cudf::detail::combine_table_pair(std::move(probe_build_pair.first),
                                             std::move(probe_build_pair.second));
   }
 
   auto join_indices_view = std::make_pair<cudf::column_view, cudf::column_view>(
     join_indices.first->view(), join_indices.second->view());
-
-  auto probe_build_pair = construct_join_output_df<cudf::detail::join_kind::FULL_JOIN>(
-    left,
-    right,
-    join_indices_view,
-    columns_in_common,
-    cudf::hash_join::common_columns_output_side::PROBE,
-    stream,
-    mr);
-
-  return combine_table_pair(std::move(probe_build_pair.first), std::move(probe_build_pair.second));
+  std::unique_ptr<table> left_result =
+    detail::gather(left,
+                   join_indices_view.first.template begin<cudf::size_type>(),
+                   join_indices_view.first.template end<cudf::size_type>(),
+                   out_of_bounds_policy::NULLIFY,
+                   stream,
+                   mr);
+  std::unique_ptr<table> right_result =
+    detail::gather(right,
+                   join_indices_view.second.template begin<cudf::size_type>(),
+                   join_indices_view.second.template end<cudf::size_type>(),
+                   out_of_bounds_policy::NULLIFY,
+                   stream,
+                   mr);
+  return combine_table_pair(std::move(left_result), std::move(right_result));
 }
 
 }  // namespace detail
@@ -281,41 +262,37 @@ std::unique_ptr<table> full_join(
 hash_join::~hash_join() = default;
 
 hash_join::hash_join(cudf::table_view const& build,
-                     std::vector<size_type> const& build_on,
                      null_equality compare_nulls,
                      rmm::cuda_stream_view stream)
-  : impl{std::make_unique<const hash_join::hash_join_impl>(build, build_on, compare_nulls, stream)}
+  : impl{std::make_unique<const hash_join::hash_join_impl>(build, compare_nulls, stream)}
 {
 }
 
 std::pair<std::unique_ptr<cudf::column>, std::unique_ptr<cudf::column>> hash_join::inner_join(
   cudf::table_view const& probe,
-  std::vector<size_type> const& probe_on,
   null_equality compare_nulls,
   rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr) const
 {
-  return impl->inner_join(probe, probe_on, compare_nulls, stream, mr);
+  return impl->inner_join(probe, compare_nulls, stream, mr);
 }
 
 std::pair<std::unique_ptr<cudf::column>, std::unique_ptr<cudf::column>> hash_join::left_join(
   cudf::table_view const& probe,
-  std::vector<size_type> const& probe_on,
   null_equality compare_nulls,
   rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr) const
 {
-  return impl->left_join(probe, probe_on, compare_nulls, stream, mr);
+  return impl->left_join(probe, compare_nulls, stream, mr);
 }
 
 std::pair<std::unique_ptr<cudf::column>, std::unique_ptr<cudf::column>> hash_join::full_join(
   cudf::table_view const& probe,
-  std::vector<size_type> const& probe_on,
   null_equality compare_nulls,
   rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr) const
 {
-  return impl->full_join(probe, probe_on, compare_nulls, stream, mr);
+  return impl->full_join(probe, compare_nulls, stream, mr);
 }
 
 // external APIs
@@ -323,82 +300,67 @@ std::pair<std::unique_ptr<cudf::column>, std::unique_ptr<cudf::column>> hash_joi
 std::pair<std::unique_ptr<cudf::column>, std::unique_ptr<cudf::column>> inner_join(
   table_view const& left,
   table_view const& right,
-  std::vector<size_type> const& left_on,
-  std::vector<size_type> const& right_on,
   null_equality compare_nulls,
   rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::inner_join(
-    left, right, left_on, right_on, compare_nulls, rmm::cuda_stream_default, mr);
+  return detail::inner_join(left, right, compare_nulls, rmm::cuda_stream_default, mr);
 }
 
-std::unique_ptr<table> inner_join(
-  table_view const& left,
-  table_view const& right,
-  std::vector<size_type> const& left_on,
-  std::vector<size_type> const& right_on,
-  std::vector<std::pair<size_type, size_type>> const& columns_in_common,
-  null_equality compare_nulls,
-  rmm::mr::device_memory_resource* mr)
+std::unique_ptr<table> inner_join(table_view const& left,
+                                  table_view const& right,
+                                  std::vector<size_type> const& left_on,
+                                  std::vector<size_type> const& right_on,
+                                  null_equality compare_nulls,
+                                  rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
   return detail::inner_join(
-    left, right, left_on, right_on, columns_in_common, compare_nulls, rmm::cuda_stream_default, mr);
+    left, right, left_on, right_on, compare_nulls, rmm::cuda_stream_default, mr);
 }
 
 std::pair<std::unique_ptr<cudf::column>, std::unique_ptr<cudf::column>> left_join(
   table_view const& left,
   table_view const& right,
-  std::vector<size_type> const& left_on,
-  std::vector<size_type> const& right_on,
   null_equality compare_nulls,
   rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::left_join(
-    left, right, left_on, right_on, compare_nulls, rmm::cuda_stream_default, mr);
+  return detail::left_join(left, right, compare_nulls, rmm::cuda_stream_default, mr);
 }
 
-std::unique_ptr<table> left_join(
-  table_view const& left,
-  table_view const& right,
-  std::vector<size_type> const& left_on,
-  std::vector<size_type> const& right_on,
-  std::vector<std::pair<size_type, size_type>> const& columns_in_common,
-  null_equality compare_nulls,
-  rmm::mr::device_memory_resource* mr)
+std::unique_ptr<table> left_join(table_view const& left,
+                                 table_view const& right,
+                                 std::vector<size_type> const& left_on,
+                                 std::vector<size_type> const& right_on,
+                                 null_equality compare_nulls,
+                                 rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
   return detail::left_join(
-    left, right, left_on, right_on, columns_in_common, compare_nulls, rmm::cuda_stream_default, mr);
+    left, right, left_on, right_on, compare_nulls, rmm::cuda_stream_default, mr);
 }
 
 std::pair<std::unique_ptr<cudf::column>, std::unique_ptr<cudf::column>> full_join(
   table_view const& left,
   table_view const& right,
-  std::vector<size_type> const& left_on,
-  std::vector<size_type> const& right_on,
   null_equality compare_nulls,
   rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::full_join(
-    left, right, left_on, right_on, compare_nulls, rmm::cuda_stream_default, mr);
+  return detail::full_join(left, right, compare_nulls, rmm::cuda_stream_default, mr);
 }
 
-std::unique_ptr<table> full_join(
-  table_view const& left,
-  table_view const& right,
-  std::vector<size_type> const& left_on,
-  std::vector<size_type> const& right_on,
-  std::vector<std::pair<size_type, size_type>> const& columns_in_common,
-  null_equality compare_nulls,
-  rmm::mr::device_memory_resource* mr)
+std::unique_ptr<table> full_join(table_view const& left,
+                                 table_view const& right,
+                                 std::vector<size_type> const& left_on,
+                                 std::vector<size_type> const& right_on,
+                                 null_equality compare_nulls,
+                                 rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
   return detail::full_join(
-    left, right, left_on, right_on, columns_in_common, compare_nulls, rmm::cuda_stream_default, mr);
+    left, right, left_on, right_on, compare_nulls, rmm::cuda_stream_default, mr);
 }
 
 }  // namespace cudf
diff --git a/cpp/src/join/join_common_utils.hpp b/cpp/src/join/join_common_utils.hpp
index 917bcb9bdd5..1fcfffb96bb 100644
--- a/cpp/src/join/join_common_utils.hpp
+++ b/cpp/src/join/join_common_utils.hpp
@@ -51,14 +51,10 @@ using row_equality = cudf::row_equality_comparator<true>;
 
 enum class join_kind { INNER_JOIN, LEFT_JOIN, FULL_JOIN, LEFT_SEMI_JOIN, LEFT_ANTI_JOIN };
 
-inline bool is_trivial_join(table_view const& left,
-                            table_view const& right,
-                            std::vector<size_type> const& left_on,
-                            std::vector<size_type> const& right_on,
-                            join_kind join_type)
+inline bool is_trivial_join(table_view const& left, table_view const& right, join_kind join_type)
 {
   // If there is nothing to join, then send empty table with all columns
-  if (left_on.empty() || right_on.empty()) { return true; }
+  if ((0 == left.num_columns()) || (0 == right.num_columns())) { return true; }
 
   // If left join and the left table is empty, return immediately
   if ((join_kind::LEFT_JOIN == join_type) && (0 == left.num_rows())) { return true; }
diff --git a/cpp/src/join/semi_join.cu b/cpp/src/join/semi_join.cu
index 8d91ee38725..919311f41bf 100644
--- a/cpp/src/join/semi_join.cu
+++ b/cpp/src/join/semi_join.cu
@@ -133,8 +133,6 @@ std::unique_ptr<cudf::column> left_semi_anti_join(
  *                             The column from `right` indicated by `right_on[i]`
  *                             will be compared against the column from `left`
  *                             indicated by `left_on[i]`.
- * @param[in] return_columns   A vector of column indices from `left` to
- *                             include in the returned table.
  * @param[in] compare_nulls    Controls whether null join-key values should match or not.
  * @param[in] mr               Device memory resource to used to allocate the returned table's
  *                             device memory
@@ -142,8 +140,7 @@ std::unique_ptr<cudf::column> left_semi_anti_join(
  * @tparam    join_kind        Indicates whether to do LEFT_SEMI_JOIN or LEFT_ANTI_JOIN
  *
  * @returns                    Result of joining `left` and `right` tables on the columns
- *                             specified by `left_on` and `right_on`. The resulting table
- *                             will contain `return_columns` from `left` that match in right.
+ *                             specified by `left_on` and `right_on`.
  */
 template <join_kind JoinKind>
 std::unique_ptr<cudf::table> left_semi_anti_join(
@@ -151,22 +148,19 @@ std::unique_ptr<cudf::table> left_semi_anti_join(
   cudf::table_view const& right,
   std::vector<cudf::size_type> const& left_on,
   std::vector<cudf::size_type> const& right_on,
-  std::vector<cudf::size_type> const& return_columns,
   null_equality compare_nulls,
   rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
 {
   CUDF_EXPECTS(left_on.size() == right_on.size(), "Mismatch in number of columns to be joined on");
 
-  if (0 == return_columns.size()) { return empty_like(left.select(return_columns)); }
-
-  if (is_trivial_join(left, right, left_on, right_on, JoinKind)) {
-    return empty_like(left.select(return_columns));
+  if ((left_on.empty() || right_on.empty()) || is_trivial_join(left, right, JoinKind)) {
+    return empty_like(left);
   }
 
   if ((join_kind::LEFT_ANTI_JOIN == JoinKind) && (0 == right.num_rows())) {
     // Everything matches, just copy the proper columns from the left table
-    return std::make_unique<table>(left.select(return_columns), stream, mr);
+    return std::make_unique<table>(left, stream, mr);
   }
 
   // Make sure any dictionary columns have matched key sets.
@@ -183,7 +177,7 @@ std::unique_ptr<cudf::table> left_semi_anti_join(
     left_semi_anti_join<JoinKind>(left_selected, right_selected, compare_nulls, stream);
 
   auto const left_updated = scatter_columns(left_selected, left_on, left);
-  return cudf::detail::gather(left_updated.select(return_columns),
+  return cudf::detail::gather(left_updated,
                               gather_map->view().template begin<cudf::size_type>(),
                               gather_map->view().template end<cudf::size_type>(),
                               out_of_bounds_policy::DONT_CHECK,
@@ -197,50 +191,44 @@ std::unique_ptr<cudf::table> left_semi_join(cudf::table_view const& left,
                                             cudf::table_view const& right,
                                             std::vector<cudf::size_type> const& left_on,
                                             std::vector<cudf::size_type> const& right_on,
-                                            std::vector<cudf::size_type> const& return_columns,
                                             null_equality compare_nulls,
                                             rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
   return detail::left_semi_anti_join<detail::join_kind::LEFT_SEMI_JOIN>(
-    left, right, left_on, right_on, return_columns, compare_nulls, rmm::cuda_stream_default, mr);
+    left, right, left_on, right_on, compare_nulls, rmm::cuda_stream_default, mr);
 }
 
 std::unique_ptr<cudf::column> left_semi_join(cudf::table_view const& left,
                                              cudf::table_view const& right,
-                                             std::vector<cudf::size_type> const& left_on,
-                                             std::vector<cudf::size_type> const& right_on,
                                              null_equality compare_nulls,
                                              rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
   return detail::left_semi_anti_join<detail::join_kind::LEFT_SEMI_JOIN>(
-    left.select(left_on), right.select(right_on), compare_nulls, rmm::cuda_stream_default, mr);
+    left, right, compare_nulls, rmm::cuda_stream_default, mr);
 }
 
 std::unique_ptr<cudf::table> left_anti_join(cudf::table_view const& left,
                                             cudf::table_view const& right,
                                             std::vector<cudf::size_type> const& left_on,
                                             std::vector<cudf::size_type> const& right_on,
-                                            std::vector<cudf::size_type> const& return_columns,
                                             null_equality compare_nulls,
                                             rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
   return detail::left_semi_anti_join<detail::join_kind::LEFT_ANTI_JOIN>(
-    left, right, left_on, right_on, return_columns, compare_nulls, rmm::cuda_stream_default, mr);
+    left, right, left_on, right_on, compare_nulls, rmm::cuda_stream_default, mr);
 }
 
 std::unique_ptr<cudf::column> left_anti_join(cudf::table_view const& left,
                                              cudf::table_view const& right,
-                                             std::vector<cudf::size_type> const& left_on,
-                                             std::vector<cudf::size_type> const& right_on,
                                              null_equality compare_nulls,
                                              rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
   return detail::left_semi_anti_join<detail::join_kind::LEFT_ANTI_JOIN>(
-    left.select(left_on), right.select(right_on), compare_nulls, rmm::cuda_stream_default, mr);
+    left, right, compare_nulls, rmm::cuda_stream_default, mr);
 }
 
 }  // namespace cudf
diff --git a/cpp/tests/join/join_tests.cpp b/cpp/tests/join/join_tests.cpp
index 9fbccbda795..ec15e5b03c7 100644
--- a/cpp/tests/join/join_tests.cpp
+++ b/cpp/tests/join/join_tests.cpp
@@ -62,59 +62,11 @@ TEST_F(JoinTest, EmptySentinelRepro)
   cudf::table_view left({left_first_col, left_second_col, left_third_col});
   cudf::table_view right({right_first_col, right_second_col, right_third_col});
 
-  auto result = cudf::inner_join(left, right, {0, 1, 2}, {0, 1, 2}, {{0, 0}, {1, 1}, {2, 2}});
+  auto result = cudf::inner_join(left, right, {0, 1, 2}, {0, 1, 2});
 
   EXPECT_EQ(result->num_rows(), 1);
 }
 
-TEST_F(JoinTest, InvalidCommonColumnIndices)
-{
-  column_wrapper<int32_t> col0_0{{3, 1, 2, 0, 3}};
-  column_wrapper<int32_t> col0_1{{0, 1, 2, 4, 1}};
-
-  column_wrapper<int32_t> col1_0{{2, 2, 0, 4, 3}};
-  column_wrapper<int32_t> col1_1{{1, 0, 1, 2, 1}};
-
-  CVector cols0, cols1;
-  cols0.push_back(col0_0.release());
-  cols0.push_back(col0_1.release());
-  cols1.push_back(col1_0.release());
-  cols1.push_back(col1_1.release());
-
-  Table t0(std::move(cols0));
-  Table t1(std::move(cols1));
-
-  EXPECT_THROW(cudf::inner_join(t0, t1, {0, 1}, {0, 1}, {{0, 1}, {1, 0}}), cudf::logic_error);
-}
-
-TEST_F(JoinTest, FullJoinNoCommon)
-{
-  column_wrapper<int32_t> col0_0{{0, 1}};
-  column_wrapper<int32_t> col1_0{{0, 2}};
-  CVector cols0, cols1;
-  cols0.push_back(col0_0.release());
-  cols1.push_back(col1_0.release());
-
-  Table t0(std::move(cols0));
-  Table t1(std::move(cols1));
-
-  column_wrapper<int32_t> exp_col0_0{{0, 1, -1}, {1, 1, 0}};
-  column_wrapper<int32_t> exp_col0_1{{0, -1, 2}, {1, 0, 1}};
-  CVector exp_cols;
-  exp_cols.push_back(exp_col0_0.release());
-  exp_cols.push_back(exp_col0_1.release());
-  Table gold(std::move(exp_cols));
-
-  auto result =
-    cudf::full_join(t0, t1, {0}, {0}, std::vector<std::pair<cudf::size_type, cudf::size_type>>{});
-  auto result_sort_order = cudf::sorted_order(result->view());
-  auto sorted_result     = cudf::gather(result->view(), *result_sort_order);
-
-  auto gold_sort_order = cudf::sorted_order(gold.view());
-  auto sorted_gold     = cudf::gather(gold.view(), *gold_sort_order);
-  CUDF_TEST_EXPECT_TABLES_EQUAL(*sorted_gold, *sorted_result);
-}
-
 TEST_F(JoinTest, LeftJoinNoNullsWithNoCommon)
 {
   column_wrapper<int32_t> col0_0{{3, 1, 2, 0, 3}};
@@ -136,8 +88,7 @@ TEST_F(JoinTest, LeftJoinNoNullsWithNoCommon)
   Table t0(std::move(cols0));
   Table t1(std::move(cols1));
 
-  auto result =
-    cudf::left_join(t0, t1, {0}, {0}, std::vector<std::pair<cudf::size_type, cudf::size_type>>{});
+  auto result            = cudf::left_join(t0, t1, {0}, {0});
   auto result_sort_order = cudf::sorted_order(result->view());
   auto sorted_result     = cudf::gather(result->view(), *result_sort_order);
 
@@ -183,19 +134,27 @@ TEST_F(JoinTest, FullJoinNoNulls)
   Table t0(std::move(cols0));
   Table t1(std::move(cols1));
 
-  auto result            = cudf::full_join(t0, t1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}});
+  auto result            = cudf::full_join(t0, t1, {0, 1}, {0, 1});
   auto result_sort_order = cudf::sorted_order(result->view());
   auto sorted_result     = cudf::gather(result->view(), *result_sort_order);
 
-  column_wrapper<int32_t> col_gold_0{{2, 2, 0, 4, 3, 3, 1, 2, 0}};
-  strcol_wrapper col_gold_1({"s1", "s0", "s1", "s2", "s1", "s0", "s1", "s2", "s4"});
-  column_wrapper<int32_t> col_gold_2{{-1, -1, -1, -1, 1, 0, 1, 2, 4}, {0, 0, 0, 0, 1, 1, 1, 1, 1}};
-  column_wrapper<int32_t> col_gold_3{{1, 0, 1, 2, 1, -1, -1, -1, -1}, {1, 1, 1, 1, 1, 0, 0, 0, 0}};
+  column_wrapper<int32_t> col_gold_0{{3, 1, 2, 0, 3, -1, -1, -1, -1}, {1, 1, 1, 1, 1, 0, 0, 0, 0}};
+  strcol_wrapper col_gold_1({"s0", "s1", "s2", "s4", "s1", "", "", "", ""},
+                            {1, 1, 1, 1, 1, 0, 0, 0, 0});
+  column_wrapper<int32_t> col_gold_2{{0, 1, 2, 4, 1, -1, -1, -1, -1}, {1, 1, 1, 1, 1, 0, 0, 0, 0}};
+  column_wrapper<int32_t> col_gold_3{{-1, -1, -1, -1, 3, 2, 2, 0, 4}, {0, 0, 0, 0, 1, 1, 1, 1, 1}};
+  strcol_wrapper col_gold_4({"", "", "", "", "s1", "s1", "s0", "s1", "s2"},
+                            {0, 0, 0, 0, 1, 1, 1, 1, 1});
+  column_wrapper<int32_t> col_gold_5{{-1, -1, -1, -1, 1, 1, 0, 1, 2}, {0, 0, 0, 0, 1, 1, 1, 1, 1}};
+
   CVector cols_gold;
   cols_gold.push_back(col_gold_0.release());
   cols_gold.push_back(col_gold_1.release());
   cols_gold.push_back(col_gold_2.release());
   cols_gold.push_back(col_gold_3.release());
+  cols_gold.push_back(col_gold_4.release());
+  cols_gold.push_back(col_gold_5.release());
+
   Table gold(std::move(cols_gold));
 
   auto gold_sort_order = cudf::sorted_order(gold.view());
@@ -224,19 +183,27 @@ TEST_F(JoinTest, FullJoinWithNulls)
   Table t0(std::move(cols0));
   Table t1(std::move(cols1));
 
-  auto result            = cudf::full_join(t0, t1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}});
+  auto result            = cudf::full_join(t0, t1, {0, 1}, {0, 1});
   auto result_sort_order = cudf::sorted_order(result->view());
   auto sorted_result     = cudf::gather(result->view(), *result_sort_order);
 
-  column_wrapper<int32_t> col_gold_0{{2, 2, 0, -1, 3, 3, 1, 2, 0}, {1, 1, 1, 0, 1, 1, 1, 1, 1}};
-  strcol_wrapper col_gold_1({"s1", "s0", "s1", "s2", "s1", "s0", "s1", "s2", "s4"});
-  column_wrapper<int32_t> col_gold_2{{-1, -1, -1, -1, 1, 0, 1, 2, 4}, {0, 0, 0, 0, 1, 1, 1, 1, 1}};
-  column_wrapper<int32_t> col_gold_3{{1, 0, 1, 2, 1, -1, -1, -1, -1}, {1, 1, 1, 1, 1, 0, 0, 0, 0}};
+  column_wrapper<int32_t> col_gold_0{{3, 1, 2, 0, 3, -1, -1, -1, -1}, {1, 1, 1, 1, 1, 0, 0, 0, 0}};
+  strcol_wrapper col_gold_1({"s0", "s1", "s2", "s4", "s1", "", "", "", ""},
+                            {1, 1, 1, 1, 1, 0, 0, 0, 0});
+  column_wrapper<int32_t> col_gold_2{{0, 1, 2, 4, 1, -1, -1, -1, -1}, {1, 1, 1, 1, 1, 0, 0, 0, 0}};
+  column_wrapper<int32_t> col_gold_3{{-1, -1, -1, -1, 3, 2, 2, 0, 4}, {0, 0, 0, 0, 1, 1, 1, 1, 0}};
+  strcol_wrapper col_gold_4({"", "", "", "", "s1", "s1", "s0", "s1", "s2"},
+                            {0, 0, 0, 0, 1, 1, 1, 1, 1});
+  column_wrapper<int32_t> col_gold_5{{-1, -1, -1, -1, 1, 1, 0, 1, 2}, {0, 0, 0, 0, 1, 1, 1, 1, 1}};
+
   CVector cols_gold;
   cols_gold.push_back(col_gold_0.release());
   cols_gold.push_back(col_gold_1.release());
   cols_gold.push_back(col_gold_2.release());
   cols_gold.push_back(col_gold_3.release());
+  cols_gold.push_back(col_gold_4.release());
+  cols_gold.push_back(col_gold_5.release());
+
   Table gold(std::move(cols_gold));
 
   auto gold_sort_order = cudf::sorted_order(gold.view());
@@ -268,7 +235,7 @@ TEST_F(JoinTest, FullJoinOnNulls)
   Table t0(std::move(cols0));
   Table t1(std::move(cols1));
 
-  auto result            = cudf::full_join(t0, t1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}});
+  auto result            = cudf::full_join(t0, t1, {0, 1}, {0, 1});
   auto result_sort_order = cudf::sorted_order(result->view());
   auto sorted_result     = cudf::gather(result->view(), *result_sort_order);
 
@@ -279,20 +246,26 @@ TEST_F(JoinTest, FullJoinOnNulls)
   cudf::test::print(sorted_result->get_column(2).view(), std::cout, ",\t\t");
   cudf::test::print(sorted_result->get_column(3).view(), std::cout, ",\t\t");
 #endif
- 
-  column_wrapper<int32_t> col_gold_0{{   2,    5,    3,    -1},
-                                     {   1,    1,    1,     0}};
-  strcol_wrapper          col_gold_1({ "s1", "s0", "s0",  "s1"});
-  column_wrapper<int32_t> col_gold_2{{  -1,   -1,    0,     1}, 
-                                     {   0,    0,    1,     1}};
-  column_wrapper<int32_t> col_gold_3{{   1,    4,    2,     8}, 
-                                     {   1,    1,    1,     1}};
+
+  column_wrapper<int32_t> col_gold_0{{   3,   -1,   -1,    -1},
+                                     {   1,    0,    0,     0}};
+  strcol_wrapper          col_gold_1{{ "s0", "s1",  "",    ""},
+                                     {   1,    1,    0,     0}};
+  column_wrapper<int32_t> col_gold_2{{   0,    1,   -1,    -1},
+                                     {   1,    1,    0,     0}};
+  column_wrapper<int32_t> col_gold_3{{   3,   -1,    2,     5},
+                                     {   1,    0,    1,     1}};
+  strcol_wrapper          col_gold_4{{ "s0", "s1", "s1",  "s0"}};
+  column_wrapper<int32_t> col_gold_5{{   2,    8,    1,     4}};
 
   CVector cols_gold;
   cols_gold.push_back(col_gold_0.release());
   cols_gold.push_back(col_gold_1.release());
   cols_gold.push_back(col_gold_2.release());
   cols_gold.push_back(col_gold_3.release());
+  cols_gold.push_back(col_gold_4.release());
+  cols_gold.push_back(col_gold_5.release());
+  
   Table gold(std::move(cols_gold));
 
   auto gold_sort_order = cudf::sorted_order(gold.view());
@@ -306,22 +279,27 @@ TEST_F(JoinTest, FullJoinOnNulls)
   cudf::test::print(sorted_gold->get_column(3).view(), std::cout, ",\t\t");
 #endif
 
-  CUDF_TEST_EXPECT_TABLES_EQUAL(*sorted_gold, *sorted_result);
+  CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*sorted_gold, *sorted_result);
 
   // Repeat test with compare_nulls_equal=false,
   // as per SQL standard.
 
-  result            = cudf::full_join(t0, t1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}}, cudf::null_equality::UNEQUAL);
+  result            = cudf::full_join(t0, t1, {0, 1}, {0, 1}, cudf::null_equality::UNEQUAL);
   result_sort_order = cudf::sorted_order(result->view());
   sorted_result     = cudf::gather(result->view(), *result_sort_order);
 
-  col_gold_0 =               {{   2,    5,    3,    -1,   -1},
-                              {   1,    1,    1,     0,    0}};
-  col_gold_1 = strcol_wrapper({ "s1", "s0", "s0",  "s1", "s1"});
-  col_gold_2 =               {{  -1,   -1,    0,    -1,    1}, 
-                              {   0,    0,    1,     0,    1}};
-  col_gold_3 =               {{   1,    4,    2,     8,   -1}, 
-                              {   1,    1,    1,     1,    0}};
+  col_gold_0 =               {{   3,   -1,   -1,    -1,   -1},
+                              {   1,    0,    0,     0,    0}};
+  col_gold_1 = strcol_wrapper{{ "s0", "s1",   "",    "",   ""},
+                              {   1,    1,    0,     0,    0}};
+  col_gold_2 =               {{   0,    1,   -1,    -1,   -1},
+                              {   1,    1,    0,     0,    0}};
+  col_gold_3 =               {{   3,   -1,    2,     5,   -1},
+                              {   1,    0,    1,     1,    0}};
+  col_gold_4 = strcol_wrapper{{ "s0",  "",  "s1",  "s0",  "s1"},
+                              {   1,    0,    1,     1,    1}};
+  col_gold_5 =               {{   2,   -1,    1,     4,    8},
+                              {   1,    0,    1,     1,    1}};
 
   // clang-format on
 
@@ -330,23 +308,26 @@ TEST_F(JoinTest, FullJoinOnNulls)
   cols_gold_nulls_unequal.push_back(col_gold_1.release());
   cols_gold_nulls_unequal.push_back(col_gold_2.release());
   cols_gold_nulls_unequal.push_back(col_gold_3.release());
+  cols_gold_nulls_unequal.push_back(col_gold_4.release());
+  cols_gold_nulls_unequal.push_back(col_gold_5.release());
+
   Table gold_nulls_unequal{std::move(cols_gold_nulls_unequal)};
 
   gold_sort_order = cudf::sorted_order(gold_nulls_unequal.view());
   sorted_gold     = cudf::gather(gold_nulls_unequal.view(), *gold_sort_order);
 
-  CUDF_TEST_EXPECT_TABLES_EQUAL(*sorted_gold, *sorted_result);
+  CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*sorted_gold, *sorted_result);
 }
 
 TEST_F(JoinTest, LeftJoinNoNulls)
 {
-  column_wrapper<int32_t> col0_0{{3, 1, 2, 0, 3}};
+  column_wrapper<int32_t> col0_0({3, 1, 2, 0, 3});
   strcol_wrapper col0_1({"s0", "s1", "s2", "s4", "s1"});
-  column_wrapper<int32_t> col0_2{{0, 1, 2, 4, 1}};
+  column_wrapper<int32_t> col0_2({0, 1, 2, 4, 1});
 
-  column_wrapper<int32_t> col1_0{{2, 2, 0, 4, 3}};
+  column_wrapper<int32_t> col1_0({2, 2, 0, 4, 3});
   strcol_wrapper col1_1({"s1", "s0", "s1", "s2", "s1"});
-  column_wrapper<int32_t> col1_2{{1, 0, 1, 2, 1}};
+  column_wrapper<int32_t> col1_2({1, 0, 1, 2, 1});
 
   CVector cols0, cols1;
   cols0.push_back(col0_0.release());
@@ -359,34 +340,38 @@ TEST_F(JoinTest, LeftJoinNoNulls)
   Table t0(std::move(cols0));
   Table t1(std::move(cols1));
 
-  auto result            = cudf::left_join(t0, t1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}});
+  auto result            = cudf::left_join(t0, t1, {0, 1}, {0, 1});
   auto result_sort_order = cudf::sorted_order(result->view());
   auto sorted_result     = cudf::gather(result->view(), *result_sort_order);
 
-  column_wrapper<int32_t> col_gold_0{{3, 3, 1, 2, 0}, {1, 1, 1, 1, 1}};
-  strcol_wrapper col_gold_1({"s1", "s0", "s1", "s2", "s4"}, {1, 1, 1, 1, 1, 1});
-  column_wrapper<int32_t> col_gold_2{{1, 0, 1, 2, 4}, {1, 1, 1, 1, 1}};
-  column_wrapper<int32_t> col_gold_3{{1, -1, -1, -1, -1}, {1, 0, 0, 0, 0}};
+  column_wrapper<int32_t> col_gold_0({3, 1, 2, 0, 3});
+  strcol_wrapper col_gold_1({"s0", "s1", "s2", "s4", "s1"});
+  column_wrapper<int32_t> col_gold_2({0, 1, 2, 4, 1});
+  column_wrapper<int32_t> col_gold_3{{-1, -1, -1, -1, 3}, {0, 0, 0, 0, 1}};
+  strcol_wrapper col_gold_4{{"", "", "", "", "s1"}, {0, 0, 0, 0, 1}};
+  column_wrapper<int32_t> col_gold_5{{-1, -1, -1, -1, 1}, {0, 0, 0, 0, 1}};
   CVector cols_gold;
   cols_gold.push_back(col_gold_0.release());
   cols_gold.push_back(col_gold_1.release());
   cols_gold.push_back(col_gold_2.release());
   cols_gold.push_back(col_gold_3.release());
+  cols_gold.push_back(col_gold_4.release());
+  cols_gold.push_back(col_gold_5.release());
   Table gold(std::move(cols_gold));
 
   auto gold_sort_order = cudf::sorted_order(gold.view());
   auto sorted_gold     = cudf::gather(gold.view(), *gold_sort_order);
-  CUDF_TEST_EXPECT_TABLES_EQUAL(*sorted_gold, *sorted_result);
+  CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*sorted_gold, *sorted_result);
 }
 
 TEST_F(JoinTest, LeftJoinWithNulls)
 {
   column_wrapper<int32_t> col0_0{{3, 1, 2, 0, 2}};
-  strcol_wrapper col0_1({"s1", "s1", "s0", "s4", "s0"}, {1, 1, 0, 1, 1});
+  strcol_wrapper col0_1({"s1", "s1", "", "s4", "s0"}, {1, 1, 0, 1, 1});
   column_wrapper<int32_t> col0_2{{0, 1, 2, 4, 1}};
 
   column_wrapper<int32_t> col1_0{{2, 2, 0, 4, 3}};
-  strcol_wrapper col1_1({"s1", "s0", "s1", "s2", "s1"});
+  strcol_wrapper col1_1({"s1", "s0", "s1", "s2", "s1"}, );
   column_wrapper<int32_t> col1_2{{1, 0, 1, 2, 1}, {1, 0, 1, 1, 1}};
 
   CVector cols0, cols1;
@@ -400,19 +385,24 @@ TEST_F(JoinTest, LeftJoinWithNulls)
   Table t0(std::move(cols0));
   Table t1(std::move(cols1));
 
-  auto result            = cudf::left_join(t0, t1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}});
+  auto result            = cudf::left_join(t0, t1, {0, 1}, {0, 1});
   auto result_sort_order = cudf::sorted_order(result->view());
   auto sorted_result     = cudf::gather(result->view(), *result_sort_order);
 
-  column_wrapper<int32_t> col_gold_0{{3, 2, 1, 2, 0}, {1, 1, 1, 1, 1}};
-  strcol_wrapper col_gold_1({"s1", "s0", "s1", "", "s4"}, {1, 1, 1, 0, 1});
-  column_wrapper<int32_t> col_gold_2{{0, 1, 1, 2, 4}, {1, 1, 1, 1, 1}};
-  column_wrapper<int32_t> col_gold_3{{1, -1, -1, -1, -1}, {1, 0, 0, 0, 0}};
+  column_wrapper<int32_t> col_gold_0{{3, 1, 2, 0, 2}, {1, 1, 1, 1, 1}};
+  strcol_wrapper col_gold_1({"s1", "s1", "", "s4", "s0"}, {1, 1, 0, 1, 1});
+  column_wrapper<int32_t> col_gold_2{{0, 1, 2, 4, 1}, {1, 1, 1, 1, 1}};
+  column_wrapper<int32_t> col_gold_3{{3, -1, -1, -1, -1}, {1, 0, 0, 0, 0}};
+  strcol_wrapper col_gold_4{{"s1", "", "", "", ""}, {1, 0, 0, 0, 0}};
+  column_wrapper<int32_t> col_gold_5{{1, 1, -1, 1, 1}, {1, 0, 1, 1, 1}};
+
   CVector cols_gold;
   cols_gold.push_back(col_gold_0.release());
   cols_gold.push_back(col_gold_1.release());
   cols_gold.push_back(col_gold_2.release());
   cols_gold.push_back(col_gold_3.release());
+  cols_gold.push_back(col_gold_4.release());
+  cols_gold.push_back(col_gold_5.release());
   Table gold(std::move(cols_gold));
 
   auto gold_sort_order = cudf::sorted_order(gold.view());
@@ -420,1118 +410,1125 @@ TEST_F(JoinTest, LeftJoinWithNulls)
   CUDF_TEST_EXPECT_TABLES_EQUAL(*sorted_gold, *sorted_result);
 }
 
-TEST_F(JoinTest, LeftJoinOnNulls)
-{
-  // clang-format off
-  column_wrapper<int32_t> col0_0{{  3,    1,    2},
-                                 {  1,    0,    1}};
-  strcol_wrapper          col0_1({"s0", "s1", "s2" });
-  column_wrapper<int32_t> col0_2{{  0,    1,    2 }};
+// TEST_F(JoinTest, LeftJoinOnNulls)
+// {
+//   // clang-format off
+//   column_wrapper<int32_t> col0_0{{  3,    1,    2},
+//                                  {  1,    0,    1}};
+//   strcol_wrapper          col0_1({"s0", "s1", "s2" });
+n//   column_wrapper<int32_t> col0_2{{  0,    1,    2 }};
+
+//   column_wrapper<int32_t> col1_0{{  2,    5,    3,    7 },
+//                                  {  1,    1,    1,    0 }};
+//   strcol_wrapper          col1_1({"s1", "s0", "s0", "s1" });
+//   column_wrapper<int32_t> col1_2{{  1,    4,    2,    8 }};
+
+//   CVector cols0, cols1;
+//   cols0.push_back(col0_0.release());
+//   cols0.push_back(col0_1.release());
+//   cols0.push_back(col0_2.release());
+//   cols1.push_back(col1_0.release());
+//   cols1.push_back(col1_1.release());
+//   cols1.push_back(col1_2.release());
+
+//   Table t0(std::move(cols0));
+//   Table t1(std::move(cols1));
 
-  column_wrapper<int32_t> col1_0{{  2,    5,    3,    7 },
-                                 {  1,    1,    1,    0 }};
-  strcol_wrapper          col1_1({"s1", "s0", "s0", "s1" });
-  column_wrapper<int32_t> col1_2{{  1,    4,    2,    8 }};
+//   auto result            = cudf::left_join(t0, t1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}});
+//   auto result_sort_order = cudf::sorted_order(result->view());
+//   auto sorted_result     = cudf::gather(result->view(), *result_sort_order);
+
+// #if 0
+//   std::cout << "Actual Results:\n";
+//   cudf::test::print(sorted_result->get_column(0).view(), std::cout, ",\t\t");
+//   cudf::test::print(sorted_result->get_column(1).view(), std::cout, ",\t\t");
+//   cudf::test::print(sorted_result->get_column(2).view(), std::cout, ",\t\t");
+//   cudf::test::print(sorted_result->get_column(3).view(), std::cout, ",\t\t");
+// #endif
+
+//   column_wrapper<int32_t> col_gold_0{{   3,    -1,    2},
+//                                      {   1,     0,    1}};
+//   strcol_wrapper          col_gold_1({ "s0",  "s1", "s2"},
+//                                      {   1,     1,    1});
+//   column_wrapper<int32_t> col_gold_2{{   0,     1,    2},
+//                                      {   1,     1,    1}};
+//   column_wrapper<int32_t> col_gold_3{{   2,     8,   -1},
+//                                      {   1,     1,    0}};
+
+//   CVector cols_gold;
+//   cols_gold.push_back(col_gold_0.release());
+//   cols_gold.push_back(col_gold_1.release());
+//   cols_gold.push_back(col_gold_2.release());
+//   cols_gold.push_back(col_gold_3.release());
+//   Table gold(std::move(cols_gold));
+
+//   auto gold_sort_order = cudf::sorted_order(gold.view());
+//   auto sorted_gold     = cudf::gather(gold.view(), *gold_sort_order);
+
+// #if 0
+//   std::cout << "Expected Results:\n";
+//   cudf::test::print(sorted_gold->get_column(0).view(), std::cout, ",\t\t");
+//   cudf::test::print(sorted_gold->get_column(1).view(), std::cout, ",\t\t");
+//   cudf::test::print(sorted_gold->get_column(2).view(), std::cout, ",\t\t");
+//   cudf::test::print(sorted_gold->get_column(3).view(), std::cout, ",\t\t");
+// #endif
+
+//   CUDF_TEST_EXPECT_TABLES_EQUAL(*sorted_gold, *sorted_result);
+
+//   // Repeat test with compare_nulls_equal=false,
+//   // as per SQL standard.
+
+//   result            = cudf::left_join(t0, t1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}},
+//   cudf::null_equality::UNEQUAL); result_sort_order = cudf::sorted_order(result->view());
+//   sorted_result     = cudf::gather(result->view(), *result_sort_order);
+
+//   col_gold_0 =               {{   3,    -1,    2},
+//                               {   1,     0,    1}};
+//   col_gold_1 = strcol_wrapper({ "s0",  "s1", "s2"},
+//                               {   1,     1,    1});
+//   col_gold_2 =               {{   0,     1,    2},
+//                               {   1,     1,    1}};
+//   col_gold_3 =               {{   2,    -1,   -1},
+//                               {   1,     0,    0}};
+
+//   // clang-format on
+//   CVector cols_gold_nulls_unequal;
+//   cols_gold_nulls_unequal.push_back(col_gold_0.release());
+//   cols_gold_nulls_unequal.push_back(col_gold_1.release());
+//   cols_gold_nulls_unequal.push_back(col_gold_2.release());
+//   cols_gold_nulls_unequal.push_back(col_gold_3.release());
+//   Table gold_nulls_unequal{std::move(cols_gold_nulls_unequal)};
+
+//   gold_sort_order = cudf::sorted_order(gold_nulls_unequal.view());
+//   sorted_gold     = cudf::gather(gold_nulls_unequal.view(), *gold_sort_order);
+
+//   CUDF_TEST_EXPECT_TABLES_EQUAL(*sorted_gold, *sorted_result);
+// }
 
-  CVector cols0, cols1;
-  cols0.push_back(col0_0.release());
-  cols0.push_back(col0_1.release());
-  cols0.push_back(col0_2.release());
-  cols1.push_back(col1_0.release());
-  cols1.push_back(col1_1.release());
-  cols1.push_back(col1_2.release());
+// TEST_F(JoinTest, InnerJoinSizeOverflow)
+// {
+//   auto zero = cudf::make_numeric_scalar(cudf::data_type(cudf::type_id::INT32));
+//   zero->set_valid(true);
+//   static_cast<cudf::scalar_type_t<int32_t> *>(zero.get())->set_value(0);
 
-  Table t0(std::move(cols0));
-  Table t1(std::move(cols1));
+//   // Should cause size overflow, raise exception
+//   int32_t left  = 4;
+//   int32_t right = 1073741825;
 
-  auto result            = cudf::left_join(t0, t1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}});
-  auto result_sort_order = cudf::sorted_order(result->view());
-  auto sorted_result     = cudf::gather(result->view(), *result_sort_order);
+//   auto col0_0 = cudf::make_column_from_scalar(*zero, left);
+//   auto col1_0 = cudf::make_column_from_scalar(*zero, right);
 
-#if 0
-  std::cout << "Actual Results:\n";
-  cudf::test::print(sorted_result->get_column(0).view(), std::cout, ",\t\t");
-  cudf::test::print(sorted_result->get_column(1).view(), std::cout, ",\t\t");
-  cudf::test::print(sorted_result->get_column(2).view(), std::cout, ",\t\t");
-  cudf::test::print(sorted_result->get_column(3).view(), std::cout, ",\t\t");
-#endif
- 
-  column_wrapper<int32_t> col_gold_0{{   3,    -1,    2},
-                                     {   1,     0,    1}};
-  strcol_wrapper          col_gold_1({ "s0",  "s1", "s2"},
-                                     {   1,     1,    1});
-  column_wrapper<int32_t> col_gold_2{{   0,     1,    2}, 
-                                     {   1,     1,    1}};
-  column_wrapper<int32_t> col_gold_3{{   2,     8,   -1}, 
-                                     {   1,     1,    0}};
+//   CVector cols0, cols1;
+//   cols0.push_back(std::move(col0_0));
+//   cols1.push_back(std::move(col1_0));
 
-  CVector cols_gold;
-  cols_gold.push_back(col_gold_0.release());
-  cols_gold.push_back(col_gold_1.release());
-  cols_gold.push_back(col_gold_2.release());
-  cols_gold.push_back(col_gold_3.release());
-  Table gold(std::move(cols_gold));
+//   Table t0(std::move(cols0));
+//   Table t1(std::move(cols1));
 
-  auto gold_sort_order = cudf::sorted_order(gold.view());
-  auto sorted_gold     = cudf::gather(gold.view(), *gold_sort_order);
+//   EXPECT_THROW(cudf::inner_join(t0, t1, {0}, {0}, {{0, 0}}), cudf::logic_error);
+// }
 
-#if 0
-  std::cout << "Expected Results:\n";
-  cudf::test::print(sorted_gold->get_column(0).view(), std::cout, ",\t\t");
-  cudf::test::print(sorted_gold->get_column(1).view(), std::cout, ",\t\t");
-  cudf::test::print(sorted_gold->get_column(2).view(), std::cout, ",\t\t");
-  cudf::test::print(sorted_gold->get_column(3).view(), std::cout, ",\t\t");
-#endif
+// TEST_F(JoinTest, InnerJoinNoNulls)
+// {
+//   column_wrapper<int32_t> col0_0{{3, 1, 2, 0, 2}};
+//   strcol_wrapper col0_1({"s1", "s1", "s0", "s4", "s0"});
+//   column_wrapper<int32_t> col0_2{{0, 1, 2, 4, 1}};
+
+//   column_wrapper<int32_t> col1_0{{2, 2, 0, 4, 3}};
+//   strcol_wrapper col1_1({"s1", "s0", "s1", "s2", "s1"});
+//   column_wrapper<int32_t> col1_2{{1, 0, 1, 2, 1}};
+
+//   CVector cols0, cols1;
+//   cols0.push_back(col0_0.release());
+//   cols0.push_back(col0_1.release());
+//   cols0.push_back(col0_2.release());
+//   cols1.push_back(col1_0.release());
+//   cols1.push_back(col1_1.release());
+//   cols1.push_back(col1_2.release());
+
+//   Table t0(std::move(cols0));
+//   Table t1(std::move(cols1));
 
-  CUDF_TEST_EXPECT_TABLES_EQUAL(*sorted_gold, *sorted_result);
+//   auto result            = cudf::inner_join(t0, t1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}});
+//   auto result_sort_order = cudf::sorted_order(result->view());
+//   auto sorted_result     = cudf::gather(result->view(), *result_sort_order);
+
+//   column_wrapper<int32_t> col_gold_0{{3, 2, 2}};
+//   strcol_wrapper col_gold_1({"s1", "s0", "s0"});
+//   column_wrapper<int32_t> col_gold_2{{0, 2, 1}};
+//   column_wrapper<int32_t> col_gold_3{{1, 0, 0}};
+//   CVector cols_gold;
+//   cols_gold.push_back(col_gold_0.release());
+//   cols_gold.push_back(col_gold_1.release());
+//   cols_gold.push_back(col_gold_2.release());
+//   cols_gold.push_back(col_gold_3.release());
+//   Table gold(std::move(cols_gold));
+
+//   auto gold_sort_order = cudf::sorted_order(gold.view());
+//   auto sorted_gold     = cudf::gather(gold.view(), *gold_sort_order);
+//   CUDF_TEST_EXPECT_TABLES_EQUAL(*sorted_gold, *sorted_result);
+// }
 
-  // Repeat test with compare_nulls_equal=false,
-  // as per SQL standard.
+// TEST_F(JoinTest, InnerJoinNonAlignedCommon)
+// {
+//   CVector cols0, cols1;
+//   cols0.emplace_back(column_wrapper<int32_t>{{3, 1, 2, 0, 2}}.release());
+//   cols0.emplace_back(column_wrapper<int32_t>{{3, 1, 2, 0, 2}}.release());
+//   cols0.emplace_back(strcol_wrapper({"s1", "s1", "s0", "s4", "s0"}).release());
+//   cols0.emplace_back(column_wrapper<int32_t>{{0, 1, 2, 4, 1}}.release());
+//   cols1.emplace_back(column_wrapper<int32_t>{{2, 2, 0, 4, 3}}.release());
+//   cols1.emplace_back(strcol_wrapper({"s1", "s0", "s1", "s2", "s1"}).release());
+//   cols1.emplace_back(column_wrapper<int32_t>{{1, 0, 1, 2, 1}}.release());
 
-  result            = cudf::left_join(t0, t1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}}, cudf::null_equality::UNEQUAL);
-  result_sort_order = cudf::sorted_order(result->view());
-  sorted_result     = cudf::gather(result->view(), *result_sort_order);
+//   Table t0(std::move(cols0));
+//   Table t1(std::move(cols1));
 
-  col_gold_0 =               {{   3,    -1,    2},
-                              {   1,     0,    1}};
-  col_gold_1 = strcol_wrapper({ "s0",  "s1", "s2"},
-                              {   1,     1,    1});
-  col_gold_2 =               {{   0,     1,    2}, 
-                              {   1,     1,    1}};
-  col_gold_3 =               {{   2,    -1,   -1}, 
-                              {   1,     0,    0}};
+//   auto result            = cudf::inner_join(t0, t1, {1, 2}, {0, 1}, {{1, 0}, {2, 1}});
+//   auto result_sort_order = cudf::sorted_order(result->view());
+//   auto sorted_result     = cudf::gather(result->view(), *result_sort_order);
+
+//   CVector cols_gold;
+//   cols_gold.emplace_back(column_wrapper<int32_t>{{3, 2, 2}}.release());
+//   cols_gold.emplace_back(column_wrapper<int32_t>{{3, 2, 2}}.release());
+//   cols_gold.emplace_back(strcol_wrapper({"s1", "s0", "s0"}).release());
+//   cols_gold.emplace_back(column_wrapper<int32_t>{{0, 2, 1}}.release());
+//   cols_gold.emplace_back(column_wrapper<int32_t>{{1, 0, 0}}.release());
+//   Table gold(std::move(cols_gold));
+
+//   auto gold_sort_order = cudf::sorted_order(gold.view());
+//   auto sorted_gold     = cudf::gather(gold.view(), *gold_sort_order);
+//   CUDF_TEST_EXPECT_TABLES_EQUAL(*sorted_gold, *sorted_result);
+// }
 
-  // clang-format on
-  CVector cols_gold_nulls_unequal;
-  cols_gold_nulls_unequal.push_back(col_gold_0.release());
-  cols_gold_nulls_unequal.push_back(col_gold_1.release());
-  cols_gold_nulls_unequal.push_back(col_gold_2.release());
-  cols_gold_nulls_unequal.push_back(col_gold_3.release());
-  Table gold_nulls_unequal{std::move(cols_gold_nulls_unequal)};
+// TEST_F(JoinTest, InnerJoinNonAlignedCommonSwap)
+// {
+//   CVector cols0, cols1;
+//   cols0.emplace_back(column_wrapper<int32_t>{{3, 1, 2, 0, 2}}.release());
+//   cols0.emplace_back(column_wrapper<int32_t>{{3, 1, 2, 0, 2}}.release());
+//   cols0.emplace_back(strcol_wrapper({"s1", "s1", "s0", "s4", "s0"}).release());
+//   cols0.emplace_back(column_wrapper<int32_t>{{0, 1, 2, 4, 1}}.release());
+//   cols1.emplace_back(column_wrapper<int32_t>{{2, 2, 0, 4, 3, 5}}.release());
+//   cols1.emplace_back(strcol_wrapper({"s1", "s0", "s1", "s2", "s1", "s0"}).release());
+//   cols1.emplace_back(column_wrapper<int32_t>{{1, 0, 1, 2, 1, 0}}.release());
+
+//   Table t0(std::move(cols0));
+//   Table t1(std::move(cols1));
 
-  gold_sort_order = cudf::sorted_order(gold_nulls_unequal.view());
-  sorted_gold     = cudf::gather(gold_nulls_unequal.view(), *gold_sort_order);
+//   auto result            = cudf::inner_join(t0, t1, {1, 2}, {0, 1}, {{1, 0}, {2, 1}});
+//   auto result_sort_order = cudf::sorted_order(result->view());
+//   auto sorted_result     = cudf::gather(result->view(), *result_sort_order);
+
+//   CVector cols_gold;
+//   cols_gold.emplace_back(column_wrapper<int32_t>{{3, 2, 2}}.release());
+//   cols_gold.emplace_back(column_wrapper<int32_t>{{3, 2, 2}}.release());
+//   cols_gold.emplace_back(strcol_wrapper({"s1", "s0", "s0"}).release());
+//   cols_gold.emplace_back(column_wrapper<int32_t>{{0, 2, 1}}.release());
+//   cols_gold.emplace_back(column_wrapper<int32_t>{{1, 0, 0}}.release());
+//   Table gold(std::move(cols_gold));
+
+//   auto gold_sort_order = cudf::sorted_order(gold.view());
+//   auto sorted_gold     = cudf::gather(gold.view(), *gold_sort_order);
+//   CUDF_TEST_EXPECT_TABLES_EQUAL(*sorted_gold, *sorted_result);
+// }
 
-  CUDF_TEST_EXPECT_TABLES_EQUAL(*sorted_gold, *sorted_result);
-}
+// TEST_F(JoinTest, InnerJoinWithNulls)
+// {
+//   column_wrapper<int32_t> col0_0{{3, 1, 2, 0, 2}};
+//   strcol_wrapper col0_1({"s1", "s1", "s0", "s4", "s0"}, {1, 1, 0, 1, 1});
+//   column_wrapper<int32_t> col0_2{{0, 1, 2, 4, 1}};
+
+//   column_wrapper<int32_t> col1_0{{2, 2, 0, 4, 3}};
+//   strcol_wrapper col1_1({"s1", "s0", "s1", "s2", "s1"});
+//   column_wrapper<int32_t> col1_2{{1, 0, 1, 2, 1}, {1, 0, 1, 1, 1}};
+
+//   CVector cols0, cols1;
+//   cols0.push_back(col0_0.release());
+//   cols0.push_back(col0_1.release());
+//   cols0.push_back(col0_2.release());
+//   cols1.push_back(col1_0.release());
+//   cols1.push_back(col1_1.release());
+//   cols1.push_back(col1_2.release());
+
+//   Table t0(std::move(cols0));
+//   Table t1(std::move(cols1));
 
-TEST_F(JoinTest, InnerJoinSizeOverflow)
-{
-  auto zero = cudf::make_numeric_scalar(cudf::data_type(cudf::type_id::INT32));
-  zero->set_valid(true);
-  static_cast<cudf::scalar_type_t<int32_t> *>(zero.get())->set_value(0);
+//   auto result            = cudf::inner_join(t0, t1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}});
+//   auto result_sort_order = cudf::sorted_order(result->view());
+//   auto sorted_result     = cudf::gather(result->view(), *result_sort_order);
+
+//   column_wrapper<int32_t> col_gold_0{{3, 2}};
+//   strcol_wrapper col_gold_1({"s1", "s0"}, {1, 1});
+//   column_wrapper<int32_t> col_gold_2{{0, 1}};
+//   column_wrapper<int32_t> col_gold_3{{1, -1}, {1, 0}};
+//   CVector cols_gold;
+//   cols_gold.push_back(col_gold_0.release());
+//   cols_gold.push_back(col_gold_1.release());
+//   cols_gold.push_back(col_gold_2.release());
+//   cols_gold.push_back(col_gold_3.release());
+//   Table gold(std::move(cols_gold));
+
+//   auto gold_sort_order = cudf::sorted_order(gold.view());
+//   auto sorted_gold     = cudf::gather(gold.view(), *gold_sort_order);
+//   CUDF_TEST_EXPECT_TABLES_EQUAL(*sorted_gold, *sorted_result);
+// }
 
-  // Should cause size overflow, raise exception
-  int32_t left  = 4;
-  int32_t right = 1073741825;
+// // Test to check join behaviour when join keys are null.
+// TEST_F(JoinTest, InnerJoinOnNulls)
+// {
+//   // clang-format off
+//   column_wrapper<int32_t> col0_0{{  3,    1,    2,    0,    2}};
+//   strcol_wrapper          col0_1({"s1", "s1", "s8", "s4", "s0"},
+//                                  {  1,    1,    0,    1,    1});
+//   column_wrapper<int32_t> col0_2{{  0,    1,    2,    4,    1}};
+
+//   column_wrapper<int32_t> col1_0{{  2,    2,    0,    4,    3}};
+//   strcol_wrapper          col1_1({"s1", "s0", "s1", "s2", "s1"},
+//                                  {  1,    0,    1,    1,    1});
+//   column_wrapper<int32_t> col1_2{{  1,    0,    1,    2,    1}};
+
+//   CVector cols0, cols1;
+//   cols0.push_back(col0_0.release());
+//   cols0.push_back(col0_1.release());
+//   cols0.push_back(col0_2.release());
+//   cols1.push_back(col1_0.release());
+//   cols1.push_back(col1_1.release());
+//   cols1.push_back(col1_2.release());
+
+//   Table t0(std::move(cols0));
+//   Table t1(std::move(cols1));
 
-  auto col0_0 = cudf::make_column_from_scalar(*zero, left);
-  auto col1_0 = cudf::make_column_from_scalar(*zero, right);
+//   auto result            = cudf::inner_join(t0, t1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}});
+//   auto result_sort_order = cudf::sorted_order(result->view());
+//   auto sorted_result     = cudf::gather(result->view(), *result_sort_order);
+
+//   column_wrapper<int32_t> col_gold_0 {{  3,    2}};
+//   strcol_wrapper          col_gold_1 ({"s1", "s0"},
+//                                       {  1,    0});
+//   column_wrapper<int32_t> col_gold_2{{   0,    2}};
+//   column_wrapper<int32_t> col_gold_3{{   1,    0}};
+//   CVector cols_gold;
+//   cols_gold.push_back(col_gold_0.release());
+//   cols_gold.push_back(col_gold_1.release());
+//   cols_gold.push_back(col_gold_2.release());
+//   cols_gold.push_back(col_gold_3.release());
+//   Table gold(std::move(cols_gold));
+
+//   auto gold_sort_order = cudf::sorted_order(gold.view());
+//   auto sorted_gold     = cudf::gather(gold.view(), *gold_sort_order);
+//   CUDF_TEST_EXPECT_TABLES_EQUAL(*sorted_gold, *sorted_result);
+
+//   // Repeat test with compare_nulls_equal=false,
+//   // as per SQL standard.
+
+//   result            = cudf::inner_join(t0, t1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}},
+//   cudf::null_equality::UNEQUAL); result_sort_order = cudf::sorted_order(result->view());
+//   sorted_result     = cudf::gather(result->view(), *result_sort_order);
+
+//   col_gold_0 =               {{  3}};
+//   col_gold_1 = strcol_wrapper({"s1"},
+//                               {  1});
+//   col_gold_2 =               {{  0}};
+//   col_gold_3 =               {{  1}};
+
+//   // clang-format on
+
+//   CVector cols_gold_sql;
+//   cols_gold_sql.push_back(col_gold_0.release());
+//   cols_gold_sql.push_back(col_gold_1.release());
+//   cols_gold_sql.push_back(col_gold_2.release());
+//   cols_gold_sql.push_back(col_gold_3.release());
+//   Table gold_sql(std::move(cols_gold_sql));
+
+//   gold_sort_order = cudf::sorted_order(gold_sql.view());
+//   sorted_gold     = cudf::gather(gold_sql.view(), *gold_sort_order);
+//   CUDF_TEST_EXPECT_TABLES_EQUAL(*sorted_gold, *sorted_result);
+// }
 
-  CVector cols0, cols1;
-  cols0.push_back(std::move(col0_0));
-  cols1.push_back(std::move(col1_0));
+// // Empty Left Table
+// TEST_F(JoinTest, EmptyLeftTableInnerJoin)
+// {
+//   column_wrapper<int32_t> col0_0;
+//   column_wrapper<int32_t> col0_1;
 
-  Table t0(std::move(cols0));
-  Table t1(std::move(cols1));
+//   column_wrapper<int32_t> col1_0{{2, 2, 0, 4, 3}};
+//   column_wrapper<int32_t> col1_1{{1, 0, 1, 2, 1}, {1, 0, 1, 1, 1}};
 
-  EXPECT_THROW(cudf::inner_join(t0, t1, {0}, {0}, {{0, 0}}), cudf::logic_error);
-}
+//   CVector cols0, cols1;
+//   cols0.push_back(col0_0.release());
+//   cols0.push_back(col0_1.release());
+//   cols1.push_back(col1_0.release());
+//   cols1.push_back(col1_1.release());
 
-TEST_F(JoinTest, InnerJoinNoNulls)
-{
-  column_wrapper<int32_t> col0_0{{3, 1, 2, 0, 2}};
-  strcol_wrapper col0_1({"s1", "s1", "s0", "s4", "s0"});
-  column_wrapper<int32_t> col0_2{{0, 1, 2, 4, 1}};
+//   Table empty0(std::move(cols0));
+//   Table t1(std::move(cols1));
 
-  column_wrapper<int32_t> col1_0{{2, 2, 0, 4, 3}};
-  strcol_wrapper col1_1({"s1", "s0", "s1", "s2", "s1"});
-  column_wrapper<int32_t> col1_2{{1, 0, 1, 2, 1}};
+//   auto result = cudf::inner_join(empty0, t1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}});
+//   CUDF_TEST_EXPECT_TABLES_EQUAL(empty0, *result);
+// }
 
-  CVector cols0, cols1;
-  cols0.push_back(col0_0.release());
-  cols0.push_back(col0_1.release());
-  cols0.push_back(col0_2.release());
-  cols1.push_back(col1_0.release());
-  cols1.push_back(col1_1.release());
-  cols1.push_back(col1_2.release());
+// TEST_F(JoinTest, EmptyLeftTableLeftJoin)
+// {
+//   column_wrapper<int32_t> col0_0;
+//   column_wrapper<int32_t> col0_1;
 
-  Table t0(std::move(cols0));
-  Table t1(std::move(cols1));
+//   column_wrapper<int32_t> col1_0{{2, 2, 0, 4, 3}};
+//   column_wrapper<int32_t> col1_1{{1, 0, 1, 2, 1}, {1, 0, 1, 1, 1}};
 
-  auto result            = cudf::inner_join(t0, t1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}});
-  auto result_sort_order = cudf::sorted_order(result->view());
-  auto sorted_result     = cudf::gather(result->view(), *result_sort_order);
+//   CVector cols0, cols1;
+//   cols0.push_back(col0_0.release());
+//   cols0.push_back(col0_1.release());
+//   cols1.push_back(col1_0.release());
+//   cols1.push_back(col1_1.release());
 
-  column_wrapper<int32_t> col_gold_0{{3, 2, 2}};
-  strcol_wrapper col_gold_1({"s1", "s0", "s0"});
-  column_wrapper<int32_t> col_gold_2{{0, 2, 1}};
-  column_wrapper<int32_t> col_gold_3{{1, 0, 0}};
-  CVector cols_gold;
-  cols_gold.push_back(col_gold_0.release());
-  cols_gold.push_back(col_gold_1.release());
-  cols_gold.push_back(col_gold_2.release());
-  cols_gold.push_back(col_gold_3.release());
-  Table gold(std::move(cols_gold));
+//   Table empty0(std::move(cols0));
+//   Table t1(std::move(cols1));
 
-  auto gold_sort_order = cudf::sorted_order(gold.view());
-  auto sorted_gold     = cudf::gather(gold.view(), *gold_sort_order);
-  CUDF_TEST_EXPECT_TABLES_EQUAL(*sorted_gold, *sorted_result);
-}
+//   auto result = cudf::left_join(empty0, t1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}});
+//   CUDF_TEST_EXPECT_TABLES_EQUAL(empty0, *result);
+// }
 
-TEST_F(JoinTest, InnerJoinNonAlignedCommon)
-{
-  CVector cols0, cols1;
-  cols0.emplace_back(column_wrapper<int32_t>{{3, 1, 2, 0, 2}}.release());
-  cols0.emplace_back(column_wrapper<int32_t>{{3, 1, 2, 0, 2}}.release());
-  cols0.emplace_back(strcol_wrapper({"s1", "s1", "s0", "s4", "s0"}).release());
-  cols0.emplace_back(column_wrapper<int32_t>{{0, 1, 2, 4, 1}}.release());
-  cols1.emplace_back(column_wrapper<int32_t>{{2, 2, 0, 4, 3}}.release());
-  cols1.emplace_back(strcol_wrapper({"s1", "s0", "s1", "s2", "s1"}).release());
-  cols1.emplace_back(column_wrapper<int32_t>{{1, 0, 1, 2, 1}}.release());
+// TEST_F(JoinTest, EmptyLeftTableLeftJoinNonAlignedCommon)
+// {
+//   column_wrapper<int32_t> col0_0;
 
-  Table t0(std::move(cols0));
-  Table t1(std::move(cols1));
+//   column_wrapper<int32_t> col1_0{{2, 2, 0, 4, 3}};
+//   column_wrapper<int32_t> col1_1{{1, 0, 1, 2, 1}, {1, 0, 1, 1, 1}};
 
-  auto result            = cudf::inner_join(t0, t1, {1, 2}, {0, 1}, {{1, 0}, {2, 1}});
-  auto result_sort_order = cudf::sorted_order(result->view());
-  auto sorted_result     = cudf::gather(result->view(), *result_sort_order);
+//   CVector cols0, cols1;
+//   cols0.emplace_back(col0_0.release());
+//   cols1.emplace_back(col1_0.release());
+//   cols1.emplace_back(col1_1.release());
 
-  CVector cols_gold;
-  cols_gold.emplace_back(column_wrapper<int32_t>{{3, 2, 2}}.release());
-  cols_gold.emplace_back(column_wrapper<int32_t>{{3, 2, 2}}.release());
-  cols_gold.emplace_back(strcol_wrapper({"s1", "s0", "s0"}).release());
-  cols_gold.emplace_back(column_wrapper<int32_t>{{0, 2, 1}}.release());
-  cols_gold.emplace_back(column_wrapper<int32_t>{{1, 0, 0}}.release());
-  Table gold(std::move(cols_gold));
+//   Table t0(std::move(cols0));
+//   Table t1(std::move(cols1));
 
-  auto gold_sort_order = cudf::sorted_order(gold.view());
-  auto sorted_gold     = cudf::gather(gold.view(), *gold_sort_order);
-  CUDF_TEST_EXPECT_TABLES_EQUAL(*sorted_gold, *sorted_result);
-}
+//   column_wrapper<int32_t> col_gold_0;
+//   column_wrapper<int32_t> col_gold_1;
 
-TEST_F(JoinTest, InnerJoinNonAlignedCommonSwap)
-{
-  CVector cols0, cols1;
-  cols0.emplace_back(column_wrapper<int32_t>{{3, 1, 2, 0, 2}}.release());
-  cols0.emplace_back(column_wrapper<int32_t>{{3, 1, 2, 0, 2}}.release());
-  cols0.emplace_back(strcol_wrapper({"s1", "s1", "s0", "s4", "s0"}).release());
-  cols0.emplace_back(column_wrapper<int32_t>{{0, 1, 2, 4, 1}}.release());
-  cols1.emplace_back(column_wrapper<int32_t>{{2, 2, 0, 4, 3, 5}}.release());
-  cols1.emplace_back(strcol_wrapper({"s1", "s0", "s1", "s2", "s1", "s0"}).release());
-  cols1.emplace_back(column_wrapper<int32_t>{{1, 0, 1, 2, 1, 0}}.release());
+//   CVector cols_gold;
+//   cols_gold.emplace_back(col_gold_0.release());
+//   cols_gold.emplace_back(col_gold_1.release());
 
-  Table t0(std::move(cols0));
-  Table t1(std::move(cols1));
+//   Table gold(std::move(cols_gold));
 
-  auto result            = cudf::inner_join(t0, t1, {1, 2}, {0, 1}, {{1, 0}, {2, 1}});
-  auto result_sort_order = cudf::sorted_order(result->view());
-  auto sorted_result     = cudf::gather(result->view(), *result_sort_order);
+//   auto result = cudf::left_join(t0, t1, {0}, {1}, {{0, 1}});
+//   CUDF_TEST_EXPECT_TABLES_EQUAL(gold, *result);
+// }
 
-  CVector cols_gold;
-  cols_gold.emplace_back(column_wrapper<int32_t>{{3, 2, 2}}.release());
-  cols_gold.emplace_back(column_wrapper<int32_t>{{3, 2, 2}}.release());
-  cols_gold.emplace_back(strcol_wrapper({"s1", "s0", "s0"}).release());
-  cols_gold.emplace_back(column_wrapper<int32_t>{{0, 2, 1}}.release());
-  cols_gold.emplace_back(column_wrapper<int32_t>{{1, 0, 0}}.release());
-  Table gold(std::move(cols_gold));
+// TEST_F(JoinTest, EmptyLeftTableFullJoin)
+// {
+//   column_wrapper<int32_t> col0_0;
+//   column_wrapper<int32_t> col0_1;
 
-  auto gold_sort_order = cudf::sorted_order(gold.view());
-  auto sorted_gold     = cudf::gather(gold.view(), *gold_sort_order);
-  CUDF_TEST_EXPECT_TABLES_EQUAL(*sorted_gold, *sorted_result);
-}
+//   column_wrapper<int32_t> col1_0{{2, 2, 0, 4, 3}};
+//   column_wrapper<int32_t> col1_1{{1, 0, 1, 2, 1}, {1, 0, 1, 1, 1}};
 
-TEST_F(JoinTest, InnerJoinWithNulls)
-{
-  column_wrapper<int32_t> col0_0{{3, 1, 2, 0, 2}};
-  strcol_wrapper col0_1({"s1", "s1", "s0", "s4", "s0"}, {1, 1, 0, 1, 1});
-  column_wrapper<int32_t> col0_2{{0, 1, 2, 4, 1}};
+//   CVector cols0, cols1;
+//   cols0.push_back(col0_0.release());
+//   cols0.push_back(col0_1.release());
+//   cols1.push_back(col1_0.release());
+//   cols1.push_back(col1_1.release());
 
-  column_wrapper<int32_t> col1_0{{2, 2, 0, 4, 3}};
-  strcol_wrapper col1_1({"s1", "s0", "s1", "s2", "s1"});
-  column_wrapper<int32_t> col1_2{{1, 0, 1, 2, 1}, {1, 0, 1, 1, 1}};
+//   Table empty0(std::move(cols0));
+//   Table t1(std::move(cols1));
 
-  CVector cols0, cols1;
-  cols0.push_back(col0_0.release());
-  cols0.push_back(col0_1.release());
-  cols0.push_back(col0_2.release());
-  cols1.push_back(col1_0.release());
-  cols1.push_back(col1_1.release());
-  cols1.push_back(col1_2.release());
+//   auto result = cudf::full_join(empty0, t1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}});
+//   CUDF_TEST_EXPECT_TABLES_EQUAL(t1, *result);
+// }
 
-  Table t0(std::move(cols0));
-  Table t1(std::move(cols1));
+// // Empty Right Table
+// TEST_F(JoinTest, EmptyRightTableInnerJoin)
+// {
+//   column_wrapper<int32_t> col0_0{{2, 2, 0, 4, 3}};
+//   column_wrapper<int32_t> col0_1{{1, 0, 1, 2, 1}, {1, 0, 1, 1, 1}};
 
-  auto result            = cudf::inner_join(t0, t1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}});
-  auto result_sort_order = cudf::sorted_order(result->view());
-  auto sorted_result     = cudf::gather(result->view(), *result_sort_order);
+//   column_wrapper<int32_t> col1_0;
+//   column_wrapper<int32_t> col1_1;
 
-  column_wrapper<int32_t> col_gold_0{{3, 2}};
-  strcol_wrapper col_gold_1({"s1", "s0"}, {1, 1});
-  column_wrapper<int32_t> col_gold_2{{0, 1}};
-  column_wrapper<int32_t> col_gold_3{{1, -1}, {1, 0}};
-  CVector cols_gold;
-  cols_gold.push_back(col_gold_0.release());
-  cols_gold.push_back(col_gold_1.release());
-  cols_gold.push_back(col_gold_2.release());
-  cols_gold.push_back(col_gold_3.release());
-  Table gold(std::move(cols_gold));
+//   CVector cols0, cols1;
+//   cols0.push_back(col0_0.release());
+//   cols0.push_back(col0_1.release());
+//   cols1.push_back(col1_0.release());
+//   cols1.push_back(col1_1.release());
 
-  auto gold_sort_order = cudf::sorted_order(gold.view());
-  auto sorted_gold     = cudf::gather(gold.view(), *gold_sort_order);
-  CUDF_TEST_EXPECT_TABLES_EQUAL(*sorted_gold, *sorted_result);
-}
+//   Table t0(std::move(cols0));
+//   Table empty1(std::move(cols1));
 
-// Test to check join behaviour when join keys are null.
-TEST_F(JoinTest, InnerJoinOnNulls)
-{
-  // clang-format off
-  column_wrapper<int32_t> col0_0{{  3,    1,    2,    0,    2}};
-  strcol_wrapper          col0_1({"s1", "s1", "s8", "s4", "s0"}, 
-                                 {  1,    1,    0,    1,    1});
-  column_wrapper<int32_t> col0_2{{  0,    1,    2,    4,    1}};
+//   auto result = cudf::inner_join(t0, empty1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}});
+//   CUDF_TEST_EXPECT_TABLES_EQUAL(empty1, *result);
+// }
 
-  column_wrapper<int32_t> col1_0{{  2,    2,    0,    4,    3}};
-  strcol_wrapper          col1_1({"s1", "s0", "s1", "s2", "s1"}, 
-                                 {  1,    0,    1,    1,    1});
-  column_wrapper<int32_t> col1_2{{  1,    0,    1,    2,    1}};
+// TEST_F(JoinTest, EmptyRightTableInnerJoinNonAlignedCommon)
+// {
+//   column_wrapper<int32_t> col0_0{{2, 2, 0, 4, 3}};
+//   column_wrapper<int32_t> col0_1{{1, 0, 1, 2, 1}, {1, 0, 1, 1, 1}};
 
-  CVector cols0, cols1;
-  cols0.push_back(col0_0.release());
-  cols0.push_back(col0_1.release());
-  cols0.push_back(col0_2.release());
-  cols1.push_back(col1_0.release());
-  cols1.push_back(col1_1.release());
-  cols1.push_back(col1_2.release());
+//   column_wrapper<int32_t> col1_0;
 
-  Table t0(std::move(cols0));
-  Table t1(std::move(cols1));
+//   CVector cols0, cols1;
+//   cols0.emplace_back(col0_0.release());
+//   cols0.emplace_back(col0_1.release());
+//   cols1.emplace_back(col1_0.release());
 
-  auto result            = cudf::inner_join(t0, t1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}});
-  auto result_sort_order = cudf::sorted_order(result->view());
-  auto sorted_result     = cudf::gather(result->view(), *result_sort_order);
+//   Table t0(std::move(cols0));
+//   Table t1(std::move(cols1));
 
-  column_wrapper<int32_t> col_gold_0 {{  3,    2}};
-  strcol_wrapper          col_gold_1 ({"s1", "s0"}, 
-                                      {  1,    0});
-  column_wrapper<int32_t> col_gold_2{{   0,    2}};
-  column_wrapper<int32_t> col_gold_3{{   1,    0}};
-  CVector cols_gold;
-  cols_gold.push_back(col_gold_0.release());
-  cols_gold.push_back(col_gold_1.release());
-  cols_gold.push_back(col_gold_2.release());
-  cols_gold.push_back(col_gold_3.release());
-  Table gold(std::move(cols_gold));
+//   column_wrapper<int32_t> col_gold_0;
+//   column_wrapper<int32_t> col_gold_1;
 
-  auto gold_sort_order = cudf::sorted_order(gold.view());
-  auto sorted_gold     = cudf::gather(gold.view(), *gold_sort_order);
-  CUDF_TEST_EXPECT_TABLES_EQUAL(*sorted_gold, *sorted_result);
-  
-  // Repeat test with compare_nulls_equal=false,
-  // as per SQL standard.
-
-  result            = cudf::inner_join(t0, t1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}}, cudf::null_equality::UNEQUAL);
-  result_sort_order = cudf::sorted_order(result->view());
-  sorted_result     = cudf::gather(result->view(), *result_sort_order);
-
-  col_gold_0 =               {{  3}};
-  col_gold_1 = strcol_wrapper({"s1"}, 
-                              {  1});
-  col_gold_2 =               {{  0}};
-  col_gold_3 =               {{  1}};
-
-  // clang-format on
-
-  CVector cols_gold_sql;
-  cols_gold_sql.push_back(col_gold_0.release());
-  cols_gold_sql.push_back(col_gold_1.release());
-  cols_gold_sql.push_back(col_gold_2.release());
-  cols_gold_sql.push_back(col_gold_3.release());
-  Table gold_sql(std::move(cols_gold_sql));
-
-  gold_sort_order = cudf::sorted_order(gold_sql.view());
-  sorted_gold     = cudf::gather(gold_sql.view(), *gold_sort_order);
-  CUDF_TEST_EXPECT_TABLES_EQUAL(*sorted_gold, *sorted_result);
-}
-
-// Empty Left Table
-TEST_F(JoinTest, EmptyLeftTableInnerJoin)
-{
-  column_wrapper<int32_t> col0_0;
-  column_wrapper<int32_t> col0_1;
-
-  column_wrapper<int32_t> col1_0{{2, 2, 0, 4, 3}};
-  column_wrapper<int32_t> col1_1{{1, 0, 1, 2, 1}, {1, 0, 1, 1, 1}};
-
-  CVector cols0, cols1;
-  cols0.push_back(col0_0.release());
-  cols0.push_back(col0_1.release());
-  cols1.push_back(col1_0.release());
-  cols1.push_back(col1_1.release());
-
-  Table empty0(std::move(cols0));
-  Table t1(std::move(cols1));
-
-  auto result = cudf::inner_join(empty0, t1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}});
-  CUDF_TEST_EXPECT_TABLES_EQUAL(empty0, *result);
-}
+//   CVector cols_gold;
+//   cols_gold.emplace_back(col_gold_0.release());
+//   cols_gold.emplace_back(col_gold_1.release());
 
-TEST_F(JoinTest, EmptyLeftTableLeftJoin)
-{
-  column_wrapper<int32_t> col0_0;
-  column_wrapper<int32_t> col0_1;
-
-  column_wrapper<int32_t> col1_0{{2, 2, 0, 4, 3}};
-  column_wrapper<int32_t> col1_1{{1, 0, 1, 2, 1}, {1, 0, 1, 1, 1}};
-
-  CVector cols0, cols1;
-  cols0.push_back(col0_0.release());
-  cols0.push_back(col0_1.release());
-  cols1.push_back(col1_0.release());
-  cols1.push_back(col1_1.release());
-
-  Table empty0(std::move(cols0));
-  Table t1(std::move(cols1));
-
-  auto result = cudf::left_join(empty0, t1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}});
-  CUDF_TEST_EXPECT_TABLES_EQUAL(empty0, *result);
-}
-
-TEST_F(JoinTest, EmptyLeftTableLeftJoinNonAlignedCommon)
-{
-  column_wrapper<int32_t> col0_0;
-
-  column_wrapper<int32_t> col1_0{{2, 2, 0, 4, 3}};
-  column_wrapper<int32_t> col1_1{{1, 0, 1, 2, 1}, {1, 0, 1, 1, 1}};
+//   Table gold(std::move(cols_gold));
 
-  CVector cols0, cols1;
-  cols0.emplace_back(col0_0.release());
-  cols1.emplace_back(col1_0.release());
-  cols1.emplace_back(col1_1.release());
-
-  Table t0(std::move(cols0));
-  Table t1(std::move(cols1));
-
-  column_wrapper<int32_t> col_gold_0;
-  column_wrapper<int32_t> col_gold_1;
-
-  CVector cols_gold;
-  cols_gold.emplace_back(col_gold_0.release());
-  cols_gold.emplace_back(col_gold_1.release());
-
-  Table gold(std::move(cols_gold));
-
-  auto result = cudf::left_join(t0, t1, {0}, {1}, {{0, 1}});
-  CUDF_TEST_EXPECT_TABLES_EQUAL(gold, *result);
-}
-
-TEST_F(JoinTest, EmptyLeftTableFullJoin)
-{
-  column_wrapper<int32_t> col0_0;
-  column_wrapper<int32_t> col0_1;
-
-  column_wrapper<int32_t> col1_0{{2, 2, 0, 4, 3}};
-  column_wrapper<int32_t> col1_1{{1, 0, 1, 2, 1}, {1, 0, 1, 1, 1}};
-
-  CVector cols0, cols1;
-  cols0.push_back(col0_0.release());
-  cols0.push_back(col0_1.release());
-  cols1.push_back(col1_0.release());
-  cols1.push_back(col1_1.release());
-
-  Table empty0(std::move(cols0));
-  Table t1(std::move(cols1));
-
-  auto result = cudf::full_join(empty0, t1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}});
-  CUDF_TEST_EXPECT_TABLES_EQUAL(t1, *result);
-}
-
-// Empty Right Table
-TEST_F(JoinTest, EmptyRightTableInnerJoin)
-{
-  column_wrapper<int32_t> col0_0{{2, 2, 0, 4, 3}};
-  column_wrapper<int32_t> col0_1{{1, 0, 1, 2, 1}, {1, 0, 1, 1, 1}};
-
-  column_wrapper<int32_t> col1_0;
-  column_wrapper<int32_t> col1_1;
-
-  CVector cols0, cols1;
-  cols0.push_back(col0_0.release());
-  cols0.push_back(col0_1.release());
-  cols1.push_back(col1_0.release());
-  cols1.push_back(col1_1.release());
-
-  Table t0(std::move(cols0));
-  Table empty1(std::move(cols1));
-
-  auto result = cudf::inner_join(t0, empty1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}});
-  CUDF_TEST_EXPECT_TABLES_EQUAL(empty1, *result);
-}
-
-TEST_F(JoinTest, EmptyRightTableInnerJoinNonAlignedCommon)
-{
-  column_wrapper<int32_t> col0_0{{2, 2, 0, 4, 3}};
-  column_wrapper<int32_t> col0_1{{1, 0, 1, 2, 1}, {1, 0, 1, 1, 1}};
-
-  column_wrapper<int32_t> col1_0;
-
-  CVector cols0, cols1;
-  cols0.emplace_back(col0_0.release());
-  cols0.emplace_back(col0_1.release());
-  cols1.emplace_back(col1_0.release());
-
-  Table t0(std::move(cols0));
-  Table t1(std::move(cols1));
-
-  column_wrapper<int32_t> col_gold_0;
-  column_wrapper<int32_t> col_gold_1;
-
-  CVector cols_gold;
-  cols_gold.emplace_back(col_gold_0.release());
-  cols_gold.emplace_back(col_gold_1.release());
-
-  Table gold(std::move(cols_gold));
-
-  auto result = cudf::inner_join(t0, t1, {1}, {0}, {{1, 0}});
-  CUDF_TEST_EXPECT_TABLES_EQUAL(gold, *result);
-}
-
-TEST_F(JoinTest, EmptyRightTableLeftJoin)
-{
-  column_wrapper<int32_t> col0_0{{2, 2, 0, 4, 3}, {1, 1, 1, 1, 1}};
-  column_wrapper<int32_t> col0_1{{1, 0, 1, 2, 1}, {1, 0, 1, 1, 1}};
-
-  column_wrapper<int32_t> col1_0;
-  column_wrapper<int32_t> col1_1;
-
-  CVector cols0, cols1;
-  cols0.push_back(col0_0.release());
-  cols0.push_back(col0_1.release());
-  cols1.push_back(col1_0.release());
-  cols1.push_back(col1_1.release());
-
-  Table t0(std::move(cols0));
-  Table empty1(std::move(cols1));
-
-  auto result = cudf::left_join(t0, empty1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}});
-  CUDF_TEST_EXPECT_TABLES_EQUAL(t0, *result);
-}
-
-TEST_F(JoinTest, EmptyRightTableFullJoin)
-{
-  column_wrapper<int32_t> col0_0{{2, 2, 0, 4, 3}};
-  column_wrapper<int32_t> col0_1{{1, 0, 1, 2, 1}, {1, 0, 1, 1, 1}};
+//   auto result = cudf::inner_join(t0, t1, {1}, {0}, {{1, 0}});
+//   CUDF_TEST_EXPECT_TABLES_EQUAL(gold, *result);
+// }
 
-  column_wrapper<int32_t> col1_0;
-  column_wrapper<int32_t> col1_1;
+// TEST_F(JoinTest, EmptyRightTableLeftJoin)
+// {
+//   column_wrapper<int32_t> col0_0{{2, 2, 0, 4, 3}, {1, 1, 1, 1, 1}};
+//   column_wrapper<int32_t> col0_1{{1, 0, 1, 2, 1}, {1, 0, 1, 1, 1}};
 
-  CVector cols0, cols1;
-  cols0.push_back(col0_0.release());
-  cols0.push_back(col0_1.release());
-  cols1.push_back(col1_0.release());
-  cols1.push_back(col1_1.release());
+//   column_wrapper<int32_t> col1_0;
+//   column_wrapper<int32_t> col1_1;
 
-  Table t0(std::move(cols0));
-  Table empty1(std::move(cols1));
+//   CVector cols0, cols1;
+//   cols0.push_back(col0_0.release());
+//   cols0.push_back(col0_1.release());
+//   cols1.push_back(col1_0.release());
+//   cols1.push_back(col1_1.release());
 
-  auto result = cudf::full_join(t0, empty1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}});
-  CUDF_TEST_EXPECT_TABLES_EQUAL(t0, *result);
-}
+//   Table t0(std::move(cols0));
+//   Table empty1(std::move(cols1));
 
-// Both tables empty
-TEST_F(JoinTest, BothEmptyInnerJoin)
-{
-  column_wrapper<int32_t> col0_0;
-  column_wrapper<int32_t> col0_1;
+//   auto result = cudf::left_join(t0, empty1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}});
+//   CUDF_TEST_EXPECT_TABLES_EQUAL(t0, *result);
+// }
 
-  column_wrapper<int32_t> col1_0;
-  column_wrapper<int32_t> col1_1;
+// TEST_F(JoinTest, EmptyRightTableFullJoin)
+// {
+//   column_wrapper<int32_t> col0_0{{2, 2, 0, 4, 3}};
+//   column_wrapper<int32_t> col0_1{{1, 0, 1, 2, 1}, {1, 0, 1, 1, 1}};
 
-  CVector cols0, cols1;
-  cols0.push_back(col0_0.release());
-  cols0.push_back(col0_1.release());
-  cols1.push_back(col1_0.release());
-  cols1.push_back(col1_1.release());
+//   column_wrapper<int32_t> col1_0;
+//   column_wrapper<int32_t> col1_1;
 
-  Table t0(std::move(cols0));
-  Table empty1(std::move(cols1));
+//   CVector cols0, cols1;
+//   cols0.push_back(col0_0.release());
+//   cols0.push_back(col0_1.release());
+//   cols1.push_back(col1_0.release());
+//   cols1.push_back(col1_1.release());
 
-  auto result = cudf::inner_join(t0, empty1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}});
-  CUDF_TEST_EXPECT_TABLES_EQUAL(empty1, *result);
-}
+//   Table t0(std::move(cols0));
+//   Table empty1(std::move(cols1));
 
-TEST_F(JoinTest, BothEmptyLeftJoin)
-{
-  column_wrapper<int32_t> col0_0;
-  column_wrapper<int32_t> col0_1;
+//   auto result = cudf::full_join(t0, empty1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}});
+//   CUDF_TEST_EXPECT_TABLES_EQUAL(t0, *result);
+// }
 
-  column_wrapper<int32_t> col1_0;
-  column_wrapper<int32_t> col1_1;
+// // Both tables empty
+// TEST_F(JoinTest, BothEmptyInnerJoin)
+// {
+//   column_wrapper<int32_t> col0_0;
+//   column_wrapper<int32_t> col0_1;
 
-  CVector cols0, cols1;
-  cols0.push_back(col0_0.release());
-  cols0.push_back(col0_1.release());
-  cols1.push_back(col1_0.release());
-  cols1.push_back(col1_1.release());
+//   column_wrapper<int32_t> col1_0;
+//   column_wrapper<int32_t> col1_1;
 
-  Table t0(std::move(cols0));
-  Table empty1(std::move(cols1));
+//   CVector cols0, cols1;
+//   cols0.push_back(col0_0.release());
+//   cols0.push_back(col0_1.release());
+//   cols1.push_back(col1_0.release());
+//   cols1.push_back(col1_1.release());
 
-  auto result = cudf::left_join(t0, empty1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}});
-  CUDF_TEST_EXPECT_TABLES_EQUAL(empty1, *result);
-}
+//   Table t0(std::move(cols0));
+//   Table empty1(std::move(cols1));
 
-TEST_F(JoinTest, BothEmptyFullJoin)
-{
-  column_wrapper<int32_t> col0_0;
-  column_wrapper<int32_t> col0_1;
+//   auto result = cudf::inner_join(t0, empty1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}});
+//   CUDF_TEST_EXPECT_TABLES_EQUAL(empty1, *result);
+// }
 
-  column_wrapper<int32_t> col1_0;
-  column_wrapper<int32_t> col1_1;
+// TEST_F(JoinTest, BothEmptyLeftJoin)
+// {
+//   column_wrapper<int32_t> col0_0;
+//   column_wrapper<int32_t> col0_1;
 
-  CVector cols0, cols1;
-  cols0.push_back(col0_0.release());
-  cols0.push_back(col0_1.release());
-  cols1.push_back(col1_0.release());
-  cols1.push_back(col1_1.release());
+//   column_wrapper<int32_t> col1_0;
+//   column_wrapper<int32_t> col1_1;
 
-  Table t0(std::move(cols0));
-  Table empty1(std::move(cols1));
+//   CVector cols0, cols1;
+//   cols0.push_back(col0_0.release());
+//   cols0.push_back(col0_1.release());
+//   cols1.push_back(col1_0.release());
+//   cols1.push_back(col1_1.release());
 
-  auto result = cudf::full_join(t0, empty1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}});
-  CUDF_TEST_EXPECT_TABLES_EQUAL(empty1, *result);
-}
+//   Table t0(std::move(cols0));
+//   Table empty1(std::move(cols1));
 
-// EqualValues X Inner,Left,Full
+//   auto result = cudf::left_join(t0, empty1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}});
+//   CUDF_TEST_EXPECT_TABLES_EQUAL(empty1, *result);
+// }
 
-TEST_F(JoinTest, EqualValuesInnerJoin)
-{
-  column_wrapper<int32_t> col0_0{{0, 0}};
-  strcol_wrapper col0_1({"s0", "s0"});
+// TEST_F(JoinTest, BothEmptyFullJoin)
+// {
+//   column_wrapper<int32_t> col0_0;
+//   column_wrapper<int32_t> col0_1;
 
-  column_wrapper<int32_t> col1_0{{0, 0}};
-  strcol_wrapper col1_1({"s0", "s0"});
+//   column_wrapper<int32_t> col1_0;
+//   column_wrapper<int32_t> col1_1;
 
-  CVector cols0, cols1;
-  cols0.push_back(col0_0.release());
-  cols0.push_back(col0_1.release());
-  cols1.push_back(col1_0.release());
-  cols1.push_back(col1_1.release());
+//   CVector cols0, cols1;
+//   cols0.push_back(col0_0.release());
+//   cols0.push_back(col0_1.release());
+//   cols1.push_back(col1_0.release());
+//   cols1.push_back(col1_1.release());
 
-  Table t0(std::move(cols0));
-  Table t1(std::move(cols1));
+//   Table t0(std::move(cols0));
+//   Table empty1(std::move(cols1));
 
-  auto result = cudf::inner_join(t0, t1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}});
+//   auto result = cudf::full_join(t0, empty1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}});
+//   CUDF_TEST_EXPECT_TABLES_EQUAL(empty1, *result);
+// }
 
-  column_wrapper<int32_t> col_gold_0{{0, 0, 0, 0}};
-  strcol_wrapper col_gold_1({"s0", "s0", "s0", "s0"});
-  CVector cols_gold;
-  cols_gold.push_back(col_gold_0.release());
-  cols_gold.push_back(col_gold_1.release());
-  Table gold(std::move(cols_gold));
+// // EqualValues X Inner,Left,Full
 
-  CUDF_TEST_EXPECT_TABLES_EQUAL(gold, *result);
-}
+// TEST_F(JoinTest, EqualValuesInnerJoin)
+// {
+//   column_wrapper<int32_t> col0_0{{0, 0}};
+//   strcol_wrapper col0_1({"s0", "s0"});
 
-TEST_F(JoinTest, EqualValuesLeftJoin)
-{
-  column_wrapper<int32_t> col0_0{{0, 0}};
-  strcol_wrapper col0_1({"s0", "s0"});
+//   column_wrapper<int32_t> col1_0{{0, 0}};
+//   strcol_wrapper col1_1({"s0", "s0"});
 
-  column_wrapper<int32_t> col1_0{{0, 0}};
-  strcol_wrapper col1_1({"s0", "s0"});
+//   CVector cols0, cols1;
+//   cols0.push_back(col0_0.release());
+//   cols0.push_back(col0_1.release());
+//   cols1.push_back(col1_0.release());
+//   cols1.push_back(col1_1.release());
 
-  CVector cols0, cols1;
-  cols0.push_back(col0_0.release());
-  cols0.push_back(col0_1.release());
-  cols1.push_back(col1_0.release());
-  cols1.push_back(col1_1.release());
+//   Table t0(std::move(cols0));
+//   Table t1(std::move(cols1));
 
-  Table t0(std::move(cols0));
-  Table t1(std::move(cols1));
+//   auto result = cudf::inner_join(t0, t1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}});
 
-  auto result = cudf::left_join(t0, t1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}});
+//   column_wrapper<int32_t> col_gold_0{{0, 0, 0, 0}};
+//   strcol_wrapper col_gold_1({"s0", "s0", "s0", "s0"});
+//   CVector cols_gold;
+//   cols_gold.push_back(col_gold_0.release());
+//   cols_gold.push_back(col_gold_1.release());
+//   Table gold(std::move(cols_gold));
 
-  column_wrapper<int32_t> col_gold_0{{0, 0, 0, 0}, {1, 1, 1, 1}};
-  strcol_wrapper col_gold_1({"s0", "s0", "s0", "s0"}, {1, 1, 1, 1});
-  CVector cols_gold;
-  cols_gold.push_back(col_gold_0.release());
-  cols_gold.push_back(col_gold_1.release());
-  Table gold(std::move(cols_gold));
+//   CUDF_TEST_EXPECT_TABLES_EQUAL(gold, *result);
+// }
 
-  CUDF_TEST_EXPECT_TABLES_EQUAL(gold, *result);
-}
+// TEST_F(JoinTest, EqualValuesLeftJoin)
+// {
+//   column_wrapper<int32_t> col0_0{{0, 0}};
+//   strcol_wrapper col0_1({"s0", "s0"});
 
-TEST_F(JoinTest, EqualValuesFullJoin)
-{
-  column_wrapper<int32_t> col0_0{{0, 0}};
-  strcol_wrapper col0_1({"s0", "s0"});
+//   column_wrapper<int32_t> col1_0{{0, 0}};
+//   strcol_wrapper col1_1({"s0", "s0"});
 
-  column_wrapper<int32_t> col1_0{{0, 0}};
-  strcol_wrapper col1_1({"s0", "s0"});
+//   CVector cols0, cols1;
+//   cols0.push_back(col0_0.release());
+//   cols0.push_back(col0_1.release());
+//   cols1.push_back(col1_0.release());
+//   cols1.push_back(col1_1.release());
 
-  CVector cols0, cols1;
-  cols0.push_back(col0_0.release());
-  cols0.push_back(col0_1.release());
-  cols1.push_back(col1_0.release());
-  cols1.push_back(col1_1.release());
+//   Table t0(std::move(cols0));
+//   Table t1(std::move(cols1));
 
-  Table t0(std::move(cols0));
-  Table t1(std::move(cols1));
+//   auto result = cudf::left_join(t0, t1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}});
 
-  auto result = cudf::full_join(t0, t1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}});
+//   column_wrapper<int32_t> col_gold_0{{0, 0, 0, 0}, {1, 1, 1, 1}};
+//   strcol_wrapper col_gold_1({"s0", "s0", "s0", "s0"}, {1, 1, 1, 1});
+//   CVector cols_gold;
+//   cols_gold.push_back(col_gold_0.release());
+//   cols_gold.push_back(col_gold_1.release());
+//   Table gold(std::move(cols_gold));
 
-  column_wrapper<int32_t> col_gold_0{{0, 0, 0, 0}};
-  strcol_wrapper col_gold_1({"s0", "s0", "s0", "s0"});
-  CVector cols_gold;
-  cols_gold.push_back(col_gold_0.release());
-  cols_gold.push_back(col_gold_1.release());
-  Table gold(std::move(cols_gold));
+//   CUDF_TEST_EXPECT_TABLES_EQUAL(gold, *result);
+// }
 
-  CUDF_TEST_EXPECT_TABLES_EQUAL(gold, *result);
-}
+// TEST_F(JoinTest, EqualValuesFullJoin)
+// {
+//   column_wrapper<int32_t> col0_0{{0, 0}};
+//   strcol_wrapper col0_1({"s0", "s0"});
 
-TEST_F(JoinTest, InnerJoinCornerCase)
-{
-  column_wrapper<int64_t> col0_0{{4, 1, 3, 2, 2, 2, 2}};
-  column_wrapper<int64_t> col1_0{{2}};
+//   column_wrapper<int32_t> col1_0{{0, 0}};
+//   strcol_wrapper col1_1({"s0", "s0"});
 
-  CVector cols0, cols1;
-  cols0.push_back(col0_0.release());
-  cols1.push_back(col1_0.release());
+//   CVector cols0, cols1;
+//   cols0.push_back(col0_0.release());
+//   cols0.push_back(col0_1.release());
+//   cols1.push_back(col1_0.release());
+//   cols1.push_back(col1_1.release());
 
-  Table t0(std::move(cols0));
-  Table t1(std::move(cols1));
+//   Table t0(std::move(cols0));
+//   Table t1(std::move(cols1));
 
-  auto result            = cudf::inner_join(t0, t1, {0}, {0}, {{0, 0}});
-  auto result_sort_order = cudf::sorted_order(result->view());
-  auto sorted_result     = cudf::gather(result->view(), *result_sort_order);
+//   auto result = cudf::full_join(t0, t1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}});
 
-  column_wrapper<int64_t> col_gold_0{{2, 2, 2, 2}};
-  CVector cols_gold;
-  cols_gold.push_back(col_gold_0.release());
-  Table gold(std::move(cols_gold));
+//   column_wrapper<int32_t> col_gold_0{{0, 0, 0, 0}};
+//   strcol_wrapper col_gold_1({"s0", "s0", "s0", "s0"});
+//   CVector cols_gold;
+//   cols_gold.push_back(col_gold_0.release());
+//   cols_gold.push_back(col_gold_1.release());
+//   Table gold(std::move(cols_gold));
 
-  auto gold_sort_order = cudf::sorted_order(gold.view());
-  auto sorted_gold     = cudf::gather(gold.view(), *gold_sort_order);
-  CUDF_TEST_EXPECT_TABLES_EQUAL(*sorted_gold, *sorted_result);
-}
+//   CUDF_TEST_EXPECT_TABLES_EQUAL(gold, *result);
+// }
 
-// TEST_F(JoinTest, HashJoinSequentialProbes)
+// TEST_F(JoinTest, InnerJoinCornerCase)
 // {
-//   CVector cols1;
-//   cols1.emplace_back(column_wrapper<int32_t>{{2, 2, 0, 4, 3}}.release());
-//   cols1.emplace_back(strcol_wrapper{{"s1", "s0", "s1", "s2", "s1"}}.release());
-//   cols1.emplace_back(column_wrapper<int32_t>{{1, 0, 1, 2, 1}}.release());
+//   column_wrapper<int64_t> col0_0{{4, 1, 3, 2, 2, 2, 2}};
+//   column_wrapper<int64_t> col1_0{{2}};
 
+//   CVector cols0, cols1;
+//   cols0.push_back(col0_0.release());
+//   cols1.push_back(col1_0.release());
+
+//   Table t0(std::move(cols0));
 //   Table t1(std::move(cols1));
 
-//   cudf::hash_join hash_join(t1, {0, 1}, cudf::null_equality::EQUAL);
+//   auto result            = cudf::inner_join(t0, t1, {0}, {0}, {{0, 0}});
+//   auto result_sort_order = cudf::sorted_order(result->view());
+//   auto sorted_result     = cudf::gather(result->view(), *result_sort_order);
 
-//   {
-//     CVector cols0;
-//     cols0.emplace_back(column_wrapper<int32_t>{{3, 1, 2, 0, 3}}.release());
-//     cols0.emplace_back(strcol_wrapper({"s0", "s1", "s2", "s4", "s1"}).release());
-//     cols0.emplace_back(column_wrapper<int32_t>{{0, 1, 2, 4, 1}}.release());
-
-//     Table t0(std::move(cols0));
-
-//     auto result            = hash_join.full_join(t0, {0, 1}, {{0, 0}, {1, 1}});
-//     auto result_sort_order = cudf::sorted_order(result->view());
-//     auto sorted_result     = cudf::gather(result->view(), *result_sort_order);
-
-//     CVector cols_gold;
-//     cols_gold.emplace_back(column_wrapper<int32_t>{{2, 2, 0, 4, 3, 3, 1, 2, 0}}.release());
-//     cols_gold.emplace_back(
-//       strcol_wrapper({"s1", "s0", "s1", "s2", "s1", "s0", "s1", "s2", "s4"}).release());
-//     cols_gold.emplace_back(
-//       column_wrapper<int32_t>{{-1, -1, -1, -1, 1, 0, 1, 2, 4}, {0, 0, 0, 0, 1, 1, 1, 1, 1}}
-//         .release());
-//     cols_gold.emplace_back(
-//       column_wrapper<int32_t>{{1, 0, 1, 2, 1, -1, -1, -1, -1}, {1, 1, 1, 1, 1, 0, 0, 0, 0}}
-//         .release());
-//     Table gold(std::move(cols_gold));
-
-//     auto gold_sort_order = cudf::sorted_order(gold.view());
-//     auto sorted_gold     = cudf::gather(gold.view(), *gold_sort_order);
-//     CUDF_TEST_EXPECT_TABLES_EQUAL(*sorted_gold, *sorted_result);
-//   }
+//   column_wrapper<int64_t> col_gold_0{{2, 2, 2, 2}};
+//   CVector cols_gold;
+//   cols_gold.push_back(col_gold_0.release());
+//   Table gold(std::move(cols_gold));
 
-//   {
-//     CVector cols0;
-//     cols0.emplace_back(column_wrapper<int32_t>{{3, 1, 2, 0, 3}}.release());
-//     cols0.emplace_back(strcol_wrapper({"s0", "s1", "s2", "s4", "s1"}).release());
-//     cols0.emplace_back(column_wrapper<int32_t>{{0, 1, 2, 4, 1}}.release());
-
-//     Table t0(std::move(cols0));
-
-//     auto result            = hash_join.left_join(t0, {0, 1}, {{0, 0}, {1, 1}});
-//     auto result_sort_order = cudf::sorted_order(result->view());
-//     auto sorted_result     = cudf::gather(result->view(), *result_sort_order);
-
-//     CVector cols_gold;
-//     cols_gold.emplace_back(column_wrapper<int32_t>{{3, 3, 1, 2, 0}, {1, 1, 1, 1, 1}}.release());
-//     cols_gold.emplace_back(
-//       strcol_wrapper({"s1", "s0", "s1", "s2", "s4"}, {1, 1, 1, 1, 1, 1}).release());
-//     cols_gold.emplace_back(column_wrapper<int32_t>{{1, 0, 1, 2, 4}, {1, 1, 1, 1, 1}}.release());
-//     cols_gold.emplace_back(column_wrapper<int32_t>{{1, -1, -1, -1, -1}, {1, 0, 0, 0,
-//     0}}.release()); Table gold(std::move(cols_gold));
-
-//     auto gold_sort_order = cudf::sorted_order(gold.view());
-//     auto sorted_gold     = cudf::gather(gold.view(), *gold_sort_order);
-//     CUDF_TEST_EXPECT_TABLES_EQUAL(*sorted_gold, *sorted_result);
-//   }
+//   auto gold_sort_order = cudf::sorted_order(gold.view());
+//   auto sorted_gold     = cudf::gather(gold.view(), *gold_sort_order);
+//   CUDF_TEST_EXPECT_TABLES_EQUAL(*sorted_gold, *sorted_result);
+// }
 
+// // TEST_F(JoinTest, HashJoinSequentialProbes)
+// // {
+// //   CVector cols1;
+// //   cols1.emplace_back(column_wrapper<int32_t>{{2, 2, 0, 4, 3}}.release());
+// //   cols1.emplace_back(strcol_wrapper{{"s1", "s0", "s1", "s2", "s1"}}.release());
+// //   cols1.emplace_back(column_wrapper<int32_t>{{1, 0, 1, 2, 1}}.release());
+
+// //   Table t1(std::move(cols1));
+
+// //   cudf::hash_join hash_join(t1, {0, 1}, cudf::null_equality::EQUAL);
+
+// //   {
+// //     CVector cols0;
+// //     cols0.emplace_back(column_wrapper<int32_t>{{3, 1, 2, 0, 3}}.release());
+// //     cols0.emplace_back(strcol_wrapper({"s0", "s1", "s2", "s4", "s1"}).release());
+// //     cols0.emplace_back(column_wrapper<int32_t>{{0, 1, 2, 4, 1}}.release());
+
+// //     Table t0(std::move(cols0));
+
+// //     auto result            = hash_join.full_join(t0, {0, 1}, {{0, 0}, {1, 1}});
+// //     auto result_sort_order = cudf::sorted_order(result->view());
+// //     auto sorted_result     = cudf::gather(result->view(), *result_sort_order);
+
+// //     CVector cols_gold;
+// //     cols_gold.emplace_back(column_wrapper<int32_t>{{2, 2, 0, 4, 3, 3, 1, 2, 0}}.release());
+// //     cols_gold.emplace_back(
+// //       strcol_wrapper({"s1", "s0", "s1", "s2", "s1", "s0", "s1", "s2", "s4"}).release());
+// //     cols_gold.emplace_back(
+// //       column_wrapper<int32_t>{{-1, -1, -1, -1, 1, 0, 1, 2, 4}, {0, 0, 0, 0, 1, 1, 1, 1, 1}}
+// //         .release());
+// //     cols_gold.emplace_back(
+// //       column_wrapper<int32_t>{{1, 0, 1, 2, 1, -1, -1, -1, -1}, {1, 1, 1, 1, 1, 0, 0, 0, 0}}
+// //         .release());
+// //     Table gold(std::move(cols_gold));
+
+// //     auto gold_sort_order = cudf::sorted_order(gold.view());
+// //     auto sorted_gold     = cudf::gather(gold.view(), *gold_sort_order);
+// //     CUDF_TEST_EXPECT_TABLES_EQUAL(*sorted_gold, *sorted_result);
+// //   }
+
+// //   {
+// //     CVector cols0;
+// //     cols0.emplace_back(column_wrapper<int32_t>{{3, 1, 2, 0, 3}}.release());
+// //     cols0.emplace_back(strcol_wrapper({"s0", "s1", "s2", "s4", "s1"}).release());
+// //     cols0.emplace_back(column_wrapper<int32_t>{{0, 1, 2, 4, 1}}.release());
+
+// //     Table t0(std::move(cols0));
+
+// //     auto result            = hash_join.left_join(t0, {0, 1}, {{0, 0}, {1, 1}});
+// //     auto result_sort_order = cudf::sorted_order(result->view());
+// //     auto sorted_result     = cudf::gather(result->view(), *result_sort_order);
+
+// //     CVector cols_gold;
+// //     cols_gold.emplace_back(column_wrapper<int32_t>{{3, 3, 1, 2, 0}, {1, 1, 1, 1,
+// 1}}.release());
+// //     cols_gold.emplace_back(
+// //       strcol_wrapper({"s1", "s0", "s1", "s2", "s4"}, {1, 1, 1, 1, 1, 1}).release());
+// //     cols_gold.emplace_back(column_wrapper<int32_t>{{1, 0, 1, 2, 4}, {1, 1, 1, 1,
+// 1}}.release());
+// //     cols_gold.emplace_back(column_wrapper<int32_t>{{1, -1, -1, -1, -1}, {1, 0, 0, 0,
+// //     0}}.release()); Table gold(std::move(cols_gold));
+
+// //     auto gold_sort_order = cudf::sorted_order(gold.view());
+// //     auto sorted_gold     = cudf::gather(gold.view(), *gold_sort_order);
+// //     CUDF_TEST_EXPECT_TABLES_EQUAL(*sorted_gold, *sorted_result);
+// //   }
+
+// //   {
+// //     CVector cols0;
+// //     cols0.emplace_back(column_wrapper<int32_t>{{3, 1, 2, 0, 2}}.release());
+// //     cols0.emplace_back(column_wrapper<int32_t>{{3, 1, 2, 0, 2}}.release());
+// //     cols0.emplace_back(strcol_wrapper({"s1", "s1", "s0", "s4", "s0"}).release());
+// //     cols0.emplace_back(column_wrapper<int32_t>{{0, 1, 2, 4, 1}}.release());
+
+// //     Table t0(std::move(cols0));
+
+// //     auto probe_build_pair = hash_join.inner_join(t0, {1, 2}, {{1, 0}, {2, 1}});
+// //     auto joined_cols      = probe_build_pair.first->release();
+// //     auto build_cols       = probe_build_pair.second->release();
+// //     joined_cols.insert(joined_cols.end(),
+// //                        std::make_move_iterator(build_cols.begin()),
+// //                        std::make_move_iterator(build_cols.end()));
+// //     auto result            = std::make_unique<cudf::table>(std::move(joined_cols));
+// //     auto result_sort_order = cudf::sorted_order(result->view());
+// //     auto sorted_result     = cudf::gather(result->view(), *result_sort_order);
+
+// //     CVector cols_gold;
+// //     cols_gold.emplace_back(column_wrapper<int32_t>{{3, 2, 2}}.release());
+// //     cols_gold.emplace_back(column_wrapper<int32_t>{{3, 2, 2}}.release());
+// //     cols_gold.emplace_back(strcol_wrapper({"s1", "s0", "s0"}).release());
+// //     cols_gold.emplace_back(column_wrapper<int32_t>{{0, 2, 1}}.release());
+// //     cols_gold.emplace_back(column_wrapper<int32_t>{{1, 0, 0}}.release());
+// //     Table gold(std::move(cols_gold));
+
+// //     auto gold_sort_order = cudf::sorted_order(gold.view());
+// //     auto sorted_gold     = cudf::gather(gold.view(), *gold_sort_order);
+// //     CUDF_TEST_EXPECT_TABLES_EQUAL(*sorted_gold, *sorted_result);
+// //   }
+
+// //   {
+// //     CVector cols0;
+// //     cols0.emplace_back(column_wrapper<int32_t>{{3, 1, 2, 0, 2}}.release());
+// //     cols0.emplace_back(column_wrapper<int32_t>{{3, 1, 2, 0, 2}}.release());
+// //     cols0.emplace_back(strcol_wrapper({"s1", "s1", "s0", "s4", "s0"}).release());
+// //     cols0.emplace_back(column_wrapper<int32_t>{{0, 1, 2, 4, 1}}.release());
+
+// //     Table t0(std::move(cols0));
+
+// //     auto probe_build_pair = hash_join.inner_join(
+// //       t0, {1, 2}, {{1, 0}, {2, 1}}, cudf::hash_join::common_columns_output_side::BUILD);
+// //     auto joined_cols = probe_build_pair.second->release();
+// //     auto probe_cols  = probe_build_pair.first->release();
+// //     joined_cols.insert(joined_cols.end(),
+// //                        std::make_move_iterator(probe_cols.begin()),
+// //                        std::make_move_iterator(probe_cols.end()));
+// //     auto result            = std::make_unique<cudf::table>(std::move(joined_cols));
+// //     auto result_sort_order = cudf::sorted_order(result->view());
+// //     auto sorted_result     = cudf::gather(result->view(), *result_sort_order);
+
+// //     CVector cols_gold;
+// //     cols_gold.emplace_back(column_wrapper<int32_t>{{3, 2, 2}}.release());
+// //     cols_gold.emplace_back(strcol_wrapper({"s1", "s0", "s0"}).release());
+// //     cols_gold.emplace_back(column_wrapper<int32_t>{{1, 0, 0}}.release());
+// //     cols_gold.emplace_back(column_wrapper<int32_t>{{3, 2, 2}}.release());
+// //     cols_gold.emplace_back(column_wrapper<int32_t>{{0, 2, 1}}.release());
+// //     Table gold(std::move(cols_gold));
+
+// //     auto gold_sort_order = cudf::sorted_order(gold.view());
+// //     auto sorted_gold     = cudf::gather(gold.view(), *gold_sort_order);
+// //     CUDF_TEST_EXPECT_TABLES_EQUAL(*sorted_gold, *sorted_result);
+// //   }
+// // }
+
+// struct JoinDictionaryTest : public cudf::test::BaseFixture {
+// };
+
+// TEST_F(JoinDictionaryTest, LeftJoinNoNulls)
+// {
+//   column_wrapper<int32_t> col0_0{{3, 1, 2, 0, 3}};
+//   strcol_wrapper col0_1_w({"s0", "s1", "s2", "s4", "s1"});
+//   auto col0_1 = cudf::dictionary::encode(col0_1_w);
+//   column_wrapper<int32_t> col0_2{{0, 1, 2, 4, 1}};
+
+//   column_wrapper<int32_t> col1_0{{2, 2, 0, 4, 3}};
+//   strcol_wrapper col1_1_w{{"s1", "s0", "s1", "s2", "s1"}};
+//   auto col1_1 = cudf::dictionary::encode(col1_1_w);
+//   column_wrapper<int32_t> col1_2{{1, 0, 1, 2, 1}};
+
+//   auto t0 = cudf::table_view({col0_0, col0_1->view(), col0_2});
+//   auto t1 = cudf::table_view({col1_0, col1_1->view(), col1_2});
+//   auto g0 = cudf::table_view({col0_0, col0_1_w, col0_2});
+//   auto g1 = cudf::table_view({col1_0, col1_1_w, col1_2});
 //   {
-//     CVector cols0;
-//     cols0.emplace_back(column_wrapper<int32_t>{{3, 1, 2, 0, 2}}.release());
-//     cols0.emplace_back(column_wrapper<int32_t>{{3, 1, 2, 0, 2}}.release());
-//     cols0.emplace_back(strcol_wrapper({"s1", "s1", "s0", "s4", "s0"}).release());
-//     cols0.emplace_back(column_wrapper<int32_t>{{0, 1, 2, 4, 1}}.release());
-
-//     Table t0(std::move(cols0));
-
-//     auto probe_build_pair = hash_join.inner_join(t0, {1, 2}, {{1, 0}, {2, 1}});
-//     auto joined_cols      = probe_build_pair.first->release();
-//     auto build_cols       = probe_build_pair.second->release();
-//     joined_cols.insert(joined_cols.end(),
-//                        std::make_move_iterator(build_cols.begin()),
-//                        std::make_move_iterator(build_cols.end()));
-//     auto result            = std::make_unique<cudf::table>(std::move(joined_cols));
-//     auto result_sort_order = cudf::sorted_order(result->view());
-//     auto sorted_result     = cudf::gather(result->view(), *result_sort_order);
-
-//     CVector cols_gold;
-//     cols_gold.emplace_back(column_wrapper<int32_t>{{3, 2, 2}}.release());
-//     cols_gold.emplace_back(column_wrapper<int32_t>{{3, 2, 2}}.release());
-//     cols_gold.emplace_back(strcol_wrapper({"s1", "s0", "s0"}).release());
-//     cols_gold.emplace_back(column_wrapper<int32_t>{{0, 2, 1}}.release());
-//     cols_gold.emplace_back(column_wrapper<int32_t>{{1, 0, 0}}.release());
-//     Table gold(std::move(cols_gold));
-
-//     auto gold_sort_order = cudf::sorted_order(gold.view());
-//     auto sorted_gold     = cudf::gather(gold.view(), *gold_sort_order);
-//     CUDF_TEST_EXPECT_TABLES_EQUAL(*sorted_gold, *sorted_result);
+//     auto result =
+//       cudf::left_join(t0, t1, {0}, {0}, std::vector<std::pair<cudf::size_type,
+//       cudf::size_type>>{});
+//     auto result_view = result->view();
+//     auto decoded1    = cudf::dictionary::decode(result_view.column(1));
+//     auto decoded4    = cudf::dictionary::decode(result_view.column(4));
+//     std::vector<cudf::column_view> result_decoded({result_view.column(0),
+//                                                    decoded1->view(),
+//                                                    result_view.column(2),
+//                                                    result_view.column(3),
+//                                                    decoded4->view(),
+//                                                    result_view.column(5)});
+
+//     auto gold =
+//       cudf::left_join(g0, g1, {0}, {0}, std::vector<std::pair<cudf::size_type,
+//       cudf::size_type>>{});
+//     CUDF_TEST_EXPECT_TABLES_EQUAL(*gold, cudf::table_view(result_decoded));
 //   }
-
 //   {
-//     CVector cols0;
-//     cols0.emplace_back(column_wrapper<int32_t>{{3, 1, 2, 0, 2}}.release());
-//     cols0.emplace_back(column_wrapper<int32_t>{{3, 1, 2, 0, 2}}.release());
-//     cols0.emplace_back(strcol_wrapper({"s1", "s1", "s0", "s4", "s0"}).release());
-//     cols0.emplace_back(column_wrapper<int32_t>{{0, 1, 2, 4, 1}}.release());
-
-//     Table t0(std::move(cols0));
-
-//     auto probe_build_pair = hash_join.inner_join(
-//       t0, {1, 2}, {{1, 0}, {2, 1}}, cudf::hash_join::common_columns_output_side::BUILD);
-//     auto joined_cols = probe_build_pair.second->release();
-//     auto probe_cols  = probe_build_pair.first->release();
-//     joined_cols.insert(joined_cols.end(),
-//                        std::make_move_iterator(probe_cols.begin()),
-//                        std::make_move_iterator(probe_cols.end()));
-//     auto result            = std::make_unique<cudf::table>(std::move(joined_cols));
-//     auto result_sort_order = cudf::sorted_order(result->view());
-//     auto sorted_result     = cudf::gather(result->view(), *result_sort_order);
-
-//     CVector cols_gold;
-//     cols_gold.emplace_back(column_wrapper<int32_t>{{3, 2, 2}}.release());
-//     cols_gold.emplace_back(strcol_wrapper({"s1", "s0", "s0"}).release());
-//     cols_gold.emplace_back(column_wrapper<int32_t>{{1, 0, 0}}.release());
-//     cols_gold.emplace_back(column_wrapper<int32_t>{{3, 2, 2}}.release());
-//     cols_gold.emplace_back(column_wrapper<int32_t>{{0, 2, 1}}.release());
-//     Table gold(std::move(cols_gold));
-
-//     auto gold_sort_order = cudf::sorted_order(gold.view());
-//     auto sorted_gold     = cudf::gather(gold.view(), *gold_sort_order);
-//     CUDF_TEST_EXPECT_TABLES_EQUAL(*sorted_gold, *sorted_result);
+//     auto result      = cudf::left_join(t0, t1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}});
+//     auto result_view = result->view();
+//     auto decoded1    = cudf::dictionary::decode(result_view.column(1));
+//     std::vector<cudf::column_view> result_decoded(
+//       {result_view.column(0), decoded1->view(), result_view.column(2), result_view.column(3)});
+
+//     auto gold = cudf::left_join(g0, g1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}});
+//     CUDF_TEST_EXPECT_TABLES_EQUAL(*gold, cudf::table_view(result_decoded));
 //   }
 // }
 
-struct JoinDictionaryTest : public cudf::test::BaseFixture {
-};
-
-TEST_F(JoinDictionaryTest, LeftJoinNoNulls)
-{
-  column_wrapper<int32_t> col0_0{{3, 1, 2, 0, 3}};
-  strcol_wrapper col0_1_w({"s0", "s1", "s2", "s4", "s1"});
-  auto col0_1 = cudf::dictionary::encode(col0_1_w);
-  column_wrapper<int32_t> col0_2{{0, 1, 2, 4, 1}};
-
-  column_wrapper<int32_t> col1_0{{2, 2, 0, 4, 3}};
-  strcol_wrapper col1_1_w{{"s1", "s0", "s1", "s2", "s1"}};
-  auto col1_1 = cudf::dictionary::encode(col1_1_w);
-  column_wrapper<int32_t> col1_2{{1, 0, 1, 2, 1}};
-
-  auto t0 = cudf::table_view({col0_0, col0_1->view(), col0_2});
-  auto t1 = cudf::table_view({col1_0, col1_1->view(), col1_2});
-  auto g0 = cudf::table_view({col0_0, col0_1_w, col0_2});
-  auto g1 = cudf::table_view({col1_0, col1_1_w, col1_2});
-  {
-    auto result =
-      cudf::left_join(t0, t1, {0}, {0}, std::vector<std::pair<cudf::size_type, cudf::size_type>>{});
-    auto result_view = result->view();
-    auto decoded1    = cudf::dictionary::decode(result_view.column(1));
-    auto decoded4    = cudf::dictionary::decode(result_view.column(4));
-    std::vector<cudf::column_view> result_decoded({result_view.column(0),
-                                                   decoded1->view(),
-                                                   result_view.column(2),
-                                                   result_view.column(3),
-                                                   decoded4->view(),
-                                                   result_view.column(5)});
-
-    auto gold =
-      cudf::left_join(g0, g1, {0}, {0}, std::vector<std::pair<cudf::size_type, cudf::size_type>>{});
-    CUDF_TEST_EXPECT_TABLES_EQUAL(*gold, cudf::table_view(result_decoded));
-  }
-  {
-    auto result      = cudf::left_join(t0, t1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}});
-    auto result_view = result->view();
-    auto decoded1    = cudf::dictionary::decode(result_view.column(1));
-    std::vector<cudf::column_view> result_decoded(
-      {result_view.column(0), decoded1->view(), result_view.column(2), result_view.column(3)});
-
-    auto gold = cudf::left_join(g0, g1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}});
-    CUDF_TEST_EXPECT_TABLES_EQUAL(*gold, cudf::table_view(result_decoded));
-  }
-}
-
-TEST_F(JoinDictionaryTest, LeftJoinWithNulls)
-{
-  column_wrapper<int32_t> col0_0{{3, 1, 2, 0, 2}};
-  strcol_wrapper col0_1({"s1", "s1", "s0", "s4", "s0"}, {1, 1, 0, 1, 1});
-  column_wrapper<int32_t> col0_2_w{{0, 1, 2, 4, 1}};
-  auto col0_2 = cudf::dictionary::encode(col0_2_w);
-
-  column_wrapper<int32_t> col1_0{{2, 2, 0, 4, 3}};
-  strcol_wrapper col1_1({"s1", "s0", "s1", "s2", "s1"});
-  column_wrapper<int32_t> col1_2_w{{1, 0, 1, 2, 1}, {1, 0, 1, 1, 1}};
-  auto col1_2 = cudf::dictionary::encode(col1_2_w);
-
-  auto t0 = cudf::table_view({col0_0, col0_1, col0_2->view()});
-  auto t1 = cudf::table_view({col1_0, col1_1, col1_2->view()});
-
-  auto result      = cudf::left_join(t0, t1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}});
-  auto result_view = result->view();
-  auto decoded2    = cudf::dictionary::decode(result_view.column(2));
-  auto decoded3    = cudf::dictionary::decode(result_view.column(3));
-  std::vector<cudf::column_view> result_decoded(
-    {result_view.column(0), result_view.column(1), decoded2->view(), decoded3->view()});
-
-  auto g0   = cudf::table_view({col0_0, col0_1, col0_2_w});
-  auto g1   = cudf::table_view({col1_0, col1_1, col1_2_w});
-  auto gold = cudf::left_join(g0, g1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}});
-  CUDF_TEST_EXPECT_TABLES_EQUAL(*gold, cudf::table_view(result_decoded));
-}
-
-TEST_F(JoinDictionaryTest, InnerJoinNoNulls)
-{
-  column_wrapper<int32_t> col0_0{{3, 1, 2, 0, 2}};
-  strcol_wrapper col0_1_w({"s1", "s1", "s0", "s4", "s0"});
-  auto col0_1 = cudf::dictionary::encode(col0_1_w);
-  column_wrapper<int32_t> col0_2{{0, 1, 2, 4, 1}};
-
-  column_wrapper<int32_t> col1_0{{2, 2, 0, 4, 3}};
-  strcol_wrapper col1_1_w({"s1", "s0", "s1", "s2", "s1"});
-  auto col1_1 = cudf::dictionary::encode(col1_1_w);
-  column_wrapper<int32_t> col1_2{{1, 0, 1, 2, 1}};
-
-  auto t0 = cudf::table_view({col0_0, col0_1->view(), col0_2});
-  auto t1 = cudf::table_view({col1_0, col1_1->view(), col1_2});
-
-  auto result      = cudf::inner_join(t0, t1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}});
-  auto result_view = result->view();
-  auto decoded1    = cudf::dictionary::decode(result_view.column(1));
-  std::vector<cudf::column_view> result_decoded(
-    {result_view.column(0), decoded1->view(), result_view.column(2), result_view.column(3)});
-
-  auto g0   = cudf::table_view({col0_0, col0_1_w, col0_2});
-  auto g1   = cudf::table_view({col1_0, col1_1_w, col1_2});
-  auto gold = cudf::inner_join(g0, g1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}});
-  CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*gold, cudf::table_view(result_decoded));
-}
-
-TEST_F(JoinDictionaryTest, InnerJoinWithNulls)
-{
-  column_wrapper<int32_t> col0_0{{3, 1, 2, 0, 2}};
-  strcol_wrapper col0_1({"s1", "s1", "s0", "s4", "s0"}, {1, 1, 0, 1, 1});
-  column_wrapper<int32_t> col0_2_w{{0, 1, 2, 4, 1}};
-  auto col0_2 = cudf::dictionary::encode(col0_2_w);
-
-  column_wrapper<int32_t> col1_0{{2, 2, 0, 4, 3}};
-  strcol_wrapper col1_1({"s1", "s0", "s1", "s2", "s1"});
-  column_wrapper<int32_t> col1_2_w{{1, 0, 1, 2, 1}, {1, 0, 1, 1, 1}};
-  auto col1_2 = cudf::dictionary::encode(col1_2_w);
-
-  auto t0 = cudf::table_view({col0_0, col0_1, col0_2->view()});
-  auto t1 = cudf::table_view({col1_0, col1_1, col1_2->view()});
-
-  auto result      = cudf::inner_join(t0, t1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}});
-  auto result_view = result->view();
-  auto decoded2    = cudf::dictionary::decode(result_view.column(2));
-  auto decoded3    = cudf::dictionary::decode(result_view.column(3));
-  std::vector<cudf::column_view> result_decoded(
-    {result_view.column(0), result_view.column(1), decoded2->view(), decoded3->view()});
-
-  auto g0   = cudf::table_view({col0_0, col0_1, col0_2_w});
-  auto g1   = cudf::table_view({col1_0, col1_1, col1_2_w});
-  auto gold = cudf::inner_join(g0, g1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}});
-  CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*gold, cudf::table_view(result_decoded));
-}
-
-TEST_F(JoinDictionaryTest, FullJoinNoNulls)
-{
-  column_wrapper<int32_t> col0_0{{3, 1, 2, 0, 3}};
-  strcol_wrapper col0_1_w({"s0", "s1", "s2", "s4", "s1"});
-  auto col0_1 = cudf::dictionary::encode(col0_1_w);
-  column_wrapper<int32_t> col0_2{{0, 1, 2, 4, 1}};
-
-  column_wrapper<int32_t> col1_0{{2, 2, 0, 4, 3}};
-  strcol_wrapper col1_1_w{{"s1", "s0", "s1", "s2", "s1"}};
-  auto col1_1 = cudf::dictionary::encode(col1_1_w);
-  column_wrapper<int32_t> col1_2{{1, 0, 1, 2, 1}};
-
-  auto t0 = cudf::table_view({col0_0, col0_1->view(), col0_2});
-  auto t1 = cudf::table_view({col1_0, col1_1->view(), col1_2});
-
-  auto result      = cudf::full_join(t0, t1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}});
-  auto result_view = result->view();
-  auto decoded1    = cudf::dictionary::decode(result_view.column(1));
-  std::vector<cudf::column_view> result_decoded(
-    {result_view.column(0), decoded1->view(), result_view.column(2), result_view.column(3)});
-
-  auto g0   = cudf::table_view({col0_0, col0_1_w, col0_2});
-  auto g1   = cudf::table_view({col1_0, col1_1_w, col1_2});
-  auto gold = cudf::full_join(g0, g1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}});
-  CUDF_TEST_EXPECT_TABLES_EQUAL(*gold, cudf::table_view(result_decoded));
-}
-
-TEST_F(JoinDictionaryTest, FullJoinWithNulls)
-{
-  column_wrapper<int32_t> col0_0_w{{3, 1, 2, 0, 3}};
-  auto col0_0 = cudf::dictionary::encode(col0_0_w);
-  strcol_wrapper col0_1({"s0", "s1", "s2", "s4", "s1"});
-  column_wrapper<int32_t> col0_2{{0, 1, 2, 4, 1}};
-
-  column_wrapper<int32_t> col1_0_w{{2, 2, 0, 4, 3}, {1, 1, 1, 0, 1}};
-  auto col1_0 = cudf::dictionary::encode(col1_0_w);
-  strcol_wrapper col1_1{{"s1", "s0", "s1", "s2", "s1"}};
-  column_wrapper<int32_t> col1_2{{1, 0, 1, 2, 1}};
-
-  auto t0 = cudf::table_view({col0_0->view(), col0_1, col0_2});
-  auto t1 = cudf::table_view({col1_0->view(), col1_1, col1_2});
-
-  auto result      = cudf::full_join(t0, t1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}});
-  auto result_view = result->view();
-  auto decoded0    = cudf::dictionary::decode(result_view.column(0));
-  std::vector<cudf::column_view> result_decoded(
-    {decoded0->view(), result_view.column(1), result_view.column(2), result_view.column(3)});
-
-  auto g0   = cudf::table_view({col0_0_w, col0_1, col0_2});
-  auto g1   = cudf::table_view({col1_0_w, col1_1, col1_2});
-  auto gold = cudf::full_join(g0, g1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}});
-  CUDF_TEST_EXPECT_TABLES_EQUAL(*gold, cudf::table_view(result_decoded));
-}
-
-TEST_F(JoinTest, InnerJoinGathermap)
-{
-  column_wrapper<int32_t> col0_0{{3, 1, 2, 0, 2}};
-  strcol_wrapper col0_1({"s1", "s1", "s0", "s4", "s0"});
-  column_wrapper<int32_t> col0_2{{0, 1, 2, 4, 1}};
-
-  column_wrapper<int32_t> col1_0{{2, 2, 0, 4, 3}};
-  strcol_wrapper col1_1({"s1", "s0", "s1", "s2", "s1"});
-  column_wrapper<int32_t> col1_2{{1, 0, 1, 2, 1}};
-
-  CVector cols0, cols1;
-  cols0.push_back(col0_0.release());
-  cols0.push_back(col0_1.release());
-  cols0.push_back(col0_2.release());
-  cols1.push_back(col1_0.release());
-  cols1.push_back(col1_1.release());
-  cols1.push_back(col1_2.release());
-
-  Table t0(std::move(cols0));
-  Table t1(std::move(cols1));
-
-  auto result          = cudf::inner_join(t0, t1, {0, 1}, {0, 1});
-  auto lmap_sort_order = cudf::sorted_order(cudf::table_view({result.first->view()}));
-  auto rmap_sort_order = cudf::sorted_order(cudf::table_view({result.second->view()}));
-  auto lmap_sorted     = cudf::gather(cudf::table_view({result.first->view()}), *lmap_sort_order);
-  auto rmap_sorted     = cudf::gather(cudf::table_view({result.second->view()}), *rmap_sort_order);
-
-  column_wrapper<int32_t> lmap_gold{{0, 2, 4}};
-  column_wrapper<int32_t> rmap_gold{{1, 1, 4}};
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(lmap_sorted->view().column(0), lmap_gold);
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(rmap_sorted->view().column(0), rmap_gold);
-}
+// TEST_F(JoinDictionaryTest, LeftJoinWithNulls)
+// {
+//   column_wrapper<int32_t> col0_0{{3, 1, 2, 0, 2}};
+//   strcol_wrapper col0_1({"s1", "s1", "s0", "s4", "s0"}, {1, 1, 0, 1, 1});
+//   column_wrapper<int32_t> col0_2_w{{0, 1, 2, 4, 1}};
+//   auto col0_2 = cudf::dictionary::encode(col0_2_w);
+
+//   column_wrapper<int32_t> col1_0{{2, 2, 0, 4, 3}};
+//   strcol_wrapper col1_1({"s1", "s0", "s1", "s2", "s1"});
+//   column_wrapper<int32_t> col1_2_w{{1, 0, 1, 2, 1}, {1, 0, 1, 1, 1}};
+//   auto col1_2 = cudf::dictionary::encode(col1_2_w);
+
+//   auto t0 = cudf::table_view({col0_0, col0_1, col0_2->view()});
+//   auto t1 = cudf::table_view({col1_0, col1_1, col1_2->view()});
+
+//   auto result      = cudf::left_join(t0, t1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}});
+//   auto result_view = result->view();
+//   auto decoded2    = cudf::dictionary::decode(result_view.column(2));
+//   auto decoded3    = cudf::dictionary::decode(result_view.column(3));
+//   std::vector<cudf::column_view> result_decoded(
+//     {result_view.column(0), result_view.column(1), decoded2->view(), decoded3->view()});
+
+//   auto g0   = cudf::table_view({col0_0, col0_1, col0_2_w});
+//   auto g1   = cudf::table_view({col1_0, col1_1, col1_2_w});
+//   auto gold = cudf::left_join(g0, g1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}});
+//   CUDF_TEST_EXPECT_TABLES_EQUAL(*gold, cudf::table_view(result_decoded));
+// }
 
-TEST_F(JoinTest, LeftJoinGathermap)
-{
-  column_wrapper<int32_t> col0_0{{3, 1, 2, 0, 3}};
-  strcol_wrapper col0_1({"s0", "s1", "s2", "s4", "s1"});
-  column_wrapper<int32_t> col0_2{{0, 1, 2, 4, 1}};
+// TEST_F(JoinDictionaryTest, InnerJoinNoNulls)
+// {
+//   column_wrapper<int32_t> col0_0{{3, 1, 2, 0, 2}};
+//   strcol_wrapper col0_1_w({"s1", "s1", "s0", "s4", "s0"});
+//   auto col0_1 = cudf::dictionary::encode(col0_1_w);
+//   column_wrapper<int32_t> col0_2{{0, 1, 2, 4, 1}};
+
+//   column_wrapper<int32_t> col1_0{{2, 2, 0, 4, 3}};
+//   strcol_wrapper col1_1_w({"s1", "s0", "s1", "s2", "s1"});
+//   auto col1_1 = cudf::dictionary::encode(col1_1_w);
+//   column_wrapper<int32_t> col1_2{{1, 0, 1, 2, 1}};
+
+//   auto t0 = cudf::table_view({col0_0, col0_1->view(), col0_2});
+//   auto t1 = cudf::table_view({col1_0, col1_1->view(), col1_2});
+
+//   auto result      = cudf::inner_join(t0, t1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}});
+//   auto result_view = result->view();
+//   auto decoded1    = cudf::dictionary::decode(result_view.column(1));
+//   std::vector<cudf::column_view> result_decoded(
+//     {result_view.column(0), decoded1->view(), result_view.column(2), result_view.column(3)});
+
+//   auto g0   = cudf::table_view({col0_0, col0_1_w, col0_2});
+//   auto g1   = cudf::table_view({col1_0, col1_1_w, col1_2});
+//   auto gold = cudf::inner_join(g0, g1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}});
+//   CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*gold, cudf::table_view(result_decoded));
+// }
 
-  column_wrapper<int32_t> col1_0{{2, 2, 0, 4, 3}};
-  strcol_wrapper col1_1({"s1", "s0", "s1", "s2", "s1"});
-  column_wrapper<int32_t> col1_2{{1, 0, 1, 2, 1}};
+// TEST_F(JoinDictionaryTest, InnerJoinWithNulls)
+// {
+//   column_wrapper<int32_t> col0_0{{3, 1, 2, 0, 2}};
+//   strcol_wrapper col0_1({"s1", "s1", "s0", "s4", "s0"}, {1, 1, 0, 1, 1});
+//   column_wrapper<int32_t> col0_2_w{{0, 1, 2, 4, 1}};
+//   auto col0_2 = cudf::dictionary::encode(col0_2_w);
+
+//   column_wrapper<int32_t> col1_0{{2, 2, 0, 4, 3}};
+//   strcol_wrapper col1_1({"s1", "s0", "s1", "s2", "s1"});
+//   column_wrapper<int32_t> col1_2_w{{1, 0, 1, 2, 1}, {1, 0, 1, 1, 1}};
+//   auto col1_2 = cudf::dictionary::encode(col1_2_w);
+
+//   auto t0 = cudf::table_view({col0_0, col0_1, col0_2->view()});
+//   auto t1 = cudf::table_view({col1_0, col1_1, col1_2->view()});
+
+//   auto result      = cudf::inner_join(t0, t1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}});
+//   auto result_view = result->view();
+//   auto decoded2    = cudf::dictionary::decode(result_view.column(2));
+//   auto decoded3    = cudf::dictionary::decode(result_view.column(3));
+//   std::vector<cudf::column_view> result_decoded(
+//     {result_view.column(0), result_view.column(1), decoded2->view(), decoded3->view()});
+
+//   auto g0   = cudf::table_view({col0_0, col0_1, col0_2_w});
+//   auto g1   = cudf::table_view({col1_0, col1_1, col1_2_w});
+//   auto gold = cudf::inner_join(g0, g1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}});
+//   CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*gold, cudf::table_view(result_decoded));
+// }
 
-  CVector cols0, cols1;
-  cols0.push_back(col0_0.release());
-  cols0.push_back(col0_1.release());
-  cols0.push_back(col0_2.release());
-  cols1.push_back(col1_0.release());
-  cols1.push_back(col1_1.release());
-  cols1.push_back(col1_2.release());
+// TEST_F(JoinDictionaryTest, FullJoinNoNulls)
+// {
+//   column_wrapper<int32_t> col0_0{{3, 1, 2, 0, 3}};
+//   strcol_wrapper col0_1_w({"s0", "s1", "s2", "s4", "s1"});
+//   auto col0_1 = cudf::dictionary::encode(col0_1_w);
+//   column_wrapper<int32_t> col0_2{{0, 1, 2, 4, 1}};
+
+//   column_wrapper<int32_t> col1_0{{2, 2, 0, 4, 3}};
+//   strcol_wrapper col1_1_w{{"s1", "s0", "s1", "s2", "s1"}};
+//   auto col1_1 = cudf::dictionary::encode(col1_1_w);
+//   column_wrapper<int32_t> col1_2{{1, 0, 1, 2, 1}};
+
+//   auto t0 = cudf::table_view({col0_0, col0_1->view(), col0_2});
+//   auto t1 = cudf::table_view({col1_0, col1_1->view(), col1_2});
+
+//   auto result      = cudf::full_join(t0, t1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}});
+//   auto result_view = result->view();
+//   auto decoded1    = cudf::dictionary::decode(result_view.column(1));
+//   std::vector<cudf::column_view> result_decoded(
+//     {result_view.column(0), decoded1->view(), result_view.column(2), result_view.column(3)});
+
+//   auto g0   = cudf::table_view({col0_0, col0_1_w, col0_2});
+//   auto g1   = cudf::table_view({col1_0, col1_1_w, col1_2});
+//   auto gold = cudf::full_join(g0, g1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}});
+//   CUDF_TEST_EXPECT_TABLES_EQUAL(*gold, cudf::table_view(result_decoded));
+// }
 
-  Table t0(std::move(cols0));
-  Table t1(std::move(cols1));
+// TEST_F(JoinDictionaryTest, FullJoinWithNulls)
+// {
+//   column_wrapper<int32_t> col0_0_w{{3, 1, 2, 0, 3}};
+//   auto col0_0 = cudf::dictionary::encode(col0_0_w);
+//   strcol_wrapper col0_1({"s0", "s1", "s2", "s4", "s1"});
+//   column_wrapper<int32_t> col0_2{{0, 1, 2, 4, 1}};
+
+//   column_wrapper<int32_t> col1_0_w{{2, 2, 0, 4, 3}, {1, 1, 1, 0, 1}};
+//   auto col1_0 = cudf::dictionary::encode(col1_0_w);
+//   strcol_wrapper col1_1{{"s1", "s0", "s1", "s2", "s1"}};
+//   column_wrapper<int32_t> col1_2{{1, 0, 1, 2, 1}};
+
+//   auto t0 = cudf::table_view({col0_0->view(), col0_1, col0_2});
+//   auto t1 = cudf::table_view({col1_0->view(), col1_1, col1_2});
+
+//   auto result      = cudf::full_join(t0, t1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}});
+//   auto result_view = result->view();
+//   auto decoded0    = cudf::dictionary::decode(result_view.column(0));
+//   std::vector<cudf::column_view> result_decoded(
+//     {decoded0->view(), result_view.column(1), result_view.column(2), result_view.column(3)});
+
+//   auto g0   = cudf::table_view({col0_0_w, col0_1, col0_2});
+//   auto g1   = cudf::table_view({col1_0_w, col1_1, col1_2});
+//   auto gold = cudf::full_join(g0, g1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}});
+//   CUDF_TEST_EXPECT_TABLES_EQUAL(*gold, cudf::table_view(result_decoded));
+// }
 
-  auto result          = cudf::left_join(t0, t1, {0, 1}, {0, 1});
-  auto lmap_sort_order = cudf::sorted_order(cudf::table_view({result.first->view()}));
-  auto rmap_sort_order = cudf::sorted_order(cudf::table_view({result.second->view()}));
-  auto lmap_sorted     = cudf::gather(cudf::table_view({result.first->view()}), *lmap_sort_order);
-  auto rmap_sorted     = cudf::gather(cudf::table_view({result.second->view()}), *rmap_sort_order);
+// TEST_F(JoinTest, InnerJoinGathermap)
+// {
+//   column_wrapper<int32_t> col0_0{{3, 1, 2, 0, 2}};
+//   strcol_wrapper col0_1({"s1", "s1", "s0", "s4", "s0"});
+//   column_wrapper<int32_t> col0_2{{0, 1, 2, 4, 1}};
+
+//   column_wrapper<int32_t> col1_0{{2, 2, 0, 4, 3}};
+//   strcol_wrapper col1_1({"s1", "s0", "s1", "s2", "s1"});
+//   column_wrapper<int32_t> col1_2{{1, 0, 1, 2, 1}};
+
+//   CVector cols0, cols1;
+//   cols0.push_back(col0_0.release());
+//   cols0.push_back(col0_1.release());
+//   cols0.push_back(col0_2.release());
+//   cols1.push_back(col1_0.release());
+//   cols1.push_back(col1_1.release());
+//   cols1.push_back(col1_2.release());
+
+//   Table t0(std::move(cols0));
+//   Table t1(std::move(cols1));
 
-  column_wrapper<int32_t> lmap_gold{{0, 1, 2, 3, 4}};
-  column_wrapper<int32_t> rmap_gold{{NoneValue, NoneValue, NoneValue, NoneValue, 4}};
+//   auto result          = cudf::inner_join(t0, t1, {0, 1}, {0, 1});
+//   auto lmap_sort_order = cudf::sorted_order(cudf::table_view({result.first->view()}));
+//   auto rmap_sort_order = cudf::sorted_order(cudf::table_view({result.second->view()}));
+//   auto lmap_sorted     = cudf::gather(cudf::table_view({result.first->view()}),
+//   *lmap_sort_order); auto rmap_sorted     =
+//   cudf::gather(cudf::table_view({result.second->view()}), *rmap_sort_order);
+
+//   column_wrapper<int32_t> lmap_gold{{0, 2, 4}};
+//   column_wrapper<int32_t> rmap_gold{{1, 1, 4}};
+//   CUDF_TEST_EXPECT_COLUMNS_EQUAL(lmap_sorted->view().column(0), lmap_gold);
+//   CUDF_TEST_EXPECT_COLUMNS_EQUAL(rmap_sorted->view().column(0), rmap_gold);
+// }
 
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(lmap_sorted->view().column(0), lmap_gold);
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(rmap_sorted->view().column(0), rmap_gold);
-}
+// TEST_F(JoinTest, LeftJoinGathermap)
+// {
+//   column_wrapper<int32_t> col0_0{{3, 1, 2, 0, 3}};
+//   strcol_wrapper col0_1({"s0", "s1", "s2", "s4", "s1"});
+//   column_wrapper<int32_t> col0_2{{0, 1, 2, 4, 1}};
+
+//   column_wrapper<int32_t> col1_0{{2, 2, 0, 4, 3}};
+//   strcol_wrapper col1_1({"s1", "s0", "s1", "s2", "s1"});
+//   column_wrapper<int32_t> col1_2{{1, 0, 1, 2, 1}};
+
+//   CVector cols0, cols1;
+//   cols0.push_back(col0_0.release());
+//   cols0.push_back(col0_1.release());
+//   cols0.push_back(col0_2.release());
+//   cols1.push_back(col1_0.release());
+//   cols1.push_back(col1_1.release());
+//   cols1.push_back(col1_2.release());
+
+//   Table t0(std::move(cols0));
+//   Table t1(std::move(cols1));
 
-TEST_F(JoinTest, FullJoinGatherMap)
-{
-  column_wrapper<int32_t> col0_0{{3, 1, 2, 0, 3}};
-  strcol_wrapper col0_1({"s0", "s1", "s2", "s4", "s1"});
-  column_wrapper<int32_t> col0_2{{0, 1, 2, 4, 1}};
+//   auto result          = cudf::left_join(t0, t1, {0, 1}, {0, 1});
+//   auto lmap_sort_order = cudf::sorted_order(cudf::table_view({result.first->view()}));
+//   auto rmap_sort_order = cudf::sorted_order(cudf::table_view({result.second->view()}));
+//   auto lmap_sorted     = cudf::gather(cudf::table_view({result.first->view()}),
+//   *lmap_sort_order); auto rmap_sorted     =
+//   cudf::gather(cudf::table_view({result.second->view()}), *rmap_sort_order);
 
-  column_wrapper<int32_t> col1_0{{2, 2, 0, 4, 3}, {1, 1, 1, 0, 1}};
-  strcol_wrapper col1_1{{"s1", "s0", "s1", "s2", "s1"}};
-  column_wrapper<int32_t> col1_2{{1, 0, 1, 2, 1}};
+//   column_wrapper<int32_t> lmap_gold{{0, 1, 2, 3, 4}};
+//   column_wrapper<int32_t> rmap_gold{{NoneValue, NoneValue, NoneValue, NoneValue, 4}};
 
-  CVector cols0, cols1;
-  cols0.push_back(col0_0.release());
-  cols0.push_back(col0_1.release());
-  cols0.push_back(col0_2.release());
-  cols1.push_back(col1_0.release());
-  cols1.push_back(col1_1.release());
-  cols1.push_back(col1_2.release());
+//   CUDF_TEST_EXPECT_COLUMNS_EQUAL(lmap_sorted->view().column(0), lmap_gold);
+//   CUDF_TEST_EXPECT_COLUMNS_EQUAL(rmap_sorted->view().column(0), rmap_gold);
+// }
 
-  Table t0(std::move(cols0));
-  Table t1(std::move(cols1));
+// TEST_F(JoinTest, FullJoinGatherMap)
+// {
+//   column_wrapper<int32_t> col0_0{{3, 1, 2, 0, 3}};
+//   strcol_wrapper col0_1({"s0", "s1", "s2", "s4", "s1"});
+//   column_wrapper<int32_t> col0_2{{0, 1, 2, 4, 1}};
+
+//   column_wrapper<int32_t> col1_0{{2, 2, 0, 4, 3}, {1, 1, 1, 0, 1}};
+//   strcol_wrapper col1_1{{"s1", "s0", "s1", "s2", "s1"}};
+//   column_wrapper<int32_t> col1_2{{1, 0, 1, 2, 1}};
+
+//   CVector cols0, cols1;
+//   cols0.push_back(col0_0.release());
+//   cols0.push_back(col0_1.release());
+//   cols0.push_back(col0_2.release());
+//   cols1.push_back(col1_0.release());
+//   cols1.push_back(col1_1.release());
+//   cols1.push_back(col1_2.release());
+
+//   Table t0(std::move(cols0));
+//   Table t1(std::move(cols1));
 
-  auto result          = cudf::full_join(t0, t1, {0, 1}, {0, 1});
-  auto lmap_sort_order = cudf::sorted_order(cudf::table_view({result.first->view()}));
-  auto rmap_sort_order = cudf::sorted_order(cudf::table_view({result.second->view()}));
-  auto lmap_sorted     = cudf::gather(cudf::table_view({result.first->view()}), *lmap_sort_order);
-  auto rmap_sorted     = cudf::gather(cudf::table_view({result.second->view()}), *rmap_sort_order);
+//   auto result          = cudf::full_join(t0, t1, {0, 1}, {0, 1});
+//   auto lmap_sort_order = cudf::sorted_order(cudf::table_view({result.first->view()}));
+//   auto rmap_sort_order = cudf::sorted_order(cudf::table_view({result.second->view()}));
+//   auto lmap_sorted     = cudf::gather(cudf::table_view({result.first->view()}),
+//   *lmap_sort_order); auto rmap_sorted     =
+//   cudf::gather(cudf::table_view({result.second->view()}), *rmap_sort_order);
 
-  column_wrapper<int32_t> lmap_gold{{NoneValue, NoneValue, NoneValue, NoneValue, 0, 1, 2, 3, 4}};
-  column_wrapper<int32_t> rmap_gold{{NoneValue, NoneValue, NoneValue, NoneValue, 0, 1, 2, 3, 4}};
+//   column_wrapper<int32_t> lmap_gold{{NoneValue, NoneValue, NoneValue, NoneValue, 0, 1, 2, 3, 4}};
+//   column_wrapper<int32_t> rmap_gold{{NoneValue, NoneValue, NoneValue, NoneValue, 0, 1, 2, 3, 4}};
 
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(lmap_sorted->view().column(0), lmap_gold);
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(rmap_sorted->view().column(0), rmap_gold);
-}
+//   CUDF_TEST_EXPECT_COLUMNS_EQUAL(lmap_sorted->view().column(0), lmap_gold);
+//   CUDF_TEST_EXPECT_COLUMNS_EQUAL(rmap_sorted->view().column(0), rmap_gold);
+// }
 
 CUDF_TEST_PROGRAM_MAIN()
diff --git a/cpp/tests/join/semi_join_tests.cpp b/cpp/tests/join/semi_join_tests.cpp
index 30ac1b57e55..8de9610b07d 100644
--- a/cpp/tests/join/semi_join_tests.cpp
+++ b/cpp/tests/join/semi_join_tests.cpp
@@ -34,830 +34,3 @@ using column_wrapper = cudf::test::fixed_width_column_wrapper<T>;
 
 struct JoinTest : public cudf::test::BaseFixture {
 };
-
-TEST_F(JoinTest, LeftSemiJoin)
-{
-  std::vector<const char*> a_strings{
-    "quick", "accénted", "turtlé", "composéd", "result", "", "words"};
-  std::vector<const char*> b_strings{"quick", "words", "result"};
-  std::vector<const char*> e_strings{"quick", "composéd", "result", ""};
-
-  column_wrapper<int32_t> a_0{10, 20, 20, 20, 20, 20, 50};
-  column_wrapper<float> a_1{5.0, .5, .5, .7, .7, .7, .7};
-  column_wrapper<int8_t> a_2{90, 77, 78, 61, 62, 63, 41};
-
-  cudf::test::strings_column_wrapper a_3(
-    a_strings.begin(),
-    a_strings.end(),
-    thrust::make_transform_iterator(a_strings.begin(), [](auto str) { return str != nullptr; }));
-
-  column_wrapper<int32_t> b_0{10, 20, 20};
-  column_wrapper<float> b_1{5.0, .7, .7};
-  column_wrapper<int8_t> b_2{90, 75, 62};
-
-  cudf::test::strings_column_wrapper b_3(
-    b_strings.begin(),
-    b_strings.end(),
-    thrust::make_transform_iterator(b_strings.begin(), [](auto str) { return str != nullptr; }));
-
-  column_wrapper<int32_t> expect_0{10, 20, 20, 20};
-  column_wrapper<float> expect_1{5.0, .7, .7, .7};
-  column_wrapper<int8_t> expect_2{90, 61, 62, 63};
-
-  cudf::test::strings_column_wrapper expect_3(
-    e_strings.begin(),
-    e_strings.end(),
-    thrust::make_transform_iterator(e_strings.begin(), [](auto str) { return str != nullptr; }));
-
-  std::vector<std::unique_ptr<cudf::column>> column_a;
-  column_a.push_back(a_0.release());
-  column_a.push_back(a_1.release());
-  column_a.push_back(a_2.release());
-  column_a.push_back(a_3.release());
-
-  std::vector<std::unique_ptr<cudf::column>> column_b;
-  column_b.push_back(b_0.release());
-  column_b.push_back(b_1.release());
-  column_b.push_back(b_2.release());
-  column_b.push_back(b_3.release());
-
-  cudf::table table_a(std::move(column_a));
-  cudf::table table_b(std::move(column_b));
-
-  auto join_table = cudf::left_semi_join(table_a, table_b, {0, 1}, {0, 1}, {0, 1, 2, 3});
-
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(join_table->get_column(0), expect_0);
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(join_table->get_column(1), expect_1);
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(join_table->get_column(2), expect_2);
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(join_table->get_column(3), expect_3);
-}
-
-TEST_F(JoinTest, LeftSemiJoin_with_a_string_key)
-{
-  std::vector<const char*> a_strings{
-    "quick", "accénted", "turtlé", "composéd", "result", "", "words"};
-  std::vector<const char*> b_strings{"quick", "words", "result"};
-  std::vector<const char*> e_strings{"quick", "result"};
-
-  column_wrapper<int32_t> a_0{10, 20, 20, 20, 20, 20, 50};
-  column_wrapper<float> a_1{5.0, .5, .5, .7, .7, .7, .7};
-  column_wrapper<int8_t> a_2{90, 77, 78, 61, 62, 63, 41};
-
-  cudf::test::strings_column_wrapper a_3(
-    a_strings.begin(),
-    a_strings.end(),
-    thrust::make_transform_iterator(a_strings.begin(), [](auto str) { return str != nullptr; }));
-
-  column_wrapper<int32_t> b_0{10, 20, 20};
-  column_wrapper<float> b_1{5.0, .7, .7};
-  column_wrapper<int8_t> b_2{90, 75, 62};
-
-  cudf::test::strings_column_wrapper b_3(
-    b_strings.begin(),
-    b_strings.end(),
-    thrust::make_transform_iterator(b_strings.begin(), [](auto str) { return str != nullptr; }));
-
-  column_wrapper<int32_t> expect_0{10, 20};
-  column_wrapper<float> expect_1{5.0, .7};
-  column_wrapper<int8_t> expect_2{90, 62};
-
-  cudf::test::strings_column_wrapper expect_3(
-    e_strings.begin(),
-    e_strings.end(),
-    thrust::make_transform_iterator(e_strings.begin(), [](auto str) { return str != nullptr; }));
-
-  std::vector<std::unique_ptr<cudf::column>> column_a;
-  column_a.push_back(a_0.release());
-  column_a.push_back(a_1.release());
-  column_a.push_back(a_2.release());
-  column_a.push_back(a_3.release());
-
-  std::vector<std::unique_ptr<cudf::column>> column_b;
-  column_b.push_back(b_0.release());
-  column_b.push_back(b_1.release());
-  column_b.push_back(b_2.release());
-  column_b.push_back(b_3.release());
-
-  cudf::table table_a(std::move(column_a));
-  cudf::table table_b(std::move(column_b));
-
-  auto join_table = cudf::left_semi_join(table_a, table_b, {0, 1, 3}, {0, 1, 3}, {0, 1, 2, 3});
-
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(join_table->get_column(0), expect_0);
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(join_table->get_column(1), expect_1);
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(join_table->get_column(2), expect_2);
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(join_table->get_column(3), expect_3);
-}
-
-TEST_F(JoinTest, LeftSemiJoin_with_null)
-{
-  std::vector<const char*> a_strings{
-    "quick", "accénted", "turtlé", "composéd", "result", "", "words"};
-  std::vector<const char*> b_strings{"quick", "words", "result", nullptr};
-  std::vector<const char*> e_strings{"quick", "result"};
-
-  column_wrapper<int32_t> a_0{10, 20, 20, 20, 20, 20, 50};
-  column_wrapper<float> a_1{5.0, .5, .5, .7, .7, .7, .7};
-  column_wrapper<int8_t> a_2{90, 77, 78, 61, 62, 63, 41};
-
-  cudf::test::strings_column_wrapper a_3(
-    a_strings.begin(),
-    a_strings.end(),
-    thrust::make_transform_iterator(a_strings.begin(), [](auto str) { return str != nullptr; }));
-
-  column_wrapper<int32_t> b_0{10, 20, 20, 50};
-  column_wrapper<float> b_1{5.0, .7, .7, .7};
-  column_wrapper<int8_t> b_2{90, 75, 62, 41};
-
-  cudf::test::strings_column_wrapper b_3(
-    b_strings.begin(),
-    b_strings.end(),
-    thrust::make_transform_iterator(b_strings.begin(), [](auto str) { return str != nullptr; }));
-
-  column_wrapper<int32_t> expect_0{10, 20};
-  column_wrapper<float> expect_1{5.0, .7};
-  column_wrapper<int8_t> expect_2{90, 62};
-
-  cudf::test::strings_column_wrapper expect_3(
-    e_strings.begin(),
-    e_strings.end(),
-    thrust::make_transform_iterator(e_strings.begin(), [](auto str) { return str != nullptr; }));
-
-  std::vector<std::unique_ptr<cudf::column>> column_a;
-  column_a.push_back(a_0.release());
-  column_a.push_back(a_1.release());
-  column_a.push_back(a_2.release());
-  column_a.push_back(a_3.release());
-
-  std::vector<std::unique_ptr<cudf::column>> column_b;
-  column_b.push_back(b_0.release());
-  column_b.push_back(b_1.release());
-  column_b.push_back(b_2.release());
-  column_b.push_back(b_3.release());
-
-  cudf::table table_a(std::move(column_a));
-  cudf::table table_b(std::move(column_b));
-
-  auto join_table = cudf::left_semi_join(table_a, table_b, {0, 1, 3}, {0, 1, 3}, {0, 1, 2, 3});
-
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(join_table->get_column(0), expect_0);
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(join_table->get_column(1), expect_1);
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(join_table->get_column(2), expect_2);
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(join_table->get_column(3), expect_3);
-}
-
-TEST_F(JoinTest, LeftAntiJoin)
-{
-  std::vector<const char*> a_strings{
-    "quick", "accénted", "turtlé", "composéd", "result", "", "words"};
-  std::vector<const char*> b_strings{"quick", "words", "result"};
-  std::vector<const char*> e_strings{"accénted", "turtlé", "words"};
-
-  column_wrapper<int32_t> a_0{10, 20, 20, 20, 20, 20, 50};
-  column_wrapper<float> a_1{5.0, .5, .5, .7, .7, .7, .7};
-  column_wrapper<int8_t> a_2{90, 77, 78, 61, 62, 63, 41};
-
-  cudf::test::strings_column_wrapper a_3(
-    a_strings.begin(),
-    a_strings.end(),
-    thrust::make_transform_iterator(a_strings.begin(), [](auto str) { return str != nullptr; }));
-
-  column_wrapper<int32_t> b_0{10, 20, 20};
-  column_wrapper<float> b_1{5.0, .7, .7};
-  column_wrapper<int8_t> b_2{90, 75, 62};
-
-  cudf::test::strings_column_wrapper b_3(
-    b_strings.begin(),
-    b_strings.end(),
-    thrust::make_transform_iterator(b_strings.begin(), [](auto str) { return str != nullptr; }));
-
-  column_wrapper<int32_t> expect_0{20, 20, 50};
-  column_wrapper<float> expect_1{.5, .5, .7};
-  column_wrapper<int8_t> expect_2{77, 78, 41};
-
-  cudf::test::strings_column_wrapper expect_3(
-    e_strings.begin(),
-    e_strings.end(),
-    thrust::make_transform_iterator(e_strings.begin(), [](auto str) { return str != nullptr; }));
-
-  std::vector<std::unique_ptr<cudf::column>> column_a;
-  column_a.push_back(a_0.release());
-  column_a.push_back(a_1.release());
-  column_a.push_back(a_2.release());
-  column_a.push_back(a_3.release());
-
-  std::vector<std::unique_ptr<cudf::column>> column_b;
-  column_b.push_back(b_0.release());
-  column_b.push_back(b_1.release());
-  column_b.push_back(b_2.release());
-  column_b.push_back(b_3.release());
-
-  cudf::table table_a(std::move(column_a));
-  cudf::table table_b(std::move(column_b));
-
-  auto join_table = cudf::left_anti_join(table_a, table_b, {0, 1}, {0, 1}, {0, 1, 2, 3});
-
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(join_table->get_column(0), expect_0);
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(join_table->get_column(1), expect_1);
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(join_table->get_column(2), expect_2);
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(join_table->get_column(3), expect_3);
-}
-
-TEST_F(JoinTest, LeftAntiJoin_with_a_string_key)
-{
-  std::vector<const char*> a_strings{
-    "quick", "accénted", "turtlé", "composéd", "result", "", "words"};
-  std::vector<const char*> b_strings{"quick", "words", "result"};
-  std::vector<const char*> e_strings{"accénted", "turtlé", "composéd", "", "words"};
-
-  column_wrapper<int32_t> a_0{10, 20, 20, 20, 20, 20, 50};
-  column_wrapper<float> a_1{5.0, .5, .5, .7, .7, .7, .7};
-  column_wrapper<int8_t> a_2{90, 77, 78, 61, 62, 63, 41};
-
-  cudf::test::strings_column_wrapper a_3(
-    a_strings.begin(),
-    a_strings.end(),
-    thrust::make_transform_iterator(a_strings.begin(), [](auto str) { return str != nullptr; }));
-
-  column_wrapper<int32_t> b_0{10, 20, 20};
-  column_wrapper<float> b_1{5.0, .7, .7};
-  column_wrapper<int8_t> b_2{90, 75, 62};
-
-  cudf::test::strings_column_wrapper b_3(
-    b_strings.begin(),
-    b_strings.end(),
-    thrust::make_transform_iterator(b_strings.begin(), [](auto str) { return str != nullptr; }));
-
-  column_wrapper<int32_t> expect_0{20, 20, 20, 20, 50};
-  column_wrapper<float> expect_1{.5, .5, .7, .7, .7};
-  column_wrapper<int8_t> expect_2{77, 78, 61, 63, 41};
-
-  cudf::test::strings_column_wrapper expect_3(
-    e_strings.begin(),
-    e_strings.end(),
-    thrust::make_transform_iterator(e_strings.begin(), [](auto str) { return str != nullptr; }));
-
-  std::vector<std::unique_ptr<cudf::column>> column_a;
-  column_a.push_back(a_0.release());
-  column_a.push_back(a_1.release());
-  column_a.push_back(a_2.release());
-  column_a.push_back(a_3.release());
-
-  std::vector<std::unique_ptr<cudf::column>> column_b;
-  column_b.push_back(b_0.release());
-  column_b.push_back(b_1.release());
-  column_b.push_back(b_2.release());
-  column_b.push_back(b_3.release());
-
-  cudf::table table_a(std::move(column_a));
-  cudf::table table_b(std::move(column_b));
-
-  auto join_table = cudf::left_anti_join(table_a, table_b, {0, 1, 3}, {0, 1, 3}, {0, 1, 2, 3});
-
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(join_table->get_column(0), expect_0);
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(join_table->get_column(1), expect_1);
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(join_table->get_column(2), expect_2);
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(join_table->get_column(3), expect_3);
-}
-
-TEST_F(JoinTest, LeftAntiJoin_with_null)
-{
-  std::vector<const char*> a_strings{
-    "quick", "accénted", "turtlé", "composéd", "result", "", "words"};
-  std::vector<const char*> b_strings{"quick", "words", "result", nullptr};
-  std::vector<const char*> e_strings{"accénted", "turtlé", "composéd", "", "words"};
-
-  column_wrapper<int32_t> a_0{10, 20, 20, 20, 20, 20, 50};
-  column_wrapper<float> a_1{5.0, .5, .5, .7, .7, .7, .7};
-  column_wrapper<int8_t> a_2{90, 77, 78, 61, 62, 63, 41};
-
-  cudf::test::strings_column_wrapper a_3(
-    a_strings.begin(),
-    a_strings.end(),
-    thrust::make_transform_iterator(a_strings.begin(), [](auto str) { return str != nullptr; }));
-
-  column_wrapper<int32_t> b_0{10, 20, 20, 50};
-  column_wrapper<float> b_1{5.0, .7, .7, .7};
-  column_wrapper<int8_t> b_2{90, 75, 62, 41};
-
-  cudf::test::strings_column_wrapper b_3(
-    b_strings.begin(),
-    b_strings.end(),
-    thrust::make_transform_iterator(b_strings.begin(), [](auto str) { return str != nullptr; }));
-
-  column_wrapper<int32_t> expect_0{20, 20, 20, 20, 50};
-  column_wrapper<float> expect_1{.5, .5, .7, .7, .7};
-  column_wrapper<int8_t> expect_2{77, 78, 61, 63, 41};
-
-  cudf::test::strings_column_wrapper expect_3(
-    e_strings.begin(),
-    e_strings.end(),
-    thrust::make_transform_iterator(e_strings.begin(), [](auto str) { return str != nullptr; }));
-
-  std::vector<std::unique_ptr<cudf::column>> column_a;
-  column_a.push_back(a_0.release());
-  column_a.push_back(a_1.release());
-  column_a.push_back(a_2.release());
-  column_a.push_back(a_3.release());
-
-  std::vector<std::unique_ptr<cudf::column>> column_b;
-  column_b.push_back(b_0.release());
-  column_b.push_back(b_1.release());
-  column_b.push_back(b_2.release());
-  column_b.push_back(b_3.release());
-
-  cudf::table table_a(std::move(column_a));
-  cudf::table table_b(std::move(column_b));
-
-  auto join_table = cudf::left_anti_join(table_a, table_b, {0, 1, 3}, {0, 1, 3}, {0, 1, 2, 3});
-
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(join_table->get_column(0), expect_0);
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(join_table->get_column(1), expect_1);
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(join_table->get_column(2), expect_2);
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(join_table->get_column(3), expect_3);
-}
-
-TEST_F(JoinTest, LeftSemiAntiJoin_exceptions)
-{
-  std::vector<const char*> b_strings{"quick", "words", "result", nullptr};
-
-  column_wrapper<int32_t> b_0{10, 20, 20, 50};
-  column_wrapper<float> b_1{5.0, .7, .7, .7};
-  column_wrapper<int8_t> b_2{90, 75, 62, 41};
-
-  cudf::test::strings_column_wrapper b_3(
-    b_strings.begin(),
-    b_strings.end(),
-    thrust::make_transform_iterator(b_strings.begin(), [](auto str) { return str != nullptr; }));
-
-  std::vector<std::unique_ptr<cudf::column>> column_a;
-
-  std::vector<std::unique_ptr<cudf::column>> column_b;
-  column_b.push_back(b_0.release());
-  column_b.push_back(b_1.release());
-  column_b.push_back(b_2.release());
-  column_b.push_back(b_3.release());
-
-  cudf::table table_a(std::move(column_a));
-  cudf::table table_b(std::move(column_b));
-
-  //
-  //  table_a has no columns, table_b has columns
-  //  Let's check different permutations of passing table
-  //  with no columns to verify that exceptions are thrown
-  //
-  EXPECT_THROW(cudf::left_semi_join(table_a, table_b, {}, {}, {}), cudf::logic_error);
-
-  EXPECT_THROW(cudf::left_anti_join(table_a, table_b, {}, {}, {}), cudf::logic_error);
-
-  EXPECT_THROW(cudf::left_semi_join(table_b, table_a, {}, {}, {}), cudf::logic_error);
-
-  EXPECT_THROW(cudf::left_anti_join(table_b, table_a, {}, {}, {}), cudf::logic_error);
-
-  //
-  //  table_b has columns, so we'll pass the column checks, but
-  //  these should fail the exception check that the number of
-  //  join columns must be the same for each table
-  //
-  EXPECT_THROW(cudf::left_semi_join(table_b, table_b, {0}, {}, {}), cudf::logic_error);
-
-  EXPECT_THROW(cudf::left_anti_join(table_b, table_b, {0}, {}, {}), cudf::logic_error);
-
-  EXPECT_THROW(cudf::left_semi_join(table_b, table_b, {}, {0}, {}), cudf::logic_error);
-
-  EXPECT_THROW(cudf::left_anti_join(table_b, table_b, {}, {0}, {}), cudf::logic_error);
-}
-
-TEST_F(JoinTest, LeftSemiJoin_empty_result)
-{
-  std::vector<const char*> a_strings{
-    "quick", "accénted", "turtlé", "composéd", "result", "", "words"};
-  std::vector<const char*> b_strings{"quick", "words", "result", nullptr};
-  std::vector<const char*> e_strings{};
-
-  column_wrapper<int32_t> a_0{10, 20, 20, 20, 20, 20, 50};
-  column_wrapper<float> a_1{5.0, .5, .5, .7, .7, .7, .7};
-  column_wrapper<int8_t> a_2{90, 77, 78, 61, 62, 63, 41};
-
-  cudf::test::strings_column_wrapper a_3(
-    a_strings.begin(),
-    a_strings.end(),
-    thrust::make_transform_iterator(a_strings.begin(), [](auto str) { return str != nullptr; }));
-
-  column_wrapper<int32_t> b_0{10, 20, 20, 50};
-  column_wrapper<float> b_1{5.0, .7, .7, .7};
-  column_wrapper<int8_t> b_2{90, 75, 62, 41};
-
-  cudf::test::strings_column_wrapper b_3(
-    b_strings.begin(),
-    b_strings.end(),
-    thrust::make_transform_iterator(b_strings.begin(), [](auto str) { return str != nullptr; }));
-
-  column_wrapper<int32_t> expect_0{};
-  column_wrapper<float> expect_1{};
-  column_wrapper<int8_t> expect_2{};
-
-  cudf::test::strings_column_wrapper expect_3(
-    e_strings.begin(),
-    e_strings.end(),
-    thrust::make_transform_iterator(e_strings.begin(), [](auto str) { return str != nullptr; }));
-
-  std::vector<std::unique_ptr<cudf::column>> column_a;
-  column_a.push_back(a_0.release());
-  column_a.push_back(a_1.release());
-  column_a.push_back(a_2.release());
-  column_a.push_back(a_3.release());
-
-  std::vector<std::unique_ptr<cudf::column>> column_b;
-  column_b.push_back(b_0.release());
-  column_b.push_back(b_1.release());
-  column_b.push_back(b_2.release());
-  column_b.push_back(b_3.release());
-
-  cudf::table table_a(std::move(column_a));
-  cudf::table table_b(std::move(column_b));
-
-  auto join_table =
-    cudf::left_semi_join(table_a, table_b, {0, 1, 3}, {0, 1, 3}, std::vector<cudf::size_type>{});
-
-  EXPECT_EQ(join_table->num_columns(), 0);
-  EXPECT_EQ(join_table->num_rows(), 0);
-
-  auto join_table2 = cudf::left_semi_join(table_a, table_b, {}, {}, {0, 1, 3});
-
-  EXPECT_EQ(join_table2->num_columns(), 3);
-  EXPECT_EQ(join_table2->num_rows(), 0);
-
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(join_table2->get_column(0), expect_0);
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(join_table2->get_column(1), expect_1);
-  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(join_table2->get_column(2), expect_3);
-}
-
-TEST_F(JoinTest, LeftAntiJoin_empty_result)
-{
-  std::vector<const char*> a_strings{
-    "quick", "accénted", "turtlé", "composéd", "result", "", "words"};
-  std::vector<const char*> b_strings{"quick", "words", "result", nullptr};
-  std::vector<const char*> e_strings{};
-
-  column_wrapper<int32_t> a_0{10, 20, 20, 20, 20, 20, 50};
-  column_wrapper<float> a_1{5.0, .5, .5, .7, .7, .7, .7};
-  column_wrapper<int8_t> a_2{90, 77, 78, 61, 62, 63, 41};
-
-  cudf::test::strings_column_wrapper a_3(
-    a_strings.begin(),
-    a_strings.end(),
-    thrust::make_transform_iterator(a_strings.begin(), [](auto str) { return str != nullptr; }));
-
-  column_wrapper<int32_t> b_0{10, 20, 20, 50};
-  column_wrapper<float> b_1{5.0, .7, .7, .7};
-  column_wrapper<int8_t> b_2{90, 75, 62, 41};
-
-  cudf::test::strings_column_wrapper b_3(
-    b_strings.begin(),
-    b_strings.end(),
-    thrust::make_transform_iterator(b_strings.begin(), [](auto str) { return str != nullptr; }));
-
-  column_wrapper<int32_t> expect_0{};
-  column_wrapper<float> expect_1{};
-  column_wrapper<int8_t> expect_2{};
-
-  cudf::test::strings_column_wrapper expect_3(
-    e_strings.begin(),
-    e_strings.end(),
-    thrust::make_transform_iterator(e_strings.begin(), [](auto str) { return str != nullptr; }));
-
-  std::vector<std::unique_ptr<cudf::column>> column_a;
-  column_a.push_back(a_0.release());
-  column_a.push_back(a_1.release());
-  column_a.push_back(a_2.release());
-  column_a.push_back(a_3.release());
-
-  std::vector<std::unique_ptr<cudf::column>> column_b;
-  column_b.push_back(b_0.release());
-  column_b.push_back(b_1.release());
-  column_b.push_back(b_2.release());
-  column_b.push_back(b_3.release());
-
-  cudf::table table_a(std::move(column_a));
-  cudf::table table_b(std::move(column_b));
-
-  auto join_table =
-    cudf::left_anti_join(table_a, table_b, {0, 1, 3}, {0, 1, 3}, std::vector<cudf::size_type>{});
-
-  EXPECT_EQ(join_table->num_columns(), 0);
-  EXPECT_EQ(join_table->num_rows(), 0);
-
-  auto join_table2 = cudf::left_anti_join(table_a, table_b, {}, {}, {0, 1, 3});
-
-  EXPECT_EQ(join_table2->num_columns(), 3);
-  EXPECT_EQ(join_table2->num_rows(), 0);
-
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(join_table2->get_column(0), expect_0);
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(join_table2->get_column(1), expect_1);
-  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(join_table2->get_column(2), expect_3);
-}
-
-TEST_F(JoinTest, LeftSemiAntiJoin_empty_table)
-{
-  std::vector<const char*> a_strings{};
-  std::vector<const char*> b_strings{"quick", "words", "result", nullptr};
-  std::vector<const char*> e_strings{};
-
-  column_wrapper<int32_t> a_0{};
-  column_wrapper<float> a_1{};
-  column_wrapper<int8_t> a_2{};
-
-  cudf::test::strings_column_wrapper a_3(
-    a_strings.begin(),
-    a_strings.end(),
-    thrust::make_transform_iterator(a_strings.begin(), [](auto str) { return str != nullptr; }));
-
-  column_wrapper<int32_t> b_0{10, 20, 20, 50};
-  column_wrapper<float> b_1{5.0, .7, .7, .7};
-  column_wrapper<int8_t> b_2{90, 75, 62, 41};
-
-  cudf::test::strings_column_wrapper b_3(
-    b_strings.begin(),
-    b_strings.end(),
-    thrust::make_transform_iterator(b_strings.begin(), [](auto str) { return str != nullptr; }));
-
-  column_wrapper<int32_t> expect_0{};
-  column_wrapper<float> expect_1{};
-  column_wrapper<int8_t> expect_2{};
-
-  cudf::test::strings_column_wrapper expect_3(
-    e_strings.begin(),
-    e_strings.end(),
-    thrust::make_transform_iterator(e_strings.begin(), [](auto str) { return str != nullptr; }));
-
-  std::vector<std::unique_ptr<cudf::column>> column_a;
-  column_a.push_back(a_0.release());
-  column_a.push_back(a_1.release());
-  column_a.push_back(a_2.release());
-  column_a.push_back(a_3.release());
-
-  std::vector<std::unique_ptr<cudf::column>> column_b;
-  column_b.push_back(b_0.release());
-  column_b.push_back(b_1.release());
-  column_b.push_back(b_2.release());
-  column_b.push_back(b_3.release());
-
-  cudf::table table_a(std::move(column_a));
-  cudf::table table_b(std::move(column_b));
-
-  auto join_table = cudf::left_semi_join(table_a, table_b, {0, 1, 3}, {0, 1, 3}, {0, 1, 2, 3});
-
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(join_table->get_column(0), expect_0);
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(join_table->get_column(1), expect_1);
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(join_table->get_column(2), expect_2);
-  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(join_table->get_column(3), expect_3);
-
-  auto join_table2 = cudf::left_semi_join(table_b, table_a, {0, 1, 3}, {0, 1, 3}, {0, 1, 2, 3});
-
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(join_table2->get_column(0), expect_0);
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(join_table2->get_column(1), expect_1);
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(join_table2->get_column(2), expect_2);
-  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(join_table2->get_column(3), expect_3);
-
-  auto join_table3 = cudf::left_anti_join(table_a, table_b, {0, 1, 3}, {0, 1, 3}, {0, 1, 2, 3});
-
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(join_table3->get_column(0), expect_0);
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(join_table3->get_column(1), expect_1);
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(join_table3->get_column(2), expect_2);
-  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(join_table3->get_column(3), expect_3);
-
-  auto join_table4 = cudf::left_anti_join(table_a, table_a, {0, 1, 3}, {0, 1, 3}, {0, 1, 2, 3});
-
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(join_table4->get_column(0), expect_0);
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(join_table4->get_column(1), expect_1);
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(join_table4->get_column(2), expect_2);
-  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(join_table4->get_column(3), expect_3);
-
-  auto join_table5 = cudf::left_anti_join(table_a, table_a, {0, 1, 3}, {0, 1, 3}, {0, 1, 2, 3});
-
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(join_table5->get_column(0), expect_0);
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(join_table5->get_column(1), expect_1);
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(join_table5->get_column(2), expect_2);
-  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(join_table5->get_column(3), expect_3);
-}
-
-TEST_F(JoinTest, LeftAntiJoin_empty_right_table)
-{
-  std::vector<const char*> a_strings{"quick", "words", "result", nullptr};
-  std::vector<const char*> b_strings{};
-  std::vector<const char*> e_strings{"quick", "words", "result", nullptr};
-
-  column_wrapper<int32_t> a_0{10, 20, 20, 50};
-  column_wrapper<float> a_1{5.0, .7, .7, .7};
-  column_wrapper<int8_t> a_2{90, 75, 62, 41};
-
-  cudf::test::strings_column_wrapper a_3(
-    a_strings.begin(),
-    a_strings.end(),
-    thrust::make_transform_iterator(a_strings.begin(), [](auto str) { return str != nullptr; }));
-
-  column_wrapper<int32_t> b_0{};
-  column_wrapper<float> b_1{};
-  column_wrapper<int8_t> b_2{};
-
-  cudf::test::strings_column_wrapper b_3(
-    b_strings.begin(),
-    b_strings.end(),
-    thrust::make_transform_iterator(b_strings.begin(), [](auto str) { return str != nullptr; }));
-
-  column_wrapper<int32_t> expect_0{10, 20, 20, 50};
-  column_wrapper<float> expect_1{5.0, .7, .7, .7};
-  column_wrapper<int8_t> expect_2{90, 75, 62, 41};
-
-  cudf::test::strings_column_wrapper expect_3(
-    e_strings.begin(),
-    e_strings.end(),
-    thrust::make_transform_iterator(e_strings.begin(), [](auto str) { return str != nullptr; }));
-
-  std::vector<std::unique_ptr<cudf::column>> column_a;
-  column_a.push_back(a_0.release());
-  column_a.push_back(a_1.release());
-  column_a.push_back(a_2.release());
-  column_a.push_back(a_3.release());
-
-  std::vector<std::unique_ptr<cudf::column>> column_b;
-  column_b.push_back(b_0.release());
-  column_b.push_back(b_1.release());
-  column_b.push_back(b_2.release());
-  column_b.push_back(b_3.release());
-
-  cudf::table table_a(std::move(column_a));
-  cudf::table table_b(std::move(column_b));
-
-  auto join_table = cudf::left_anti_join(table_a, table_b, {0, 1, 3}, {0, 1, 3}, {0, 1, 2, 3});
-
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(join_table->get_column(0), expect_0);
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(join_table->get_column(1), expect_1);
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(join_table->get_column(2), expect_2);
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(join_table->get_column(3), expect_3);
-}
-
-struct JoinDictionaryTest : public cudf::test::BaseFixture {
-};
-
-TEST_F(JoinDictionaryTest, LeftSemiJoin)
-{
-  column_wrapper<int32_t> a_0{10, 20, 20, 20, 20, 20, 50};
-  column_wrapper<float> a_1{5.0, .5, .5, .7, .7, .7, .7};
-  column_wrapper<int8_t> a_2{90, 77, 78, 61, 62, 63, 41};
-  cudf::test::strings_column_wrapper a_3_w(
-    {"quick", "accénted", "turtlé", "composéd", "result", "", "words"});
-  auto a_3 = cudf::dictionary::encode(a_3_w);
-
-  column_wrapper<int32_t> b_0{10, 20, 20};
-  column_wrapper<float> b_1{5.0, .7, .7};
-  column_wrapper<int8_t> b_2{90, 75, 62};
-  cudf::test::strings_column_wrapper b_3_w({"quick", "words", "result"});
-  auto b_3 = cudf::dictionary::encode(b_3_w);
-
-  auto table_a  = cudf::table_view({a_0, a_1, a_2, a_3->view()});
-  auto table_b  = cudf::table_view({b_0, b_1, b_2, b_3->view()});
-  auto expect_a = cudf::table_view({a_0, a_1, a_2, a_3_w});
-  auto expect_b = cudf::table_view({b_0, b_1, b_2, b_3_w});
-  {
-    auto result      = cudf::left_semi_join(table_a, table_b, {0, 1}, {0, 1}, {0, 1, 2, 3});
-    auto result_view = result->view();
-    auto decoded3    = cudf::dictionary::decode(result_view.column(3));
-    std::vector<cudf::column_view> result_decoded(
-      {result_view.column(0), result_view.column(1), result_view.column(2), decoded3->view()});
-
-    auto expected = cudf::left_semi_join(expect_a, expect_b, {0, 1}, {0, 1}, {0, 1, 2, 3});
-    CUDF_TEST_EXPECT_TABLES_EQUAL(cudf::table_view(result_decoded), *expected);
-  }
-  {
-    auto result      = cudf::left_semi_join(table_a, table_b, {0, 1, 3}, {0, 1, 3}, {0, 1, 2, 3});
-    auto result_view = result->view();
-    auto decoded3    = cudf::dictionary::decode(result_view.column(3));
-    std::vector<cudf::column_view> result_decoded(
-      {result_view.column(0), result_view.column(1), result_view.column(2), decoded3->view()});
-
-    auto expected = cudf::left_semi_join(expect_a, expect_b, {0, 1, 3}, {0, 1, 3}, {0, 1, 2, 3});
-    CUDF_TEST_EXPECT_TABLES_EQUIVALENT(cudf::table_view(result_decoded), *expected);
-  }
-}
-
-TEST_F(JoinDictionaryTest, LeftSemiJoinWithNulls)
-{
-  column_wrapper<int32_t> a_0{10, 20, 20, 20, 20, 20, 50};
-  column_wrapper<float> a_1{5.0, .5, .5, .7, .7, .7, .7};
-  column_wrapper<int8_t> a_2{90, 77, 78, 61, 62, 63, 41};
-  cudf::test::strings_column_wrapper a_3_w(
-    {"quick", "accénted", "turtlé", "composéd", "result", "", "words"});
-  auto a_3 = cudf::dictionary::encode(a_3_w);
-
-  column_wrapper<int32_t> b_0{10, 20, 20, 50};
-  column_wrapper<float> b_1{5.0, .7, .7, .7};
-  column_wrapper<int8_t> b_2{90, 75, 62, 41};
-  cudf::test::strings_column_wrapper b_3_w({"quick", "words", "result", ""}, {1, 1, 1, 0});
-  auto b_3 = cudf::dictionary::encode(b_3_w);
-
-  auto table_a = cudf::table_view({a_0, a_1, a_2, a_3->view()});
-  auto table_b = cudf::table_view({b_0, b_1, b_2, b_3->view()});
-
-  auto result      = cudf::left_semi_join(table_a, table_b, {0, 1, 3}, {0, 1, 3}, {0, 1, 2, 3});
-  auto result_view = result->view();
-  auto decoded3    = cudf::dictionary::decode(result_view.column(3));
-  std::vector<cudf::column_view> result_decoded(
-    {result_view.column(0), result_view.column(1), result_view.column(2), decoded3->view()});
-
-  auto expect_a = cudf::table_view({a_0, a_1, a_2, a_3_w});
-  auto expect_b = cudf::table_view({b_0, b_1, b_2, b_3_w});
-  auto expected = cudf::left_semi_join(expect_a, expect_b, {0, 1, 3}, {0, 1, 3}, {0, 1, 2, 3});
-  CUDF_TEST_EXPECT_TABLES_EQUIVALENT(cudf::table_view(result_decoded), *expected);
-}
-
-TEST_F(JoinDictionaryTest, LeftAntiJoin)
-{
-  column_wrapper<int32_t> a_0{10, 20, 20, 20, 20, 20, 50};
-  column_wrapper<float> a_1{5.0, .5, .5, .7, .7, .7, .7};
-  column_wrapper<int8_t> a_2{90, 77, 78, 61, 62, 63, 41};
-  cudf::test::strings_column_wrapper a_3_w(
-    {"quick", "accénted", "turtlé", "composéd", "result", "", "words"});
-  auto a_3 = cudf::dictionary::encode(a_3_w);
-
-  column_wrapper<int32_t> b_0{10, 20, 20};
-  column_wrapper<float> b_1{5.0, .7, .7};
-  column_wrapper<int8_t> b_2{90, 75, 62};
-  cudf::test::strings_column_wrapper b_3_w({"quick", "words", "result"});
-  auto b_3 = cudf::dictionary::encode(b_3_w);
-
-  auto table_a  = cudf::table_view({a_0, a_1, a_2, a_3->view()});
-  auto table_b  = cudf::table_view({b_0, b_1, b_2, b_3->view()});
-  auto expect_a = cudf::table_view({a_0, a_1, a_2, a_3_w});
-  auto expect_b = cudf::table_view({b_0, b_1, b_2, b_3_w});
-  {
-    auto result      = cudf::left_anti_join(table_a, table_b, {0, 1}, {0, 1}, {0, 1, 2, 3});
-    auto result_view = result->view();
-    auto decoded3    = cudf::dictionary::decode(result_view.column(3));
-    std::vector<cudf::column_view> result_decoded(
-      {result_view.column(0), result_view.column(1), result_view.column(2), decoded3->view()});
-
-    auto expected = cudf::left_anti_join(expect_a, expect_b, {0, 1}, {0, 1}, {0, 1, 2, 3});
-    CUDF_TEST_EXPECT_TABLES_EQUAL(cudf::table_view(result_decoded), *expected);
-  }
-  {
-    auto result      = cudf::left_anti_join(table_a, table_b, {0, 1, 3}, {0, 1, 3}, {0, 1, 2, 3});
-    auto result_view = result->view();
-    auto decoded3    = cudf::dictionary::decode(result_view.column(3));
-    std::vector<cudf::column_view> result_decoded(
-      {result_view.column(0), result_view.column(1), result_view.column(2), decoded3->view()});
-
-    auto expected = cudf::left_anti_join(expect_a, expect_b, {0, 1, 3}, {0, 1, 3}, {0, 1, 2, 3});
-    CUDF_TEST_EXPECT_TABLES_EQUIVALENT(cudf::table_view(result_decoded), *expected);
-  }
-}
-
-TEST_F(JoinDictionaryTest, LeftAntiJoinWithNulls)
-{
-  column_wrapper<int32_t> a_0{10, 20, 20, 20, 20, 20, 50};
-  column_wrapper<float> a_1{5.0, .5, .5, .7, .7, .7, .7};
-  column_wrapper<int8_t> a_2{90, 77, 78, 61, 62, 63, 41};
-  cudf::test::strings_column_wrapper a_3_w(
-    {"quick", "accénted", "turtlé", "composéd", "result", "", "words"});
-  auto a_3 = cudf::dictionary::encode(a_3_w);
-
-  column_wrapper<int32_t> b_0{10, 20, 20, 50};
-  column_wrapper<float> b_1{5.0, .7, .7, .7};
-  column_wrapper<int8_t> b_2{90, 75, 62, 41};
-  cudf::test::strings_column_wrapper b_3_w({"quick", "words", "result", ""}, {1, 1, 1, 0});
-  auto b_3 = cudf::dictionary::encode(b_3_w);
-
-  auto table_a = cudf::table_view({a_0, a_1, a_2, a_3->view()});
-  auto table_b = cudf::table_view({b_0, b_1, b_2, b_3->view()});
-
-  auto result      = cudf::left_anti_join(table_a, table_b, {0, 1, 3}, {0, 1, 3}, {0, 1, 2, 3});
-  auto result_view = result->view();
-  auto decoded3    = cudf::dictionary::decode(result_view.column(3));
-  std::vector<cudf::column_view> result_decoded(
-    {result_view.column(0), result_view.column(1), result_view.column(2), decoded3->view()});
-
-  auto expect_a = cudf::table_view({a_0, a_1, a_2, a_3_w});
-  auto expect_b = cudf::table_view({b_0, b_1, b_2, b_3_w});
-  auto expected = cudf::left_anti_join(expect_a, expect_b, {0, 1, 3}, {0, 1, 3}, {0, 1, 2, 3});
-  CUDF_TEST_EXPECT_TABLES_EQUIVALENT(cudf::table_view(result_decoded), *expected);
-}
-
-TEST_F(JoinTest, LeftSemiSimple)
-{
-  column_wrapper<int32_t> a_0{1, 9, 0};
-  column_wrapper<int32_t> a_1{1, 2, 3};
-  auto table_a = cudf::table_view({a_0, a_1});
-
-  column_wrapper<int32_t> b_0{0, 1};
-  column_wrapper<int32_t> b_1{1, 2};
-  auto table_b = cudf::table_view({b_0, b_1});
-
-  auto result      = cudf::left_anti_join(table_a, table_b, {0}, {0}, {0, 1});
-  auto result_view = result->view();
-
-  column_wrapper<int32_t> expect_0{9};
-  column_wrapper<int32_t> expect_1{2};
-  auto expect = cudf::table_view({expect_0, expect_1});
-  CUDF_TEST_EXPECT_TABLES_EQUIVALENT(result_view, expect);
-}

From d736d1c9298e97fd38086c879d4e60e5473a6365 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Thu, 18 Feb 2021 08:30:23 -0500
Subject: [PATCH 043/138] More join tests

---
 cpp/tests/join/join_tests.cpp | 179 ++++++++++++++++++----------------
 1 file changed, 95 insertions(+), 84 deletions(-)

diff --git a/cpp/tests/join/join_tests.cpp b/cpp/tests/join/join_tests.cpp
index ec15e5b03c7..b8af44d2083 100644
--- a/cpp/tests/join/join_tests.cpp
+++ b/cpp/tests/join/join_tests.cpp
@@ -371,7 +371,7 @@ TEST_F(JoinTest, LeftJoinWithNulls)
   column_wrapper<int32_t> col0_2{{0, 1, 2, 4, 1}};
 
   column_wrapper<int32_t> col1_0{{2, 2, 0, 4, 3}};
-  strcol_wrapper col1_1({"s1", "s0", "s1", "s2", "s1"}, );
+  strcol_wrapper col1_1({"s1", "s0", "s1", "s2", "s1"});
   column_wrapper<int32_t> col1_2{{1, 0, 1, 2, 1}, {1, 0, 1, 1, 1}};
 
   CVector cols0, cols1;
@@ -392,9 +392,9 @@ TEST_F(JoinTest, LeftJoinWithNulls)
   column_wrapper<int32_t> col_gold_0{{3, 1, 2, 0, 2}, {1, 1, 1, 1, 1}};
   strcol_wrapper col_gold_1({"s1", "s1", "", "s4", "s0"}, {1, 1, 0, 1, 1});
   column_wrapper<int32_t> col_gold_2{{0, 1, 2, 4, 1}, {1, 1, 1, 1, 1}};
-  column_wrapper<int32_t> col_gold_3{{3, -1, -1, -1, -1}, {1, 0, 0, 0, 0}};
-  strcol_wrapper col_gold_4{{"s1", "", "", "", ""}, {1, 0, 0, 0, 0}};
-  column_wrapper<int32_t> col_gold_5{{1, 1, -1, 1, 1}, {1, 0, 1, 1, 1}};
+  column_wrapper<int32_t> col_gold_3{{3, -1, -1, -1, 2}, {1, 0, 0, 0, 1}};
+  strcol_wrapper col_gold_4{{"s1", "", "", "", "s0"}, {1, 0, 0, 0, 1}};
+  column_wrapper<int32_t> col_gold_5{{1, -1, -1, -1, -1}, {1, 0, 0, 0, 0}};
 
   CVector cols_gold;
   cols_gold.push_back(col_gold_0.release());
@@ -407,103 +407,114 @@ TEST_F(JoinTest, LeftJoinWithNulls)
 
   auto gold_sort_order = cudf::sorted_order(gold.view());
   auto sorted_gold     = cudf::gather(gold.view(), *gold_sort_order);
-  CUDF_TEST_EXPECT_TABLES_EQUAL(*sorted_gold, *sorted_result);
+  CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*sorted_gold, *sorted_result);
 }
 
-// TEST_F(JoinTest, LeftJoinOnNulls)
-// {
-//   // clang-format off
-//   column_wrapper<int32_t> col0_0{{  3,    1,    2},
-//                                  {  1,    0,    1}};
-//   strcol_wrapper          col0_1({"s0", "s1", "s2" });
-n//   column_wrapper<int32_t> col0_2{{  0,    1,    2 }};
+TEST_F(JoinTest, LeftJoinOnNulls)
+{
+  // clang-format off
+  column_wrapper<int32_t> col0_0{{  3,    1,    2},
+                                 {  1,    0,    1}};
+  strcol_wrapper          col0_1({"s0", "s1", "s2" });
+  column_wrapper<int32_t> col0_2{{  0,    1,    2 }};
 
-//   column_wrapper<int32_t> col1_0{{  2,    5,    3,    7 },
-//                                  {  1,    1,    1,    0 }};
-//   strcol_wrapper          col1_1({"s1", "s0", "s0", "s1" });
-//   column_wrapper<int32_t> col1_2{{  1,    4,    2,    8 }};
+  column_wrapper<int32_t> col1_0{{  2,    5,    3,    7 },
+                                 {  1,    1,    1,    0 }};
+  strcol_wrapper          col1_1({"s1", "s0", "s0", "s1" });
+  column_wrapper<int32_t> col1_2{{  1,    4,    2,    8 }};
 
-//   CVector cols0, cols1;
-//   cols0.push_back(col0_0.release());
-//   cols0.push_back(col0_1.release());
-//   cols0.push_back(col0_2.release());
-//   cols1.push_back(col1_0.release());
-//   cols1.push_back(col1_1.release());
-//   cols1.push_back(col1_2.release());
+  CVector cols0, cols1;
+  cols0.push_back(col0_0.release());
+  cols0.push_back(col0_1.release());
+  cols0.push_back(col0_2.release());
+  cols1.push_back(col1_0.release());
+  cols1.push_back(col1_1.release());
+  cols1.push_back(col1_2.release());
 
-//   Table t0(std::move(cols0));
-//   Table t1(std::move(cols1));
+  Table t0(std::move(cols0));
+  Table t1(std::move(cols1));
 
-//   auto result            = cudf::left_join(t0, t1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}});
-//   auto result_sort_order = cudf::sorted_order(result->view());
-//   auto sorted_result     = cudf::gather(result->view(), *result_sort_order);
+  auto result            = cudf::left_join(t0, t1, {0, 1}, {0, 1});
+  auto result_sort_order = cudf::sorted_order(result->view());
+  auto sorted_result     = cudf::gather(result->view(), *result_sort_order);
 
-// #if 0
-//   std::cout << "Actual Results:\n";
-//   cudf::test::print(sorted_result->get_column(0).view(), std::cout, ",\t\t");
-//   cudf::test::print(sorted_result->get_column(1).view(), std::cout, ",\t\t");
-//   cudf::test::print(sorted_result->get_column(2).view(), std::cout, ",\t\t");
-//   cudf::test::print(sorted_result->get_column(3).view(), std::cout, ",\t\t");
-// #endif
-
-//   column_wrapper<int32_t> col_gold_0{{   3,    -1,    2},
-//                                      {   1,     0,    1}};
-//   strcol_wrapper          col_gold_1({ "s0",  "s1", "s2"},
-//                                      {   1,     1,    1});
-//   column_wrapper<int32_t> col_gold_2{{   0,     1,    2},
-//                                      {   1,     1,    1}};
-//   column_wrapper<int32_t> col_gold_3{{   2,     8,   -1},
-//                                      {   1,     1,    0}};
+#if 0
+  std::cout << "Actual Results:\n";
+  cudf::test::print(sorted_result->get_column(0).view(), std::cout, ",\t\t");
+  cudf::test::print(sorted_result->get_column(1).view(), std::cout, ",\t\t");
+  cudf::test::print(sorted_result->get_column(2).view(), std::cout, ",\t\t");
+  cudf::test::print(sorted_result->get_column(3).view(), std::cout, ",\t\t");
+#endif
 
-//   CVector cols_gold;
-//   cols_gold.push_back(col_gold_0.release());
-//   cols_gold.push_back(col_gold_1.release());
-//   cols_gold.push_back(col_gold_2.release());
-//   cols_gold.push_back(col_gold_3.release());
-//   Table gold(std::move(cols_gold));
+  column_wrapper<int32_t> col_gold_0{{   3,    -1,    2},
+                                     {   1,     0,    1}};
+  strcol_wrapper          col_gold_1({ "s0",  "s1", "s2"},
+                                     {   1,     1,    1});
+  column_wrapper<int32_t> col_gold_2{{   0,     1,    2},
+                                     {   1,     1,    1}};
+  column_wrapper<int32_t> col_gold_3{{   3,    -1,   -1},
+                                     {   1,     0,    0}};
+  strcol_wrapper          col_gold_4({ "s0",  "s1",  ""},
+                                     {   1,     1,    0});
+  column_wrapper<int32_t> col_gold_5{{   2,     8,   -1},
+                                     {   1,     1,    0}};
+  
+  CVector cols_gold;
+  cols_gold.push_back(col_gold_0.release());
+  cols_gold.push_back(col_gold_1.release());
+  cols_gold.push_back(col_gold_2.release());
+  cols_gold.push_back(col_gold_3.release());
+  cols_gold.push_back(col_gold_4.release());
+  cols_gold.push_back(col_gold_5.release());
+  Table gold(std::move(cols_gold));
 
-//   auto gold_sort_order = cudf::sorted_order(gold.view());
-//   auto sorted_gold     = cudf::gather(gold.view(), *gold_sort_order);
+  auto gold_sort_order = cudf::sorted_order(gold.view());
+  auto sorted_gold     = cudf::gather(gold.view(), *gold_sort_order);
 
-// #if 0
-//   std::cout << "Expected Results:\n";
-//   cudf::test::print(sorted_gold->get_column(0).view(), std::cout, ",\t\t");
-//   cudf::test::print(sorted_gold->get_column(1).view(), std::cout, ",\t\t");
-//   cudf::test::print(sorted_gold->get_column(2).view(), std::cout, ",\t\t");
-//   cudf::test::print(sorted_gold->get_column(3).view(), std::cout, ",\t\t");
-// #endif
+#if 0
+  std::cout << "Expected Results:\n";
+  cudf::test::print(sorted_gold->get_column(0).view(), std::cout, ",\t\t");
+  cudf::test::print(sorted_gold->get_column(1).view(), std::cout, ",\t\t");
+  cudf::test::print(sorted_gold->get_column(2).view(), std::cout, ",\t\t");
+  cudf::test::print(sorted_gold->get_column(3).view(), std::cout, ",\t\t");
+#endif
 
-//   CUDF_TEST_EXPECT_TABLES_EQUAL(*sorted_gold, *sorted_result);
+  CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*sorted_gold, *sorted_result);
 
-//   // Repeat test with compare_nulls_equal=false,
-//   // as per SQL standard.
+  // Repeat test with compare_nulls_equal=false,
+  // as per SQL standard.
 
-//   result            = cudf::left_join(t0, t1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}},
-//   cudf::null_equality::UNEQUAL); result_sort_order = cudf::sorted_order(result->view());
-//   sorted_result     = cudf::gather(result->view(), *result_sort_order);
+  result            = cudf::left_join(t0, t1, {0, 1}, {0, 1}, cudf::null_equality::UNEQUAL);
+  result_sort_order = cudf::sorted_order(result->view());
+  sorted_result     = cudf::gather(result->view(), *result_sort_order);
 
-//   col_gold_0 =               {{   3,    -1,    2},
-//                               {   1,     0,    1}};
-//   col_gold_1 = strcol_wrapper({ "s0",  "s1", "s2"},
-//                               {   1,     1,    1});
-//   col_gold_2 =               {{   0,     1,    2},
-//                               {   1,     1,    1}};
-//   col_gold_3 =               {{   2,    -1,   -1},
-//                               {   1,     0,    0}};
+  
+  col_gold_0 = {{   3,    -1,    2},
+                {   1,     0,    1}};
+  col_gold_1 = {{ "s0",  "s1", "s2"},
+                {   1,     1,    1}};
+  col_gold_2 = {{   0,     1,    2},
+                {   1,     1,    1}};
+  col_gold_3 = {{   3,    -1,   -1},
+                {   1,     0,    0}};
+  col_gold_4 = {{ "s0",   "",   ""},
+                {   1,     0,    0}};
+  col_gold_5 = {{   2,    -1,   -1},
+                {   1,     0,    0}};
 
-//   // clang-format on
-//   CVector cols_gold_nulls_unequal;
-//   cols_gold_nulls_unequal.push_back(col_gold_0.release());
-//   cols_gold_nulls_unequal.push_back(col_gold_1.release());
-//   cols_gold_nulls_unequal.push_back(col_gold_2.release());
-//   cols_gold_nulls_unequal.push_back(col_gold_3.release());
-//   Table gold_nulls_unequal{std::move(cols_gold_nulls_unequal)};
+  // clang-format on
+  CVector cols_gold_nulls_unequal;
+  cols_gold_nulls_unequal.push_back(col_gold_0.release());
+  cols_gold_nulls_unequal.push_back(col_gold_1.release());
+  cols_gold_nulls_unequal.push_back(col_gold_2.release());
+  cols_gold_nulls_unequal.push_back(col_gold_3.release());
+  Table gold_nulls_unequal{std::move(cols_gold_nulls_unequal)};
 
-//   gold_sort_order = cudf::sorted_order(gold_nulls_unequal.view());
-//   sorted_gold     = cudf::gather(gold_nulls_unequal.view(), *gold_sort_order);
+  gold_sort_order = cudf::sorted_order(gold_nulls_unequal.view());
+  sorted_gold     = cudf::gather(gold_nulls_unequal.view(), *gold_sort_order);
 
-//   CUDF_TEST_EXPECT_TABLES_EQUAL(*sorted_gold, *sorted_result);
-// }
+  CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*sorted_gold, *sorted_result);
+}
 
 // TEST_F(JoinTest, InnerJoinSizeOverflow)
 // {

From b58591dce203c752d62441e3d75a3fa42ddc6e00 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Thu, 18 Feb 2021 12:24:15 -0500
Subject: [PATCH 044/138] Fix all join tests

---
 cpp/tests/join/join_tests.cpp | 1774 +++++++++++++++------------------
 1 file changed, 786 insertions(+), 988 deletions(-)

diff --git a/cpp/tests/join/join_tests.cpp b/cpp/tests/join/join_tests.cpp
index b8af44d2083..fbde179d33d 100644
--- a/cpp/tests/join/join_tests.cpp
+++ b/cpp/tests/join/join_tests.cpp
@@ -110,7 +110,7 @@ TEST_F(JoinTest, LeftJoinNoNullsWithNoCommon)
   auto gold_sort_order = cudf::sorted_order(gold.view());
   auto sorted_gold     = cudf::gather(gold.view(), *gold_sort_order);
 
-  CUDF_TEST_EXPECT_TABLES_EQUAL(*sorted_gold, *sorted_result);
+  CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*sorted_gold, *sorted_result);
 }
 
 TEST_F(JoinTest, FullJoinNoNulls)
@@ -159,7 +159,7 @@ TEST_F(JoinTest, FullJoinNoNulls)
 
   auto gold_sort_order = cudf::sorted_order(gold.view());
   auto sorted_gold     = cudf::gather(gold.view(), *gold_sort_order);
-  CUDF_TEST_EXPECT_TABLES_EQUAL(*sorted_gold, *sorted_result);
+  CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*sorted_gold, *sorted_result);
 }
 
 TEST_F(JoinTest, FullJoinWithNulls)
@@ -208,7 +208,7 @@ TEST_F(JoinTest, FullJoinWithNulls)
 
   auto gold_sort_order = cudf::sorted_order(gold.view());
   auto sorted_gold     = cudf::gather(gold.view(), *gold_sort_order);
-  CUDF_TEST_EXPECT_TABLES_EQUAL(*sorted_gold, *sorted_result);
+  CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*sorted_gold, *sorted_result);
 }
 
 TEST_F(JoinTest, FullJoinOnNulls)
@@ -516,1030 +516,828 @@ TEST_F(JoinTest, LeftJoinOnNulls)
   CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*sorted_gold, *sorted_result);
 }
 
-// TEST_F(JoinTest, InnerJoinSizeOverflow)
-// {
-//   auto zero = cudf::make_numeric_scalar(cudf::data_type(cudf::type_id::INT32));
-//   zero->set_valid(true);
-//   static_cast<cudf::scalar_type_t<int32_t> *>(zero.get())->set_value(0);
-
-//   // Should cause size overflow, raise exception
-//   int32_t left  = 4;
-//   int32_t right = 1073741825;
-
-//   auto col0_0 = cudf::make_column_from_scalar(*zero, left);
-//   auto col1_0 = cudf::make_column_from_scalar(*zero, right);
-
-//   CVector cols0, cols1;
-//   cols0.push_back(std::move(col0_0));
-//   cols1.push_back(std::move(col1_0));
-
-//   Table t0(std::move(cols0));
-//   Table t1(std::move(cols1));
-
-//   EXPECT_THROW(cudf::inner_join(t0, t1, {0}, {0}, {{0, 0}}), cudf::logic_error);
-// }
-
-// TEST_F(JoinTest, InnerJoinNoNulls)
-// {
-//   column_wrapper<int32_t> col0_0{{3, 1, 2, 0, 2}};
-//   strcol_wrapper col0_1({"s1", "s1", "s0", "s4", "s0"});
-//   column_wrapper<int32_t> col0_2{{0, 1, 2, 4, 1}};
-
-//   column_wrapper<int32_t> col1_0{{2, 2, 0, 4, 3}};
-//   strcol_wrapper col1_1({"s1", "s0", "s1", "s2", "s1"});
-//   column_wrapper<int32_t> col1_2{{1, 0, 1, 2, 1}};
-
-//   CVector cols0, cols1;
-//   cols0.push_back(col0_0.release());
-//   cols0.push_back(col0_1.release());
-//   cols0.push_back(col0_2.release());
-//   cols1.push_back(col1_0.release());
-//   cols1.push_back(col1_1.release());
-//   cols1.push_back(col1_2.release());
-
-//   Table t0(std::move(cols0));
-//   Table t1(std::move(cols1));
-
-//   auto result            = cudf::inner_join(t0, t1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}});
-//   auto result_sort_order = cudf::sorted_order(result->view());
-//   auto sorted_result     = cudf::gather(result->view(), *result_sort_order);
-
-//   column_wrapper<int32_t> col_gold_0{{3, 2, 2}};
-//   strcol_wrapper col_gold_1({"s1", "s0", "s0"});
-//   column_wrapper<int32_t> col_gold_2{{0, 2, 1}};
-//   column_wrapper<int32_t> col_gold_3{{1, 0, 0}};
-//   CVector cols_gold;
-//   cols_gold.push_back(col_gold_0.release());
-//   cols_gold.push_back(col_gold_1.release());
-//   cols_gold.push_back(col_gold_2.release());
-//   cols_gold.push_back(col_gold_3.release());
-//   Table gold(std::move(cols_gold));
-
-//   auto gold_sort_order = cudf::sorted_order(gold.view());
-//   auto sorted_gold     = cudf::gather(gold.view(), *gold_sort_order);
-//   CUDF_TEST_EXPECT_TABLES_EQUAL(*sorted_gold, *sorted_result);
-// }
-
-// TEST_F(JoinTest, InnerJoinNonAlignedCommon)
-// {
-//   CVector cols0, cols1;
-//   cols0.emplace_back(column_wrapper<int32_t>{{3, 1, 2, 0, 2}}.release());
-//   cols0.emplace_back(column_wrapper<int32_t>{{3, 1, 2, 0, 2}}.release());
-//   cols0.emplace_back(strcol_wrapper({"s1", "s1", "s0", "s4", "s0"}).release());
-//   cols0.emplace_back(column_wrapper<int32_t>{{0, 1, 2, 4, 1}}.release());
-//   cols1.emplace_back(column_wrapper<int32_t>{{2, 2, 0, 4, 3}}.release());
-//   cols1.emplace_back(strcol_wrapper({"s1", "s0", "s1", "s2", "s1"}).release());
-//   cols1.emplace_back(column_wrapper<int32_t>{{1, 0, 1, 2, 1}}.release());
-
-//   Table t0(std::move(cols0));
-//   Table t1(std::move(cols1));
-
-//   auto result            = cudf::inner_join(t0, t1, {1, 2}, {0, 1}, {{1, 0}, {2, 1}});
-//   auto result_sort_order = cudf::sorted_order(result->view());
-//   auto sorted_result     = cudf::gather(result->view(), *result_sort_order);
-
-//   CVector cols_gold;
-//   cols_gold.emplace_back(column_wrapper<int32_t>{{3, 2, 2}}.release());
-//   cols_gold.emplace_back(column_wrapper<int32_t>{{3, 2, 2}}.release());
-//   cols_gold.emplace_back(strcol_wrapper({"s1", "s0", "s0"}).release());
-//   cols_gold.emplace_back(column_wrapper<int32_t>{{0, 2, 1}}.release());
-//   cols_gold.emplace_back(column_wrapper<int32_t>{{1, 0, 0}}.release());
-//   Table gold(std::move(cols_gold));
-
-//   auto gold_sort_order = cudf::sorted_order(gold.view());
-//   auto sorted_gold     = cudf::gather(gold.view(), *gold_sort_order);
-//   CUDF_TEST_EXPECT_TABLES_EQUAL(*sorted_gold, *sorted_result);
-// }
-
-// TEST_F(JoinTest, InnerJoinNonAlignedCommonSwap)
-// {
-//   CVector cols0, cols1;
-//   cols0.emplace_back(column_wrapper<int32_t>{{3, 1, 2, 0, 2}}.release());
-//   cols0.emplace_back(column_wrapper<int32_t>{{3, 1, 2, 0, 2}}.release());
-//   cols0.emplace_back(strcol_wrapper({"s1", "s1", "s0", "s4", "s0"}).release());
-//   cols0.emplace_back(column_wrapper<int32_t>{{0, 1, 2, 4, 1}}.release());
-//   cols1.emplace_back(column_wrapper<int32_t>{{2, 2, 0, 4, 3, 5}}.release());
-//   cols1.emplace_back(strcol_wrapper({"s1", "s0", "s1", "s2", "s1", "s0"}).release());
-//   cols1.emplace_back(column_wrapper<int32_t>{{1, 0, 1, 2, 1, 0}}.release());
-
-//   Table t0(std::move(cols0));
-//   Table t1(std::move(cols1));
-
-//   auto result            = cudf::inner_join(t0, t1, {1, 2}, {0, 1}, {{1, 0}, {2, 1}});
-//   auto result_sort_order = cudf::sorted_order(result->view());
-//   auto sorted_result     = cudf::gather(result->view(), *result_sort_order);
-
-//   CVector cols_gold;
-//   cols_gold.emplace_back(column_wrapper<int32_t>{{3, 2, 2}}.release());
-//   cols_gold.emplace_back(column_wrapper<int32_t>{{3, 2, 2}}.release());
-//   cols_gold.emplace_back(strcol_wrapper({"s1", "s0", "s0"}).release());
-//   cols_gold.emplace_back(column_wrapper<int32_t>{{0, 2, 1}}.release());
-//   cols_gold.emplace_back(column_wrapper<int32_t>{{1, 0, 0}}.release());
-//   Table gold(std::move(cols_gold));
-
-//   auto gold_sort_order = cudf::sorted_order(gold.view());
-//   auto sorted_gold     = cudf::gather(gold.view(), *gold_sort_order);
-//   CUDF_TEST_EXPECT_TABLES_EQUAL(*sorted_gold, *sorted_result);
-// }
-
-// TEST_F(JoinTest, InnerJoinWithNulls)
-// {
-//   column_wrapper<int32_t> col0_0{{3, 1, 2, 0, 2}};
-//   strcol_wrapper col0_1({"s1", "s1", "s0", "s4", "s0"}, {1, 1, 0, 1, 1});
-//   column_wrapper<int32_t> col0_2{{0, 1, 2, 4, 1}};
-
-//   column_wrapper<int32_t> col1_0{{2, 2, 0, 4, 3}};
-//   strcol_wrapper col1_1({"s1", "s0", "s1", "s2", "s1"});
-//   column_wrapper<int32_t> col1_2{{1, 0, 1, 2, 1}, {1, 0, 1, 1, 1}};
-
-//   CVector cols0, cols1;
-//   cols0.push_back(col0_0.release());
-//   cols0.push_back(col0_1.release());
-//   cols0.push_back(col0_2.release());
-//   cols1.push_back(col1_0.release());
-//   cols1.push_back(col1_1.release());
-//   cols1.push_back(col1_2.release());
-
-//   Table t0(std::move(cols0));
-//   Table t1(std::move(cols1));
-
-//   auto result            = cudf::inner_join(t0, t1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}});
-//   auto result_sort_order = cudf::sorted_order(result->view());
-//   auto sorted_result     = cudf::gather(result->view(), *result_sort_order);
-
-//   column_wrapper<int32_t> col_gold_0{{3, 2}};
-//   strcol_wrapper col_gold_1({"s1", "s0"}, {1, 1});
-//   column_wrapper<int32_t> col_gold_2{{0, 1}};
-//   column_wrapper<int32_t> col_gold_3{{1, -1}, {1, 0}};
-//   CVector cols_gold;
-//   cols_gold.push_back(col_gold_0.release());
-//   cols_gold.push_back(col_gold_1.release());
-//   cols_gold.push_back(col_gold_2.release());
-//   cols_gold.push_back(col_gold_3.release());
-//   Table gold(std::move(cols_gold));
-
-//   auto gold_sort_order = cudf::sorted_order(gold.view());
-//   auto sorted_gold     = cudf::gather(gold.view(), *gold_sort_order);
-//   CUDF_TEST_EXPECT_TABLES_EQUAL(*sorted_gold, *sorted_result);
-// }
+TEST_F(JoinTest, InnerJoinSizeOverflow)
+{
+  auto zero = cudf::make_numeric_scalar(cudf::data_type(cudf::type_id::INT32));
+  zero->set_valid(true);
+  static_cast<cudf::scalar_type_t<int32_t> *>(zero.get())->set_value(0);
+
+  // Should cause size overflow, raise exception
+  int32_t left  = 4;
+  int32_t right = 1073741825;
+
+  auto col0_0 = cudf::make_column_from_scalar(*zero, left);
+  auto col1_0 = cudf::make_column_from_scalar(*zero, right);
+
+  CVector cols0, cols1;
+  cols0.push_back(std::move(col0_0));
+  cols1.push_back(std::move(col1_0));
+
+  Table t0(std::move(cols0));
+  Table t1(std::move(cols1));
+
+  EXPECT_THROW(cudf::inner_join(t0, t1, {0}, {0}), cudf::logic_error);
+}
+
+TEST_F(JoinTest, InnerJoinNoNulls)
+{
+  column_wrapper<int32_t> col0_0{{3, 1, 2, 0, 2}};
+  strcol_wrapper col0_1({"s1", "s1", "s0", "s4", "s0"});
+  column_wrapper<int32_t> col0_2{{0, 1, 2, 4, 1}};
+
+  column_wrapper<int32_t> col1_0{{2, 2, 0, 4, 3}};
+  strcol_wrapper col1_1({"s1", "s0", "s1", "s2", "s1"});
+  column_wrapper<int32_t> col1_2{{1, 0, 1, 2, 1}};
+
+  CVector cols0, cols1;
+  cols0.push_back(col0_0.release());
+  cols0.push_back(col0_1.release());
+  cols0.push_back(col0_2.release());
+  cols1.push_back(col1_0.release());
+  cols1.push_back(col1_1.release());
+  cols1.push_back(col1_2.release());
+
+  Table t0(std::move(cols0));
+  Table t1(std::move(cols1));
+
+  auto result            = cudf::inner_join(t0, t1, {0, 1}, {0, 1});
+  auto result_sort_order = cudf::sorted_order(result->view());
+  auto sorted_result     = cudf::gather(result->view(), *result_sort_order);
+
+  column_wrapper<int32_t> col_gold_0{{3, 2, 2}};
+  strcol_wrapper col_gold_1({"s1", "s0", "s0"});
+  column_wrapper<int32_t> col_gold_2{{0, 2, 1}};
+  column_wrapper<int32_t> col_gold_3{{3, 2, 2}};
+  strcol_wrapper col_gold_4({"s1", "s0", "s0"});
+  column_wrapper<int32_t> col_gold_5{{1, 0, 0}};
+  CVector cols_gold;
+  cols_gold.push_back(col_gold_0.release());
+  cols_gold.push_back(col_gold_1.release());
+  cols_gold.push_back(col_gold_2.release());
+  cols_gold.push_back(col_gold_3.release());
+  cols_gold.push_back(col_gold_4.release());
+  cols_gold.push_back(col_gold_5.release());
+  Table gold(std::move(cols_gold));
+
+  auto gold_sort_order = cudf::sorted_order(gold.view());
+  auto sorted_gold     = cudf::gather(gold.view(), *gold_sort_order);
+  CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*sorted_gold, *sorted_result);
+}
+
+TEST_F(JoinTest, InnerJoinWithNulls)
+{
+  column_wrapper<int32_t> col0_0{{3, 1, 2, 0, 2}};
+  strcol_wrapper col0_1({"s1", "s1", "s0", "s4", "s0"}, {1, 1, 0, 1, 1});
+  column_wrapper<int32_t> col0_2{{0, 1, 2, 4, 1}};
+
+  column_wrapper<int32_t> col1_0{{2, 2, 0, 4, 3}};
+  strcol_wrapper col1_1({"s1", "s0", "s1", "s2", "s1"});
+  column_wrapper<int32_t> col1_2{{1, 0, 1, 2, 1}, {1, 0, 1, 1, 1}};
+
+  CVector cols0, cols1;
+  cols0.push_back(col0_0.release());
+  cols0.push_back(col0_1.release());
+  cols0.push_back(col0_2.release());
+  cols1.push_back(col1_0.release());
+  cols1.push_back(col1_1.release());
+  cols1.push_back(col1_2.release());
+
+  Table t0(std::move(cols0));
+  Table t1(std::move(cols1));
+
+  auto result            = cudf::inner_join(t0, t1, {0, 1}, {0, 1});
+  auto result_sort_order = cudf::sorted_order(result->view());
+  auto sorted_result     = cudf::gather(result->view(), *result_sort_order);
+
+  column_wrapper<int32_t> col_gold_0{{3, 2}};
+  strcol_wrapper col_gold_1({"s1", "s0"}, {1, 1});
+  column_wrapper<int32_t> col_gold_2{{0, 1}};
+  column_wrapper<int32_t> col_gold_3{{3, 2}};
+  strcol_wrapper col_gold_4({"s1", "s0"}, {1, 1});
+  column_wrapper<int32_t> col_gold_5{{1, -1}, {1, 0}};
+  CVector cols_gold;
+  cols_gold.push_back(col_gold_0.release());
+  cols_gold.push_back(col_gold_1.release());
+  cols_gold.push_back(col_gold_2.release());
+  cols_gold.push_back(col_gold_3.release());
+  cols_gold.push_back(col_gold_4.release());
+  cols_gold.push_back(col_gold_5.release());
+  Table gold(std::move(cols_gold));
+
+  auto gold_sort_order = cudf::sorted_order(gold.view());
+  auto sorted_gold     = cudf::gather(gold.view(), *gold_sort_order);
+  CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*sorted_gold, *sorted_result);
+}
 
 // // Test to check join behaviour when join keys are null.
-// TEST_F(JoinTest, InnerJoinOnNulls)
-// {
-//   // clang-format off
-//   column_wrapper<int32_t> col0_0{{  3,    1,    2,    0,    2}};
-//   strcol_wrapper          col0_1({"s1", "s1", "s8", "s4", "s0"},
-//                                  {  1,    1,    0,    1,    1});
-//   column_wrapper<int32_t> col0_2{{  0,    1,    2,    4,    1}};
-
-//   column_wrapper<int32_t> col1_0{{  2,    2,    0,    4,    3}};
-//   strcol_wrapper          col1_1({"s1", "s0", "s1", "s2", "s1"},
-//                                  {  1,    0,    1,    1,    1});
-//   column_wrapper<int32_t> col1_2{{  1,    0,    1,    2,    1}};
-
-//   CVector cols0, cols1;
-//   cols0.push_back(col0_0.release());
-//   cols0.push_back(col0_1.release());
-//   cols0.push_back(col0_2.release());
-//   cols1.push_back(col1_0.release());
-//   cols1.push_back(col1_1.release());
-//   cols1.push_back(col1_2.release());
-
-//   Table t0(std::move(cols0));
-//   Table t1(std::move(cols1));
-
-//   auto result            = cudf::inner_join(t0, t1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}});
-//   auto result_sort_order = cudf::sorted_order(result->view());
-//   auto sorted_result     = cudf::gather(result->view(), *result_sort_order);
-
-//   column_wrapper<int32_t> col_gold_0 {{  3,    2}};
-//   strcol_wrapper          col_gold_1 ({"s1", "s0"},
-//                                       {  1,    0});
-//   column_wrapper<int32_t> col_gold_2{{   0,    2}};
-//   column_wrapper<int32_t> col_gold_3{{   1,    0}};
-//   CVector cols_gold;
-//   cols_gold.push_back(col_gold_0.release());
-//   cols_gold.push_back(col_gold_1.release());
-//   cols_gold.push_back(col_gold_2.release());
-//   cols_gold.push_back(col_gold_3.release());
-//   Table gold(std::move(cols_gold));
-
-//   auto gold_sort_order = cudf::sorted_order(gold.view());
-//   auto sorted_gold     = cudf::gather(gold.view(), *gold_sort_order);
-//   CUDF_TEST_EXPECT_TABLES_EQUAL(*sorted_gold, *sorted_result);
-
-//   // Repeat test with compare_nulls_equal=false,
-//   // as per SQL standard.
-
-//   result            = cudf::inner_join(t0, t1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}},
-//   cudf::null_equality::UNEQUAL); result_sort_order = cudf::sorted_order(result->view());
-//   sorted_result     = cudf::gather(result->view(), *result_sort_order);
-
-//   col_gold_0 =               {{  3}};
-//   col_gold_1 = strcol_wrapper({"s1"},
-//                               {  1});
-//   col_gold_2 =               {{  0}};
-//   col_gold_3 =               {{  1}};
-
-//   // clang-format on
-
-//   CVector cols_gold_sql;
-//   cols_gold_sql.push_back(col_gold_0.release());
-//   cols_gold_sql.push_back(col_gold_1.release());
-//   cols_gold_sql.push_back(col_gold_2.release());
-//   cols_gold_sql.push_back(col_gold_3.release());
-//   Table gold_sql(std::move(cols_gold_sql));
-
-//   gold_sort_order = cudf::sorted_order(gold_sql.view());
-//   sorted_gold     = cudf::gather(gold_sql.view(), *gold_sort_order);
-//   CUDF_TEST_EXPECT_TABLES_EQUAL(*sorted_gold, *sorted_result);
-// }
-
-// // Empty Left Table
-// TEST_F(JoinTest, EmptyLeftTableInnerJoin)
-// {
-//   column_wrapper<int32_t> col0_0;
-//   column_wrapper<int32_t> col0_1;
-
-//   column_wrapper<int32_t> col1_0{{2, 2, 0, 4, 3}};
-//   column_wrapper<int32_t> col1_1{{1, 0, 1, 2, 1}, {1, 0, 1, 1, 1}};
-
-//   CVector cols0, cols1;
-//   cols0.push_back(col0_0.release());
-//   cols0.push_back(col0_1.release());
-//   cols1.push_back(col1_0.release());
-//   cols1.push_back(col1_1.release());
-
-//   Table empty0(std::move(cols0));
-//   Table t1(std::move(cols1));
-
-//   auto result = cudf::inner_join(empty0, t1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}});
-//   CUDF_TEST_EXPECT_TABLES_EQUAL(empty0, *result);
-// }
-
-// TEST_F(JoinTest, EmptyLeftTableLeftJoin)
-// {
-//   column_wrapper<int32_t> col0_0;
-//   column_wrapper<int32_t> col0_1;
-
-//   column_wrapper<int32_t> col1_0{{2, 2, 0, 4, 3}};
-//   column_wrapper<int32_t> col1_1{{1, 0, 1, 2, 1}, {1, 0, 1, 1, 1}};
-
-//   CVector cols0, cols1;
-//   cols0.push_back(col0_0.release());
-//   cols0.push_back(col0_1.release());
-//   cols1.push_back(col1_0.release());
-//   cols1.push_back(col1_1.release());
-
-//   Table empty0(std::move(cols0));
-//   Table t1(std::move(cols1));
-
-//   auto result = cudf::left_join(empty0, t1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}});
-//   CUDF_TEST_EXPECT_TABLES_EQUAL(empty0, *result);
-// }
-
-// TEST_F(JoinTest, EmptyLeftTableLeftJoinNonAlignedCommon)
-// {
-//   column_wrapper<int32_t> col0_0;
-
-//   column_wrapper<int32_t> col1_0{{2, 2, 0, 4, 3}};
-//   column_wrapper<int32_t> col1_1{{1, 0, 1, 2, 1}, {1, 0, 1, 1, 1}};
-
-//   CVector cols0, cols1;
-//   cols0.emplace_back(col0_0.release());
-//   cols1.emplace_back(col1_0.release());
-//   cols1.emplace_back(col1_1.release());
-
-//   Table t0(std::move(cols0));
-//   Table t1(std::move(cols1));
-
-//   column_wrapper<int32_t> col_gold_0;
-//   column_wrapper<int32_t> col_gold_1;
-
-//   CVector cols_gold;
-//   cols_gold.emplace_back(col_gold_0.release());
-//   cols_gold.emplace_back(col_gold_1.release());
-
-//   Table gold(std::move(cols_gold));
-
-//   auto result = cudf::left_join(t0, t1, {0}, {1}, {{0, 1}});
-//   CUDF_TEST_EXPECT_TABLES_EQUAL(gold, *result);
-// }
+TEST_F(JoinTest, InnerJoinOnNulls)
+{
+  // clang-format off
+  column_wrapper<int32_t> col0_0{{  3,    1,    2,    0,    2}};
+  strcol_wrapper          col0_1({"s1", "s1", "s8", "s4", "s0"},
+                                 {  1,    1,    0,    1,    1});
+  column_wrapper<int32_t> col0_2{{  0,    1,    2,    4,    1}};
 
-// TEST_F(JoinTest, EmptyLeftTableFullJoin)
-// {
-//   column_wrapper<int32_t> col0_0;
-//   column_wrapper<int32_t> col0_1;
-
-//   column_wrapper<int32_t> col1_0{{2, 2, 0, 4, 3}};
-//   column_wrapper<int32_t> col1_1{{1, 0, 1, 2, 1}, {1, 0, 1, 1, 1}};
-
-//   CVector cols0, cols1;
-//   cols0.push_back(col0_0.release());
-//   cols0.push_back(col0_1.release());
-//   cols1.push_back(col1_0.release());
-//   cols1.push_back(col1_1.release());
+  column_wrapper<int32_t> col1_0{{  2,    2,    0,    4,    3}};
+  strcol_wrapper          col1_1({"s1", "s0", "s1", "s2", "s1"},
+                                 {  1,    0,    1,    1,    1});
+  column_wrapper<int32_t> col1_2{{  1,    0,    1,    2,    1}};
 
-//   Table empty0(std::move(cols0));
-//   Table t1(std::move(cols1));
-
-//   auto result = cudf::full_join(empty0, t1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}});
-//   CUDF_TEST_EXPECT_TABLES_EQUAL(t1, *result);
-// }
-
-// // Empty Right Table
-// TEST_F(JoinTest, EmptyRightTableInnerJoin)
-// {
-//   column_wrapper<int32_t> col0_0{{2, 2, 0, 4, 3}};
-//   column_wrapper<int32_t> col0_1{{1, 0, 1, 2, 1}, {1, 0, 1, 1, 1}};
-
-//   column_wrapper<int32_t> col1_0;
-//   column_wrapper<int32_t> col1_1;
-
-//   CVector cols0, cols1;
-//   cols0.push_back(col0_0.release());
-//   cols0.push_back(col0_1.release());
-//   cols1.push_back(col1_0.release());
-//   cols1.push_back(col1_1.release());
+  CVector cols0, cols1;
+  cols0.push_back(col0_0.release());
+  cols0.push_back(col0_1.release());
+  cols0.push_back(col0_2.release());
+  cols1.push_back(col1_0.release());
+  cols1.push_back(col1_1.release());
+  cols1.push_back(col1_2.release());
 
-//   Table t0(std::move(cols0));
-//   Table empty1(std::move(cols1));
-
-//   auto result = cudf::inner_join(t0, empty1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}});
-//   CUDF_TEST_EXPECT_TABLES_EQUAL(empty1, *result);
-// }
+  Table t0(std::move(cols0));
+  Table t1(std::move(cols1));
 
-// TEST_F(JoinTest, EmptyRightTableInnerJoinNonAlignedCommon)
-// {
-//   column_wrapper<int32_t> col0_0{{2, 2, 0, 4, 3}};
-//   column_wrapper<int32_t> col0_1{{1, 0, 1, 2, 1}, {1, 0, 1, 1, 1}};
-
-//   column_wrapper<int32_t> col1_0;
+  auto result            = cudf::inner_join(t0, t1, {0, 1}, {0, 1});
+  auto result_sort_order = cudf::sorted_order(result->view());
+  auto sorted_result     = cudf::gather(result->view(), *result_sort_order);
 
-//   CVector cols0, cols1;
-//   cols0.emplace_back(col0_0.release());
-//   cols0.emplace_back(col0_1.release());
-//   cols1.emplace_back(col1_0.release());
+  column_wrapper<int32_t> col_gold_0 {{  3,    2}};
+  strcol_wrapper          col_gold_1 ({"s1", "s0"},
+                                      {  1,    0});
+  column_wrapper<int32_t> col_gold_2{{   0,    2}};
+  column_wrapper<int32_t> col_gold_3 {{  3,    2}};
+  strcol_wrapper          col_gold_4 ({"s1", "s0"},
+                                      {  1,    0});
+  column_wrapper<int32_t> col_gold_5{{   1,    0}};
+  CVector cols_gold;
+  cols_gold.push_back(col_gold_0.release());
+  cols_gold.push_back(col_gold_1.release());
+  cols_gold.push_back(col_gold_2.release());
+  cols_gold.push_back(col_gold_3.release());
+  cols_gold.push_back(col_gold_4.release());
+  cols_gold.push_back(col_gold_5.release());
+  
+  Table gold(std::move(cols_gold));
 
-//   Table t0(std::move(cols0));
-//   Table t1(std::move(cols1));
+  auto gold_sort_order = cudf::sorted_order(gold.view());
+  auto sorted_gold     = cudf::gather(gold.view(), *gold_sort_order);
+  CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*sorted_gold, *sorted_result);
 
-//   column_wrapper<int32_t> col_gold_0;
-//   column_wrapper<int32_t> col_gold_1;
+  // Repeat test with compare_nulls_equal=false,
+  // as per SQL standard.
 
-//   CVector cols_gold;
-//   cols_gold.emplace_back(col_gold_0.release());
-//   cols_gold.emplace_back(col_gold_1.release());
+  result            = cudf::inner_join(t0, t1, {0, 1}, {0, 1},  cudf::null_equality::UNEQUAL);
+  result_sort_order = cudf::sorted_order(result->view());
+  sorted_result     = cudf::gather(result->view(), *result_sort_order);
 
-//   Table gold(std::move(cols_gold));
+  col_gold_0 =               {{  3}};
+  col_gold_1 = strcol_wrapper({"s1"},
+                              {  1});
+  col_gold_2 =               {{  0}};
+  col_gold_3 =               {{  3}};
+  col_gold_4 = strcol_wrapper({"s1"},
+                              {  1});
+  col_gold_5 =               {{  1}};
 
-//   auto result = cudf::inner_join(t0, t1, {1}, {0}, {{1, 0}});
-//   CUDF_TEST_EXPECT_TABLES_EQUAL(gold, *result);
-// }
+  // clang-format on
 
-// TEST_F(JoinTest, EmptyRightTableLeftJoin)
-// {
-//   column_wrapper<int32_t> col0_0{{2, 2, 0, 4, 3}, {1, 1, 1, 1, 1}};
-//   column_wrapper<int32_t> col0_1{{1, 0, 1, 2, 1}, {1, 0, 1, 1, 1}};
+  CVector cols_gold_sql;
+  cols_gold_sql.push_back(col_gold_0.release());
+  cols_gold_sql.push_back(col_gold_1.release());
+  cols_gold_sql.push_back(col_gold_2.release());
+  cols_gold_sql.push_back(col_gold_3.release());
+  cols_gold_sql.push_back(col_gold_4.release());
+  cols_gold_sql.push_back(col_gold_5.release());
+  Table gold_sql(std::move(cols_gold_sql));
+
+  gold_sort_order = cudf::sorted_order(gold_sql.view());
+  sorted_gold     = cudf::gather(gold_sql.view(), *gold_sort_order);
+  CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*sorted_gold, *sorted_result);
+}
 
-//   column_wrapper<int32_t> col1_0;
-//   column_wrapper<int32_t> col1_1;
+// Empty Left Table
+TEST_F(JoinTest, EmptyLeftTableInnerJoin)
+{
+  column_wrapper<int32_t> col0_0;
+  column_wrapper<int32_t> col0_1;
 
-//   CVector cols0, cols1;
-//   cols0.push_back(col0_0.release());
-//   cols0.push_back(col0_1.release());
-//   cols1.push_back(col1_0.release());
-//   cols1.push_back(col1_1.release());
+  column_wrapper<int32_t> col1_0{{2, 2, 0, 4, 3}};
+  column_wrapper<int32_t> col1_1{{1, 0, 1, 2, 1}, {1, 0, 1, 1, 1}};
 
-//   Table t0(std::move(cols0));
-//   Table empty1(std::move(cols1));
+  CVector cols0, cols1;
+  cols0.push_back(col0_0.release());
+  cols0.push_back(col0_1.release());
+  cols1.push_back(col1_0.release());
+  cols1.push_back(col1_1.release());
 
-//   auto result = cudf::left_join(t0, empty1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}});
-//   CUDF_TEST_EXPECT_TABLES_EQUAL(t0, *result);
-// }
+  Table empty0(std::move(cols0));
+  Table t1(std::move(cols1));
 
-// TEST_F(JoinTest, EmptyRightTableFullJoin)
-// {
-//   column_wrapper<int32_t> col0_0{{2, 2, 0, 4, 3}};
-//   column_wrapper<int32_t> col0_1{{1, 0, 1, 2, 1}, {1, 0, 1, 1, 1}};
-
-//   column_wrapper<int32_t> col1_0;
-//   column_wrapper<int32_t> col1_1;
+  auto result = cudf::inner_join(empty0, t1, {0, 1}, {0, 1});
+  CUDF_TEST_EXPECT_TABLES_EQUIVALENT(empty0, *result);
+}
 
-//   CVector cols0, cols1;
-//   cols0.push_back(col0_0.release());
-//   cols0.push_back(col0_1.release());
-//   cols1.push_back(col1_0.release());
-//   cols1.push_back(col1_1.release());
-
-//   Table t0(std::move(cols0));
-//   Table empty1(std::move(cols1));
-
-//   auto result = cudf::full_join(t0, empty1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}});
-//   CUDF_TEST_EXPECT_TABLES_EQUAL(t0, *result);
-// }
+TEST_F(JoinTest, EmptyLeftTableLeftJoin)
+{
+  column_wrapper<int32_t> col0_0;
+  column_wrapper<int32_t> col0_1;
 
-// // Both tables empty
-// TEST_F(JoinTest, BothEmptyInnerJoin)
-// {
-//   column_wrapper<int32_t> col0_0;
-//   column_wrapper<int32_t> col0_1;
+  column_wrapper<int32_t> col1_0{{2, 2, 0, 4, 3}};
+  column_wrapper<int32_t> col1_1{{1, 0, 1, 2, 1}, {1, 0, 1, 1, 1}};
 
-//   column_wrapper<int32_t> col1_0;
-//   column_wrapper<int32_t> col1_1;
-
-//   CVector cols0, cols1;
-//   cols0.push_back(col0_0.release());
-//   cols0.push_back(col0_1.release());
-//   cols1.push_back(col1_0.release());
-//   cols1.push_back(col1_1.release());
+  CVector cols0, cols1;
+  cols0.push_back(col0_0.release());
+  cols0.push_back(col0_1.release());
+  cols1.push_back(col1_0.release());
+  cols1.push_back(col1_1.release());
 
-//   Table t0(std::move(cols0));
-//   Table empty1(std::move(cols1));
+  Table empty0(std::move(cols0));
+  Table t1(std::move(cols1));
 
-//   auto result = cudf::inner_join(t0, empty1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}});
-//   CUDF_TEST_EXPECT_TABLES_EQUAL(empty1, *result);
-// }
+  auto result = cudf::left_join(empty0, t1, {0, 1}, {0, 1});
+  CUDF_TEST_EXPECT_TABLES_EQUIVALENT(empty0, *result);
+}
 
-// TEST_F(JoinTest, BothEmptyLeftJoin)
-// {
-//   column_wrapper<int32_t> col0_0;
-//   column_wrapper<int32_t> col0_1;
+TEST_F(JoinTest, EmptyLeftTableFullJoin)
+{
+  column_wrapper<int32_t> col0_0;
+  column_wrapper<int32_t> col0_1;
 
-//   column_wrapper<int32_t> col1_0;
-//   column_wrapper<int32_t> col1_1;
+  column_wrapper<int32_t> col1_0{{2, 2, 0, 4, 3}};
+  column_wrapper<int32_t> col1_1{{1, 0, 1, 2, 1}, {1, 0, 1, 1, 1}};
 
-//   CVector cols0, cols1;
-//   cols0.push_back(col0_0.release());
-//   cols0.push_back(col0_1.release());
-//   cols1.push_back(col1_0.release());
-//   cols1.push_back(col1_1.release());
+  CVector cols0, cols1;
+  cols0.push_back(col0_0.release());
+  cols0.push_back(col0_1.release());
+  cols1.push_back(col1_0.release());
+  cols1.push_back(col1_1.release());
 
-//   Table t0(std::move(cols0));
-//   Table empty1(std::move(cols1));
+  Table lhs(std::move(cols0));
+  Table rhs(std::move(cols1));
 
-//   auto result = cudf::left_join(t0, empty1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}});
-//   CUDF_TEST_EXPECT_TABLES_EQUAL(empty1, *result);
-// }
+  auto result            = cudf::full_join(lhs, rhs, {0, 1}, {0, 1});
+  auto result_sort_order = cudf::sorted_order(result->view());
+  auto sorted_result     = cudf::gather(result->view(), *result_sort_order);
 
-// TEST_F(JoinTest, BothEmptyFullJoin)
-// {
-//   column_wrapper<int32_t> col0_0;
-//   column_wrapper<int32_t> col0_1;
+  column_wrapper<int32_t> col_gold_0{{-1, -1, -1, -1, -1}, {0, 0, 0, 0, 0}};
+  column_wrapper<int32_t> col_gold_1{{-1, -1, -1, -1, -1}, {0, 0, 0, 0, 0}};
+  column_wrapper<int32_t> col_gold_2{{2, 2, 0, 4, 3}};
+  column_wrapper<int32_t> col_gold_3{{1, 0, 1, 2, 1}, {1, 0, 1, 1, 1}};
 
-//   column_wrapper<int32_t> col1_0;
-//   column_wrapper<int32_t> col1_1;
+  CVector cols_gold;
+  cols_gold.push_back(col_gold_0.release());
+  cols_gold.push_back(col_gold_1.release());
+  cols_gold.push_back(col_gold_2.release());
+  cols_gold.push_back(col_gold_3.release());
+  Table gold(std::move(cols_gold));
 
-//   CVector cols0, cols1;
-//   cols0.push_back(col0_0.release());
-//   cols0.push_back(col0_1.release());
-//   cols1.push_back(col1_0.release());
-//   cols1.push_back(col1_1.release());
-
-//   Table t0(std::move(cols0));
-//   Table empty1(std::move(cols1));
-
-//   auto result = cudf::full_join(t0, empty1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}});
-//   CUDF_TEST_EXPECT_TABLES_EQUAL(empty1, *result);
-// }
+  auto gold_sort_order = cudf::sorted_order(gold.view());
+  auto sorted_gold     = cudf::gather(gold.view(), *gold_sort_order);
+
+  CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*sorted_gold, *sorted_result);
+}
+
+// Empty Right Table
+TEST_F(JoinTest, EmptyRightTableInnerJoin)
+{
+  column_wrapper<int32_t> col0_0{{2, 2, 0, 4, 3}};
+  column_wrapper<int32_t> col0_1{{1, 0, 1, 2, 1}, {1, 0, 1, 1, 1}};
+
+  column_wrapper<int32_t> col1_0;
+  column_wrapper<int32_t> col1_1;
+
+  CVector cols0, cols1;
+  cols0.push_back(col0_0.release());
+  cols0.push_back(col0_1.release());
+  cols1.push_back(col1_0.release());
+  cols1.push_back(col1_1.release());
+
+  Table t0(std::move(cols0));
+  Table empty1(std::move(cols1));
+
+  auto result = cudf::inner_join(t0, empty1, {0, 1}, {0, 1});
+  CUDF_TEST_EXPECT_TABLES_EQUIVALENT(empty1, *result);
+}
+
+TEST_F(JoinTest, EmptyRightTableLeftJoin)
+{
+  column_wrapper<int32_t> col0_0{{2, 2, 0, 4, 3}, {1, 1, 1, 1, 1}};
+  column_wrapper<int32_t> col0_1{{1, 0, 1, 2, 1}, {1, 0, 1, 1, 1}};
+
+  column_wrapper<int32_t> col1_0;
+  column_wrapper<int32_t> col1_1;
+
+  CVector cols0, cols1;
+  cols0.push_back(col0_0.release());
+  cols0.push_back(col0_1.release());
+  cols1.push_back(col1_0.release());
+  cols1.push_back(col1_1.release());
+
+  Table t0(std::move(cols0));
+  Table empty1(std::move(cols1));
+
+  auto result = cudf::left_join(t0, empty1, {0, 1}, {0, 1});
+  CUDF_TEST_EXPECT_TABLES_EQUIVALENT(t0, *result);
+}
+
+TEST_F(JoinTest, EmptyRightTableFullJoin)
+{
+  column_wrapper<int32_t> col0_0{{2, 2, 0, 4, 3}};
+  column_wrapper<int32_t> col0_1{{1, 0, 1, 2, 1}, {1, 0, 1, 1, 1}};
+
+  column_wrapper<int32_t> col1_0;
+  column_wrapper<int32_t> col1_1;
+
+  CVector cols0, cols1;
+  cols0.push_back(col0_0.release());
+  cols0.push_back(col0_1.release());
+  cols1.push_back(col1_0.release());
+  cols1.push_back(col1_1.release());
+
+  Table t0(std::move(cols0));
+  Table empty1(std::move(cols1));
+
+  auto result = cudf::full_join(t0, empty1, {0, 1}, {0, 1});
+  CUDF_TEST_EXPECT_TABLES_EQUIVALENT(t0, *result);
+}
+
+// Both tables empty
+TEST_F(JoinTest, BothEmptyInnerJoin)
+{
+  column_wrapper<int32_t> col0_0;
+  column_wrapper<int32_t> col0_1;
+
+  column_wrapper<int32_t> col1_0;
+  column_wrapper<int32_t> col1_1;
+
+  CVector cols0, cols1;
+  cols0.push_back(col0_0.release());
+  cols0.push_back(col0_1.release());
+  cols1.push_back(col1_0.release());
+  cols1.push_back(col1_1.release());
+
+  Table t0(std::move(cols0));
+  Table empty1(std::move(cols1));
+
+  auto result = cudf::inner_join(t0, empty1, {0, 1}, {0, 1});
+  CUDF_TEST_EXPECT_TABLES_EQUIVALENT(empty1, *result);
+}
+
+TEST_F(JoinTest, BothEmptyLeftJoin)
+{
+  column_wrapper<int32_t> col0_0;
+  column_wrapper<int32_t> col0_1;
+
+  column_wrapper<int32_t> col1_0;
+  column_wrapper<int32_t> col1_1;
+
+  CVector cols0, cols1;
+  cols0.push_back(col0_0.release());
+  cols0.push_back(col0_1.release());
+  cols1.push_back(col1_0.release());
+  cols1.push_back(col1_1.release());
+
+  Table t0(std::move(cols0));
+  Table empty1(std::move(cols1));
+
+  auto result = cudf::left_join(t0, empty1, {0, 1}, {0, 1});
+  CUDF_TEST_EXPECT_TABLES_EQUIVALENT(empty1, *result);
+}
+
+TEST_F(JoinTest, BothEmptyFullJoin)
+{
+  column_wrapper<int32_t> col0_0;
+  column_wrapper<int32_t> col0_1;
+
+  column_wrapper<int32_t> col1_0;
+  column_wrapper<int32_t> col1_1;
+
+  CVector cols0, cols1;
+  cols0.push_back(col0_0.release());
+  cols0.push_back(col0_1.release());
+  cols1.push_back(col1_0.release());
+  cols1.push_back(col1_1.release());
+
+  Table t0(std::move(cols0));
+  Table empty1(std::move(cols1));
+
+  auto result = cudf::full_join(t0, empty1, {0, 1}, {0, 1});
+  CUDF_TEST_EXPECT_TABLES_EQUIVALENT(empty1, *result);
+}
 
 // // EqualValues X Inner,Left,Full
 
-// TEST_F(JoinTest, EqualValuesInnerJoin)
-// {
-//   column_wrapper<int32_t> col0_0{{0, 0}};
-//   strcol_wrapper col0_1({"s0", "s0"});
+TEST_F(JoinTest, EqualValuesInnerJoin)
+{
+  column_wrapper<int32_t> col0_0{{0, 0}};
+  strcol_wrapper col0_1({"s0", "s0"});
+
+  column_wrapper<int32_t> col1_0{{0, 0}};
+  strcol_wrapper col1_1({"s0", "s0"});
+
+  CVector cols0, cols1;
+  cols0.push_back(col0_0.release());
+  cols0.push_back(col0_1.release());
+  cols1.push_back(col1_0.release());
+  cols1.push_back(col1_1.release());
+
+  Table t0(std::move(cols0));
+  Table t1(std::move(cols1));
+
+  auto result = cudf::inner_join(t0, t1, {0, 1}, {0, 1});
+
+  column_wrapper<int32_t> col_gold_0{{0, 0, 0, 0}};
+  strcol_wrapper col_gold_1({"s0", "s0", "s0", "s0"});
+  column_wrapper<int32_t> col_gold_2{{0, 0, 0, 0}};
+  strcol_wrapper col_gold_3({"s0", "s0", "s0", "s0"});
+
+  CVector cols_gold;
+  cols_gold.push_back(col_gold_0.release());
+  cols_gold.push_back(col_gold_1.release());
+  cols_gold.push_back(col_gold_2.release());
+  cols_gold.push_back(col_gold_3.release());
+
+  Table gold(std::move(cols_gold));
+
+  CUDF_TEST_EXPECT_TABLES_EQUIVALENT(gold, *result);
+}
+
+TEST_F(JoinTest, EqualValuesLeftJoin)
+{
+  column_wrapper<int32_t> col0_0{{0, 0}};
+  strcol_wrapper col0_1({"s0", "s0"});
+
+  column_wrapper<int32_t> col1_0{{0, 0}};
+  strcol_wrapper col1_1({"s0", "s0"});
+
+  CVector cols0, cols1;
+  cols0.push_back(col0_0.release());
+  cols0.push_back(col0_1.release());
+  cols1.push_back(col1_0.release());
+  cols1.push_back(col1_1.release());
+
+  Table t0(std::move(cols0));
+  Table t1(std::move(cols1));
+
+  auto result = cudf::left_join(t0, t1, {0, 1}, {0, 1});
+
+  column_wrapper<int32_t> col_gold_0{{0, 0, 0, 0}, {1, 1, 1, 1}};
+  strcol_wrapper col_gold_1({"s0", "s0", "s0", "s0"}, {1, 1, 1, 1});
+  column_wrapper<int32_t> col_gold_2{{0, 0, 0, 0}, {1, 1, 1, 1}};
+  strcol_wrapper col_gold_3({"s0", "s0", "s0", "s0"}, {1, 1, 1, 1});
+
+  CVector cols_gold;
+  cols_gold.push_back(col_gold_0.release());
+  cols_gold.push_back(col_gold_1.release());
+  cols_gold.push_back(col_gold_2.release());
+  cols_gold.push_back(col_gold_3.release());
+  Table gold(std::move(cols_gold));
+
+  CUDF_TEST_EXPECT_TABLES_EQUIVALENT(gold, *result);
+}
+
+TEST_F(JoinTest, EqualValuesFullJoin)
+{
+  column_wrapper<int32_t> col0_0{{0, 0}};
+  strcol_wrapper col0_1({"s0", "s0"});
+
+  column_wrapper<int32_t> col1_0{{0, 0}};
+  strcol_wrapper col1_1({"s0", "s0"});
+
+  CVector cols0, cols1;
+  cols0.push_back(col0_0.release());
+  cols0.push_back(col0_1.release());
+  cols1.push_back(col1_0.release());
+  cols1.push_back(col1_1.release());
+
+  Table t0(std::move(cols0));
+  Table t1(std::move(cols1));
+
+  auto result = cudf::full_join(t0, t1, {0, 1}, {0, 1});
+
+  column_wrapper<int32_t> col_gold_0{{0, 0, 0, 0}};
+  strcol_wrapper col_gold_1({"s0", "s0", "s0", "s0"});
+  column_wrapper<int32_t> col_gold_2{{0, 0, 0, 0}};
+  strcol_wrapper col_gold_3({"s0", "s0", "s0", "s0"});
+
+  CVector cols_gold;
+  cols_gold.push_back(col_gold_0.release());
+  cols_gold.push_back(col_gold_1.release());
+  cols_gold.push_back(col_gold_2.release());
+  cols_gold.push_back(col_gold_3.release());
+  Table gold(std::move(cols_gold));
+
+  CUDF_TEST_EXPECT_TABLES_EQUIVALENT(gold, *result);
+}
+
+TEST_F(JoinTest, InnerJoinCornerCase)
+{
+  column_wrapper<int64_t> col0_0{{4, 1, 3, 2, 2, 2, 2}};
+  column_wrapper<int64_t> col1_0{{2}};
+
+  CVector cols0, cols1;
+  cols0.push_back(col0_0.release());
+  cols1.push_back(col1_0.release());
+
+  Table t0(std::move(cols0));
+  Table t1(std::move(cols1));
+
+  auto result            = cudf::inner_join(t0, t1, {0}, {0});
+  auto result_sort_order = cudf::sorted_order(result->view());
+  auto sorted_result     = cudf::gather(result->view(), *result_sort_order);
+
+  column_wrapper<int64_t> col_gold_0{{2, 2, 2, 2}};
+  column_wrapper<int64_t> col_gold_1{{2, 2, 2, 2}};
+  CVector cols_gold;
+  cols_gold.push_back(col_gold_0.release());
+  cols_gold.push_back(col_gold_1.release());
+  Table gold(std::move(cols_gold));
+
+  auto gold_sort_order = cudf::sorted_order(gold.view());
+  auto sorted_gold     = cudf::gather(gold.view(), *gold_sort_order);
+  CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*sorted_gold, *sorted_result);
+}
+
+TEST_F(JoinTest, HashJoinSequentialProbes)
+{
+  CVector cols1;
+  cols1.emplace_back(column_wrapper<int32_t>{{2, 2, 0, 4, 3}}.release());
+  cols1.emplace_back(strcol_wrapper{{"s1", "s0", "s1", "s2", "s1"}}.release());
+
+  Table t1(std::move(cols1));
+
+  cudf::hash_join hash_join(t1, cudf::null_equality::EQUAL);
+
+  {
+    CVector cols0;
+    cols0.emplace_back(column_wrapper<int32_t>{{3, 1, 2, 0, 3}}.release());
+    cols0.emplace_back(strcol_wrapper({"s0", "s1", "s2", "s4", "s1"}).release());
+
+    Table t0(std::move(cols0));
+
+    auto result            = hash_join.full_join(t0);
+    auto result_table      = cudf::table_view({result.first->view(), result.second->view()});
+    auto result_sort_order = cudf::sorted_order(result_table);
+    auto sorted_result     = cudf::gather(result_table, *result_sort_order);
+
+    column_wrapper<int32_t> col_gold_0{{NoneValue, NoneValue, NoneValue, NoneValue, 4, 0, 1, 2, 3}};
+    column_wrapper<int32_t> col_gold_1{{0, 1, 2, 3, 4, NoneValue, NoneValue, NoneValue, NoneValue}};
+
+    CVector cols_gold;
+    cols_gold.push_back(col_gold_0.release());
+    cols_gold.push_back(col_gold_1.release());
+
+    Table gold(std::move(cols_gold));
+    auto gold_sort_order = cudf::sorted_order(gold.view());
+    auto sorted_gold     = cudf::gather(gold.view(), *gold_sort_order);
+
+    CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*sorted_gold, *sorted_result);
+  }
 
-//   column_wrapper<int32_t> col1_0{{0, 0}};
-//   strcol_wrapper col1_1({"s0", "s0"});
+  {
+    CVector cols0;
+    cols0.emplace_back(column_wrapper<int32_t>{{3, 1, 2, 0, 3}}.release());
+    cols0.emplace_back(strcol_wrapper({"s0", "s1", "s2", "s4", "s1"}).release());
 
-//   CVector cols0, cols1;
-//   cols0.push_back(col0_0.release());
-//   cols0.push_back(col0_1.release());
-//   cols1.push_back(col1_0.release());
-//   cols1.push_back(col1_1.release());
+    Table t0(std::move(cols0));
 
-//   Table t0(std::move(cols0));
-//   Table t1(std::move(cols1));
-
-//   auto result = cudf::inner_join(t0, t1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}});
-
-//   column_wrapper<int32_t> col_gold_0{{0, 0, 0, 0}};
-//   strcol_wrapper col_gold_1({"s0", "s0", "s0", "s0"});
-//   CVector cols_gold;
-//   cols_gold.push_back(col_gold_0.release());
-//   cols_gold.push_back(col_gold_1.release());
-//   Table gold(std::move(cols_gold));
+    auto result            = hash_join.left_join(t0);
+    auto result_table      = cudf::table_view({result.first->view(), result.second->view()});
+    auto result_sort_order = cudf::sorted_order(result_table);
+    auto sorted_result     = cudf::gather(result_table, *result_sort_order);
 
-//   CUDF_TEST_EXPECT_TABLES_EQUAL(gold, *result);
-// }
-
-// TEST_F(JoinTest, EqualValuesLeftJoin)
-// {
-//   column_wrapper<int32_t> col0_0{{0, 0}};
-//   strcol_wrapper col0_1({"s0", "s0"});
+    column_wrapper<int32_t> col_gold_0{{0, 1, 2, 3, 4}};
+    column_wrapper<int32_t> col_gold_1{{NoneValue, NoneValue, NoneValue, NoneValue, 4}};
 
-//   column_wrapper<int32_t> col1_0{{0, 0}};
-//   strcol_wrapper col1_1({"s0", "s0"});
+    CVector cols_gold;
+    cols_gold.push_back(col_gold_0.release());
+    cols_gold.push_back(col_gold_1.release());
+
+    Table gold(std::move(cols_gold));
+    auto gold_sort_order = cudf::sorted_order(gold.view());
+    auto sorted_gold     = cudf::gather(gold.view(), *gold_sort_order);
+
+    CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*sorted_gold, *sorted_result);
+  }
+
+  {
+    CVector cols0;
+    cols0.emplace_back(column_wrapper<int32_t>{{3, 1, 2, 0, 2}}.release());
+    cols0.emplace_back(strcol_wrapper({"s1", "s1", "s0", "s4", "s0"}).release());
+
+    Table t0(std::move(cols0));
+
+    auto result            = hash_join.inner_join(t0);
+    auto result_table      = cudf::table_view({result.first->view(), result.second->view()});
+    auto result_sort_order = cudf::sorted_order(result_table);
+    auto sorted_result     = cudf::gather(result_table, *result_sort_order);
+
+    column_wrapper<int32_t> col_gold_0{{2, 4, 0}};
+    column_wrapper<int32_t> col_gold_1{{1, 1, 4}};
+
+    CVector cols_gold;
+    cols_gold.push_back(col_gold_0.release());
+    cols_gold.push_back(col_gold_1.release());
+
+    Table gold(std::move(cols_gold));
+    auto gold_sort_order = cudf::sorted_order(gold.view());
+    auto sorted_gold     = cudf::gather(gold.view(), *gold_sort_order);
+
+    CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*sorted_gold, *sorted_result);
+  }
+}
+
+struct JoinDictionaryTest : public cudf::test::BaseFixture {
+};
 
-//   CVector cols0, cols1;
-//   cols0.push_back(col0_0.release());
-//   cols0.push_back(col0_1.release());
-//   cols1.push_back(col1_0.release());
-//   cols1.push_back(col1_1.release());
-
-//   Table t0(std::move(cols0));
-//   Table t1(std::move(cols1));
-
-//   auto result = cudf::left_join(t0, t1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}});
-
-//   column_wrapper<int32_t> col_gold_0{{0, 0, 0, 0}, {1, 1, 1, 1}};
-//   strcol_wrapper col_gold_1({"s0", "s0", "s0", "s0"}, {1, 1, 1, 1});
-//   CVector cols_gold;
-//   cols_gold.push_back(col_gold_0.release());
-//   cols_gold.push_back(col_gold_1.release());
-//   Table gold(std::move(cols_gold));
-
-//   CUDF_TEST_EXPECT_TABLES_EQUAL(gold, *result);
-// }
-
-// TEST_F(JoinTest, EqualValuesFullJoin)
-// {
-//   column_wrapper<int32_t> col0_0{{0, 0}};
-//   strcol_wrapper col0_1({"s0", "s0"});
-
-//   column_wrapper<int32_t> col1_0{{0, 0}};
-//   strcol_wrapper col1_1({"s0", "s0"});
-
-//   CVector cols0, cols1;
-//   cols0.push_back(col0_0.release());
-//   cols0.push_back(col0_1.release());
-//   cols1.push_back(col1_0.release());
-//   cols1.push_back(col1_1.release());
-
-//   Table t0(std::move(cols0));
-//   Table t1(std::move(cols1));
-
-//   auto result = cudf::full_join(t0, t1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}});
-
-//   column_wrapper<int32_t> col_gold_0{{0, 0, 0, 0}};
-//   strcol_wrapper col_gold_1({"s0", "s0", "s0", "s0"});
-//   CVector cols_gold;
-//   cols_gold.push_back(col_gold_0.release());
-//   cols_gold.push_back(col_gold_1.release());
-//   Table gold(std::move(cols_gold));
-
-//   CUDF_TEST_EXPECT_TABLES_EQUAL(gold, *result);
-// }
-
-// TEST_F(JoinTest, InnerJoinCornerCase)
-// {
-//   column_wrapper<int64_t> col0_0{{4, 1, 3, 2, 2, 2, 2}};
-//   column_wrapper<int64_t> col1_0{{2}};
-
-//   CVector cols0, cols1;
-//   cols0.push_back(col0_0.release());
-//   cols1.push_back(col1_0.release());
-
-//   Table t0(std::move(cols0));
-//   Table t1(std::move(cols1));
-
-//   auto result            = cudf::inner_join(t0, t1, {0}, {0}, {{0, 0}});
-//   auto result_sort_order = cudf::sorted_order(result->view());
-//   auto sorted_result     = cudf::gather(result->view(), *result_sort_order);
-
-//   column_wrapper<int64_t> col_gold_0{{2, 2, 2, 2}};
-//   CVector cols_gold;
-//   cols_gold.push_back(col_gold_0.release());
-//   Table gold(std::move(cols_gold));
-
-//   auto gold_sort_order = cudf::sorted_order(gold.view());
-//   auto sorted_gold     = cudf::gather(gold.view(), *gold_sort_order);
-//   CUDF_TEST_EXPECT_TABLES_EQUAL(*sorted_gold, *sorted_result);
-// }
-
-// // TEST_F(JoinTest, HashJoinSequentialProbes)
-// // {
-// //   CVector cols1;
-// //   cols1.emplace_back(column_wrapper<int32_t>{{2, 2, 0, 4, 3}}.release());
-// //   cols1.emplace_back(strcol_wrapper{{"s1", "s0", "s1", "s2", "s1"}}.release());
-// //   cols1.emplace_back(column_wrapper<int32_t>{{1, 0, 1, 2, 1}}.release());
-
-// //   Table t1(std::move(cols1));
-
-// //   cudf::hash_join hash_join(t1, {0, 1}, cudf::null_equality::EQUAL);
-
-// //   {
-// //     CVector cols0;
-// //     cols0.emplace_back(column_wrapper<int32_t>{{3, 1, 2, 0, 3}}.release());
-// //     cols0.emplace_back(strcol_wrapper({"s0", "s1", "s2", "s4", "s1"}).release());
-// //     cols0.emplace_back(column_wrapper<int32_t>{{0, 1, 2, 4, 1}}.release());
-
-// //     Table t0(std::move(cols0));
-
-// //     auto result            = hash_join.full_join(t0, {0, 1}, {{0, 0}, {1, 1}});
-// //     auto result_sort_order = cudf::sorted_order(result->view());
-// //     auto sorted_result     = cudf::gather(result->view(), *result_sort_order);
-
-// //     CVector cols_gold;
-// //     cols_gold.emplace_back(column_wrapper<int32_t>{{2, 2, 0, 4, 3, 3, 1, 2, 0}}.release());
-// //     cols_gold.emplace_back(
-// //       strcol_wrapper({"s1", "s0", "s1", "s2", "s1", "s0", "s1", "s2", "s4"}).release());
-// //     cols_gold.emplace_back(
-// //       column_wrapper<int32_t>{{-1, -1, -1, -1, 1, 0, 1, 2, 4}, {0, 0, 0, 0, 1, 1, 1, 1, 1}}
-// //         .release());
-// //     cols_gold.emplace_back(
-// //       column_wrapper<int32_t>{{1, 0, 1, 2, 1, -1, -1, -1, -1}, {1, 1, 1, 1, 1, 0, 0, 0, 0}}
-// //         .release());
-// //     Table gold(std::move(cols_gold));
-
-// //     auto gold_sort_order = cudf::sorted_order(gold.view());
-// //     auto sorted_gold     = cudf::gather(gold.view(), *gold_sort_order);
-// //     CUDF_TEST_EXPECT_TABLES_EQUAL(*sorted_gold, *sorted_result);
-// //   }
-
-// //   {
-// //     CVector cols0;
-// //     cols0.emplace_back(column_wrapper<int32_t>{{3, 1, 2, 0, 3}}.release());
-// //     cols0.emplace_back(strcol_wrapper({"s0", "s1", "s2", "s4", "s1"}).release());
-// //     cols0.emplace_back(column_wrapper<int32_t>{{0, 1, 2, 4, 1}}.release());
-
-// //     Table t0(std::move(cols0));
-
-// //     auto result            = hash_join.left_join(t0, {0, 1}, {{0, 0}, {1, 1}});
-// //     auto result_sort_order = cudf::sorted_order(result->view());
-// //     auto sorted_result     = cudf::gather(result->view(), *result_sort_order);
-
-// //     CVector cols_gold;
-// //     cols_gold.emplace_back(column_wrapper<int32_t>{{3, 3, 1, 2, 0}, {1, 1, 1, 1,
-// 1}}.release());
-// //     cols_gold.emplace_back(
-// //       strcol_wrapper({"s1", "s0", "s1", "s2", "s4"}, {1, 1, 1, 1, 1, 1}).release());
-// //     cols_gold.emplace_back(column_wrapper<int32_t>{{1, 0, 1, 2, 4}, {1, 1, 1, 1,
-// 1}}.release());
-// //     cols_gold.emplace_back(column_wrapper<int32_t>{{1, -1, -1, -1, -1}, {1, 0, 0, 0,
-// //     0}}.release()); Table gold(std::move(cols_gold));
-
-// //     auto gold_sort_order = cudf::sorted_order(gold.view());
-// //     auto sorted_gold     = cudf::gather(gold.view(), *gold_sort_order);
-// //     CUDF_TEST_EXPECT_TABLES_EQUAL(*sorted_gold, *sorted_result);
-// //   }
-
-// //   {
-// //     CVector cols0;
-// //     cols0.emplace_back(column_wrapper<int32_t>{{3, 1, 2, 0, 2}}.release());
-// //     cols0.emplace_back(column_wrapper<int32_t>{{3, 1, 2, 0, 2}}.release());
-// //     cols0.emplace_back(strcol_wrapper({"s1", "s1", "s0", "s4", "s0"}).release());
-// //     cols0.emplace_back(column_wrapper<int32_t>{{0, 1, 2, 4, 1}}.release());
-
-// //     Table t0(std::move(cols0));
-
-// //     auto probe_build_pair = hash_join.inner_join(t0, {1, 2}, {{1, 0}, {2, 1}});
-// //     auto joined_cols      = probe_build_pair.first->release();
-// //     auto build_cols       = probe_build_pair.second->release();
-// //     joined_cols.insert(joined_cols.end(),
-// //                        std::make_move_iterator(build_cols.begin()),
-// //                        std::make_move_iterator(build_cols.end()));
-// //     auto result            = std::make_unique<cudf::table>(std::move(joined_cols));
-// //     auto result_sort_order = cudf::sorted_order(result->view());
-// //     auto sorted_result     = cudf::gather(result->view(), *result_sort_order);
-
-// //     CVector cols_gold;
-// //     cols_gold.emplace_back(column_wrapper<int32_t>{{3, 2, 2}}.release());
-// //     cols_gold.emplace_back(column_wrapper<int32_t>{{3, 2, 2}}.release());
-// //     cols_gold.emplace_back(strcol_wrapper({"s1", "s0", "s0"}).release());
-// //     cols_gold.emplace_back(column_wrapper<int32_t>{{0, 2, 1}}.release());
-// //     cols_gold.emplace_back(column_wrapper<int32_t>{{1, 0, 0}}.release());
-// //     Table gold(std::move(cols_gold));
-
-// //     auto gold_sort_order = cudf::sorted_order(gold.view());
-// //     auto sorted_gold     = cudf::gather(gold.view(), *gold_sort_order);
-// //     CUDF_TEST_EXPECT_TABLES_EQUAL(*sorted_gold, *sorted_result);
-// //   }
-
-// //   {
-// //     CVector cols0;
-// //     cols0.emplace_back(column_wrapper<int32_t>{{3, 1, 2, 0, 2}}.release());
-// //     cols0.emplace_back(column_wrapper<int32_t>{{3, 1, 2, 0, 2}}.release());
-// //     cols0.emplace_back(strcol_wrapper({"s1", "s1", "s0", "s4", "s0"}).release());
-// //     cols0.emplace_back(column_wrapper<int32_t>{{0, 1, 2, 4, 1}}.release());
-
-// //     Table t0(std::move(cols0));
-
-// //     auto probe_build_pair = hash_join.inner_join(
-// //       t0, {1, 2}, {{1, 0}, {2, 1}}, cudf::hash_join::common_columns_output_side::BUILD);
-// //     auto joined_cols = probe_build_pair.second->release();
-// //     auto probe_cols  = probe_build_pair.first->release();
-// //     joined_cols.insert(joined_cols.end(),
-// //                        std::make_move_iterator(probe_cols.begin()),
-// //                        std::make_move_iterator(probe_cols.end()));
-// //     auto result            = std::make_unique<cudf::table>(std::move(joined_cols));
-// //     auto result_sort_order = cudf::sorted_order(result->view());
-// //     auto sorted_result     = cudf::gather(result->view(), *result_sort_order);
-
-// //     CVector cols_gold;
-// //     cols_gold.emplace_back(column_wrapper<int32_t>{{3, 2, 2}}.release());
-// //     cols_gold.emplace_back(strcol_wrapper({"s1", "s0", "s0"}).release());
-// //     cols_gold.emplace_back(column_wrapper<int32_t>{{1, 0, 0}}.release());
-// //     cols_gold.emplace_back(column_wrapper<int32_t>{{3, 2, 2}}.release());
-// //     cols_gold.emplace_back(column_wrapper<int32_t>{{0, 2, 1}}.release());
-// //     Table gold(std::move(cols_gold));
-
-// //     auto gold_sort_order = cudf::sorted_order(gold.view());
-// //     auto sorted_gold     = cudf::gather(gold.view(), *gold_sort_order);
-// //     CUDF_TEST_EXPECT_TABLES_EQUAL(*sorted_gold, *sorted_result);
-// //   }
-// // }
-
-// struct JoinDictionaryTest : public cudf::test::BaseFixture {
-// };
-
-// TEST_F(JoinDictionaryTest, LeftJoinNoNulls)
-// {
-//   column_wrapper<int32_t> col0_0{{3, 1, 2, 0, 3}};
-//   strcol_wrapper col0_1_w({"s0", "s1", "s2", "s4", "s1"});
-//   auto col0_1 = cudf::dictionary::encode(col0_1_w);
-//   column_wrapper<int32_t> col0_2{{0, 1, 2, 4, 1}};
-
-//   column_wrapper<int32_t> col1_0{{2, 2, 0, 4, 3}};
-//   strcol_wrapper col1_1_w{{"s1", "s0", "s1", "s2", "s1"}};
-//   auto col1_1 = cudf::dictionary::encode(col1_1_w);
-//   column_wrapper<int32_t> col1_2{{1, 0, 1, 2, 1}};
-
-//   auto t0 = cudf::table_view({col0_0, col0_1->view(), col0_2});
-//   auto t1 = cudf::table_view({col1_0, col1_1->view(), col1_2});
-//   auto g0 = cudf::table_view({col0_0, col0_1_w, col0_2});
-//   auto g1 = cudf::table_view({col1_0, col1_1_w, col1_2});
-//   {
-//     auto result =
-//       cudf::left_join(t0, t1, {0}, {0}, std::vector<std::pair<cudf::size_type,
-//       cudf::size_type>>{});
-//     auto result_view = result->view();
-//     auto decoded1    = cudf::dictionary::decode(result_view.column(1));
-//     auto decoded4    = cudf::dictionary::decode(result_view.column(4));
-//     std::vector<cudf::column_view> result_decoded({result_view.column(0),
-//                                                    decoded1->view(),
-//                                                    result_view.column(2),
-//                                                    result_view.column(3),
-//                                                    decoded4->view(),
-//                                                    result_view.column(5)});
-
-//     auto gold =
-//       cudf::left_join(g0, g1, {0}, {0}, std::vector<std::pair<cudf::size_type,
-//       cudf::size_type>>{});
-//     CUDF_TEST_EXPECT_TABLES_EQUAL(*gold, cudf::table_view(result_decoded));
-//   }
-//   {
-//     auto result      = cudf::left_join(t0, t1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}});
-//     auto result_view = result->view();
-//     auto decoded1    = cudf::dictionary::decode(result_view.column(1));
-//     std::vector<cudf::column_view> result_decoded(
-//       {result_view.column(0), decoded1->view(), result_view.column(2), result_view.column(3)});
-
-//     auto gold = cudf::left_join(g0, g1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}});
-//     CUDF_TEST_EXPECT_TABLES_EQUAL(*gold, cudf::table_view(result_decoded));
-//   }
-// }
-
-// TEST_F(JoinDictionaryTest, LeftJoinWithNulls)
-// {
-//   column_wrapper<int32_t> col0_0{{3, 1, 2, 0, 2}};
-//   strcol_wrapper col0_1({"s1", "s1", "s0", "s4", "s0"}, {1, 1, 0, 1, 1});
-//   column_wrapper<int32_t> col0_2_w{{0, 1, 2, 4, 1}};
-//   auto col0_2 = cudf::dictionary::encode(col0_2_w);
-
-//   column_wrapper<int32_t> col1_0{{2, 2, 0, 4, 3}};
-//   strcol_wrapper col1_1({"s1", "s0", "s1", "s2", "s1"});
-//   column_wrapper<int32_t> col1_2_w{{1, 0, 1, 2, 1}, {1, 0, 1, 1, 1}};
-//   auto col1_2 = cudf::dictionary::encode(col1_2_w);
-
-//   auto t0 = cudf::table_view({col0_0, col0_1, col0_2->view()});
-//   auto t1 = cudf::table_view({col1_0, col1_1, col1_2->view()});
-
-//   auto result      = cudf::left_join(t0, t1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}});
-//   auto result_view = result->view();
-//   auto decoded2    = cudf::dictionary::decode(result_view.column(2));
-//   auto decoded3    = cudf::dictionary::decode(result_view.column(3));
-//   std::vector<cudf::column_view> result_decoded(
-//     {result_view.column(0), result_view.column(1), decoded2->view(), decoded3->view()});
-
-//   auto g0   = cudf::table_view({col0_0, col0_1, col0_2_w});
-//   auto g1   = cudf::table_view({col1_0, col1_1, col1_2_w});
-//   auto gold = cudf::left_join(g0, g1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}});
-//   CUDF_TEST_EXPECT_TABLES_EQUAL(*gold, cudf::table_view(result_decoded));
-// }
-
-// TEST_F(JoinDictionaryTest, InnerJoinNoNulls)
-// {
-//   column_wrapper<int32_t> col0_0{{3, 1, 2, 0, 2}};
-//   strcol_wrapper col0_1_w({"s1", "s1", "s0", "s4", "s0"});
-//   auto col0_1 = cudf::dictionary::encode(col0_1_w);
-//   column_wrapper<int32_t> col0_2{{0, 1, 2, 4, 1}};
-
-//   column_wrapper<int32_t> col1_0{{2, 2, 0, 4, 3}};
-//   strcol_wrapper col1_1_w({"s1", "s0", "s1", "s2", "s1"});
-//   auto col1_1 = cudf::dictionary::encode(col1_1_w);
-//   column_wrapper<int32_t> col1_2{{1, 0, 1, 2, 1}};
-
-//   auto t0 = cudf::table_view({col0_0, col0_1->view(), col0_2});
-//   auto t1 = cudf::table_view({col1_0, col1_1->view(), col1_2});
-
-//   auto result      = cudf::inner_join(t0, t1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}});
-//   auto result_view = result->view();
-//   auto decoded1    = cudf::dictionary::decode(result_view.column(1));
-//   std::vector<cudf::column_view> result_decoded(
-//     {result_view.column(0), decoded1->view(), result_view.column(2), result_view.column(3)});
-
-//   auto g0   = cudf::table_view({col0_0, col0_1_w, col0_2});
-//   auto g1   = cudf::table_view({col1_0, col1_1_w, col1_2});
-//   auto gold = cudf::inner_join(g0, g1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}});
-//   CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*gold, cudf::table_view(result_decoded));
-// }
-
-// TEST_F(JoinDictionaryTest, InnerJoinWithNulls)
-// {
-//   column_wrapper<int32_t> col0_0{{3, 1, 2, 0, 2}};
-//   strcol_wrapper col0_1({"s1", "s1", "s0", "s4", "s0"}, {1, 1, 0, 1, 1});
-//   column_wrapper<int32_t> col0_2_w{{0, 1, 2, 4, 1}};
-//   auto col0_2 = cudf::dictionary::encode(col0_2_w);
-
-//   column_wrapper<int32_t> col1_0{{2, 2, 0, 4, 3}};
-//   strcol_wrapper col1_1({"s1", "s0", "s1", "s2", "s1"});
-//   column_wrapper<int32_t> col1_2_w{{1, 0, 1, 2, 1}, {1, 0, 1, 1, 1}};
-//   auto col1_2 = cudf::dictionary::encode(col1_2_w);
-
-//   auto t0 = cudf::table_view({col0_0, col0_1, col0_2->view()});
-//   auto t1 = cudf::table_view({col1_0, col1_1, col1_2->view()});
-
-//   auto result      = cudf::inner_join(t0, t1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}});
-//   auto result_view = result->view();
-//   auto decoded2    = cudf::dictionary::decode(result_view.column(2));
-//   auto decoded3    = cudf::dictionary::decode(result_view.column(3));
-//   std::vector<cudf::column_view> result_decoded(
-//     {result_view.column(0), result_view.column(1), decoded2->view(), decoded3->view()});
-
-//   auto g0   = cudf::table_view({col0_0, col0_1, col0_2_w});
-//   auto g1   = cudf::table_view({col1_0, col1_1, col1_2_w});
-//   auto gold = cudf::inner_join(g0, g1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}});
-//   CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*gold, cudf::table_view(result_decoded));
-// }
-
-// TEST_F(JoinDictionaryTest, FullJoinNoNulls)
-// {
-//   column_wrapper<int32_t> col0_0{{3, 1, 2, 0, 3}};
-//   strcol_wrapper col0_1_w({"s0", "s1", "s2", "s4", "s1"});
-//   auto col0_1 = cudf::dictionary::encode(col0_1_w);
-//   column_wrapper<int32_t> col0_2{{0, 1, 2, 4, 1}};
-
-//   column_wrapper<int32_t> col1_0{{2, 2, 0, 4, 3}};
-//   strcol_wrapper col1_1_w{{"s1", "s0", "s1", "s2", "s1"}};
-//   auto col1_1 = cudf::dictionary::encode(col1_1_w);
-//   column_wrapper<int32_t> col1_2{{1, 0, 1, 2, 1}};
-
-//   auto t0 = cudf::table_view({col0_0, col0_1->view(), col0_2});
-//   auto t1 = cudf::table_view({col1_0, col1_1->view(), col1_2});
-
-//   auto result      = cudf::full_join(t0, t1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}});
-//   auto result_view = result->view();
-//   auto decoded1    = cudf::dictionary::decode(result_view.column(1));
-//   std::vector<cudf::column_view> result_decoded(
-//     {result_view.column(0), decoded1->view(), result_view.column(2), result_view.column(3)});
-
-//   auto g0   = cudf::table_view({col0_0, col0_1_w, col0_2});
-//   auto g1   = cudf::table_view({col1_0, col1_1_w, col1_2});
-//   auto gold = cudf::full_join(g0, g1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}});
-//   CUDF_TEST_EXPECT_TABLES_EQUAL(*gold, cudf::table_view(result_decoded));
-// }
-
-// TEST_F(JoinDictionaryTest, FullJoinWithNulls)
-// {
-//   column_wrapper<int32_t> col0_0_w{{3, 1, 2, 0, 3}};
-//   auto col0_0 = cudf::dictionary::encode(col0_0_w);
-//   strcol_wrapper col0_1({"s0", "s1", "s2", "s4", "s1"});
-//   column_wrapper<int32_t> col0_2{{0, 1, 2, 4, 1}};
-
-//   column_wrapper<int32_t> col1_0_w{{2, 2, 0, 4, 3}, {1, 1, 1, 0, 1}};
-//   auto col1_0 = cudf::dictionary::encode(col1_0_w);
-//   strcol_wrapper col1_1{{"s1", "s0", "s1", "s2", "s1"}};
-//   column_wrapper<int32_t> col1_2{{1, 0, 1, 2, 1}};
-
-//   auto t0 = cudf::table_view({col0_0->view(), col0_1, col0_2});
-//   auto t1 = cudf::table_view({col1_0->view(), col1_1, col1_2});
-
-//   auto result      = cudf::full_join(t0, t1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}});
-//   auto result_view = result->view();
-//   auto decoded0    = cudf::dictionary::decode(result_view.column(0));
-//   std::vector<cudf::column_view> result_decoded(
-//     {decoded0->view(), result_view.column(1), result_view.column(2), result_view.column(3)});
-
-//   auto g0   = cudf::table_view({col0_0_w, col0_1, col0_2});
-//   auto g1   = cudf::table_view({col1_0_w, col1_1, col1_2});
-//   auto gold = cudf::full_join(g0, g1, {0, 1}, {0, 1}, {{0, 0}, {1, 1}});
-//   CUDF_TEST_EXPECT_TABLES_EQUAL(*gold, cudf::table_view(result_decoded));
-// }
-
-// TEST_F(JoinTest, InnerJoinGathermap)
-// {
-//   column_wrapper<int32_t> col0_0{{3, 1, 2, 0, 2}};
-//   strcol_wrapper col0_1({"s1", "s1", "s0", "s4", "s0"});
-//   column_wrapper<int32_t> col0_2{{0, 1, 2, 4, 1}};
-
-//   column_wrapper<int32_t> col1_0{{2, 2, 0, 4, 3}};
-//   strcol_wrapper col1_1({"s1", "s0", "s1", "s2", "s1"});
-//   column_wrapper<int32_t> col1_2{{1, 0, 1, 2, 1}};
-
-//   CVector cols0, cols1;
-//   cols0.push_back(col0_0.release());
-//   cols0.push_back(col0_1.release());
-//   cols0.push_back(col0_2.release());
-//   cols1.push_back(col1_0.release());
-//   cols1.push_back(col1_1.release());
-//   cols1.push_back(col1_2.release());
-
-//   Table t0(std::move(cols0));
-//   Table t1(std::move(cols1));
-
-//   auto result          = cudf::inner_join(t0, t1, {0, 1}, {0, 1});
-//   auto lmap_sort_order = cudf::sorted_order(cudf::table_view({result.first->view()}));
-//   auto rmap_sort_order = cudf::sorted_order(cudf::table_view({result.second->view()}));
-//   auto lmap_sorted     = cudf::gather(cudf::table_view({result.first->view()}),
-//   *lmap_sort_order); auto rmap_sorted     =
-//   cudf::gather(cudf::table_view({result.second->view()}), *rmap_sort_order);
-
-//   column_wrapper<int32_t> lmap_gold{{0, 2, 4}};
-//   column_wrapper<int32_t> rmap_gold{{1, 1, 4}};
-//   CUDF_TEST_EXPECT_COLUMNS_EQUAL(lmap_sorted->view().column(0), lmap_gold);
-//   CUDF_TEST_EXPECT_COLUMNS_EQUAL(rmap_sorted->view().column(0), rmap_gold);
-// }
-
-// TEST_F(JoinTest, LeftJoinGathermap)
-// {
-//   column_wrapper<int32_t> col0_0{{3, 1, 2, 0, 3}};
-//   strcol_wrapper col0_1({"s0", "s1", "s2", "s4", "s1"});
-//   column_wrapper<int32_t> col0_2{{0, 1, 2, 4, 1}};
-
-//   column_wrapper<int32_t> col1_0{{2, 2, 0, 4, 3}};
-//   strcol_wrapper col1_1({"s1", "s0", "s1", "s2", "s1"});
-//   column_wrapper<int32_t> col1_2{{1, 0, 1, 2, 1}};
-
-//   CVector cols0, cols1;
-//   cols0.push_back(col0_0.release());
-//   cols0.push_back(col0_1.release());
-//   cols0.push_back(col0_2.release());
-//   cols1.push_back(col1_0.release());
-//   cols1.push_back(col1_1.release());
-//   cols1.push_back(col1_2.release());
-
-//   Table t0(std::move(cols0));
-//   Table t1(std::move(cols1));
-
-//   auto result          = cudf::left_join(t0, t1, {0, 1}, {0, 1});
-//   auto lmap_sort_order = cudf::sorted_order(cudf::table_view({result.first->view()}));
-//   auto rmap_sort_order = cudf::sorted_order(cudf::table_view({result.second->view()}));
-//   auto lmap_sorted     = cudf::gather(cudf::table_view({result.first->view()}),
-//   *lmap_sort_order); auto rmap_sorted     =
-//   cudf::gather(cudf::table_view({result.second->view()}), *rmap_sort_order);
-
-//   column_wrapper<int32_t> lmap_gold{{0, 1, 2, 3, 4}};
-//   column_wrapper<int32_t> rmap_gold{{NoneValue, NoneValue, NoneValue, NoneValue, 4}};
-
-//   CUDF_TEST_EXPECT_COLUMNS_EQUAL(lmap_sorted->view().column(0), lmap_gold);
-//   CUDF_TEST_EXPECT_COLUMNS_EQUAL(rmap_sorted->view().column(0), rmap_gold);
-// }
-
-// TEST_F(JoinTest, FullJoinGatherMap)
-// {
-//   column_wrapper<int32_t> col0_0{{3, 1, 2, 0, 3}};
-//   strcol_wrapper col0_1({"s0", "s1", "s2", "s4", "s1"});
-//   column_wrapper<int32_t> col0_2{{0, 1, 2, 4, 1}};
-
-//   column_wrapper<int32_t> col1_0{{2, 2, 0, 4, 3}, {1, 1, 1, 0, 1}};
-//   strcol_wrapper col1_1{{"s1", "s0", "s1", "s2", "s1"}};
-//   column_wrapper<int32_t> col1_2{{1, 0, 1, 2, 1}};
-
-//   CVector cols0, cols1;
-//   cols0.push_back(col0_0.release());
-//   cols0.push_back(col0_1.release());
-//   cols0.push_back(col0_2.release());
-//   cols1.push_back(col1_0.release());
-//   cols1.push_back(col1_1.release());
-//   cols1.push_back(col1_2.release());
-
-//   Table t0(std::move(cols0));
-//   Table t1(std::move(cols1));
-
-//   auto result          = cudf::full_join(t0, t1, {0, 1}, {0, 1});
-//   auto lmap_sort_order = cudf::sorted_order(cudf::table_view({result.first->view()}));
-//   auto rmap_sort_order = cudf::sorted_order(cudf::table_view({result.second->view()}));
-//   auto lmap_sorted     = cudf::gather(cudf::table_view({result.first->view()}),
-//   *lmap_sort_order); auto rmap_sorted     =
-//   cudf::gather(cudf::table_view({result.second->view()}), *rmap_sort_order);
-
-//   column_wrapper<int32_t> lmap_gold{{NoneValue, NoneValue, NoneValue, NoneValue, 0, 1, 2, 3, 4}};
-//   column_wrapper<int32_t> rmap_gold{{NoneValue, NoneValue, NoneValue, NoneValue, 0, 1, 2, 3, 4}};
-
-//   CUDF_TEST_EXPECT_COLUMNS_EQUAL(lmap_sorted->view().column(0), lmap_gold);
-//   CUDF_TEST_EXPECT_COLUMNS_EQUAL(rmap_sorted->view().column(0), rmap_gold);
-// }
+TEST_F(JoinDictionaryTest, LeftJoinNoNulls)
+{
+  column_wrapper<int32_t> col0_0{{3, 1, 2, 0, 3}};
+  strcol_wrapper col0_1_w({"s0", "s1", "s2", "s4", "s1"});
+  auto col0_1 = cudf::dictionary::encode(col0_1_w);
+  column_wrapper<int32_t> col0_2{{0, 1, 2, 4, 1}};
+
+  column_wrapper<int32_t> col1_0{{2, 2, 0, 4, 3}};
+  strcol_wrapper col1_1_w{{"s1", "s0", "s1", "s2", "s1"}};
+  auto col1_1 = cudf::dictionary::encode(col1_1_w);
+  column_wrapper<int32_t> col1_2{{1, 0, 1, 2, 1}};
+
+  auto t0 = cudf::table_view({col0_0, col0_1->view(), col0_2});
+  auto t1 = cudf::table_view({col1_0, col1_1->view(), col1_2});
+  auto g0 = cudf::table_view({col0_0, col0_1_w, col0_2});
+  auto g1 = cudf::table_view({col1_0, col1_1_w, col1_2});
+  {
+    auto result      = cudf::left_join(t0, t1, {0}, {0});
+    auto result_view = result->view();
+    auto decoded1    = cudf::dictionary::decode(result_view.column(1));
+    auto decoded4    = cudf::dictionary::decode(result_view.column(4));
+    std::vector<cudf::column_view> result_decoded({result_view.column(0),
+                                                   decoded1->view(),
+                                                   result_view.column(2),
+                                                   result_view.column(3),
+                                                   decoded4->view(),
+                                                   result_view.column(5)});
+
+    auto gold = cudf::left_join(g0, g1, {0}, {0});
+    CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*gold, cudf::table_view(result_decoded));
+  }
+}
+
+TEST_F(JoinDictionaryTest, LeftJoinWithNulls)
+{
+  column_wrapper<int32_t> col0_0{{3, 1, 2, 0, 2}};
+  strcol_wrapper col0_1({"s1", "s1", "s0", "s4", "s0"}, {1, 1, 0, 1, 1});
+  column_wrapper<int32_t> col0_2_w{{0, 1, 2, 4, 1}};
+  auto col0_2 = cudf::dictionary::encode(col0_2_w);
+
+  column_wrapper<int32_t> col1_0{{2, 2, 0, 4, 3}};
+  strcol_wrapper col1_1({"s1", "s0", "s1", "s2", "s1"});
+  column_wrapper<int32_t> col1_2_w{{1, 0, 1, 2, 1}, {1, 0, 1, 1, 1}};
+  auto col1_2 = cudf::dictionary::encode(col1_2_w);
+
+  auto t0 = cudf::table_view({col0_0, col0_1, col0_2->view()});
+  auto t1 = cudf::table_view({col1_0, col1_1, col1_2->view()});
+
+  auto result      = cudf::left_join(t0, t1, {0, 1}, {0, 1});
+  auto result_view = result->view();
+  auto decoded2    = cudf::dictionary::decode(result_view.column(2));
+  auto decoded5    = cudf::dictionary::decode(result_view.column(5));
+  std::vector<cudf::column_view> result_decoded({result_view.column(0),
+                                                 result_view.column(1),
+                                                 decoded2->view(),
+                                                 result_view.column(3),
+                                                 result_view.column(4),
+                                                 decoded5->view()});
+
+  auto g0   = cudf::table_view({col0_0, col0_1, col0_2_w});
+  auto g1   = cudf::table_view({col1_0, col1_1, col1_2_w});
+  auto gold = cudf::left_join(g0, g1, {0, 1}, {0, 1});
+  CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*gold, cudf::table_view(result_decoded));
+}
+
+TEST_F(JoinDictionaryTest, InnerJoinNoNulls)
+{
+  column_wrapper<int32_t> col0_0{{3, 1, 2, 0, 2}};
+  strcol_wrapper col0_1_w({"s1", "s1", "s0", "s4", "s0"});
+  auto col0_1 = cudf::dictionary::encode(col0_1_w);
+  column_wrapper<int32_t> col0_2{{0, 1, 2, 4, 1}};
+
+  column_wrapper<int32_t> col1_0{{2, 2, 0, 4, 3}};
+  strcol_wrapper col1_1_w({"s1", "s0", "s1", "s2", "s1"});
+  auto col1_1 = cudf::dictionary::encode(col1_1_w);
+  column_wrapper<int32_t> col1_2{{1, 0, 1, 2, 1}};
+
+  auto t0 = cudf::table_view({col0_0, col0_1->view(), col0_2});
+  auto t1 = cudf::table_view({col1_0, col1_1->view(), col1_2});
+
+  auto result      = cudf::inner_join(t0, t1, {0, 1}, {0, 1});
+  auto result_view = result->view();
+  auto decoded1    = cudf::dictionary::decode(result_view.column(1));
+  auto decoded4    = cudf::dictionary::decode(result_view.column(4));
+  std::vector<cudf::column_view> result_decoded({result_view.column(0),
+                                                 decoded1->view(),
+                                                 result_view.column(2),
+                                                 result_view.column(3),
+                                                 decoded4->view(),
+                                                 result_view.column(5)});
+
+  auto g0   = cudf::table_view({col0_0, col0_1_w, col0_2});
+  auto g1   = cudf::table_view({col1_0, col1_1_w, col1_2});
+  auto gold = cudf::inner_join(g0, g1, {0, 1}, {0, 1});
+  CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*gold, cudf::table_view(result_decoded));
+}
+
+TEST_F(JoinDictionaryTest, InnerJoinWithNulls)
+{
+  column_wrapper<int32_t> col0_0{{3, 1, 2, 0, 2}};
+  strcol_wrapper col0_1({"s1", "s1", "s0", "s4", "s0"}, {1, 1, 0, 1, 1});
+  column_wrapper<int32_t> col0_2_w{{0, 1, 2, 4, 1}};
+  auto col0_2 = cudf::dictionary::encode(col0_2_w);
+
+  column_wrapper<int32_t> col1_0{{2, 2, 0, 4, 3}};
+  strcol_wrapper col1_1({"s1", "s0", "s1", "s2", "s1"});
+  column_wrapper<int32_t> col1_2_w{{1, 0, 1, 2, 1}, {1, 0, 1, 1, 1}};
+  auto col1_2 = cudf::dictionary::encode(col1_2_w);
+
+  auto t0 = cudf::table_view({col0_0, col0_1, col0_2->view()});
+  auto t1 = cudf::table_view({col1_0, col1_1, col1_2->view()});
+
+  auto result      = cudf::inner_join(t0, t1, {0, 1}, {0, 1});
+  auto result_view = result->view();
+  auto decoded2    = cudf::dictionary::decode(result_view.column(2));
+  auto decoded5    = cudf::dictionary::decode(result_view.column(5));
+  std::vector<cudf::column_view> result_decoded({result_view.column(0),
+                                                 result_view.column(1),
+                                                 decoded2->view(),
+                                                 result_view.column(3),
+                                                 result_view.column(4),
+                                                 decoded5->view()});
+
+  auto g0   = cudf::table_view({col0_0, col0_1, col0_2_w});
+  auto g1   = cudf::table_view({col1_0, col1_1, col1_2_w});
+  auto gold = cudf::inner_join(g0, g1, {0, 1}, {0, 1});
+  CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*gold, cudf::table_view(result_decoded));
+}
+
+TEST_F(JoinDictionaryTest, FullJoinNoNulls)
+{
+  column_wrapper<int32_t> col0_0{{3, 1, 2, 0, 3}};
+  strcol_wrapper col0_1_w({"s0", "s1", "s2", "s4", "s1"});
+  auto col0_1 = cudf::dictionary::encode(col0_1_w);
+  column_wrapper<int32_t> col0_2{{0, 1, 2, 4, 1}};
+
+  column_wrapper<int32_t> col1_0{{2, 2, 0, 4, 3}};
+  strcol_wrapper col1_1_w{{"s1", "s0", "s1", "s2", "s1"}};
+  auto col1_1 = cudf::dictionary::encode(col1_1_w);
+  column_wrapper<int32_t> col1_2{{1, 0, 1, 2, 1}};
+
+  auto t0 = cudf::table_view({col0_0, col0_1->view(), col0_2});
+  auto t1 = cudf::table_view({col1_0, col1_1->view(), col1_2});
+
+  auto result      = cudf::full_join(t0, t1, {0, 1}, {0, 1});
+  auto result_view = result->view();
+  auto decoded1    = cudf::dictionary::decode(result_view.column(1));
+  auto decoded4    = cudf::dictionary::decode(result_view.column(4));
+  std::vector<cudf::column_view> result_decoded({result_view.column(0),
+                                                 decoded1->view(),
+                                                 result_view.column(2),
+                                                 result_view.column(3),
+                                                 decoded4->view(),
+                                                 result_view.column(5)});
+
+  auto g0   = cudf::table_view({col0_0, col0_1_w, col0_2});
+  auto g1   = cudf::table_view({col1_0, col1_1_w, col1_2});
+  auto gold = cudf::full_join(g0, g1, {0, 1}, {0, 1});
+  CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*gold, cudf::table_view(result_decoded));
+}
+
+TEST_F(JoinDictionaryTest, FullJoinWithNulls)
+{
+  column_wrapper<int32_t> col0_0_w{{3, 1, 2, 0, 3}};
+  auto col0_0 = cudf::dictionary::encode(col0_0_w);
+  strcol_wrapper col0_1({"s0", "s1", "s2", "s4", "s1"});
+  column_wrapper<int32_t> col0_2{{0, 1, 2, 4, 1}};
+
+  column_wrapper<int32_t> col1_0_w{{2, 2, 0, 4, 3}, {1, 1, 1, 0, 1}};
+  auto col1_0 = cudf::dictionary::encode(col1_0_w);
+  strcol_wrapper col1_1{{"s1", "s0", "s1", "s2", "s1"}};
+  column_wrapper<int32_t> col1_2{{1, 0, 1, 2, 1}};
+
+  auto t0 = cudf::table_view({col0_0->view(), col0_1, col0_2});
+  auto t1 = cudf::table_view({col1_0->view(), col1_1, col1_2});
+
+  auto result      = cudf::full_join(t0, t1, {0, 1}, {0, 1});
+  auto result_view = result->view();
+  auto decoded0    = cudf::dictionary::decode(result_view.column(0));
+  auto decoded3    = cudf::dictionary::decode(result_view.column(3));
+  std::vector<cudf::column_view> result_decoded({decoded0->view(),
+                                                 result_view.column(1),
+                                                 result_view.column(2),
+                                                 decoded3->view(),
+                                                 result_view.column(4),
+                                                 result_view.column(5)});
+
+  auto g0   = cudf::table_view({col0_0_w, col0_1, col0_2});
+  auto g1   = cudf::table_view({col1_0_w, col1_1, col1_2});
+  auto gold = cudf::full_join(g0, g1, {0, 1}, {0, 1});
+  CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*gold, cudf::table_view(result_decoded));
+}
 
 CUDF_TEST_PROGRAM_MAIN()

From be560bbc3f58d50e3463b7fefae65e5e42b455c6 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Thu, 18 Feb 2021 13:31:35 -0500
Subject: [PATCH 045/138] Python regressions

---
 cpp/benchmarks/join/join_benchmark.cu         |  8 +--
 python/cudf/cudf/_lib/cpp/join.pxd            | 30 ++++--------
 .../cudf/cudf/_lib/cpp/table/table_view.pxd   |  1 +
 python/cudf/cudf/_lib/join.pyx                | 49 +++++++++----------
 4 files changed, 36 insertions(+), 52 deletions(-)

diff --git a/cpp/benchmarks/join/join_benchmark.cu b/cpp/benchmarks/join/join_benchmark.cu
index bd013afc451..fa6afdd908c 100644
--- a/cpp/benchmarks/join/join_benchmark.cu
+++ b/cpp/benchmarks/join/join_benchmark.cu
@@ -105,12 +105,8 @@ static void BM_join(benchmark::State &state)
   for (auto _ : state) {
     cuda_event_timer raii(state, true, 0);
 
-    auto result = cudf::inner_join(probe_table,
-                                   build_table,
-                                   columns_to_join,
-                                   columns_to_join,
-                                   {{0, 0}},
-                                   cudf::null_equality::UNEQUAL);
+    auto result = cudf::inner_join(
+      probe_table, build_table, columns_to_join, columns_to_join, cudf::null_equality::UNEQUAL);
   }
 }
 
diff --git a/python/cudf/cudf/_lib/cpp/join.pxd b/python/cudf/cudf/_lib/cpp/join.pxd
index 55180e2b74e..6ebde3934c3 100644
--- a/python/cudf/cudf/_lib/cpp/join.pxd
+++ b/python/cudf/cudf/_lib/cpp/join.pxd
@@ -14,36 +14,26 @@ from cudf._lib.cpp.table.table_view cimport table_view
 
 cdef extern from "cudf/join.hpp" namespace "cudf" nogil:
     cdef pair[unique_ptr[column], unique_ptr[column]] inner_join(
-        const table_view left,
-        const table_view right,
-        const vector[int] left_on,
-        const vector[int] right_on
+        const table_view left_keys,
+        const table_view right_keys,
     ) except +
 
     cdef pair[unique_ptr[column], unique_ptr[column]] left_join(
-        const table_view left,
-        const table_view right,
-        const vector[int] left_on,
-        const vector[int] right_on
+        const table_view left_keys,
+        const table_view right_keys,
     ) except +
 
     cdef pair[unique_ptr[column], unique_ptr[column]] full_join(
-        const table_view left,
-        const table_view right,
-        const vector[int] left_on,
-        const vector[int] right_on
+        const table_view left_keys,
+        const table_view right_keys,
     ) except +
 
     cdef unique_ptr[column] left_semi_join(
-        const table_view left,
-        const table_view right,
-        const vector[int] left_on,
-        const vector[int] right_on,
+        const table_view left_keys,
+        const table_view right_keys,
     ) except +
 
     cdef unique_ptr[column] left_anti_join(
-        const table_view left,
-        const table_view right,
-        const vector[int] left_on,
-        const vector[int] right_on,
+        const table_view left_keys,
+        const table_view right_keys,
     ) except +
diff --git a/python/cudf/cudf/_lib/cpp/table/table_view.pxd b/python/cudf/cudf/_lib/cpp/table/table_view.pxd
index 2f386d337cd..7bbfa69836c 100644
--- a/python/cudf/cudf/_lib/cpp/table/table_view.pxd
+++ b/python/cudf/cudf/_lib/cpp/table/table_view.pxd
@@ -15,6 +15,7 @@ cdef extern from "cudf/table/table_view.hpp" namespace "cudf" nogil:
         column_view column(size_type column_index) except +
         size_type num_columns() except +
         size_type num_rows() except +
+        table_view select(vector[size_type] column_indices) except +
 
     cdef cppclass mutable_table_view:
         mutable_table_view() except +
diff --git a/python/cudf/cudf/_lib/join.pyx b/python/cudf/cudf/_lib/join.pyx
index 41b59e3d2e7..f31e75f94a8 100644
--- a/python/cudf/cudf/_lib/join.pyx
+++ b/python/cudf/cudf/_lib/join.pyx
@@ -13,6 +13,7 @@ from cudf._lib.column cimport Column
 from cudf._lib.table cimport Table, columns_from_ptr
 
 from cudf._lib.cpp.column.column cimport column
+from cudf._lib.cpp.types cimport size_type
 from cudf._lib.cpp.table.table cimport table
 from cudf._lib.cpp.table.table_view cimport table_view
 cimport cudf._lib.cpp.join as cpp_join
@@ -20,32 +21,32 @@ cimport cudf._lib.cpp.join as cpp_join
 
 cpdef join(Table lhs, Table rhs, left_on, right_on, how=None):
     # left, inner and outer join
-    cdef vector[int] c_left_on = left_on
-    cdef vector[int] c_right_on = right_on
+    cdef vector[size_type] c_left_on = left_on
+    cdef vector[size_type] c_right_on = right_on
     cdef pair[unique_ptr[column], unique_ptr[column]] c_result
-    cdef table_view c_lhs = lhs.view()
-    cdef table_view c_rhs = rhs.view()
+    cdef table_view c_lhs = lhs.view().select(c_left_on)
+    cdef table_view c_rhs = rhs.view().select(c_right_on)
 
     if how == "inner":
-        c_result = move(cpp_join.inner_join(
-            c_lhs,
-            c_rhs,
-            c_left_on,
-            c_right_on,
-        ))
+        if c_lhs.num_rows() < c_rhs.num_rows():
+            c_result = move(cpp_join.inner_join(
+                c_rhs,
+                c_lhs
+            ))
+        else:
+            c_result = move(cpp_join.inner_join(
+                c_lhs,
+                c_rhs
+            ))
     elif how == "left":
         c_result = move(cpp_join.left_join(
             c_lhs,
-            c_rhs,
-            c_left_on,
-            c_right_on,
+            c_rhs
         ))
     elif how == "outer":
         c_result = move(cpp_join.full_join(
             c_lhs,
-            c_rhs,
-            c_left_on,
-            c_right_on
+            c_rhs
         ))
     else:
         raise ValueError(f"Unkown join type {how}")
@@ -57,25 +58,21 @@ cpdef join(Table lhs, Table rhs, left_on, right_on, how=None):
 
 cpdef semi_join(Table lhs, Table rhs, left_on, right_on, how=None):
     # left-semi and left-anti joins
-    cdef vector[int] c_left_on = left_on
-    cdef vector[int] c_right_on = right_on
+    cdef vector[size_type] c_left_on = left_on
+    cdef vector[size_type] c_right_on = right_on
     cdef unique_ptr[column] c_result
-    cdef table_view c_lhs = lhs.view()
-    cdef table_view c_rhs = rhs.view()
+    cdef table_view c_lhs = lhs.view().select(c_left_on)
+    cdef table_view c_rhs = rhs.view().select(c_right_on)
 
     if how == "leftsemi":
         c_result = move(cpp_join.left_semi_join(
             c_lhs,
-            c_rhs,
-            c_left_on,
-            c_right_on
+            c_rhs
         ))
     elif how == "leftanti":
         c_result = move(cpp_join.left_anti_join(
             c_lhs,
-            c_rhs,
-            c_left_on,
-            c_right_on
+            c_rhs
         ))
     else:
         raise ValueError(f"Invalid join type {how}")

From efb60d6c01d7113152a188ab6d22de050b9cc175 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Thu, 18 Feb 2021 14:32:08 -0500
Subject: [PATCH 046/138] Revert

---
 python/cudf/cudf/_lib/join.pyx | 14 ++++----------
 1 file changed, 4 insertions(+), 10 deletions(-)

diff --git a/python/cudf/cudf/_lib/join.pyx b/python/cudf/cudf/_lib/join.pyx
index f31e75f94a8..4cf07dd7e99 100644
--- a/python/cudf/cudf/_lib/join.pyx
+++ b/python/cudf/cudf/_lib/join.pyx
@@ -28,16 +28,10 @@ cpdef join(Table lhs, Table rhs, left_on, right_on, how=None):
     cdef table_view c_rhs = rhs.view().select(c_right_on)
 
     if how == "inner":
-        if c_lhs.num_rows() < c_rhs.num_rows():
-            c_result = move(cpp_join.inner_join(
-                c_rhs,
-                c_lhs
-            ))
-        else:
-            c_result = move(cpp_join.inner_join(
-                c_lhs,
-                c_rhs
-            ))
+        c_result = move(cpp_join.inner_join(
+            c_lhs,
+            c_rhs
+        ))
     elif how == "left":
         c_result = move(cpp_join.left_join(
             c_lhs,

From fe6d0b8e4d89a87b48c482a5441e9b0b16f0447d Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Thu, 18 Feb 2021 14:32:58 -0500
Subject: [PATCH 047/138] Invalid -> Unkown

---
 python/cudf/cudf/_lib/join.pyx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/cudf/cudf/_lib/join.pyx b/python/cudf/cudf/_lib/join.pyx
index 4cf07dd7e99..20c14f00957 100644
--- a/python/cudf/cudf/_lib/join.pyx
+++ b/python/cudf/cudf/_lib/join.pyx
@@ -43,7 +43,7 @@ cpdef join(Table lhs, Table rhs, left_on, right_on, how=None):
             c_rhs
         ))
     else:
-        raise ValueError(f"Unkown join type {how}")
+        raise ValueError(f"Invalid join type {how}")
     return (
         Column.from_unique_ptr(move(c_result.first)),
         Column.from_unique_ptr(move(c_result.second))

From 547027c39e7ab9992ae60b7859f7a8e563504815 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Thu, 18 Feb 2021 15:28:08 -0500
Subject: [PATCH 048/138] Don't mutate lhs/rhs

---
 python/cudf/cudf/core/join/join.py | 149 +++++++++++++----------------
 1 file changed, 66 insertions(+), 83 deletions(-)

diff --git a/python/cudf/cudf/core/join/join.py b/python/cudf/cudf/core/join/join.py
index 41830b7a80f..72d6d8588c3 100644
--- a/python/cudf/cudf/core/join/join.py
+++ b/python/cudf/cudf/core/join/join.py
@@ -9,61 +9,37 @@
 )
 
 
-class _MISSING_TYPE:
-    pass
-
-
-MISSING = _MISSING_TYPE()
-
-
 class ColumnView:
-    # A ColumnView represents one column of a Series
-    # or DataFrame - either an index column or a
-    # data column
-
-    # we need a different sentinel value than `None`
-    # because `None` is totally a valid index/column name
-    def __init__(self, obj, column=MISSING, index=MISSING):
-        self.obj = obj
+    def __init__(self, name, column=False, index=False):
+        self.name = name
         self.column, self.index = column, index
 
-    def get_numeric_index(self):
+    def get_numeric_index(self, obj):
         # get the position of the column (including any index columns)
-        if self.index is MISSING:
-            index_nlevels = (
-                self.obj.index.nlevels if self.obj._index is not None else 0
-            )
-            return index_nlevels + tuple(self.obj._data).index(self.column)
+        if self.column:
+            index_nlevels = obj.index.nlevels if obj._index is not None else 0
+            return index_nlevels + tuple(obj._data).index(self.name)
         else:
-            return self.obj.index.names.index(self.index)
+            return obj.index.names.index(self.name)
 
     @property
     def is_index_level(self):
         # True if this is an index column
-        return self.index is not MISSING
+        return self.index
 
-    @property
-    def name(self):
-        # get the name of the column
-        if self.index is MISSING:
-            return self.column
-        else:
-            return self.index
-
-    @property
-    def value(self):
+    def value(self, obj):
         # get the column
-        if self.index is MISSING:
-            return self.obj._data[self.name]
+        if self.column:
+            return obj._data[self.name]
         else:
-            return self.obj._index._data[self.name]
+            return obj._index._data[self.name]
 
-    def set_value(self, value):
+    def set_value(self, obj, value):
         # set the colum
-        if self.index is MISSING:
-            self.obj._data[self.name] = value
+        if self.column:
+            obj._data[self.name] = value
         else:
-            self.obj._index._data[self.name] = value
+            obj._index._data[self.name] = value
 
 
 JoinKeys = namedtuple("JoinKeys", ["left", "right"])
@@ -234,7 +210,7 @@ def compute_join_keys(self):
             if self.left_index:
                 left_keys.extend(
                     [
-                        ColumnView(obj=self.lhs, index=on)
+                        ColumnView(name=on, index=True)
                         for on in self.lhs.index.names
                     ]
                 )
@@ -242,14 +218,14 @@ def compute_join_keys(self):
                 # TODO: require left_on or left_index to be specified
                 left_keys.extend(
                     [
-                        ColumnView(obj=self.lhs, column=on)
+                        ColumnView(name=on, column=True)
                         for on in _coerce_to_tuple(self.left_on)
                     ]
                 )
             if self.right_index:
                 right_keys.extend(
                     [
-                        ColumnView(obj=self.rhs, index=on)
+                        ColumnView(name=on, index=True)
                         for on in self.rhs.index.names
                     ]
                 )
@@ -257,7 +233,7 @@ def compute_join_keys(self):
                 # TODO: require right_on or right_index to be specified
                 right_keys.extend(
                     [
-                        ColumnView(obj=self.rhs, column=on)
+                        ColumnView(name=on, column=True)
                         for on in _coerce_to_tuple(self.right_on)
                     ]
                 )
@@ -269,12 +245,8 @@ def compute_join_keys(self):
                 if self.on is not None
                 else set(self.lhs._data.keys()) & set(self.rhs._data.keys())
             )
-            left_keys = [
-                ColumnView(obj=self.lhs, column=on) for on in on_names
-            ]
-            right_keys = [
-                ColumnView(obj=self.rhs, column=on) for on in on_names
-            ]
+            left_keys = [ColumnView(name=on, column=True) for on in on_names]
+            right_keys = [ColumnView(name=on, column=True) for on in on_names]
 
         if len(left_keys) != len(right_keys):
             raise ValueError(
@@ -284,36 +256,42 @@ def compute_join_keys(self):
         self._keys = JoinKeys(left=left_keys, right=right_keys)
 
     def perform_merge(self):
-        self.match_key_dtypes(_input_to_libcudf_castrules_any)
+        lhs, rhs = self.match_key_dtypes(
+            self.lhs, self.rhs, _input_to_libcudf_castrules_any
+        )
 
-        left_key_indices = [key.get_numeric_index() for key in self._keys.left]
+        left_key_indices = [
+            key.get_numeric_index(lhs) for key in self._keys.left
+        ]
         right_key_indices = [
-            key.get_numeric_index() for key in self._keys.right
+            key.get_numeric_index(rhs) for key in self._keys.right
         ]
         left_rows, right_rows = libcudf.join.join(
-            self.lhs,
-            self.rhs,
+            lhs,
+            rhs,
             left_on=left_key_indices,
             right_on=right_key_indices,
             how=self.how,
         )
-        return self.construct_result(left_rows, right_rows)
+        return self.construct_result(lhs, rhs, left_rows, right_rows)
 
-    def construct_result(self, left_rows, right_rows):
-        self.match_key_dtypes(_libcudf_to_output_castrules)
+    def construct_result(self, lhs, rhs, left_rows, right_rows):
+        lhs, rhs = self.match_key_dtypes(
+            lhs, rhs, _libcudf_to_output_castrules
+        )
 
         # first construct the index.
         if self.left_index and self.right_index:
             if self.how == "right":
-                out_index = self.rhs.index._gather(left_rows, nullify=True)
+                out_index = rhs.index._gather(left_rows, nullify=True)
             else:
-                out_index = self.lhs.index._gather(left_rows, nullify=True)
+                out_index = lhs.index._gather(left_rows, nullify=True)
         elif self.left_index:
             # left_index and right_on
-            out_index = self.rhs.index._gather(right_rows, nullify=True)
+            out_index = rhs.index._gather(right_rows, nullify=True)
         elif self.right_index:
             # right_index and left_on
-            out_index = self.lhs.index._gather(left_rows, nullify=True)
+            out_index = lhs.index._gather(left_rows, nullify=True)
         else:
             out_index = None
 
@@ -322,11 +300,11 @@ def construct_result(self, left_rows, right_rows):
         left_names, right_names = self.output_column_names()
 
         for lcol in left_names:
-            data[left_names[lcol]] = self.lhs._data[lcol].take(
+            data[left_names[lcol]] = lhs._data[lcol].take(
                 left_rows, nullify=True
             )
         for rcol in right_names:
-            data[right_names[rcol]] = self.rhs._data[rcol].take(
+            data[right_names[rcol]] = rhs._data[rcol].take(
                 right_rows, nullify=True
             )
 
@@ -336,16 +314,12 @@ def construct_result(self, left_rows, right_rows):
         if self.how == "outer":
             for lkey, rkey in zip(*self._keys):
                 if lkey.name == rkey.name:
-                    # get the key column as it appears in the result:
-                    out_key = ColumnView(
-                        result, column=lkey.column, index=lkey.index
-                    )
-
                     # fill nulls in the key column with values from the RHS
-                    out_key.set_value(
-                        out_key.value.fillna(
-                            rkey.value.take(right_rows, nullify=True)
-                        )
+                    lkey.set_value(
+                        result,
+                        lkey.value(result).fillna(
+                            rkey.value(rhs).take(right_rows, nullify=True)
+                        ),
                     )
 
         return self.sort_result(result)
@@ -474,33 +448,42 @@ def validate_merge_params(
                         "lsuffix and rsuffix are not defined"
                     )
 
-    def match_key_dtypes(self, match_func):
+    def match_key_dtypes(self, lhs, rhs, match_func):
+        out_lhs = lhs.copy(deep=False)
+        out_rhs = rhs.copy(deep=False)
         # match the dtypes of the key columns in
         # self.lhs and self.rhs according to the matching
         # function `match_func`
         for left_key, right_key in zip(*self._keys):
-            lcol, rcol = left_key.value, right_key.value
+            lcol, rcol = left_key.value(lhs), right_key.value(rhs)
             dtype = match_func(lcol, rcol, how=self.how)
-            left_key.set_value(lcol.astype(dtype))
-            right_key.set_value(rcol.astype(dtype))
+            left_key.set_value(out_lhs, lcol.astype(dtype))
+            right_key.set_value(out_rhs, rcol.astype(dtype))
+        return out_lhs, out_rhs
 
 
 class MergeSemi(MergeBase):
     def perform_merge(self):
-        self.match_key_dtypes(_input_to_libcudf_castrules_any)
+        lhs, rhs = self.match_key_dtypes(
+            self.lhs, self.rhs, _input_to_libcudf_castrules_any
+        )
 
-        left_key_indices = [key.get_numeric_index() for key in self._keys.left]
+        left_key_indices = [
+            key.get_numeric_index(lhs) for key in self._keys.left
+        ]
         right_key_indices = [
-            key.get_numeric_index() for key in self._keys.right
+            key.get_numeric_index(rhs) for key in self._keys.right
         ]
         left_rows = libcudf.join.semi_join(
-            self.lhs,
-            self.rhs,
+            lhs,
+            rhs,
             left_on=left_key_indices,
             right_on=right_key_indices,
             how=self.how,
         )
-        return self.construct_result(left_rows, cudf.core.column.as_column([]))
+        return self.construct_result(
+            lhs, rhs, left_rows, cudf.core.column.as_column([])
+        )
 
     def output_column_names(self):
         left_names, _ = super().output_column_names()

From 5f93d23f24467e286612c0b4dae0244d35636607 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Fri, 19 Feb 2021 10:19:42 -0500
Subject: [PATCH 049/138] Fix join tests

---
 python/cudf/cudf/core/join/casting_logic.py |  2 ++
 python/cudf/cudf/core/join/join.py          | 16 ++++++++++------
 python/cudf/cudf/tests/test_joining.py      |  6 +++---
 3 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/python/cudf/cudf/core/join/casting_logic.py b/python/cudf/cudf/core/join/casting_logic.py
index acd8efca8a9..7638288f75e 100644
--- a/python/cudf/cudf/core/join/casting_logic.py
+++ b/python/cudf/cudf/core/join/casting_logic.py
@@ -183,6 +183,8 @@ def _libcudf_to_output_castrules(lcol, rcol, how):
     if pd.api.types.is_dtype_equal(ltype, rtype):
         return ltype
 
+    merge_return_type = _input_to_libcudf_castrules_any(lcol, rcol, how)
+
     l_is_cat = isinstance(ltype, CategoricalDtype)
     r_is_cat = isinstance(rtype, CategoricalDtype)
 
diff --git a/python/cudf/cudf/core/join/join.py b/python/cudf/cudf/core/join/join.py
index 72d6d8588c3..414d8d0dff7 100644
--- a/python/cudf/cudf/core/join/join.py
+++ b/python/cudf/cudf/core/join/join.py
@@ -277,7 +277,7 @@ def perform_merge(self):
 
     def construct_result(self, lhs, rhs, left_rows, right_rows):
         lhs, rhs = self.match_key_dtypes(
-            lhs, rhs, _libcudf_to_output_castrules
+            self.lhs, self.rhs, _libcudf_to_output_castrules
         )
 
         # first construct the index.
@@ -331,9 +331,12 @@ def sort_result(self, result):
         # the key columns on the other side will be used to sort.
         if self.sort:
             if self.on:
-                return result.sort_values(
-                    _coerce_to_list(self.on), ignore_index=True
-                )
+                if isinstance(result, cudf.Index):
+                    return result.sort_values()
+                else:
+                    return result.sort_values(
+                        _coerce_to_list(self.on), ignore_index=True
+                    )
             by = []
             if self.left_index and self.right_index:
                 by.extend(result.index._data.columns)
@@ -457,8 +460,9 @@ def match_key_dtypes(self, lhs, rhs, match_func):
         for left_key, right_key in zip(*self._keys):
             lcol, rcol = left_key.value(lhs), right_key.value(rhs)
             dtype = match_func(lcol, rcol, how=self.how)
-            left_key.set_value(out_lhs, lcol.astype(dtype))
-            right_key.set_value(out_rhs, rcol.astype(dtype))
+            if dtype:
+                left_key.set_value(out_lhs, lcol.astype(dtype))
+                right_key.set_value(out_rhs, rcol.astype(dtype))
         return out_lhs, out_rhs
 
 
diff --git a/python/cudf/cudf/tests/test_joining.py b/python/cudf/cudf/tests/test_joining.py
index d99897584ec..fbb12f897a3 100644
--- a/python/cudf/cudf/tests/test_joining.py
+++ b/python/cudf/cudf/tests/test_joining.py
@@ -265,7 +265,7 @@ def test_dataframe_join_mismatch_cats(how):
     expect.data_col_right = expect.data_col_right.astype(np.int64)
     expect.data_col_left = expect.data_col_left.astype(np.int64)
 
-    assert_eq(expect, got)
+    assert_eq(expect, got, check_categorical=False)
 
 
 @pytest.mark.parametrize("on", ["key1", ["key1", "key2"], None])
@@ -1224,7 +1224,7 @@ def test_categorical_typecast_inner():
     expect_dtype = CategoricalDtype(categories=[1, 2, 3], ordered=False)
     expect_data = cudf.Series([1, 2, 3], dtype=expect_dtype, name="key")
 
-    assert_eq(expect_data, result["key"])
+    assert_eq(expect_data, result["key"], check_categorical=False)
 
     # Equal categories, unequal ordering -> error
     left = make_categorical_dataframe([1, 2, 3], ordered=False)
@@ -1242,7 +1242,7 @@ def test_categorical_typecast_inner():
 
     expect_dtype = cudf.CategoricalDtype(categories=[2, 3], ordered=False)
     expect_data = cudf.Series([2, 3], dtype=expect_dtype, name="key")
-    assert_eq(expect_data, result["key"])
+    assert_eq(expect_data, result["key"], check_categorical=False)
 
     # One is ordered -> error
     left = make_categorical_dataframe([1, 2, 3], ordered=False)

From b7bf82172ef87ab1eca0e0d66ea38fe483db5bd2 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Fri, 19 Feb 2021 11:59:17 -0500
Subject: [PATCH 050/138] Fix semi/anti join trivial cases

---
 cpp/src/join/join.cu      |  4 +++-
 cpp/src/join/semi_join.cu | 17 +++++++++++++++++
 2 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/cpp/src/join/join.cu b/cpp/src/join/join.cu
index 9e61a924e03..15b1f216928 100644
--- a/cpp/src/join/join.cu
+++ b/cpp/src/join/join.cu
@@ -49,7 +49,9 @@ std::pair<std::unique_ptr<cudf::column>, std::unique_ptr<cudf::column>> inner_jo
   // build the hash map from the smaller table.
   if (right.num_rows() > left.num_rows()) {
     cudf::hash_join hj_obj(left, compare_nulls, stream);
-    return hj_obj.inner_join(right, compare_nulls, stream, mr);
+    auto result = hj_obj.inner_join(right, compare_nulls, stream, mr);
+    return std::make_pair<std::unique_ptr<cudf::column>, std::unique_ptr<cudf::column>>(
+      std::move(result.second), std::move(result.first));
   } else {
     cudf::hash_join hj_obj(right, compare_nulls, stream);
     return hj_obj.inner_join(left, compare_nulls, stream, mr);
diff --git a/cpp/src/join/semi_join.cu b/cpp/src/join/semi_join.cu
index 919311f41bf..e5966dd01e4 100644
--- a/cpp/src/join/semi_join.cu
+++ b/cpp/src/join/semi_join.cu
@@ -22,8 +22,10 @@
 #include <cudf/column/column_factories.hpp>
 #include <cudf/detail/gather.cuh>
 #include <cudf/detail/nvtx/ranges.hpp>
+#include <cudf/detail/sequence.hpp>
 #include <cudf/dictionary/detail/update_keys.hpp>
 #include <cudf/join.hpp>
+#include <cudf/scalar/scalar_factories.hpp>
 #include <cudf/table/table.hpp>
 #include <cudf/utilities/error.hpp>
 
@@ -46,6 +48,21 @@ std::unique_ptr<cudf::column> left_semi_anti_join(
   CUDF_EXPECTS(0 != left_keys.num_columns(), "Left table is empty");
   CUDF_EXPECTS(0 != right_keys.num_columns(), "Right table is empty");
 
+  if (is_trivial_join(left_keys, right_keys, JoinKind)) {
+    return std::make_unique<cudf::column>(cudf::data_type(type_to_id<cudf::size_type>()),
+                                          0,
+                                          rmm::device_buffer{},
+                                          rmm::device_buffer{},
+                                          0);
+  }
+  if ((join_kind::LEFT_ANTI_JOIN == JoinKind) && (0 == right_keys.num_rows())) {
+    using ScalarType = cudf::scalar_type_t<cudf::size_type>;
+    auto zero = cudf::make_numeric_scalar(cudf::data_type(cudf::type_id::INT32), stream.value());
+    zero->set_valid(true, stream);
+    static_cast<ScalarType*>(zero.get())->set_value(0, stream);
+    return cudf::detail::sequence(left_keys.num_rows(), *zero, stream);
+  }
+
   auto const left_num_rows  = left_keys.num_rows();
   auto const right_num_rows = right_keys.num_rows();
 

From 50a2fb2aab777e7a9b87d4d278ae560b6a95ef5d Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Fri, 19 Feb 2021 16:39:56 -0500
Subject: [PATCH 051/138] When testing join results, use a helper that sorts
 values

---
 python/cudf/cudf/tests/test_joining.py | 161 ++++++++++++-------------
 python/cudf/cudf/tests/test_string.py  |  40 +++---
 2 files changed, 96 insertions(+), 105 deletions(-)

diff --git a/python/cudf/cudf/tests/test_joining.py b/python/cudf/cudf/tests/test_joining.py
index fbb12f897a3..367e903d02e 100644
--- a/python/cudf/cudf/tests/test_joining.py
+++ b/python/cudf/cudf/tests/test_joining.py
@@ -69,6 +69,35 @@ def pd_odd_joins(left, right, join_type):
         return left[left.index.isin(right.index)][left.columns]
 
 
+def assert_join_results_equal(expect, got, how, **kwargs):
+    if how == "right":
+        got = got[expect.columns]
+
+    if isinstance(expect, (pd.Series, cudf.Series)):
+        return assert_eq(
+            expect.sort_values().reset_index(drop=True),
+            got.sort_values().reset_index(drop=True),
+            **kwargs,
+        )
+    elif isinstance(expect, (pd.DataFrame, cudf.DataFrame)):
+        if not len(
+            expect.columns
+        ):  # can't sort_values() on a df without columns
+            return assert_eq(expect, got, **kwargs)
+
+        return assert_eq(
+            expect.sort_values(expect.columns.to_list()).reset_index(
+                drop=True
+            ),
+            got.sort_values(got.columns.to_list()).reset_index(drop=True),
+            **kwargs,
+        )
+    elif isinstance(expect, (pd.Index, cudf.Index)):
+        return assert_eq(expect.sort_values(), got.sort_values(), **kwargs)
+    else:
+        raise ValueError(f"Not a join result: {type(expect).__name__}")
+
+
 @pytest.mark.parametrize("aa,bb,how,method", make_params())
 def test_dataframe_join_how(aa, bb, how, method):
     df = DataFrame()
@@ -113,12 +142,7 @@ def work_gdf(df):
             # TODO: What is the less hacky way?
             expect.index.name = "bob"
             got.index.name = "mary"
-            assert_eq(
-                got.sort_values(got.columns.to_list()).reset_index(drop=True),
-                expect.sort_values(expect.columns.to_list()).reset_index(
-                    drop=True
-                ),
-            )
+            assert_join_results_equal(expect, got, how=how)
         # if(how=='right'):
         #     _sorted_check_series(expect['a'], expect['b'],
         #                          got['a'], got['b'])
@@ -188,10 +212,7 @@ def test_dataframe_join_cats():
     expect = lhs.to_pandas().join(rhs.to_pandas())
 
     # Note: pandas make an object Index after joining
-    assert_eq(
-        got.sort_values(by="b").sort_index().reset_index(drop=True),
-        expect.reset_index(drop=True),
-    )
+    assert_join_results_equal(expect, got, how="inner")
 
     # Just do some rough checking here.
     assert list(got.columns) == ["b", "c"]
@@ -265,7 +286,7 @@ def test_dataframe_join_mismatch_cats(how):
     expect.data_col_right = expect.data_col_right.astype(np.int64)
     expect.data_col_left = expect.data_col_left.astype(np.int64)
 
-    assert_eq(expect, got, check_categorical=False)
+    assert_join_results_equal(expect, got, how=how, check_categorical=False)
 
 
 @pytest.mark.parametrize("on", ["key1", ["key1", "key2"], None])
@@ -324,7 +345,7 @@ def test_dataframe_merge_on(on):
         list(pddf_joined.columns)
     ).reset_index(drop=True)
 
-    assert_eq(cdf_result, pdf_result, check_like=True)
+    assert_join_results_equal(cdf_result, pdf_result, how="left")
 
     merge_func_result_cdf = (
         join_result_cudf.to_pandas()
@@ -332,7 +353,7 @@ def test_dataframe_merge_on(on):
         .reset_index(drop=True)
     )
 
-    assert_eq(merge_func_result_cdf, cdf_result, check_like=True)
+    assert_join_results_equal(merge_func_result_cdf, cdf_result, how="left")
 
 
 def test_dataframe_merge_on_unknown_column():
@@ -384,7 +405,7 @@ def test_dataframe_empty_merge():
     expect = DataFrame({"a": [], "b": [], "c": []})
     got = gdf1.merge(gdf2, how="left", on=["a"])
 
-    assert_eq(expect, got)
+    assert_join_results_equal(expect, got, how="left")
 
 
 def test_dataframe_merge_order():
@@ -409,7 +430,7 @@ def test_dataframe_merge_order():
     df2["a"] = [7, 8]
 
     df = df1.merge(df2, how="left", on=["id", "a"])
-    assert_eq(gdf, df)
+    assert_join_results_equal(df, gdf, how="left")
 
 
 @pytest.mark.parametrize(
@@ -554,7 +575,7 @@ def test_merge_left_index_zero():
     pd_merge = left.merge(right, left_on="x", right_on="y")
     gd_merge = gleft.merge(gright, left_on="x", right_on="y")
 
-    assert_eq(pd_merge, gd_merge)
+    assert_join_results_equal(pd_merge, gd_merge, how="left")
 
 
 @pytest.mark.parametrize(
@@ -575,7 +596,7 @@ def test_merge_left_right_index_left_right_on_zero_kwargs(kwargs):
     gright = DataFrame.from_pandas(right)
     pd_merge = left.merge(right, **kwargs)
     gd_merge = gleft.merge(gright, **kwargs)
-    assert_eq(pd_merge, gd_merge)
+    assert_join_results_equal(pd_merge, gd_merge, how="left")
 
 
 @pytest.mark.parametrize(
@@ -596,7 +617,7 @@ def test_merge_left_right_index_left_right_on_kwargs(kwargs):
     gright = DataFrame.from_pandas(right)
     pd_merge = left.merge(right, **kwargs)
     gd_merge = gleft.merge(gright, **kwargs)
-    assert_eq(pd_merge, gd_merge)
+    assert_join_results_equal(pd_merge, gd_merge, how="left")
 
 
 def test_indicator():
@@ -612,9 +633,10 @@ def test_indicator():
 def test_merge_suffixes():
     pdf = cudf.DataFrame({"x": [1, 2, 1]})
     gdf = cudf.DataFrame({"x": [1, 2, 1]})
-    assert_eq(
+    assert_join_results_equal(
         gdf.merge(gdf, suffixes=("left", "right")),
         pdf.merge(pdf, suffixes=("left", "right")),
+        how="left",
     )
 
     assert_exceptions_equal(
@@ -632,11 +654,14 @@ def test_merge_left_on_right_on():
     gleft = cudf.from_pandas(left)
     gright = cudf.from_pandas(right)
 
-    assert_eq(left.merge(right, on="xx"), gleft.merge(gright, on="xx"))
+    assert_join_results_equal(
+        left.merge(right, on="xx"), gleft.merge(gright, on="xx"), how="left"
+    )
 
-    assert_eq(
+    assert_join_results_equal(
         left.merge(right, left_on="xx", right_on="xx"),
         gleft.merge(gright, left_on="xx", right_on="xx"),
+        how="left",
     )
 
 
@@ -712,7 +737,9 @@ def test_merge_sort(ons, hows):
     pd_merge = left.merge(right, **kwargs)
     # require the join keys themselves to be sorted correctly
     # the non-key columns will NOT match pandas ordering
-    assert_eq(pd_merge[kwargs["on"]], gd_merge[kwargs["on"]])
+    assert_join_results_equal(
+        pd_merge[kwargs["on"]], gd_merge[kwargs["on"]], how="left"
+    )
     pd_merge = pd_merge.drop(kwargs["on"], axis=1)
     gd_merge = gd_merge.drop(kwargs["on"], axis=1)
     if not pd_merge.empty:
@@ -724,7 +751,7 @@ def test_merge_sort(ons, hows):
             drop=True
         )
 
-    assert_eq(pd_merge, gd_merge)
+    assert_join_results_equal(pd_merge, gd_merge, how="left")
 
 
 @pytest.mark.parametrize(
@@ -785,7 +812,7 @@ def test_join_datetimes_index(dtype):
 
     assert gdf["d"].dtype == np.dtype(dtype)
 
-    assert_eq(pdf, gdf)
+    assert_join_results_equal(pdf, gdf, how="inner")
 
 
 def test_join_with_different_names():
@@ -795,7 +822,7 @@ def test_join_with_different_names():
     gright = DataFrame.from_pandas(right)
     pd_merge = left.merge(right, how="outer", left_on=["a"], right_on=["b"])
     gd_merge = gleft.merge(gright, how="outer", left_on=["a"], right_on=["b"])
-    assert_eq(pd_merge, gd_merge.sort_values(by=["a"]).reset_index(drop=True))
+    assert_join_results_equal(pd_merge, gd_merge, how="outer")
 
 
 def test_join_same_name_different_order():
@@ -805,9 +832,7 @@ def test_join_same_name_different_order():
     gright = DataFrame.from_pandas(right)
     pd_merge = left.merge(right, left_on=["a", "b"], right_on=["b", "a"])
     gd_merge = gleft.merge(gright, left_on=["a", "b"], right_on=["b", "a"])
-    assert_eq(
-        pd_merge, gd_merge.sort_values(by=["a_x"]).reset_index(drop=True)
-    )
+    assert_join_results_equal(pd_merge, gd_merge, how="left")
 
 
 def test_join_empty_table_dtype():
@@ -878,10 +903,7 @@ def test_join_multi(how, column_a, column_b, column_c):
     gdf_result = gdf_result[columns]
     pdf_result = pdf_result[columns]
 
-    assert_eq(
-        gdf_result.reset_index(drop=True).fillna(-1),
-        pdf_result.sort_index().reset_index(drop=True).fillna(-1),
-    )
+    assert_join_results_equal(pdf_result, gdf_result, how="inner")
 
 
 @pytest.mark.parametrize(
@@ -971,7 +993,7 @@ def test_merge_multi(kwargs):
     expect.index = range(len(expect))
     got.index = range(len(got))
 
-    assert_eq(expect, got)
+    assert_join_results_equal(expect, got, how="left")
 
 
 @pytest.mark.parametrize("dtype_l", INTEGER_TYPES)
@@ -1001,7 +1023,7 @@ def test_typecast_on_join_int_to_int(dtype_l, dtype_r):
 
     got = gdf_l.merge(gdf_r, on="join_col", how="inner")
 
-    assert_eq(expect, got)
+    assert_join_results_equal(expect, got, how="inner")
 
 
 @pytest.mark.parametrize("dtype_l", ["float32", "float64"])
@@ -1036,7 +1058,7 @@ def test_typecast_on_join_float_to_float(dtype_l, dtype_r):
 
     got = gdf_l.merge(gdf_r, on="join_col", how="inner")
 
-    assert_eq(expect, got)
+    assert_join_results_equal(expect, got, how="inner")
 
 
 @pytest.mark.parametrize("dtype_l", NUMERIC_TYPES)
@@ -1072,7 +1094,7 @@ def test_typecast_on_join_mixed_int_float(dtype_l, dtype_r):
 
     got = gdf_l.merge(gdf_r, on="join_col", how="inner")
 
-    assert_eq(expect, got)
+    assert_join_results_equal(expect, got, how="inner")
 
 
 def test_typecast_on_join_no_float_round():
@@ -1096,7 +1118,7 @@ def test_typecast_on_join_no_float_round():
 
     got = gdf_l.merge(gdf_r, on="join_col", how="left")
 
-    assert_eq(expect, got)
+    assert_join_results_equal(expect, got, how="left")
 
 
 @pytest.mark.parametrize(
@@ -1169,7 +1191,7 @@ def test_typecast_on_join_dt_to_dt(dtype_l, dtype_r):
 
     got = gdf_l.merge(gdf_r, on="join_col", how="inner")
 
-    assert_eq(expect, got)
+    assert_join_results_equal(expect, got, how="inner")
 
 
 @pytest.mark.parametrize("dtype_l", ["category", "str", "int32", "float32"])
@@ -1204,7 +1226,7 @@ def test_typecast_on_join_categorical(dtype_l, dtype_r):
     )
 
     got = gdf_l.merge(gdf_r, on="join_col", how="inner")
-    assert_eq(expect, got)
+    assert_join_results_equal(expect, got, how="inner")
 
 
 def make_categorical_dataframe(categories, ordered=False):
@@ -1431,20 +1453,10 @@ def test_index_join(lhs, rhs, how, level):
     g_lhs = l_df.set_index(lhs).index
     g_rhs = r_df.set_index(rhs).index
 
-    expected = (
-        p_lhs.join(p_rhs, level=level, how=how)
-        .to_frame(index=False)
-        .sort_values(by=lhs)
-        .reset_index(drop=True)
-    )
-    got = (
-        g_lhs.join(g_rhs, level=level, how=how)
-        .to_frame(index=False)
-        .sort_values(by=lhs)
-        .reset_index(drop=True)
-    )
+    expected = p_lhs.join(p_rhs, level=level, how=how).to_frame(index=False)
+    got = g_lhs.join(g_rhs, level=level, how=how).to_frame(index=False)
 
-    assert_eq(expected, got)
+    assert_join_results_equal(expected, got, how=how)
 
 
 def test_index_join_corner_cases():
@@ -1465,20 +1477,10 @@ def test_index_join_corner_cases():
     p_rhs = r_pdf.set_index(rhs).index
     g_lhs = l_df.set_index(lhs).index
     g_rhs = r_df.set_index(rhs).index
-    expected = (
-        p_lhs.join(p_rhs, level=level, how=how)
-        .to_frame(index=False)
-        .sort_values(by=lhs)
-        .reset_index(drop=True)
-    )
-    got = (
-        g_lhs.join(g_rhs, level=level, how=how)
-        .to_frame(index=False)
-        .sort_values(by=lhs)
-        .reset_index(drop=True)
-    )
+    expected = p_lhs.join(p_rhs, level=level, how=how).to_frame(index=False)
+    got = g_lhs.join(g_rhs, level=level, how=how).to_frame(index=False)
 
-    assert_eq(expected, got)
+    assert_join_results_equal(expected, got, how=how)
 
     # sort is supported only in case of two non-MultiIndex join
     # Join when column name doesn't match with level
@@ -1494,7 +1496,7 @@ def test_index_join_corner_cases():
     expected = p_lhs.join(p_rhs, how=how, sort=True)
     got = g_lhs.join(g_rhs, how=how, sort=True)
 
-    assert_eq(expected, got)
+    assert_join_results_equal(expected, got, how=how)
 
     # Pandas Index.join on categorical column returns generic column
     # but cudf will be returning a categorical column itself.
@@ -1508,22 +1510,12 @@ def test_index_join_corner_cases():
     p_rhs = r_pdf.set_index(rhs).index
     g_lhs = l_df.set_index(lhs).index
     g_rhs = r_df.set_index(rhs).index
-    expected = (
-        p_lhs.join(p_rhs, level=level, how=how)
-        .to_frame(index=False)
-        .sort_values(by=lhs)
-        .reset_index(drop=True)
-    )
-    got = (
-        g_lhs.join(g_rhs, level=level, how=how)
-        .to_frame(index=False)
-        .sort_values(by=lhs)
-        .reset_index(drop=True)
-    )
+    expected = p_lhs.join(p_rhs, level=level, how=how).to_frame(index=False)
+    got = g_lhs.join(g_rhs, level=level, how=how).to_frame(index=False)
 
     got["a"] = got["a"].astype(expected["a"].dtype)
 
-    assert_eq(expected, got)
+    assert_join_results_equal(expected, got, how=how)
 
 
 def test_index_join_exception_cases():
@@ -1575,7 +1567,7 @@ def test_typecast_on_join_indexes():
 
     got = gdf_l.join(gdf_r, how="inner", lsuffix="_x", rsuffix="_y")
 
-    assert_eq(expect, got)
+    assert_join_results_equal(expect, got, how="inner")
 
 
 def test_typecast_on_join_multiindices():
@@ -1626,7 +1618,7 @@ def test_typecast_on_join_multiindices():
     expect = expect.set_index(["join_col_0", "join_col_1", "join_col_2"])
     got = gdf_l.join(gdf_r, how="inner", lsuffix="_x", rsuffix="_y")
 
-    assert_eq(expect, got)
+    assert_join_results_equal(expect, got, how="inner")
 
 
 def test_typecast_on_join_indexes_matching_categorical():
@@ -1653,7 +1645,7 @@ def test_typecast_on_join_indexes_matching_categorical():
     expect = expect.set_index("join_col")
     got = gdf_l.join(gdf_r, how="inner", lsuffix="_x", rsuffix="_y")
 
-    assert_eq(expect, got)
+    assert_join_results_equal(expect, got, how="inner")
 
 
 @pytest.mark.parametrize(
@@ -1705,9 +1697,10 @@ def test_series_dataframe_mixed_merging(lhs, rhs, how, kwargs):
     expect = check_lhs.merge(check_rhs, how=how, **kwargs)
     got = lhs.merge(rhs, how=how, **kwargs)
 
-    assert_eq(expect, got)
+    assert_join_results_equal(expect, got, how=how)
 
 
+@pytest.mark.xfail(reason="Cannot sort values of list dtype")
 @pytest.mark.parametrize(
     "how", ["left", "inner", "right", "leftanti", "leftsemi"]
 )
@@ -1732,4 +1725,4 @@ def test_merge_with_lists(how):
     expect = pd_left.merge(pd_right, on="a")
     got = gd_left.merge(gd_right, on="a")
 
-    assert_eq(expect, got)
+    assert_join_results_equal(expect, got, how=how)
diff --git a/python/cudf/cudf/tests/test_string.py b/python/cudf/cudf/tests/test_string.py
index 080420c8f75..5e9d75c7568 100644
--- a/python/cudf/cudf/tests/test_string.py
+++ b/python/cudf/cudf/tests/test_string.py
@@ -14,6 +14,7 @@
 from cudf.core import DataFrame, Series
 from cudf.core.column.string import StringColumn
 from cudf.core.index import StringIndex, as_index
+from cudf.tests.test_joining import assert_join_results_equal
 from cudf.tests.utils import (
     DATETIME_TYPES,
     NUMERIC_TYPES,
@@ -861,16 +862,12 @@ def test_string_split(data, pat, n, expand):
 
 
 @pytest.mark.parametrize(
-    "str_data,str_data_raise",
-    [
-        ([], 0),
-        (["a", "b", "c", "d", "e"], 0),
-        ([None, None, None, None, None], 1),
-    ],
+    "str_data",
+    [[], ["a", "b", "c", "d", "e"], [None, None, None, None, None]],
 )
 @pytest.mark.parametrize("num_keys", [1, 2, 3])
 @pytest.mark.parametrize("how", ["left", "right", "inner", "outer"])
-def test_string_join_key(str_data, str_data_raise, num_keys, how):
+def test_string_join_key(str_data, num_keys, how):
     other_data = [1, 2, 3, 4, 5][: len(str_data)]
 
     pdf = pd.DataFrame()
@@ -884,19 +881,17 @@ def test_string_join_key(str_data, str_data_raise, num_keys, how):
     pdf2 = pdf.copy()
     gdf2 = gdf.copy()
 
-    expectation = raise_builder(
-        [0 if how == "right" else str_data_raise], (AssertionError)
-    )
+    expect = pdf.merge(pdf2, on=list(range(num_keys)), how=how)
+    got = gdf.merge(gdf2, on=list(range(num_keys)), how=how)
 
-    with expectation:
-        expect = pdf.merge(pdf2, on=list(range(num_keys)), how=how)
-        got = gdf.merge(gdf2, on=list(range(num_keys)), how=how)
+    if len(expect) == 0 and len(got) == 0:
+        expect = expect.reset_index(drop=True)
+        got = got[expect.columns]  # reorder columns
 
-        if len(expect) == 0 and len(got) == 0:
-            expect = expect.reset_index(drop=True)
-            got = got[expect.columns]
+    if how == "right":
+        got = got[expect.columns]  # reorder columns
 
-        assert_eq(expect, got)
+    assert_join_results_equal(expect, got, how=how)
 
 
 @pytest.mark.parametrize(
@@ -940,7 +935,7 @@ def test_string_join_key_nulls(str_data_nulls):
 
     expect["vals_y"] = expect["vals_y"].fillna(-1).astype("int64")
 
-    assert_eq(expect, got)
+    assert_join_results_equal(expect, got, how="left")
 
 
 @pytest.mark.parametrize(
@@ -969,7 +964,10 @@ def test_string_join_non_key(str_data, num_cols, how):
         expect = expect.reset_index(drop=True)
         got = got[expect.columns]
 
-    assert_eq(expect, got)
+    if how == "right":
+        got = got[expect.columns]  # reorder columns
+
+    assert_join_results_equal(expect, got, how=how)
 
 
 @pytest.mark.parametrize(
@@ -1010,7 +1008,7 @@ def test_string_join_non_key_nulls(str_data_nulls):
         expect = expect.reset_index(drop=True)
         got = got[expect.columns]
 
-    assert_eq(expect, got)
+    assert_join_results_equal(expect, got, how="left")
 
 
 def test_string_join_values_nulls():
@@ -1050,7 +1048,7 @@ def test_string_join_values_nulls():
     expect = expect.sort_values(by=["a", "b", "c"]).reset_index(drop=True)
     got = got.sort_values(by=["a", "b", "c"]).reset_index(drop=True)
 
-    assert_eq(expect, got)
+    assert_join_results_equal(expect, got, how="left")
 
 
 @pytest.mark.parametrize(

From ff0ae791595c8840f6c3857a7c876657c298a71e Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Fri, 19 Feb 2021 18:59:36 -0500
Subject: [PATCH 052/138] Totally broken commit

---
 python/cudf/cudf/core/frame.py              |  39 ++--
 python/cudf/cudf/core/join/__init__.py      |   2 +-
 python/cudf/cudf/core/join/_join_helpers.py | 225 ++++++++++++++++++++
 python/cudf/cudf/core/join/casting_logic.py | 209 ------------------
 python/cudf/cudf/core/join/join.py          | 178 +++++-----------
 5 files changed, 300 insertions(+), 353 deletions(-)
 create mode 100644 python/cudf/cudf/core/join/_join_helpers.py
 delete mode 100644 python/cudf/cudf/core/join/casting_logic.py

diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index ccbf2cd10b6..1dfb65ace38 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -3399,29 +3399,24 @@ def _merge(
                 indicator=indicator,
                 suffixes=suffixes,
             )
-
-        lhs = self
-        rhs = right
-
-        from cudf.core.join import Merge
-
-        mergeop = Merge(
-            lhs,
-            rhs,
-            on,
-            left_on,
-            right_on,
-            left_index,
-            right_index,
-            how,
-            sort,
-            lsuffix,
-            rsuffix,
-            method,
-            indicator,
-            suffixes,
+        from cudf.core.join.join import merge
+
+        return merge(
+            self,
+            right,
+            on=on,
+            left_on=left_on,
+            right_on=right_on,
+            left_index=left_index,
+            right_index=right_index,
+            how=how,
+            sort=sort,
+            lsuffix=lsuffix,
+            rsuffix=rsuffix,
+            method=method,
+            indicator=indicator,
+            suffixes=suffixes,
         )
-        return mergeop.perform_merge()
 
     def _is_sorted(self, ascending=None, null_position=None):
         """
diff --git a/python/cudf/cudf/core/join/__init__.py b/python/cudf/cudf/core/join/__init__.py
index 6d126c8af4d..0463b8f9df1 100644
--- a/python/cudf/cudf/core/join/__init__.py
+++ b/python/cudf/cudf/core/join/__init__.py
@@ -1,3 +1,3 @@
 # Copyright (c) 2020, NVIDIA CORPORATION.
 
-from cudf.core.join.join import Merge
+from cudf.core.join.join import merge
diff --git a/python/cudf/cudf/core/join/_join_helpers.py b/python/cudf/cudf/core/join/_join_helpers.py
new file mode 100644
index 00000000000..68c3e33ac25
--- /dev/null
+++ b/python/cudf/cudf/core/join/_join_helpers.py
@@ -0,0 +1,225 @@
+# Copyright (c) 2021, NVIDIA CORPORATION.
+
+import warnings
+
+import numpy as np
+import pandas as pd
+
+import cudf
+from cudf.core.dtypes import CategoricalDtype
+
+
+class _Indexer:
+    # Indexer into a column (either a data column or index level).
+    #
+    # >>> df
+    #    a
+    # b
+    # 4  1
+    # 5  2
+    # 6  3
+    # >>> _Indexer("a", column=True).value(df)  # returns column "a" of df
+    # >>> _Indexer("b", index=True).value(df)  # returns index level "b" of df
+
+    def __init__(self, name, column=False, index=False):
+        self.name = name
+        self.column, self.index = column, index
+
+    def value(self, obj):
+        # get the column from `obj`
+        if self.column:
+            return obj._data[self.name]
+        else:
+            return obj._index._data[self.name]
+
+    def set_value(self, obj, value):
+        # set the colum in `obj`
+        if self.column:
+            obj._data[self.name] = value
+        else:
+            obj._index._data[self.name] = value
+
+    def get_numeric_index(self, obj):
+        # get the position of the column in `obj`
+        # (counting any index columns)
+        if self.column:
+            index_nlevels = obj.index.nlevels if obj._index is not None else 0
+            return index_nlevels + tuple(obj._data).index(self.name)
+        else:
+            return obj.index.names.index(self.name)
+
+
+def _coerce_to_tuple(obj):
+    if hasattr(obj, "__iter__") and not isinstance(obj, str):
+        return tuple(obj)
+    else:
+        return (obj,)
+
+
+def _coerce_to_list(obj):
+    return list(_coerce_to_tuple(obj))
+
+
+def _cast_join_categorical_keys_both(lcol, rcol, how):
+    # cast lcol and rcol to a common type when they are *both*
+    # categorical types.
+    #
+    # The commontype depends on both `how` and the specifics of the
+    # categorical variables to be merged.
+
+    ltype, rtype = lcol.dtype, rcol.dtype
+
+    # when both are ordered and both have the same categories,
+    # no casting required:
+    if ltype == rtype:
+        return lcol, rcol
+
+    # Merging categorical variables when only one side is ordered is
+    # ambiguous and not allowed.
+    if ltype.ordered != rtype.ordered:
+        raise TypeError(
+            "Merging on categorical variables with mismatched"
+            " ordering is ambiguous"
+        )
+
+    if ltype.ordered and rtype.ordered:
+        # if we get to here, categories must be what causes the
+        # dtype equality check to fail. And we can never merge
+        # two ordered categoricals with different categories
+        raise TypeError(
+            f"{how} merge between categoricals with "
+            "different categories is only valid when "
+            "neither side is ordered"
+        )
+
+    # the following should now always hold
+    assert not ltype.ordered and not rtype.ordered
+
+    if how == "inner":
+        # demote to underlying types -- we will promote them back later
+        return _cast_join_keys(ltype.categories, rtype.categories, how)
+    elif how in {"left", "leftanti", "leftsemi"}:
+        # always cast to left type
+        return lcol, rcol.astype(ltype)
+    else:
+        # merge categories
+        merged_categories = cudf.concat(
+            [ltype.categories, rtype.categories]
+        ).unique()
+        common_type = cudf.CategoricalDtype(
+            categories=merged_categories, ordered=False
+        )
+        return lcol.astype(common_type), rcol.astype(common_type)
+
+
+def _cast_join_categorical_keys(lcol, rcol, how):
+    # cast the keys lcol and rcol to a common dtype
+    # when at least one of them is a categorical type
+
+    l_is_cat = isinstance(lcol.dtype, CategoricalDtype)
+    r_is_cat = isinstance(rcol.dtype, CategoricalDtype)
+
+    if l_is_cat and r_is_cat:
+        # if both are categoricals, logic is complicated:
+        return _cast_join_categorical_keys_both(lcol, rcol, how)
+    elif l_is_cat or r_is_cat:
+        if l_is_cat and how in {"left", "leftsemi", "leftanti"}:
+            return (lcol, rcol.astype(lcol.dtype))
+        common_type = (
+            lcol.dtype.categories.dtype
+            if l_is_cat
+            else rcol.dtype.categories.dtype
+        )
+        return lcol.astype(common_type), rcol.astype(common_type)
+    else:
+        raise ValueError("Neither operand is categorical")
+
+
+def _cast_join_keys(lcol, rcol, how):
+    # cast the keys lcol and rcol to a common dtype
+
+    ltype = lcol.dtype
+    rtype = rcol.dtype
+
+    # if either side is categorical, different logic
+    if isinstance(ltype, CategoricalDtype) or isinstance(
+        rtype, CategoricalDtype
+    ):
+        return _cast_join_categorical_keys(lcol, rcol, how)
+
+    if pd.api.types.is_dtype_equal(ltype, rtype):
+        return lcol, rcol
+
+    if (np.issubdtype(ltype, np.number)) and (np.issubdtype(rtype, np.number)):
+        common_type = (
+            max(ltype, type)
+            if ltype.kind == rtype.kind
+            else np.find_common_type([], (ltype, rtype))
+        )
+
+    elif np.issubdtype(ltype, np.datetime64) and np.issubdtype(
+        rtype, np.datetime64
+    ):
+        common_type = max(ltype, rtype)
+
+    if how == "left":
+        if rcol.fillna(0).can_cast_safely(ltype):
+            return lcol, rcol.astype(ltype)
+        else:
+            warnings.warn(
+                "Can't safely cast column from {rtype} to {ltype}, "
+                "upcasting to {common_type}."
+            )
+
+    if common_type:
+        lcol, rcol = lcol.astype(common_type), rcol.astype(common_type)
+
+    return lcol, rcol
+
+
+def _libcudf_to_output_castrules(lcol, rcol, how):
+    """
+    Determine what dtype an output merge key column should be
+    cast to after it has been processed by libcudf. Determine
+    if a column should be promoted to a categorical datatype.
+    For inner merges between unordered categoricals, we get a
+    new categorical variable containing the intersection of
+    the two source variables. For left or right joins, we get
+    the original categorical variable from whichever was the
+    major operand of the join, e.g. left for a left join or
+    right for a right join. In the case of an outer join, the
+    result will be a new categorical variable with both sets
+    of categories.
+    """
+    merge_return_type = None
+
+    ltype = lcol.dtype
+    rtype = rcol.dtype
+
+    if pd.api.types.is_dtype_equal(ltype, rtype):
+        return ltype
+
+    merge_return_type = _cast_join_keys(lcol, rcol, how)
+
+    l_is_cat = isinstance(ltype, CategoricalDtype)
+    r_is_cat = isinstance(rtype, CategoricalDtype)
+
+    # we currently only need to do this for categorical variables
+    if how == "inner":
+        if l_is_cat and r_is_cat:
+            merge_return_type = "category"
+    elif how in {"left", "leftsemi", "leftanti"}:
+        if l_is_cat:
+            merge_return_type = ltype
+    elif how == "right":
+        if r_is_cat:
+            merge_return_type = rtype
+    elif how == "outer":
+        if l_is_cat and r_is_cat:
+            new_cats = cudf.concat(
+                [ltype.categories, rtype.categories]
+            ).unique()
+            merge_return_type = cudf.CategoricalDtype(
+                categories=new_cats, ordered=ltype.ordered
+            )
+    return merge_return_type
diff --git a/python/cudf/cudf/core/join/casting_logic.py b/python/cudf/cudf/core/join/casting_logic.py
deleted file mode 100644
index 7638288f75e..00000000000
--- a/python/cudf/cudf/core/join/casting_logic.py
+++ /dev/null
@@ -1,209 +0,0 @@
-# Copyright (c) 2021, NVIDIA CORPORATION.
-
-import warnings
-
-import numpy as np
-import pandas as pd
-
-import cudf
-from cudf.core.dtypes import CategoricalDtype
-
-
-def _input_to_libcudf_castrules_both_cat(lcol, rcol, how):
-    """
-    Based off the left and right operands, determine the libcudf
-    merge dtype or error for corner cases where the merge cannot
-    proceed. This function handles categorical variables.
-    Categorical variable typecasting logic depends on both `how`
-    and the specifics of the categorical variables to be merged.
-    Merging categorical variables when only one side is ordered
-    is ambiguous and not allowed. Merging when both categoricals
-    are ordered is allowed, but only when the categories are
-    exactly equal and have equal ordering, and will result in the
-    common dtype.
-    When both sides are unordered, the result categorical depends
-    on the kind of join:
-    - For inner joins, the result will be the intersection of the
-    categories
-    - For left or right joins, the result will be the the left or
-    right dtype respectively. This extends to semi and anti joins.
-    - For outer joins, the result will be the union of categories
-    from both sides.
-
-    """
-    ltype = lcol.dtype
-    rtype = rcol.dtype
-
-    # this function is only to be used to resolve the result when both
-    # sides are categorical
-    if not isinstance(ltype, CategoricalDtype) and isinstance(
-        rtype, CategoricalDtype
-    ):
-        raise TypeError("Both operands must be CategoricalDtype")
-
-    # true for every configuration
-    if ltype == rtype:
-        return ltype
-
-    # raise for any join where ordering doesn't match
-    if ltype.ordered != rtype.ordered:
-        raise TypeError(
-            "Merging on categorical variables with mismatched"
-            " ordering is ambiguous"
-        )
-    elif ltype.ordered and rtype.ordered:
-        # if we get to here, categories must be what causes the
-        # dtype equality check to fail. And we can never merge
-        # two ordered categoricals with different categories
-        raise TypeError(
-            f"{how} merge between categoricals with "
-            "different categories is only valid when "
-            "neither side is ordered"
-        )
-
-    elif how == "inner":
-        # neither ordered, so categories must be different
-        # demote to underlying types
-        return _input_to_libcudf_castrules_any(
-            ltype.categories, rtype.categories, how
-        )
-
-    elif how == "left":
-        return ltype
-    elif how == "right":
-        return rtype
-
-    elif how == "outer":
-        new_cats = cudf.concat([ltype.categories, rtype.categories]).unique()
-        return cudf.CategoricalDtype(categories=new_cats, ordered=False)
-
-
-def _input_to_libcudf_castrules_any_cat(lcol, rcol, how):
-
-    l_is_cat = isinstance(lcol.dtype, CategoricalDtype)
-    r_is_cat = isinstance(rcol.dtype, CategoricalDtype)
-
-    if l_is_cat and r_is_cat:
-        return _input_to_libcudf_castrules_both_cat(lcol, rcol, how)
-    elif l_is_cat or r_is_cat:
-        if l_is_cat and how == "left":
-            return lcol.dtype
-        if r_is_cat and how == "right":
-            return rcol.dtype
-        return (
-            lcol.dtype.categories.dtype
-            if l_is_cat
-            else rcol.dtype.categories.dtype
-        )
-    else:
-        raise ValueError("Neither operand is categorical")
-
-
-def _input_to_libcudf_castrules_any(lcol, rcol, how):
-    """
-    Determine what dtype the left and right hand
-    input columns must be cast to for a libcudf
-    join to proceed.
-    """
-
-    cast_warn = (
-        "can't safely cast column from {} with type"
-        " {} to {}, upcasting to {}"
-    )
-
-    ltype = lcol.dtype
-    rtype = rcol.dtype
-
-    # if either side is categorical, different logic
-    if isinstance(ltype, CategoricalDtype) or isinstance(
-        rtype, CategoricalDtype
-    ):
-        return _input_to_libcudf_castrules_any_cat(lcol, rcol, how)
-
-    libcudf_join_type = None
-    if pd.api.types.is_dtype_equal(ltype, rtype):
-        libcudf_join_type = ltype
-    elif how == "left":
-        check_col = rcol.fillna(0)
-        if not check_col.can_cast_safely(ltype):
-            libcudf_join_type = _input_to_libcudf_castrules_any(
-                lcol, rcol, "inner"
-            )
-            warnings.warn(
-                cast_warn.format("right", rtype, ltype, libcudf_join_type)
-            )
-        else:
-            libcudf_join_type = ltype
-    elif how == "right":
-        check_col = lcol.fillna(0)
-        if not check_col.can_cast_safely(rtype):
-            libcudf_join_type = _input_to_libcudf_castrules_any(
-                lcol, rcol, "inner"
-            )
-            warnings.warn(
-                cast_warn.format("left", ltype, rtype, libcudf_join_type)
-            )
-        else:
-            libcudf_join_type = rtype
-    elif how in {"inner", "outer"}:
-        if (np.issubdtype(ltype, np.number)) and (
-            np.issubdtype(rtype, np.number)
-        ):
-            if ltype.kind == rtype.kind:
-                # both ints or both floats
-                libcudf_join_type = max(ltype, rtype)
-            else:
-                libcudf_join_type = np.find_common_type([], [ltype, rtype])
-        elif np.issubdtype(ltype, np.datetime64) and np.issubdtype(
-            rtype, np.datetime64
-        ):
-            libcudf_join_type = max(ltype, rtype)
-    return libcudf_join_type
-
-
-def _libcudf_to_output_castrules(lcol, rcol, how):
-    """
-    Determine what dtype an output merge key column should be
-    cast to after it has been processed by libcudf. Determine
-    if a column should be promoted to a categorical datatype.
-    For inner merges between unordered categoricals, we get a
-    new categorical variable containing the intersection of
-    the two source variables. For left or right joins, we get
-    the original categorical variable from whichever was the
-    major operand of the join, e.g. left for a left join or
-    right for a right join. In the case of an outer join, the
-    result will be a new categorical variable with both sets
-    of categories.
-    """
-    merge_return_type = None
-
-    ltype = lcol.dtype
-    rtype = rcol.dtype
-
-    if pd.api.types.is_dtype_equal(ltype, rtype):
-        return ltype
-
-    merge_return_type = _input_to_libcudf_castrules_any(lcol, rcol, how)
-
-    l_is_cat = isinstance(ltype, CategoricalDtype)
-    r_is_cat = isinstance(rtype, CategoricalDtype)
-
-    # we currently only need to do this for categorical variables
-    if how == "inner":
-        if l_is_cat and r_is_cat:
-            merge_return_type = "category"
-    elif how == "left":
-        if l_is_cat:
-            merge_return_type = ltype
-    elif how == "right":
-        if r_is_cat:
-            merge_return_type = rtype
-    elif how == "outer":
-        if l_is_cat and r_is_cat:
-            new_cats = cudf.concat(
-                [ltype.categories, rtype.categories]
-            ).unique()
-            merge_return_type = cudf.CategoricalDtype(
-                categories=new_cats, ordered=ltype.ordered
-            )
-    return merge_return_type
diff --git a/python/cudf/cudf/core/join/join.py b/python/cudf/cudf/core/join/join.py
index 414d8d0dff7..dcb06d3aee2 100644
--- a/python/cudf/cudf/core/join/join.py
+++ b/python/cudf/cudf/core/join/join.py
@@ -3,105 +3,62 @@
 
 import cudf
 from cudf import _lib as libcudf
-from cudf.core.join.casting_logic import (
-    _input_to_libcudf_castrules_any,
+from cudf.core.join._join_helpers import (
+    _cast_join_keys,
+    _coerce_to_list,
+    _coerce_to_tuple,
+    _Indexer,
     _libcudf_to_output_castrules,
 )
 
 
-class ColumnView:
-    def __init__(self, name, column=False, index=False):
-        self.name = name
-        self.column, self.index = column, index
-
-    def get_numeric_index(self, obj):
-        # get the position of the column (including any index columns)
-        if self.column:
-            index_nlevels = obj.index.nlevels if obj._index is not None else 0
-            return index_nlevels + tuple(obj._data).index(self.name)
-        else:
-            return obj.index.names.index(self.name)
-
-    @property
-    def is_index_level(self):
-        # True if this is an index column
-        return self.index
-
-    def value(self, obj):
-        # get the column
-        if self.column:
-            return obj._data[self.name]
-        else:
-            return obj._index._data[self.name]
-
-    def set_value(self, obj, value):
-        # set the colum
-        if self.column:
-            obj._data[self.name] = value
-        else:
-            obj._index._data[self.name] = value
-
-
-JoinKeys = namedtuple("JoinKeys", ["left", "right"])
-
-
-def Merge(
+def merge(
     lhs,
     rhs,
-    on=None,
-    left_on=None,
-    right_on=None,
-    left_index=False,
-    right_index=False,
-    how="inner",
-    sort=False,
-    lsuffix="_x",
-    rsuffix="_y",
-    method=None,
-    indicator=None,
-    suffixes=None,
+    *,
+    on,
+    left_on,
+    right_on,
+    left_index,
+    right_index,
+    how,
+    sort,
+    lsuffix,
+    rsuffix,
+    method,
+    indicator,
+    suffixes,
 ):
-    if how not in {"leftsemi", "leftanti"}:
-        return MergeBase(
-            lhs,
-            rhs,
-            on=on,
-            left_on=left_on,
-            right_on=right_on,
-            left_index=left_index,
-            right_index=right_index,
-            how=how,
-            sort=sort,
-            lsuffix=lsuffix,
-            rsuffix=rsuffix,
-            method=method,
-            indicator=indicator,
-            suffixes=suffixes,
-        )
+    if how in {"leftsemi", "leftanti"}:
+        merge_cls = MergeSemi
     else:
-        return MergeSemi(
-            lhs,
-            rhs,
-            on=on,
-            left_on=left_on,
-            right_on=right_on,
-            left_index=left_index,
-            right_index=right_index,
-            how=how,
-            sort=sort,
-            lsuffix=lsuffix,
-            rsuffix=rsuffix,
-            method=method,
-            indicator=indicator,
-            suffixes=suffixes,
-        )
-
+        merge_cls = Merge
+    mergeobj = merge_cls(
+        lhs,
+        rhs,
+        on=on,
+        left_on=left_on,
+        right_on=right_on,
+        left_index=left_index,
+        right_index=right_index,
+        how=how,
+        sort=sort,
+        lsuffix=lsuffix,
+        rsuffix=rsuffix,
+        method=method,
+        indicator=indicator,
+    )
+    return mergeobj.perform_merge()
+
+
+class Merge(object):
+    JoinKeys = namedtuple("JoinKeys", ["left", "right"])
 
-class MergeBase(object):
     def __init__(
         self,
         lhs,
         rhs,
+        *,
         on=None,
         left_on=None,
         right_on=None,
@@ -171,11 +128,8 @@ def __init__(
             suffixes=suffixes,
         )
 
-        # warning: self.lhs and self.rhs are mutated both before
-        # and after the join
-        self.lhs = lhs.copy(deep=False)
-        self.rhs = rhs.copy(deep=False)
-
+        self.lhs = lhs
+        self.rhs = rhs
         self.on = on
         self.left_on = left_on
         self.right_on = right_on
@@ -188,6 +142,7 @@ def __init__(
         self.suffixes = suffixes
 
         self.out_class = cudf.DataFrame
+
         if isinstance(self.lhs, cudf.MultiIndex) or isinstance(
             self.rhs, cudf.MultiIndex
         ):
@@ -198,7 +153,6 @@ def __init__(
         self.compute_join_keys()
 
     def compute_join_keys(self):
-
         if (
             self.left_index
             or self.right_index
@@ -210,7 +164,7 @@ def compute_join_keys(self):
             if self.left_index:
                 left_keys.extend(
                     [
-                        ColumnView(name=on, index=True)
+                        _Indexer(name=on, index=True)
                         for on in self.lhs.index.names
                     ]
                 )
@@ -218,14 +172,14 @@ def compute_join_keys(self):
                 # TODO: require left_on or left_index to be specified
                 left_keys.extend(
                     [
-                        ColumnView(name=on, column=True)
+                        _Indexer(name=on, column=True)
                         for on in _coerce_to_tuple(self.left_on)
                     ]
                 )
             if self.right_index:
                 right_keys.extend(
                     [
-                        ColumnView(name=on, index=True)
+                        _Indexer(name=on, index=True)
                         for on in self.rhs.index.names
                     ]
                 )
@@ -233,7 +187,7 @@ def compute_join_keys(self):
                 # TODO: require right_on or right_index to be specified
                 right_keys.extend(
                     [
-                        ColumnView(name=on, column=True)
+                        _Indexer(name=on, column=True)
                         for on in _coerce_to_tuple(self.right_on)
                     ]
                 )
@@ -245,20 +199,18 @@ def compute_join_keys(self):
                 if self.on is not None
                 else set(self.lhs._data.keys()) & set(self.rhs._data.keys())
             )
-            left_keys = [ColumnView(name=on, column=True) for on in on_names]
-            right_keys = [ColumnView(name=on, column=True) for on in on_names]
+            left_keys = [_Indexer(name=on, column=True) for on in on_names]
+            right_keys = [_Indexer(name=on, column=True) for on in on_names]
 
         if len(left_keys) != len(right_keys):
             raise ValueError(
                 "Merge operands must have same number of join key columns"
             )
 
-        self._keys = JoinKeys(left=left_keys, right=right_keys)
+        self._keys = self.__class__.JoinKeys(left=left_keys, right=right_keys)
 
     def perform_merge(self):
-        lhs, rhs = self.match_key_dtypes(
-            self.lhs, self.rhs, _input_to_libcudf_castrules_any
-        )
+        lhs, rhs = self.match_key_dtypes(self.lhs, self.rhs, _cast_join_keys)
 
         left_key_indices = [
             key.get_numeric_index(lhs) for key in self._keys.left
@@ -376,10 +328,7 @@ def output_column_names(self):
         else:
             key_columns_with_same_name = []
             for lkey, rkey in zip(*self._keys):
-                if (lkey.is_index_level, rkey.is_index_level) == (
-                    False,
-                    False,
-                ):
+                if (lkey.index, rkey.index) == (False, False,):
                     if lkey.name == rkey.name:
                         key_columns_with_same_name.append(lkey.name)
         for name in common_names:
@@ -466,11 +415,9 @@ def match_key_dtypes(self, lhs, rhs, match_func):
         return out_lhs, out_rhs
 
 
-class MergeSemi(MergeBase):
+class MergeSemi(Merge):
     def perform_merge(self):
-        lhs, rhs = self.match_key_dtypes(
-            self.lhs, self.rhs, _input_to_libcudf_castrules_any
-        )
+        lhs, rhs = self.match_key_dtypes(self.lhs, self.rhs, _cast_join_keys)
 
         left_key_indices = [
             key.get_numeric_index(lhs) for key in self._keys.left
@@ -492,14 +439,3 @@ def perform_merge(self):
     def output_column_names(self):
         left_names, _ = super().output_column_names()
         return left_names, {}
-
-
-def _coerce_to_tuple(obj):
-    if hasattr(obj, "__iter__") and not isinstance(obj, str):
-        return tuple(obj)
-    else:
-        return (obj,)
-
-
-def _coerce_to_list(obj):
-    return list(_coerce_to_tuple(obj))

From 07cd05237fade5f13112f46daf1b7b3ec66c6e41 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Sat, 20 Feb 2021 10:23:42 -0500
Subject: [PATCH 053/138] Cleanup

---
 python/cudf/cudf/core/join/_join_helpers.py | 190 ++++++++------------
 python/cudf/cudf/core/join/join.py          | 124 +++++++------
 python/cudf/cudf/tests/test_joining.py      |   5 +-
 3 files changed, 140 insertions(+), 179 deletions(-)

diff --git a/python/cudf/cudf/core/join/_join_helpers.py b/python/cudf/cudf/core/join/_join_helpers.py
index 68c3e33ac25..2b4c655f057 100644
--- a/python/cudf/cudf/core/join/_join_helpers.py
+++ b/python/cudf/cudf/core/join/_join_helpers.py
@@ -49,15 +49,69 @@ def get_numeric_index(self, obj):
             return obj.index.names.index(self.name)
 
 
-def _coerce_to_tuple(obj):
-    if hasattr(obj, "__iter__") and not isinstance(obj, str):
-        return tuple(obj)
-    else:
-        return (obj,)
+def _cast_join_keys(lcol, rcol, how):
+    # cast the keys lcol and rcol to a common dtype
 
+    ltype = lcol.dtype
+    rtype = rcol.dtype
 
-def _coerce_to_list(obj):
-    return list(_coerce_to_tuple(obj))
+    # if either side is categorical, different logic
+    if isinstance(ltype, CategoricalDtype) or isinstance(
+        rtype, CategoricalDtype
+    ):
+        return _cast_join_categorical_keys(lcol, rcol, how)
+
+    if pd.api.types.is_dtype_equal(ltype, rtype):
+        return ltype
+
+    if (np.issubdtype(ltype, np.number)) and (np.issubdtype(rtype, np.number)):
+        common_type = (
+            max(ltype, rtype)
+            if ltype.kind == rtype.kind
+            else np.find_common_type([], (ltype, rtype))
+        )
+
+    elif np.issubdtype(ltype, np.datetime64) and np.issubdtype(
+        rtype, np.datetime64
+    ):
+        common_type = max(ltype, rtype)
+
+    if how == "left":
+        if rcol.fillna(0).can_cast_safely(ltype):
+            return ltype
+        else:
+            warnings.warn(
+                f"Can't safely cast column from {rtype} to {ltype}, "
+                "upcasting to {common_type}."
+            )
+
+    if common_type:
+        return common_type
+
+    return None
+
+
+def _cast_join_categorical_keys(lcol, rcol, how):
+    # cast the keys lcol and rcol to a common dtype
+    # when at least one of them is a categorical type
+
+    l_is_cat = isinstance(lcol.dtype, CategoricalDtype)
+    r_is_cat = isinstance(rcol.dtype, CategoricalDtype)
+
+    if l_is_cat and r_is_cat:
+        # if both are categoricals, logic is complicated:
+        return _cast_join_categorical_keys_both(lcol, rcol, how)
+    elif l_is_cat or r_is_cat:
+        if l_is_cat and how in {"left", "leftsemi", "leftanti"}:
+            return lcol.dtype
+        common_type = (
+            lcol.dtype.categories.dtype
+            if l_is_cat
+            else rcol.dtype.categories.dtype
+        )
+        return common_type
+    else:
+        raise ValueError("Neither operand is categorical")
 
 
 def _cast_join_categorical_keys_both(lcol, rcol, how):
@@ -72,7 +126,7 @@ def _cast_join_categorical_keys_both(lcol, rcol, how):
     # when both are ordered and both have the same categories,
     # no casting required:
     if ltype == rtype:
-        return lcol, rcol
+        return ltype
 
     # Merging categorical variables when only one side is ordered is
     # ambiguous and not allowed.
@@ -96,11 +150,11 @@ def _cast_join_categorical_keys_both(lcol, rcol, how):
     assert not ltype.ordered and not rtype.ordered
 
     if how == "inner":
-        # demote to underlying types -- we will promote them back later
+        # cast to category types -- we must cast them back later
         return _cast_join_keys(ltype.categories, rtype.categories, how)
     elif how in {"left", "leftanti", "leftsemi"}:
         # always cast to left type
-        return lcol, rcol.astype(ltype)
+        return ltype
     else:
         # merge categories
         merged_categories = cudf.concat(
@@ -109,117 +163,15 @@ def _cast_join_categorical_keys_both(lcol, rcol, how):
         common_type = cudf.CategoricalDtype(
             categories=merged_categories, ordered=False
         )
-        return lcol.astype(common_type), rcol.astype(common_type)
+        return common_type
 
 
-def _cast_join_categorical_keys(lcol, rcol, how):
-    # cast the keys lcol and rcol to a common dtype
-    # when at least one of them is a categorical type
-
-    l_is_cat = isinstance(lcol.dtype, CategoricalDtype)
-    r_is_cat = isinstance(rcol.dtype, CategoricalDtype)
-
-    if l_is_cat and r_is_cat:
-        # if both are categoricals, logic is complicated:
-        return _cast_join_categorical_keys_both(lcol, rcol, how)
-    elif l_is_cat or r_is_cat:
-        if l_is_cat and how in {"left", "leftsemi", "leftanti"}:
-            return (lcol, rcol.astype(lcol.dtype))
-        common_type = (
-            lcol.dtype.categories.dtype
-            if l_is_cat
-            else rcol.dtype.categories.dtype
-        )
-        return lcol.astype(common_type), rcol.astype(common_type)
+def _coerce_to_tuple(obj):
+    if hasattr(obj, "__iter__") and not isinstance(obj, str):
+        return tuple(obj)
     else:
-        raise ValueError("Neither operand is categorical")
-
-
-def _cast_join_keys(lcol, rcol, how):
-    # cast the keys lcol and rcol to a common dtype
-
-    ltype = lcol.dtype
-    rtype = rcol.dtype
-
-    # if either side is categorical, different logic
-    if isinstance(ltype, CategoricalDtype) or isinstance(
-        rtype, CategoricalDtype
-    ):
-        return _cast_join_categorical_keys(lcol, rcol, how)
-
-    if pd.api.types.is_dtype_equal(ltype, rtype):
-        return lcol, rcol
-
-    if (np.issubdtype(ltype, np.number)) and (np.issubdtype(rtype, np.number)):
-        common_type = (
-            max(ltype, type)
-            if ltype.kind == rtype.kind
-            else np.find_common_type([], (ltype, rtype))
-        )
-
-    elif np.issubdtype(ltype, np.datetime64) and np.issubdtype(
-        rtype, np.datetime64
-    ):
-        common_type = max(ltype, rtype)
-
-    if how == "left":
-        if rcol.fillna(0).can_cast_safely(ltype):
-            return lcol, rcol.astype(ltype)
-        else:
-            warnings.warn(
-                "Can't safely cast column from {rtype} to {ltype}, "
-                "upcasting to {common_type}."
-            )
-
-    if common_type:
-        lcol, rcol = lcol.astype(common_type), rcol.astype(common_type)
-
-    return lcol, rcol
-
-
-def _libcudf_to_output_castrules(lcol, rcol, how):
-    """
-    Determine what dtype an output merge key column should be
-    cast to after it has been processed by libcudf. Determine
-    if a column should be promoted to a categorical datatype.
-    For inner merges between unordered categoricals, we get a
-    new categorical variable containing the intersection of
-    the two source variables. For left or right joins, we get
-    the original categorical variable from whichever was the
-    major operand of the join, e.g. left for a left join or
-    right for a right join. In the case of an outer join, the
-    result will be a new categorical variable with both sets
-    of categories.
-    """
-    merge_return_type = None
-
-    ltype = lcol.dtype
-    rtype = rcol.dtype
-
-    if pd.api.types.is_dtype_equal(ltype, rtype):
-        return ltype
-
-    merge_return_type = _cast_join_keys(lcol, rcol, how)
+        return (obj,)
 
-    l_is_cat = isinstance(ltype, CategoricalDtype)
-    r_is_cat = isinstance(rtype, CategoricalDtype)
 
-    # we currently only need to do this for categorical variables
-    if how == "inner":
-        if l_is_cat and r_is_cat:
-            merge_return_type = "category"
-    elif how in {"left", "leftsemi", "leftanti"}:
-        if l_is_cat:
-            merge_return_type = ltype
-    elif how == "right":
-        if r_is_cat:
-            merge_return_type = rtype
-    elif how == "outer":
-        if l_is_cat and r_is_cat:
-            new_cats = cudf.concat(
-                [ltype.categories, rtype.categories]
-            ).unique()
-            merge_return_type = cudf.CategoricalDtype(
-                categories=new_cats, ordered=ltype.ordered
-            )
-    return merge_return_type
+def _coerce_to_list(obj):
+    return list(_coerce_to_tuple(obj))
diff --git a/python/cudf/cudf/core/join/join.py b/python/cudf/cudf/core/join/join.py
index dcb06d3aee2..41265ba6af5 100644
--- a/python/cudf/cudf/core/join/join.py
+++ b/python/cudf/cudf/core/join/join.py
@@ -8,7 +8,6 @@
     _coerce_to_list,
     _coerce_to_tuple,
     _Indexer,
-    _libcudf_to_output_castrules,
 )
 
 
@@ -47,6 +46,7 @@ def merge(
         rsuffix=rsuffix,
         method=method,
         indicator=indicator,
+        suffixes=suffixes,
     )
     return mergeobj.perform_merge()
 
@@ -59,18 +59,18 @@ def __init__(
         lhs,
         rhs,
         *,
-        on=None,
-        left_on=None,
-        right_on=None,
-        left_index=False,
-        right_index=False,
-        how="inner",
-        sort=False,
-        lsuffix="_x",
-        rsuffix="_y",
-        method=None,
-        indicator=None,
-        suffixes=None,
+        on,
+        left_on,
+        right_on,
+        left_index,
+        right_index,
+        how,
+        sort,
+        lsuffix,
+        rsuffix,
+        method,
+        indicator,
+        suffixes,
     ):
         """
         Manage the merging of two Frames.
@@ -197,7 +197,7 @@ def compute_join_keys(self):
             on_names = (
                 _coerce_to_tuple(self.on)
                 if self.on is not None
-                else set(self.lhs._data.keys()) & set(self.rhs._data.keys())
+                else set(self.lhs._data) & set(self.rhs._data)
             )
             left_keys = [_Indexer(name=on, column=True) for on in on_names]
             right_keys = [_Indexer(name=on, column=True) for on in on_names]
@@ -225,13 +225,10 @@ def perform_merge(self):
             right_on=right_key_indices,
             how=self.how,
         )
+        lhs, rhs = self.restore_categorical_keys(lhs, rhs)
         return self.construct_result(lhs, rhs, left_rows, right_rows)
 
     def construct_result(self, lhs, rhs, left_rows, right_rows):
-        lhs, rhs = self.match_key_dtypes(
-            self.lhs, self.rhs, _libcudf_to_output_castrules
-        )
-
         # first construct the index.
         if self.left_index and self.right_index:
             if self.how == "right":
@@ -274,53 +271,44 @@ def construct_result(self, lhs, rhs, left_rows, right_rows):
                         ),
                     )
 
-        return self.sort_result(result)
+        if self.sort:
+            result = self.sort_result(result)
+        return result
 
     def sort_result(self, result):
-        # If sort=True, Pandas sorts on the key columns in the
+        # Pandas sorts on the key columns in the
         # same order as given in 'on'. If the indices are used as
         # keys, the index will be sorted. If one index is specified,
         # the key columns on the other side will be used to sort.
-        if self.sort:
-            if self.on:
-                if isinstance(result, cudf.Index):
-                    return result.sort_values()
-                else:
-                    return result.sort_values(
-                        _coerce_to_list(self.on), ignore_index=True
-                    )
-            by = []
-            if self.left_index and self.right_index:
-                by.extend(result.index._data.columns)
-            if self.left_on:
-                by.extend(
-                    [
-                        result._data[col]
-                        for col in _coerce_to_list(self.left_on)
-                    ]
-                )
-            if self.right_on:
-                by.extend(
-                    [
-                        result._data[col]
-                        for col in _coerce_to_list(self.right_on)
-                    ]
+        if self.on:
+            if isinstance(result, cudf.Index):
+                return result.sort_values()
+            else:
+                return result.sort_values(
+                    _coerce_to_list(self.on), ignore_index=True
                 )
-            if by:
-                to_sort = cudf.DataFrame._from_columns(by)
-                sort_order = to_sort.argsort()
-                result = result.take(sort_order)
+        by = []
+        if self.left_index and self.right_index:
+            by.extend(result.index._data.columns)
+        if self.left_on:
+            by.extend(
+                [result._data[col] for col in _coerce_to_list(self.left_on)]
+            )
+        if self.right_on:
+            by.extend(
+                [result._data[col] for col in _coerce_to_list(self.right_on)]
+            )
+        if by:
+            to_sort = cudf.DataFrame._from_columns(by)
+            sort_order = to_sort.argsort()
+            result = result.take(sort_order)
         return result
 
     def output_column_names(self):
         # Return mappings of input column names to (possibly) suffixed
         # result column names
-        left_names = OrderedDict(
-            zip(self.lhs._data.keys(), self.lhs._data.keys())
-        )
-        right_names = OrderedDict(
-            zip(self.rhs._data.keys(), self.rhs._data.keys())
-        )
+        left_names = OrderedDict(zip(self.lhs._data, self.lhs._data))
+        right_names = OrderedDict(zip(self.rhs._data, self.rhs._data))
         common_names = set(left_names) & set(right_names)
 
         if self.on:
@@ -328,7 +316,7 @@ def output_column_names(self):
         else:
             key_columns_with_same_name = []
             for lkey, rkey in zip(*self._keys):
-                if (lkey.index, rkey.index) == (False, False,):
+                if (lkey.index, rkey.index) == (False, False):
                     if lkey.name == rkey.name:
                         key_columns_with_same_name.append(lkey.name)
         for name in common_names:
@@ -375,7 +363,7 @@ def validate_merge_params(
             raise ValueError("Can not merge on unnamed Series")
 
         # If nothing specified, must have common cols to use implicitly
-        same_named_columns = set(lhs._data.keys()) & set(rhs._data.keys())
+        same_named_columns = set(lhs._data) & set(rhs._data)
         if (
             not (left_index or right_index)
             and not (left_on or right_on)
@@ -414,6 +402,30 @@ def match_key_dtypes(self, lhs, rhs, match_func):
                 right_key.set_value(out_rhs, rcol.astype(dtype))
         return out_lhs, out_rhs
 
+    def restore_categorical_keys(self, lhs, rhs):
+        # For inner joins, any categorical keys were casted
+        # to the type of their categories.
+        # Here, we cast the keys back to categorical type
+        # before constructing the result
+
+        out_lhs = lhs.copy(deep=False)
+        out_rhs = rhs.copy(deep=False)
+
+        if self.how == "inner":
+            for left_key, right_key in zip(*self._keys):
+                if isinstance(
+                    left_key.value(self.lhs).dtype, cudf.CategoricalDtype
+                ) and isinstance(
+                    right_key.value(self.rhs).dtype, cudf.CategoricalDtype
+                ):
+                    left_key.set_value(
+                        out_lhs, left_key.value(out_lhs).astype("category")
+                    )
+                    right_key.set_value(
+                        out_rhs, right_key.value(out_rhs).astype("category")
+                    )
+        return out_lhs, out_rhs
+
 
 class MergeSemi(Merge):
     def perform_merge(self):
diff --git a/python/cudf/cudf/tests/test_joining.py b/python/cudf/cudf/tests/test_joining.py
index 367e903d02e..078619afcaf 100644
--- a/python/cudf/cudf/tests/test_joining.py
+++ b/python/cudf/cudf/tests/test_joining.py
@@ -1147,10 +1147,7 @@ def test_typecast_on_join_overflow_unsafe(dtypes):
 
     with pytest.warns(
         UserWarning,
-        match=(
-            f"can't safely cast column"
-            f" from right with type {dtype_r} to {dtype_l}"
-        ),
+        match=(f"Can't safely cast column" f" from {dtype_r} to {dtype_l}"),
     ):
         merged = lhs.merge(rhs, on="a", how="left")  # noqa: F841
 

From bd6bf771fcafd1968fced6fcd6c27251825d3a1d Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Sat, 20 Feb 2021 10:29:45 -0500
Subject: [PATCH 054/138] Warnings

---
 python/cudf/cudf/tests/test_joining.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/python/cudf/cudf/tests/test_joining.py b/python/cudf/cudf/tests/test_joining.py
index 078619afcaf..42c90998e7f 100644
--- a/python/cudf/cudf/tests/test_joining.py
+++ b/python/cudf/cudf/tests/test_joining.py
@@ -850,8 +850,8 @@ def test_join_empty_table_dtype():
     "column_a",
     [
         (
-            pd.Series([None, 1, 2, 3, 4, 5, 6, 7]).astype(np.float),
-            pd.Series([8, 9, 10, 11, 12, None, 14, 15]).astype(np.float),
+            pd.Series([None, 1, 2, 3, 4, 5, 6, 7]).astype(np.float64),
+            pd.Series([8, 9, 10, 11, 12, None, 14, 15]).astype(np.float64),
         )
     ],
 )
@@ -859,8 +859,8 @@ def test_join_empty_table_dtype():
     "column_b",
     [
         (
-            pd.Series([0, 1, 0, None, 1, 0, 0, 0]).astype(np.float),
-            pd.Series([None, 1, 2, 1, 2, 2, 0, 0]).astype(np.float),
+            pd.Series([0, 1, 0, None, 1, 0, 0, 0]).astype(np.float64),
+            pd.Series([None, 1, 2, 1, 2, 2, 0, 0]).astype(np.float64),
         )
     ],
 )

From a40063e70fb5339829833403472b202b5f6255e1 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Mon, 22 Feb 2021 10:25:01 -0500
Subject: [PATCH 055/138] Cleanup

---
 python/cudf/cudf/core/join/join.py | 131 +++++++++++++----------------
 1 file changed, 59 insertions(+), 72 deletions(-)

diff --git a/python/cudf/cudf/core/join/join.py b/python/cudf/cudf/core/join/join.py
index 41265ba6af5..71a7b716775 100644
--- a/python/cudf/cudf/core/join/join.py
+++ b/python/cudf/cudf/core/join/join.py
@@ -1,5 +1,8 @@
 # Copyright (c) 2020-2021, NVIDIA CORPORATION.
+from __future__ import annotations
+
 from collections import OrderedDict, namedtuple
+from typing import TYPE_CHECKING
 
 import cudf
 from cudf import _lib as libcudf
@@ -10,6 +13,9 @@
     _Indexer,
 )
 
+if TYPE_CHECKING:
+    from cudf.core.frame import Frame
+
 
 def merge(
     lhs,
@@ -53,6 +59,7 @@ def merge(
 
 class Merge(object):
     JoinKeys = namedtuple("JoinKeys", ["left", "right"])
+    _joiner = libcudf.join.join
 
     def __init__(
         self,
@@ -218,7 +225,7 @@ def perform_merge(self):
         right_key_indices = [
             key.get_numeric_index(rhs) for key in self._keys.right
         ]
-        left_rows, right_rows = libcudf.join.join(
+        left_rows, right_rows = self._joiner(
             lhs,
             rhs,
             left_on=left_key_indices,
@@ -229,35 +236,60 @@ def perform_merge(self):
         return self.construct_result(lhs, rhs, left_rows, right_rows)
 
     def construct_result(self, lhs, rhs, left_rows, right_rows):
-        # first construct the index.
+        lhs = lhs._gather(left_rows, nullify=True)
+        rhs = rhs._gather(right_rows, nullify=True)
+
+        result = self.merge_results(lhs, rhs)
+
+        if self.sort:
+            result = self.sort_result(result)
+        return result
+
+    def merge_results(self, lhs: Frame, rhs: Frame) -> Frame:
+        lnames = OrderedDict(zip(lhs._data, lhs._data))
+        rnames = OrderedDict(zip(rhs._data, rhs._data))
+        common_names = set(lnames) & set(rnames)
+
+        if self.on:
+            key_columns_with_same_name = self.on
+        else:
+            key_columns_with_same_name = []
+            for lkey, rkey in zip(*self._keys):
+                if (lkey.index, rkey.index) == (False, False):
+                    if lkey.name == rkey.name:
+                        key_columns_with_same_name.append(lkey.name)
+
+        for name in common_names:
+            if name not in key_columns_with_same_name:
+                lnames[name] = f"{name}{self.lsuffix}"
+                rnames[name] = f"{name}{self.rsuffix}"
+            else:
+                del rnames[name]
+
+        # now construct the data:
+        data = cudf.core.column_accessor.ColumnAccessor()
+
+        for lcol in lnames:
+            data[lnames[lcol]] = lhs._data[lcol]
+        for rcol in rnames:
+            data[rnames[rcol]] = rhs._data[rcol]
+
+        # drop the index we won't be using:
         if self.left_index and self.right_index:
             if self.how == "right":
-                out_index = rhs.index._gather(left_rows, nullify=True)
+                index = rhs._index
             else:
-                out_index = lhs.index._gather(left_rows, nullify=True)
+                index = lhs._index
         elif self.left_index:
             # left_index and right_on
-            out_index = rhs.index._gather(right_rows, nullify=True)
+            index = rhs._index
         elif self.right_index:
             # right_index and left_on
-            out_index = lhs.index._gather(left_rows, nullify=True)
+            index = lhs._index
         else:
-            out_index = None
-
-        # now construct the data:
-        data = cudf.core.column_accessor.ColumnAccessor()
-        left_names, right_names = self.output_column_names()
-
-        for lcol in left_names:
-            data[left_names[lcol]] = lhs._data[lcol].take(
-                left_rows, nullify=True
-            )
-        for rcol in right_names:
-            data[right_names[rcol]] = rhs._data[rcol].take(
-                right_rows, nullify=True
-            )
+            index = None
 
-        result = self.out_class._from_data(data, index=out_index)
+        result = self.out_class._from_data(data=data, index=index)
 
         # if outer join, key columns with the same name are combined:
         if self.how == "outer":
@@ -265,14 +297,9 @@ def construct_result(self, lhs, rhs, left_rows, right_rows):
                 if lkey.name == rkey.name:
                     # fill nulls in the key column with values from the RHS
                     lkey.set_value(
-                        result,
-                        lkey.value(result).fillna(
-                            rkey.value(rhs).take(right_rows, nullify=True)
-                        ),
+                        result, lkey.value(result).fillna(rkey.value(rhs)),
                     )
 
-        if self.sort:
-            result = self.sort_result(result)
         return result
 
     def sort_result(self, result):
@@ -304,29 +331,6 @@ def sort_result(self, result):
             result = result.take(sort_order)
         return result
 
-    def output_column_names(self):
-        # Return mappings of input column names to (possibly) suffixed
-        # result column names
-        left_names = OrderedDict(zip(self.lhs._data, self.lhs._data))
-        right_names = OrderedDict(zip(self.rhs._data, self.rhs._data))
-        common_names = set(left_names) & set(right_names)
-
-        if self.on:
-            key_columns_with_same_name = self.on
-        else:
-            key_columns_with_same_name = []
-            for lkey, rkey in zip(*self._keys):
-                if (lkey.index, rkey.index) == (False, False):
-                    if lkey.name == rkey.name:
-                        key_columns_with_same_name.append(lkey.name)
-        for name in common_names:
-            if name not in key_columns_with_same_name:
-                left_names[name] = f"{name}{self.lsuffix}"
-                right_names[name] = f"{name}{self.rsuffix}"
-            else:
-                del right_names[name]
-        return left_names, right_names
-
     @staticmethod
     def validate_merge_params(
         lhs,
@@ -428,26 +432,9 @@ def restore_categorical_keys(self, lhs, rhs):
 
 
 class MergeSemi(Merge):
-    def perform_merge(self):
-        lhs, rhs = self.match_key_dtypes(self.lhs, self.rhs, _cast_join_keys)
-
-        left_key_indices = [
-            key.get_numeric_index(lhs) for key in self._keys.left
-        ]
-        right_key_indices = [
-            key.get_numeric_index(rhs) for key in self._keys.right
-        ]
-        left_rows = libcudf.join.semi_join(
-            lhs,
-            rhs,
-            left_on=left_key_indices,
-            right_on=right_key_indices,
-            how=self.how,
-        )
-        return self.construct_result(
-            lhs, rhs, left_rows, cudf.core.column.as_column([])
-        )
+    def _joiner(self, lhs, rhs, left_on, right_on, how):
+        left_rows = libcudf.join.semi_join(lhs, rhs, left_on, right_on, how)
+        return left_rows, cudf.core.column.as_column([], dtype="int32")
 
-    def output_column_names(self):
-        left_names, _ = super().output_column_names()
-        return left_names, {}
+    def merge_results(self, lhs, rhs):
+        return super().merge_results(lhs, cudf.core.frame.Frame())

From ccef9d09998959d85c99f6afa1d18cef9bd20883 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Mon, 22 Feb 2021 11:27:13 -0500
Subject: [PATCH 056/138] Cleanup

---
 python/cudf/cudf/core/join/_join_helpers.py |  12 +--
 python/cudf/cudf/core/join/join.py          | 107 ++++++++++----------
 2 files changed, 59 insertions(+), 60 deletions(-)

diff --git a/python/cudf/cudf/core/join/_join_helpers.py b/python/cudf/cudf/core/join/_join_helpers.py
index 2b4c655f057..4d5bc1dd1c0 100644
--- a/python/cudf/cudf/core/join/_join_helpers.py
+++ b/python/cudf/cudf/core/join/_join_helpers.py
@@ -49,7 +49,7 @@ def get_numeric_index(self, obj):
             return obj.index.names.index(self.name)
 
 
-def _cast_join_keys(lcol, rcol, how):
+def _match_join_keys(lcol, rcol, how):
     # cast the keys lcol and rcol to a common dtype
 
     ltype = lcol.dtype
@@ -59,7 +59,7 @@ def _cast_join_keys(lcol, rcol, how):
     if isinstance(ltype, CategoricalDtype) or isinstance(
         rtype, CategoricalDtype
     ):
-        return _cast_join_categorical_keys(lcol, rcol, how)
+        return _match_join_categorical_keys(lcol, rcol, how)
 
     if pd.api.types.is_dtype_equal(ltype, rtype):
         return ltype
@@ -91,7 +91,7 @@ def _cast_join_keys(lcol, rcol, how):
     return None
 
 
-def _cast_join_categorical_keys(lcol, rcol, how):
+def _match_join_categorical_keys(lcol, rcol, how):
     # cast the keys lcol and rcol to a common dtype
     # when at least one of them is a categorical type
 
@@ -100,7 +100,7 @@ def _cast_join_categorical_keys(lcol, rcol, how):
 
     if l_is_cat and r_is_cat:
         # if both are categoricals, logic is complicated:
-        return _cast_join_categorical_keys_both(lcol, rcol, how)
+        return _match_join_categorical_keys_both(lcol, rcol, how)
     elif l_is_cat or r_is_cat:
         if l_is_cat and how in {"left", "leftsemi", "leftanti"}:
             return lcol.dtype
@@ -114,7 +114,7 @@ def _cast_join_categorical_keys(lcol, rcol, how):
         raise ValueError("Neither operand is categorical")
 
 
-def _cast_join_categorical_keys_both(lcol, rcol, how):
+def _match_join_categorical_keys_both(lcol, rcol, how):
     # cast lcol and rcol to a common type when they are *both*
     # categorical types.
     #
@@ -151,7 +151,7 @@ def _cast_join_categorical_keys_both(lcol, rcol, how):
 
     if how == "inner":
         # cast to category types -- we must cast them back later
-        return _cast_join_keys(ltype.categories, rtype.categories, how)
+        return _match_join_keys(ltype.categories, rtype.categories, how)
     elif how in {"left", "leftanti", "leftsemi"}:
         # always cast to left type
         return ltype
diff --git a/python/cudf/cudf/core/join/join.py b/python/cudf/cudf/core/join/join.py
index 71a7b716775..d2505016f2c 100644
--- a/python/cudf/cudf/core/join/join.py
+++ b/python/cudf/cudf/core/join/join.py
@@ -7,10 +7,10 @@
 import cudf
 from cudf import _lib as libcudf
 from cudf.core.join._join_helpers import (
-    _cast_join_keys,
     _coerce_to_list,
     _coerce_to_tuple,
     _Indexer,
+    _match_join_keys,
 )
 
 if TYPE_CHECKING:
@@ -121,7 +121,7 @@ def __init__(
             Left and right suffixes specified together, unpacked into lsuffix
             and rsuffix.
         """
-        self.validate_merge_params(
+        self._validate_merge_params(
             lhs,
             rhs,
             on=on,
@@ -157,9 +157,37 @@ def __init__(
         elif isinstance(self.lhs, cudf.Index):
             self.out_class = self.lhs.__class__
 
-        self.compute_join_keys()
+        self._compute_join_keys()
 
-    def compute_join_keys(self):
+    def perform_merge(self):
+        lhs, rhs = self._match_key_dtypes(self.lhs, self.rhs)
+
+        left_key_indices = [
+            key.get_numeric_index(lhs) for key in self._keys.left
+        ]
+        right_key_indices = [
+            key.get_numeric_index(rhs) for key in self._keys.right
+        ]
+
+        left_rows, right_rows = self._joiner(
+            lhs,
+            rhs,
+            left_on=left_key_indices,
+            right_on=right_key_indices,
+            how=self.how,
+        )
+        lhs, rhs = self._restore_categorical_keys(lhs, rhs)
+
+        left_result = lhs._gather(left_rows, nullify=True)
+        right_result = rhs._gather(right_rows, nullify=True)
+
+        result = self._merge_results(left_result, right_result)
+
+        if self.sort:
+            result = self._sort_result(result)
+        return result
+
+    def _compute_join_keys(self):
         if (
             self.left_index
             or self.right_index
@@ -216,38 +244,11 @@ def compute_join_keys(self):
 
         self._keys = self.__class__.JoinKeys(left=left_keys, right=right_keys)
 
-    def perform_merge(self):
-        lhs, rhs = self.match_key_dtypes(self.lhs, self.rhs, _cast_join_keys)
-
-        left_key_indices = [
-            key.get_numeric_index(lhs) for key in self._keys.left
-        ]
-        right_key_indices = [
-            key.get_numeric_index(rhs) for key in self._keys.right
-        ]
-        left_rows, right_rows = self._joiner(
-            lhs,
-            rhs,
-            left_on=left_key_indices,
-            right_on=right_key_indices,
-            how=self.how,
-        )
-        lhs, rhs = self.restore_categorical_keys(lhs, rhs)
-        return self.construct_result(lhs, rhs, left_rows, right_rows)
-
-    def construct_result(self, lhs, rhs, left_rows, right_rows):
-        lhs = lhs._gather(left_rows, nullify=True)
-        rhs = rhs._gather(right_rows, nullify=True)
-
-        result = self.merge_results(lhs, rhs)
-
-        if self.sort:
-            result = self.sort_result(result)
-        return result
+    def _merge_results(self, left_result: Frame, right_result: Frame) -> Frame:
+        # merge the left result and right result into a single Frame
 
-    def merge_results(self, lhs: Frame, rhs: Frame) -> Frame:
-        lnames = OrderedDict(zip(lhs._data, lhs._data))
-        rnames = OrderedDict(zip(rhs._data, rhs._data))
+        lnames = OrderedDict(zip(left_result._data, left_result._data))
+        rnames = OrderedDict(zip(right_result._data, right_result._data))
         common_names = set(lnames) & set(rnames)
 
         if self.on:
@@ -270,22 +271,22 @@ def merge_results(self, lhs: Frame, rhs: Frame) -> Frame:
         data = cudf.core.column_accessor.ColumnAccessor()
 
         for lcol in lnames:
-            data[lnames[lcol]] = lhs._data[lcol]
+            data[lnames[lcol]] = left_result._data[lcol]
         for rcol in rnames:
-            data[rnames[rcol]] = rhs._data[rcol]
+            data[rnames[rcol]] = right_result._data[rcol]
 
         # drop the index we won't be using:
         if self.left_index and self.right_index:
             if self.how == "right":
-                index = rhs._index
+                index = right_result._index
             else:
-                index = lhs._index
+                index = left_result._index
         elif self.left_index:
             # left_index and right_on
-            index = rhs._index
+            index = right_result._index
         elif self.right_index:
             # right_index and left_on
-            index = lhs._index
+            index = left_result._index
         else:
             index = None
 
@@ -297,12 +298,13 @@ def merge_results(self, lhs: Frame, rhs: Frame) -> Frame:
                 if lkey.name == rkey.name:
                     # fill nulls in the key column with values from the RHS
                     lkey.set_value(
-                        result, lkey.value(result).fillna(rkey.value(rhs)),
+                        result,
+                        lkey.value(result).fillna(rkey.value(right_result)),
                     )
 
         return result
 
-    def sort_result(self, result):
+    def _sort_result(self, result):
         # Pandas sorts on the key columns in the
         # same order as given in 'on'. If the indices are used as
         # keys, the index will be sorted. If one index is specified,
@@ -332,7 +334,7 @@ def sort_result(self, result):
         return result
 
     @staticmethod
-    def validate_merge_params(
+    def _validate_merge_params(
         lhs,
         rhs,
         on,
@@ -392,25 +394,22 @@ def validate_merge_params(
                         "lsuffix and rsuffix are not defined"
                     )
 
-    def match_key_dtypes(self, lhs, rhs, match_func):
+    def _match_key_dtypes(self, lhs, rhs):
+        # Match the dtypes of the key columns from lhs and rhs
         out_lhs = lhs.copy(deep=False)
         out_rhs = rhs.copy(deep=False)
-        # match the dtypes of the key columns in
-        # self.lhs and self.rhs according to the matching
-        # function `match_func`
         for left_key, right_key in zip(*self._keys):
             lcol, rcol = left_key.value(lhs), right_key.value(rhs)
-            dtype = match_func(lcol, rcol, how=self.how)
+            dtype = _match_join_keys(lcol, rcol, how=self.how)
             if dtype:
                 left_key.set_value(out_lhs, lcol.astype(dtype))
                 right_key.set_value(out_rhs, rcol.astype(dtype))
         return out_lhs, out_rhs
 
-    def restore_categorical_keys(self, lhs, rhs):
+    def _restore_categorical_keys(self, lhs, rhs):
         # For inner joins, any categorical keys were casted
         # to the type of their categories.
-        # Here, we cast the keys back to categorical type
-        # before constructing the result
+        # Here, we cast the keys back to categorical type.
 
         out_lhs = lhs.copy(deep=False)
         out_rhs = rhs.copy(deep=False)
@@ -436,5 +435,5 @@ def _joiner(self, lhs, rhs, left_on, right_on, how):
         left_rows = libcudf.join.semi_join(lhs, rhs, left_on, right_on, how)
         return left_rows, cudf.core.column.as_column([], dtype="int32")
 
-    def merge_results(self, lhs, rhs):
-        return super().merge_results(lhs, cudf.core.frame.Frame())
+    def _merge_results(self, lhs, rhs):
+        return super()._merge_results(lhs, cudf.core.frame.Frame())

From 210244b89ec30a0bb3f86f3a7c079262364cc2c9 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Mon, 22 Feb 2021 12:10:07 -0500
Subject: [PATCH 057/138] Cleanup

---
 python/cudf/cudf/_lib/join.pyx              |   4 +-
 python/cudf/cudf/core/join/_join_helpers.py |   8 +-
 python/cudf/cudf/core/join/join.py          | 119 ++++++++++++--------
 3 files changed, 79 insertions(+), 52 deletions(-)

diff --git a/python/cudf/cudf/_lib/join.pyx b/python/cudf/cudf/_lib/join.pyx
index 20c14f00957..0339d86384d 100644
--- a/python/cudf/cudf/_lib/join.pyx
+++ b/python/cudf/cudf/_lib/join.pyx
@@ -51,6 +51,8 @@ cpdef join(Table lhs, Table rhs, left_on, right_on, how=None):
 
 
 cpdef semi_join(Table lhs, Table rhs, left_on, right_on, how=None):
+    from cudf.core.column import as_column
+
     # left-semi and left-anti joins
     cdef vector[size_type] c_left_on = left_on
     cdef vector[size_type] c_right_on = right_on
@@ -70,4 +72,4 @@ cpdef semi_join(Table lhs, Table rhs, left_on, right_on, how=None):
         ))
     else:
         raise ValueError(f"Invalid join type {how}")
-    return Column.from_unique_ptr(move(c_result))
+    return Column.from_unique_ptr(move(c_result)), as_column([], dtype="int32")
diff --git a/python/cudf/cudf/core/join/_join_helpers.py b/python/cudf/cudf/core/join/_join_helpers.py
index 4d5bc1dd1c0..f7a8622c80a 100644
--- a/python/cudf/cudf/core/join/_join_helpers.py
+++ b/python/cudf/cudf/core/join/_join_helpers.py
@@ -18,21 +18,21 @@ class _Indexer:
     # 4  1
     # 5  2
     # 6  3
-    # >>> _Indexer("a", column=True).value(df)  # returns column "a" of df
-    # >>> _Indexer("b", index=True).value(df)  # returns index level "b" of df
+    # >>> _Indexer("a", column=True).get(df)  # returns column "a" of df
+    # >>> _Indexer("b", index=True).get(df)  # returns index level "b" of df
 
     def __init__(self, name, column=False, index=False):
         self.name = name
         self.column, self.index = column, index
 
-    def value(self, obj):
+    def get(self, obj):
         # get the column from `obj`
         if self.column:
             return obj._data[self.name]
         else:
             return obj._index._data[self.name]
 
-    def set_value(self, obj, value):
+    def set(self, obj, value):
         # set the colum in `obj`
         if self.column:
             obj._data[self.name] = value
diff --git a/python/cudf/cudf/core/join/join.py b/python/cudf/cudf/core/join/join.py
index d2505016f2c..0d53184ced5 100644
--- a/python/cudf/cudf/core/join/join.py
+++ b/python/cudf/cudf/core/join/join.py
@@ -57,8 +57,26 @@ def merge(
     return mergeobj.perform_merge()
 
 
+_JoinKeys = namedtuple("JoinKeys", ["left", "right"])
+
+
 class Merge(object):
-    JoinKeys = namedtuple("JoinKeys", ["left", "right"])
+    # A namedtuple of indexers representing the left and right keys
+    _keys: _JoinKeys
+
+    # The joiner function must have the following signature:
+    #
+    #     def joiner(lhs, rhs, left_on, right_on, how=how):
+    #          ...
+    #
+    # Where:
+    #
+    # - `lhs` and `rhs` represent the left and right Frames to join
+    # - `left_on` and `right_on` represent the *numerical* indices
+    #   of the key columns  of lhs and rhs. This allows specifying
+    #   index levels as keys in an unambiguous way.
+    # - `how` is a string specifying the kind of join to perform
+    #   (useful if the joiner function can perform more than one join).
     _joiner = libcudf.join.join
 
     def __init__(
@@ -148,14 +166,14 @@ def __init__(
         self.rsuffix = rsuffix
         self.suffixes = suffixes
 
-        self.out_class = cudf.DataFrame
+        self._out_class = cudf.DataFrame
 
         if isinstance(self.lhs, cudf.MultiIndex) or isinstance(
             self.rhs, cudf.MultiIndex
         ):
-            self.out_class = cudf.MultiIndex
+            self._out_class = cudf.MultiIndex
         elif isinstance(self.lhs, cudf.Index):
-            self.out_class = self.lhs.__class__
+            self._out_class = self.lhs.__class__
 
         self._compute_join_keys()
 
@@ -188,6 +206,7 @@ def perform_merge(self):
         return result
 
     def _compute_join_keys(self):
+        # Computes self._keys
         if (
             self.left_index
             or self.right_index
@@ -242,14 +261,31 @@ def _compute_join_keys(self):
                 "Merge operands must have same number of join key columns"
             )
 
-        self._keys = self.__class__.JoinKeys(left=left_keys, right=right_keys)
+        self._keys = _JoinKeys(left=left_keys, right=right_keys)
 
     def _merge_results(self, left_result: Frame, right_result: Frame) -> Frame:
-        # merge the left result and right result into a single Frame
+        # Merge the Frames `left_result` and `right_result` into a single
+        # `Frame`, suffixing column names if necessary.
+
+        # For outer joins, the key columns from left_result and
+        # right_result are combined if they have the same name.
+        # We will drop those keys from right_result later, so
+        # combine them now with keys from left_result.
+        if self.how == "outer":
+            for lkey, rkey in zip(*self._keys):
+                if lkey.name == rkey.name:
+                    # fill nulls in lhs from values in the rhs
+                    lkey.set(
+                        left_result,
+                        lkey.get(left_result).fillna(rkey.get(right_result)),
+                    )
+
+        # `left_names` and `right_names` are mappings of column names
+        # of `lhs` and `rhs` to the corresponding column names in the result
+        left_names = OrderedDict(zip(left_result._data, left_result._data))
+        right_names = OrderedDict(zip(right_result._data, right_result._data))
 
-        lnames = OrderedDict(zip(left_result._data, left_result._data))
-        rnames = OrderedDict(zip(right_result._data, right_result._data))
-        common_names = set(lnames) & set(rnames)
+        common_names = set(left_names) & set(right_names)
 
         if self.on:
             key_columns_with_same_name = self.on
@@ -260,22 +296,25 @@ def _merge_results(self, left_result: Frame, right_result: Frame) -> Frame:
                     if lkey.name == rkey.name:
                         key_columns_with_same_name.append(lkey.name)
 
+        # For any columns with the same name:
+        # - if they are key columns, keep only the left column
+        # - if they are not key columns, use suffixes
         for name in common_names:
             if name not in key_columns_with_same_name:
-                lnames[name] = f"{name}{self.lsuffix}"
-                rnames[name] = f"{name}{self.rsuffix}"
+                left_names[name] = f"{name}{self.lsuffix}"
+                right_names[name] = f"{name}{self.rsuffix}"
             else:
-                del rnames[name]
+                del right_names[name]
 
-        # now construct the data:
+        # Assemble the data columns of the result:
         data = cudf.core.column_accessor.ColumnAccessor()
 
-        for lcol in lnames:
-            data[lnames[lcol]] = left_result._data[lcol]
-        for rcol in rnames:
-            data[rnames[rcol]] = right_result._data[rcol]
+        for lcol in left_names:
+            data[left_names[lcol]] = left_result._data[lcol]
+        for rcol in right_names:
+            data[right_names[rcol]] = right_result._data[rcol]
 
-        # drop the index we won't be using:
+        # Index of the result:
         if self.left_index and self.right_index:
             if self.how == "right":
                 index = right_result._index
@@ -290,17 +329,8 @@ def _merge_results(self, left_result: Frame, right_result: Frame) -> Frame:
         else:
             index = None
 
-        result = self.out_class._from_data(data=data, index=index)
-
-        # if outer join, key columns with the same name are combined:
-        if self.how == "outer":
-            for lkey, rkey in zip(*self._keys):
-                if lkey.name == rkey.name:
-                    # fill nulls in the key column with values from the RHS
-                    lkey.set_value(
-                        result,
-                        lkey.value(result).fillna(rkey.value(right_result)),
-                    )
+        # Construct result from data and index:
+        result = self._out_class._from_data(data=data, index=index)
 
         return result
 
@@ -350,7 +380,6 @@ def _validate_merge_params(
         """
         Error for various invalid combinations of merge input parameters
         """
-
         # must actually support the requested merge type
         if how not in {"left", "inner", "outer", "leftanti", "leftsemi"}:
             raise NotImplementedError(f"{how} merge not supported yet")
@@ -399,41 +428,37 @@ def _match_key_dtypes(self, lhs, rhs):
         out_lhs = lhs.copy(deep=False)
         out_rhs = rhs.copy(deep=False)
         for left_key, right_key in zip(*self._keys):
-            lcol, rcol = left_key.value(lhs), right_key.value(rhs)
+            lcol, rcol = left_key.get(lhs), right_key.get(rhs)
             dtype = _match_join_keys(lcol, rcol, how=self.how)
             if dtype:
-                left_key.set_value(out_lhs, lcol.astype(dtype))
-                right_key.set_value(out_rhs, rcol.astype(dtype))
+                left_key.set(out_lhs, lcol.astype(dtype))
+                right_key.set(out_rhs, rcol.astype(dtype))
         return out_lhs, out_rhs
 
     def _restore_categorical_keys(self, lhs, rhs):
-        # For inner joins, any categorical keys were casted
-        # to the type of their categories.
-        # Here, we cast the keys back to categorical type.
-
+        # For inner joins, any categorical keys in `self.lhs` and `self.rhs`
+        # were casted to their category type to produce `lhs` and `rhs`.
+        # Here, we cast them back.
         out_lhs = lhs.copy(deep=False)
         out_rhs = rhs.copy(deep=False)
-
         if self.how == "inner":
             for left_key, right_key in zip(*self._keys):
                 if isinstance(
-                    left_key.value(self.lhs).dtype, cudf.CategoricalDtype
+                    left_key.get(self.lhs).dtype, cudf.CategoricalDtype
                 ) and isinstance(
-                    right_key.value(self.rhs).dtype, cudf.CategoricalDtype
+                    right_key.get(self.rhs).dtype, cudf.CategoricalDtype
                 ):
-                    left_key.set_value(
-                        out_lhs, left_key.value(out_lhs).astype("category")
+                    left_key.set(
+                        out_lhs, left_key.get(out_lhs).astype("category")
                     )
-                    right_key.set_value(
-                        out_rhs, right_key.value(out_rhs).astype("category")
+                    right_key.set(
+                        out_rhs, right_key.get(out_rhs).astype("category")
                     )
         return out_lhs, out_rhs
 
 
 class MergeSemi(Merge):
-    def _joiner(self, lhs, rhs, left_on, right_on, how):
-        left_rows = libcudf.join.semi_join(lhs, rhs, left_on, right_on, how)
-        return left_rows, cudf.core.column.as_column([], dtype="int32")
+    _joiner = libcudf.join.semi_join
 
     def _merge_results(self, lhs, rhs):
         return super()._merge_results(lhs, cudf.core.frame.Frame())

From b57348c88543a01a2ae618d375874924d7b07897 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Mon, 22 Feb 2021 12:45:19 -0500
Subject: [PATCH 058/138] Add typing for join helpers

---
 python/cudf/cudf/core/column/column.py      |  3 +
 python/cudf/cudf/core/index.py              |  4 ++
 python/cudf/cudf/core/join/_join_helpers.py | 75 +++++++++++----------
 3 files changed, 47 insertions(+), 35 deletions(-)

diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index 0f99395d919..8e8587b2dee 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -991,6 +991,9 @@ def distinct_count(
             raise NotImplementedError(msg)
         return cpp_distinct_count(self, ignore_nulls=dropna)
 
+    def can_cast_safely(self, to_dtype: Dtype) -> bool:
+        return False
+
     def astype(self, dtype: Dtype, **kwargs) -> ColumnBase:
         if is_categorical_dtype(dtype):
             return self.as_categorical_column(dtype, **kwargs)
diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py
index 78d43d0275b..cfb0dc4238c 100644
--- a/python/cudf/cudf/core/index.py
+++ b/python/cudf/cudf/core/index.py
@@ -12,6 +12,7 @@
 from pandas._config import get_option
 
 import cudf
+from cudf._typing import DtypeObj
 from cudf.core.abc import Serializable
 from cudf.core.column import (
     CategoricalColumn,
@@ -65,6 +66,9 @@ def _to_frame(this_index, index=True, name=None):
 
 
 class Index(Frame, Serializable):
+
+    dtype: DtypeObj
+
     def __new__(
         cls,
         data=None,
diff --git a/python/cudf/cudf/core/join/_join_helpers.py b/python/cudf/cudf/core/join/_join_helpers.py
index f7a8622c80a..d4b25951de2 100644
--- a/python/cudf/cudf/core/join/_join_helpers.py
+++ b/python/cudf/cudf/core/join/_join_helpers.py
@@ -1,6 +1,8 @@
 # Copyright (c) 2021, NVIDIA CORPORATION.
+from __future__ import annotations
 
 import warnings
+from typing import TYPE_CHECKING, Any
 
 import numpy as np
 import pandas as pd
@@ -8,6 +10,11 @@
 import cudf
 from cudf.core.dtypes import CategoricalDtype
 
+if TYPE_CHECKING:
+    from cudf._typing import Dtype
+    from cudf.core.column import ColumnBase
+    from cudf.core.frame import Frame
+
 
 class _Indexer:
     # Indexer into a column (either a data column or index level).
@@ -21,35 +28,38 @@ class _Indexer:
     # >>> _Indexer("a", column=True).get(df)  # returns column "a" of df
     # >>> _Indexer("b", index=True).get(df)  # returns index level "b" of df
 
-    def __init__(self, name, column=False, index=False):
+    def __init__(self, name: Any, column=False, index=False):
         self.name = name
         self.column, self.index = column, index
 
-    def get(self, obj):
+    def get(self, obj: Frame) -> ColumnBase:
         # get the column from `obj`
         if self.column:
             return obj._data[self.name]
-        else:
+        if obj._index is not None:
             return obj._index._data[self.name]
+        raise KeyError()
 
-    def set(self, obj, value):
+    def set(self, obj: Frame, value: ColumnBase):
         # set the colum in `obj`
         if self.column:
             obj._data[self.name] = value
-        else:
+        if obj._index is not None:
             obj._index._data[self.name] = value
+        raise KeyError()
 
-    def get_numeric_index(self, obj):
+    def get_numeric_index(self, obj: Frame) -> int:
         # get the position of the column in `obj`
         # (counting any index columns)
         if self.column:
-            index_nlevels = obj.index.nlevels if obj._index is not None else 0
+            index_nlevels = obj._index.nlevels if obj._index is not None else 0
             return index_nlevels + tuple(obj._data).index(self.name)
-        else:
-            return obj.index.names.index(self.name)
+        if obj._index is not None:
+            return obj._index.names.index(self.name)
+        raise KeyError()
 
 
-def _match_join_keys(lcol, rcol, how):
+def _match_join_keys(lcol: ColumnBase, rcol: ColumnBase, how: str) -> Dtype:
     # cast the keys lcol and rcol to a common dtype
 
     ltype = lcol.dtype
@@ -59,7 +69,7 @@ def _match_join_keys(lcol, rcol, how):
     if isinstance(ltype, CategoricalDtype) or isinstance(
         rtype, CategoricalDtype
     ):
-        return _match_join_categorical_keys(lcol, rcol, how)
+        return _match_categorical_dtypes(ltype, rtype, how)
 
     if pd.api.types.is_dtype_equal(ltype, rtype):
         return ltype
@@ -91,38 +101,31 @@ def _match_join_keys(lcol, rcol, how):
     return None
 
 
-def _match_join_categorical_keys(lcol, rcol, how):
+def _match_categorical_dtypes(ltype: Dtype, rtype: Dtype, how: str) -> Dtype:
     # cast the keys lcol and rcol to a common dtype
     # when at least one of them is a categorical type
 
-    l_is_cat = isinstance(lcol.dtype, CategoricalDtype)
-    r_is_cat = isinstance(rcol.dtype, CategoricalDtype)
-
-    if l_is_cat and r_is_cat:
+    if isinstance(ltype, CategoricalDtype) and isinstance(
+        rtype, CategoricalDtype
+    ):
         # if both are categoricals, logic is complicated:
-        return _match_join_categorical_keys_both(lcol, rcol, how)
-    elif l_is_cat or r_is_cat:
-        if l_is_cat and how in {"left", "leftsemi", "leftanti"}:
-            return lcol.dtype
-        common_type = (
-            lcol.dtype.categories.dtype
-            if l_is_cat
-            else rcol.dtype.categories.dtype
-        )
-        return common_type
-    else:
-        raise ValueError("Neither operand is categorical")
+        return _match_categorical_dtypes_both(ltype, rtype, how)
+
+    if isinstance(ltype, CategoricalDtype):
+        if how in {"left", "leftsemi", "leftanti"}:
+            return ltype
+        common_type = ltype.categories.dtype
+    elif isinstance(rtype, CategoricalDtype):
+        common_type = rtype.categories.dtype
+    return common_type
 
 
-def _match_join_categorical_keys_both(lcol, rcol, how):
-    # cast lcol and rcol to a common type when they are *both*
-    # categorical types.
-    #
+def _match_categorical_dtypes_both(
+    ltype: CategoricalDtype, rtype: CategoricalDtype, how: str
+) -> Dtype:
     # The commontype depends on both `how` and the specifics of the
     # categorical variables to be merged.
 
-    ltype, rtype = lcol.dtype, rcol.dtype
-
     # when both are ordered and both have the same categories,
     # no casting required:
     if ltype == rtype:
@@ -151,7 +154,9 @@ def _match_join_categorical_keys_both(lcol, rcol, how):
 
     if how == "inner":
         # cast to category types -- we must cast them back later
-        return _match_join_keys(ltype.categories, rtype.categories, how)
+        return _match_join_keys(
+            ltype.categories._values, rtype.categories._values, how
+        )
     elif how in {"left", "leftanti", "leftsemi"}:
         # always cast to left type
         return ltype

From 5c2c9b368d713653b1d25fad198cc2bf65e98c0a Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Mon, 22 Feb 2021 13:52:18 -0500
Subject: [PATCH 059/138] Typing for Join class

---
 python/cudf/cudf/core/column/column.py      |  4 +++-
 python/cudf/cudf/core/column/numerical.py   |  4 +++-
 python/cudf/cudf/core/frame.py              | 17 ++++++++++----
 python/cudf/cudf/core/join/_join_helpers.py | 20 +++++++++-------
 python/cudf/cudf/core/join/join.py          | 26 +++++++++++----------
 5 files changed, 45 insertions(+), 26 deletions(-)

diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index 8e8587b2dee..65e2a142992 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -979,7 +979,9 @@ def sort_by_values(
         ascending: bool = True,
         na_position: builtins.str = "last",
     ) -> Tuple[ColumnBase, "cudf.core.column.NumericalColumn"]:
-        col_inds = self.as_frame()._get_sorted_inds(ascending, na_position)
+        col_inds = self.as_frame()._get_sorted_inds(
+            ascending=ascending, na_position=na_position
+        )
         col_keys = self.take(col_inds)
         return col_keys, col_inds
 
diff --git a/python/cudf/cudf/core/column/numerical.py b/python/cudf/cudf/core/column/numerical.py
index 0a8d93c913b..3add003efc1 100644
--- a/python/cudf/cudf/core/column/numerical.py
+++ b/python/cudf/cudf/core/column/numerical.py
@@ -333,7 +333,9 @@ def _numeric_quantile(
     ) -> NumericalColumn:
         quant = [float(q)] if not isinstance(q, (Sequence, np.ndarray)) else q
         # get sorted indices and exclude nulls
-        sorted_indices = self.as_frame()._get_sorted_inds(True, "first")
+        sorted_indices = self.as_frame()._get_sorted_inds(
+            ascending=True, na_position="first"
+        )
         sorted_indices = sorted_indices[self.null_count :]
 
         return cpp_quantile(self, quant, interpolation, sorted_indices, exact)
diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index 1dfb65ace38..ce554a6c3b9 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -569,7 +569,7 @@ def equals(self, other, **kwargs):
         else:
             return self._index.equals(other._index)
 
-    def _get_columns_by_label(self, labels, downcast):
+    def _get_columns_by_label(self, labels, downcast=False):
         """
         Returns columns of the Frame specified by `labels`
 
@@ -2720,12 +2720,15 @@ def searchsorted(
         else:
             return result
 
-    def _get_sorted_inds(self, ascending=True, na_position="last"):
+    def _get_sorted_inds(self, by=None, ascending=True, na_position="last"):
         """
         Sort by the values.
 
         Parameters
         ----------
+        by: list, optional
+            Labels specifyin columns to sort by. By default,
+            sort by all columns of `self`
         ascending : bool or list of bool, default True
             If True, sort values in ascending order, otherwise descending.
         na_position : {‘first’ or ‘last’}, default ‘last’
@@ -2760,11 +2763,17 @@ def _get_sorted_inds(self, ascending=True, na_position="last"):
             )
             na_position = 0
 
+        to_sort = (
+            self
+            if by is None
+            else self._get_columns_by_label(by, downcast=False)
+        )
+
         # If given a scalar need to construct a sequence of length # of columns
         if np.isscalar(ascending):
-            ascending = [ascending] * self._num_columns
+            ascending = [ascending] * to_sort._num_columns
 
-        return libcudf.sort.order_by(self, ascending, na_position)
+        return libcudf.sort.order_by(to_sort, ascending, na_position)
 
     def sin(self):
         """
diff --git a/python/cudf/cudf/core/join/_join_helpers.py b/python/cudf/cudf/core/join/_join_helpers.py
index d4b25951de2..a499d3d33de 100644
--- a/python/cudf/cudf/core/join/_join_helpers.py
+++ b/python/cudf/cudf/core/join/_join_helpers.py
@@ -36,17 +36,20 @@ def get(self, obj: Frame) -> ColumnBase:
         # get the column from `obj`
         if self.column:
             return obj._data[self.name]
-        if obj._index is not None:
-            return obj._index._data[self.name]
+        else:
+            if obj._index is not None:
+                return obj._index._data[self.name]
         raise KeyError()
 
     def set(self, obj: Frame, value: ColumnBase):
         # set the colum in `obj`
         if self.column:
             obj._data[self.name] = value
-        if obj._index is not None:
-            obj._index._data[self.name] = value
-        raise KeyError()
+        else:
+            if obj._index is not None:
+                obj._index._data[self.name] = value
+            else:
+                raise KeyError()
 
     def get_numeric_index(self, obj: Frame) -> int:
         # get the position of the column in `obj`
@@ -54,9 +57,10 @@ def get_numeric_index(self, obj: Frame) -> int:
         if self.column:
             index_nlevels = obj._index.nlevels if obj._index is not None else 0
             return index_nlevels + tuple(obj._data).index(self.name)
-        if obj._index is not None:
-            return obj._index.names.index(self.name)
-        raise KeyError()
+        else:
+            if obj._index is not None:
+                return obj._index.names.index(self.name)
+            raise KeyError()
 
 
 def _match_join_keys(lcol: ColumnBase, rcol: ColumnBase, how: str) -> Dtype:
diff --git a/python/cudf/cudf/core/join/join.py b/python/cudf/cudf/core/join/join.py
index 0d53184ced5..61fe6bfc082 100644
--- a/python/cudf/cudf/core/join/join.py
+++ b/python/cudf/cudf/core/join/join.py
@@ -2,7 +2,7 @@
 from __future__ import annotations
 
 from collections import OrderedDict, namedtuple
-from typing import TYPE_CHECKING
+from typing import TYPE_CHECKING, Tuple
 
 import cudf
 from cudf import _lib as libcudf
@@ -177,7 +177,7 @@ def __init__(
 
         self._compute_join_keys()
 
-    def perform_merge(self):
+    def perform_merge(self) -> Frame:
         lhs, rhs = self._match_key_dtypes(self.lhs, self.rhs)
 
         left_key_indices = [
@@ -334,21 +334,21 @@ def _merge_results(self, left_result: Frame, right_result: Frame) -> Frame:
 
         return result
 
-    def _sort_result(self, result):
+    def _sort_result(self, result: Frame) -> Frame:
         # Pandas sorts on the key columns in the
         # same order as given in 'on'. If the indices are used as
         # keys, the index will be sorted. If one index is specified,
         # the key columns on the other side will be used to sort.
         if self.on:
             if isinstance(result, cudf.Index):
-                return result.sort_values()
+                sort_order = result._get_sorted_inds()
             else:
-                return result.sort_values(
-                    _coerce_to_list(self.on), ignore_index=True
-                )
+                sort_order = result._get_sorted_inds(_coerce_to_list(self.on))
+            return result._gather(sort_order, keep_index=False)
         by = []
         if self.left_index and self.right_index:
-            by.extend(result.index._data.columns)
+            if result._index is not None:
+                by.extend(result._index._data.columns)
         if self.left_on:
             by.extend(
                 [result._data[col] for col in _coerce_to_list(self.left_on)]
@@ -360,7 +360,7 @@ def _sort_result(self, result):
         if by:
             to_sort = cudf.DataFrame._from_columns(by)
             sort_order = to_sort.argsort()
-            result = result.take(sort_order)
+            result = result._gather(sort_order)
         return result
 
     @staticmethod
@@ -423,7 +423,7 @@ def _validate_merge_params(
                         "lsuffix and rsuffix are not defined"
                     )
 
-    def _match_key_dtypes(self, lhs, rhs):
+    def _match_key_dtypes(self, lhs: Frame, rhs: Frame) -> Tuple[Frame, Frame]:
         # Match the dtypes of the key columns from lhs and rhs
         out_lhs = lhs.copy(deep=False)
         out_rhs = rhs.copy(deep=False)
@@ -435,7 +435,9 @@ def _match_key_dtypes(self, lhs, rhs):
                 right_key.set(out_rhs, rcol.astype(dtype))
         return out_lhs, out_rhs
 
-    def _restore_categorical_keys(self, lhs, rhs):
+    def _restore_categorical_keys(
+        self, lhs: Frame, rhs: Frame
+    ) -> Tuple[Frame, Frame]:
         # For inner joins, any categorical keys in `self.lhs` and `self.rhs`
         # were casted to their category type to produce `lhs` and `rhs`.
         # Here, we cast them back.
@@ -460,5 +462,5 @@ def _restore_categorical_keys(self, lhs, rhs):
 class MergeSemi(Merge):
     _joiner = libcudf.join.semi_join
 
-    def _merge_results(self, lhs, rhs):
+    def _merge_results(self, lhs: Frame, rhs: Frame) -> Frame:
         return super()._merge_results(lhs, cudf.core.frame.Frame())

From 558aa15b7a3eba4a302e5648b9057c9c39e4455f Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Mon, 22 Feb 2021 15:41:59 -0500
Subject: [PATCH 060/138] Simplify joiner API

---
 python/cudf/cudf/_lib/join.pyx              | 16 +++----
 python/cudf/cudf/core/join/_join_helpers.py | 28 +++++++----
 python/cudf/cudf/core/join/join.py          | 52 +++++++++------------
 3 files changed, 46 insertions(+), 50 deletions(-)

diff --git a/python/cudf/cudf/_lib/join.pyx b/python/cudf/cudf/_lib/join.pyx
index 0339d86384d..f1677e3f856 100644
--- a/python/cudf/cudf/_lib/join.pyx
+++ b/python/cudf/cudf/_lib/join.pyx
@@ -19,13 +19,11 @@ from cudf._lib.cpp.table.table_view cimport table_view
 cimport cudf._lib.cpp.join as cpp_join
 
 
-cpdef join(Table lhs, Table rhs, left_on, right_on, how=None):
+cpdef join(Table lhs, Table rhs, how=None):
     # left, inner and outer join
-    cdef vector[size_type] c_left_on = left_on
-    cdef vector[size_type] c_right_on = right_on
     cdef pair[unique_ptr[column], unique_ptr[column]] c_result
-    cdef table_view c_lhs = lhs.view().select(c_left_on)
-    cdef table_view c_rhs = rhs.view().select(c_right_on)
+    cdef table_view c_lhs = lhs.view()
+    cdef table_view c_rhs = rhs.view()
 
     if how == "inner":
         c_result = move(cpp_join.inner_join(
@@ -50,15 +48,13 @@ cpdef join(Table lhs, Table rhs, left_on, right_on, how=None):
     )
 
 
-cpdef semi_join(Table lhs, Table rhs, left_on, right_on, how=None):
+cpdef semi_join(Table lhs, Table rhs, how=None):
     from cudf.core.column import as_column
 
     # left-semi and left-anti joins
-    cdef vector[size_type] c_left_on = left_on
-    cdef vector[size_type] c_right_on = right_on
     cdef unique_ptr[column] c_result
-    cdef table_view c_lhs = lhs.view().select(c_left_on)
-    cdef table_view c_rhs = rhs.view().select(c_right_on)
+    cdef table_view c_lhs = lhs.view()
+    cdef table_view c_rhs = rhs.view()
 
     if how == "leftsemi":
         c_result = move(cpp_join.left_semi_join(
diff --git a/python/cudf/cudf/core/join/_join_helpers.py b/python/cudf/cudf/core/join/_join_helpers.py
index a499d3d33de..02a1a49564a 100644
--- a/python/cudf/cudf/core/join/_join_helpers.py
+++ b/python/cudf/cudf/core/join/_join_helpers.py
@@ -2,7 +2,7 @@
 from __future__ import annotations
 
 import warnings
-from typing import TYPE_CHECKING, Any
+from typing import TYPE_CHECKING, Any, Iterable
 
 import numpy as np
 import pandas as pd
@@ -51,16 +51,24 @@ def set(self, obj: Frame, value: ColumnBase):
             else:
                 raise KeyError()
 
-    def get_numeric_index(self, obj: Frame) -> int:
-        # get the position of the column in `obj`
-        # (counting any index columns)
-        if self.column:
-            index_nlevels = obj._index.nlevels if obj._index is not None else 0
-            return index_nlevels + tuple(obj._data).index(self.name)
+
+def _frame_select_by_indexers(
+    frame: Frame, indexers: Iterable[_Indexer]
+) -> Frame:
+    # Select columns from the given `Frame` using `indexers`,
+    # and return a new `Frame`.
+    index_data = frame._data.__class__()
+    data = frame._data.__class__()
+
+    for idx in indexers:
+        if idx.index:
+            index_data[idx.name] = idx.get(frame)
         else:
-            if obj._index is not None:
-                return obj._index.names.index(self.name)
-            raise KeyError()
+            data[idx.name] = idx.get(frame)
+
+    result_index = cudf.Index._from_data(index_data) if index_data else None
+    result = cudf.core.frame.Frame(data=data, index=result_index)
+    return result
 
 
 def _match_join_keys(lcol: ColumnBase, rcol: ColumnBase, how: str) -> Dtype:
diff --git a/python/cudf/cudf/core/join/join.py b/python/cudf/cudf/core/join/join.py
index 61fe6bfc082..1377ecf5df8 100644
--- a/python/cudf/cudf/core/join/join.py
+++ b/python/cudf/cudf/core/join/join.py
@@ -2,13 +2,14 @@
 from __future__ import annotations
 
 from collections import OrderedDict, namedtuple
-from typing import TYPE_CHECKING, Tuple
+from typing import TYPE_CHECKING, Callable, Tuple
 
 import cudf
 from cudf import _lib as libcudf
 from cudf.core.join._join_helpers import (
     _coerce_to_list,
     _coerce_to_tuple,
+    _frame_select_by_indexers,
     _Indexer,
     _match_join_keys,
 )
@@ -66,18 +67,17 @@ class Merge(object):
 
     # The joiner function must have the following signature:
     #
-    #     def joiner(lhs, rhs, left_on, right_on, how=how):
+    #     def joiner(lhs, rhs, how=how):
     #          ...
     #
-    # Where:
+    # where:
     #
-    # - `lhs` and `rhs` represent the left and right Frames to join
-    # - `left_on` and `right_on` represent the *numerical* indices
-    #   of the key columns  of lhs and rhs. This allows specifying
-    #   index levels as keys in an unambiguous way.
+    # - `lhs` and `rhs` are Frames composed of the left and right join keys
     # - `how` is a string specifying the kind of join to perform
-    #   (useful if the joiner function can perform more than one join).
-    _joiner = libcudf.join.join
+    #
+    # ...and it returns a tuple of two gather maps representing the rows
+    # to gather from the left- and right- side tables respectively.
+    _joiner: Callable = libcudf.join.join
 
     def __init__(
         self,
@@ -166,33 +166,28 @@ def __init__(
         self.rsuffix = rsuffix
         self.suffixes = suffixes
 
-        self._out_class = cudf.DataFrame
+        self._compute_join_keys()
+
+    @property
+    def _out_class(self):
+        out_class = cudf.DataFrame
 
         if isinstance(self.lhs, cudf.MultiIndex) or isinstance(
             self.rhs, cudf.MultiIndex
         ):
-            self._out_class = cudf.MultiIndex
+            out_class = cudf.MultiIndex
         elif isinstance(self.lhs, cudf.Index):
-            self._out_class = self.lhs.__class__
-
-        self._compute_join_keys()
+            out_class = self.lhs.__class__
+        return out_class
 
     def perform_merge(self) -> Frame:
         lhs, rhs = self._match_key_dtypes(self.lhs, self.rhs)
 
-        left_key_indices = [
-            key.get_numeric_index(lhs) for key in self._keys.left
-        ]
-        right_key_indices = [
-            key.get_numeric_index(rhs) for key in self._keys.right
-        ]
+        left_table = _frame_select_by_indexers(lhs, self._keys.left)
+        right_table = _frame_select_by_indexers(rhs, self._keys.right)
 
         left_rows, right_rows = self._joiner(
-            lhs,
-            rhs,
-            left_on=left_key_indices,
-            right_on=right_key_indices,
-            how=self.how,
+            left_table, right_table, how=self.how,
         )
         lhs, rhs = self._restore_categorical_keys(lhs, rhs)
 
@@ -307,7 +302,7 @@ def _merge_results(self, left_result: Frame, right_result: Frame) -> Frame:
                 del right_names[name]
 
         # Assemble the data columns of the result:
-        data = cudf.core.column_accessor.ColumnAccessor()
+        data = left_result._data.__class__()
 
         for lcol in left_names:
             data[left_names[lcol]] = left_result._data[lcol]
@@ -316,10 +311,7 @@ def _merge_results(self, left_result: Frame, right_result: Frame) -> Frame:
 
         # Index of the result:
         if self.left_index and self.right_index:
-            if self.how == "right":
-                index = right_result._index
-            else:
-                index = left_result._index
+            index = left_result._index
         elif self.left_index:
             # left_index and right_on
             index = right_result._index

From 31848962927b335d4db855baba0528e77f82906b Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Mon, 22 Feb 2021 16:07:30 -0500
Subject: [PATCH 061/138] Example doc

---
 cpp/include/cudf/join.hpp | 31 +++++++++++++++++++++++++++----
 1 file changed, 27 insertions(+), 4 deletions(-)

diff --git a/cpp/include/cudf/join.hpp b/cpp/include/cudf/join.hpp
index 2707c60fa34..fcbaf0da795 100644
--- a/cpp/include/cudf/join.hpp
+++ b/cpp/include/cudf/join.hpp
@@ -32,10 +32,33 @@ namespace cudf {
  */
 
 /**
- * @brief Performs  an inner join on the specified columns of two
- * tables (`left`, `right`), and returns the row indices corresponding
- * to the result.
- */ // TODO: explain this better
+ * @brief Returns the row indices to use when constructing
+ * the result of performing an inner join between two tables.
+ *
+ * @code{.pseudo}
+ *     Left: {{0, 1, 2}}
+ *     Right: {{1, 2, 3}}
+ *     Result: {{1, 2}, {0, 1}}
+ *
+ *     Left: {{0, 1, 2}, {3, 4, 5}}
+ *     Right: {{1, 2, 3}, {4, 6, 7}}
+ *     Result: {{1}, {0}}
+ *
+ * @throw cudf::logic_error if number of elements in `left_on` or `right_on`
+ * mismatch.
+ * @throw cudf::logic_error if number of columns in either `left` or `right`
+ * table is 0 or exceeds MAX_JOIN_SIZE
+ *
+ * @param[in] left A table representing the keys of the left table of the join
+ * @param[in] right A table representing  the keys of the right table of the join
+ * @param[in] compare_nulls controls whether null join-key values
+ * should match or not.
+ * @param mr Device memory resource used to allocate the returned table and columns' device memory
+ *
+ * @return A pair of columns [`left_indices`, `right_indices`] that can be used to construct
+ * the result of performing an inner join between two tables with `left_keys` and `right_keys`
+ * as the join keys .
+ */
 std::pair<std::unique_ptr<cudf::column>, std::unique_ptr<cudf::column>> inner_join(
   cudf::table_view const& left_keys,
   cudf::table_view const& right_keys,

From d3535dcfaa897a674192946185a50bf4072c3d12 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Thu, 25 Feb 2021 16:08:36 -0500
Subject: [PATCH 062/138] Refactor join APIs to return a device_uvector

---
 cpp/include/cudf/join.hpp  |  16 ++---
 cpp/src/join/hash_join.cu  |  45 ++++--------
 cpp/src/join/hash_join.cuh |   8 +--
 cpp/src/join/join.cu       | 138 ++++++++++++++++---------------------
 cpp/src/join/semi_join.cu  |  44 +++++-------
 5 files changed, 103 insertions(+), 148 deletions(-)

diff --git a/cpp/include/cudf/join.hpp b/cpp/include/cudf/join.hpp
index fcbaf0da795..b3988beaef6 100644
--- a/cpp/include/cudf/join.hpp
+++ b/cpp/include/cudf/join.hpp
@@ -59,7 +59,7 @@ namespace cudf {
  * the result of performing an inner join between two tables with `left_keys` and `right_keys`
  * as the join keys .
  */
-std::pair<std::unique_ptr<cudf::column>, std::unique_ptr<cudf::column>> inner_join(
+std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>> inner_join(
   cudf::table_view const& left_keys,
   cudf::table_view const& right_keys,
   null_equality compare_nulls         = null_equality::EQUAL,
@@ -135,7 +135,7 @@ std::unique_ptr<cudf::table> inner_join(
  * tables (`left`, `right`), and returns the row indices corresponding
  * to the result.
  */ // TODO: explain this better
-std::pair<std::unique_ptr<cudf::column>, std::unique_ptr<cudf::column>> left_join(
+std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>> left_join(
   cudf::table_view const& left_keys,
   cudf::table_view const& right_keys,
   null_equality compare_nulls         = null_equality::EQUAL,
@@ -213,7 +213,7 @@ std::unique_ptr<cudf::table> left_join(
  * tables (`left`, `right`), and returns the row indices corresponding
  * to the result.
  */ // TODO: explain this better
-std::pair<std::unique_ptr<cudf::column>, std::unique_ptr<cudf::column>> full_join(
+std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>> full_join(
   cudf::table_view const& left_keys,
   cudf::table_view const& right_keys,
   null_equality compare_nulls         = null_equality::EQUAL,
@@ -342,7 +342,7 @@ std::unique_ptr<cudf::table> left_semi_join(
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /** TODO: document */
-std::unique_ptr<cudf::column> left_semi_join(
+rmm::device_uvector<size_type> left_semi_join(
   cudf::table_view const& left_keys,
   cudf::table_view const& right_keys,
   null_equality compare_nulls         = null_equality::EQUAL,
@@ -404,7 +404,7 @@ std::unique_ptr<cudf::table> left_anti_join(
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /** TODO: document */
-std::unique_ptr<cudf::column> left_anti_join(
+rmm::device_uvector<size_type> left_anti_join(
   cudf::table_view const& left_keys,
   cudf::table_view const& right_keys,
   null_equality compare_nulls         = null_equality::EQUAL,
@@ -473,7 +473,7 @@ class hash_join {
    * tables (`left`, `right`), and returns the row indices corresponding
    * to the result.
    */ // TODO: explain this better
-  std::pair<std::unique_ptr<cudf::column>, std::unique_ptr<cudf::column>> inner_join(
+  std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>> inner_join(
     cudf::table_view const& probe,
     null_equality compare_nulls         = null_equality::EQUAL,
     rmm::cuda_stream_view stream        = rmm::cuda_stream_default,
@@ -484,7 +484,7 @@ class hash_join {
    * tables (`left`, `right`), and returns the row indices corresponding
    * to the result.
    */ // TODO: explain this better
-  std::pair<std::unique_ptr<cudf::column>, std::unique_ptr<cudf::column>> left_join(
+  std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>> left_join(
     cudf::table_view const& probe,
     null_equality compare_nulls         = null_equality::EQUAL,
     rmm::cuda_stream_view stream        = rmm::cuda_stream_default,
@@ -495,7 +495,7 @@ class hash_join {
    * tables (`left`, `right`), and returns the row indices corresponding
    * to the result.
    */ // TODO: explain this better
-  std::pair<std::unique_ptr<cudf::column>, std::unique_ptr<cudf::column>> full_join(
+  std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>> full_join(
     cudf::table_view const& probe,
     null_equality compare_nulls         = null_equality::EQUAL,
     rmm::cuda_stream_view stream        = rmm::cuda_stream_default,
diff --git a/cpp/src/join/hash_join.cu b/cpp/src/join/hash_join.cu
index d8fe8870001..cb0e5bc4901 100644
--- a/cpp/src/join/hash_join.cu
+++ b/cpp/src/join/hash_join.cu
@@ -13,6 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+#include <thrust/uninitialized_fill.h>
 #include <join/hash_join.cuh>
 
 #include <cudf/detail/concatenate.cuh>
@@ -105,7 +106,12 @@ get_left_join_indices_complement(rmm::device_uvector<size_type> &right_indices,
                      0);
   } else {
     // Assume all the indices in invalid_index_map are invalid
-    rmm::device_vector<size_type> invalid_index_map(right_table_row_count, 1);
+    rmm::device_uvector<size_type> invalid_index_map(right_table_row_count, stream);
+    thrust::uninitialized_fill(thrust::cuda::par.on(stream.value()),
+                               invalid_index_map.begin(),
+                               invalid_index_map.end(),
+                               int32_t{1});
+
     // Functor to check for index validity since left joins can create invalid indices
     valid_range<size_type> valid(0, right_table_row_count);
 
@@ -295,7 +301,7 @@ hash_join::hash_join_impl::hash_join_impl(cudf::table_view const &build,
   _hash_table = build_join_hash_table(_build, compare_nulls, stream);
 }
 
-std::pair<std::unique_ptr<cudf::column>, std::unique_ptr<cudf::column>>
+std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>>
 hash_join::hash_join_impl::inner_join(cudf::table_view const &probe,
                                       null_equality compare_nulls,
                                       rmm::cuda_stream_view stream,
@@ -305,7 +311,7 @@ hash_join::hash_join_impl::inner_join(cudf::table_view const &probe,
   return compute_hash_join<cudf::detail::join_kind::INNER_JOIN>(probe, compare_nulls, stream, mr);
 }
 
-std::pair<std::unique_ptr<cudf::column>, std::unique_ptr<cudf::column>>
+std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>>
 hash_join::hash_join_impl::left_join(cudf::table_view const &probe,
                                      null_equality compare_nulls,
                                      rmm::cuda_stream_view stream,
@@ -315,7 +321,7 @@ hash_join::hash_join_impl::left_join(cudf::table_view const &probe,
   return compute_hash_join<cudf::detail::join_kind::LEFT_JOIN>(probe, compare_nulls, stream, mr);
 }
 
-std::pair<std::unique_ptr<cudf::column>, std::unique_ptr<cudf::column>>
+std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>>
 hash_join::hash_join_impl::full_join(cudf::table_view const &probe,
                                      null_equality compare_nulls,
                                      rmm::cuda_stream_view stream,
@@ -327,10 +333,10 @@ hash_join::hash_join_impl::full_join(cudf::table_view const &probe,
 
 template <cudf::detail::join_kind JoinKind>
 std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>>
-hash_join::hash_join_impl::compute_hash_join_indices(cudf::table_view const &probe,
-                                                     null_equality compare_nulls,
-                                                     rmm::cuda_stream_view stream,
-                                                     rmm::mr::device_memory_resource *mr) const
+hash_join::hash_join_impl::compute_hash_join(cudf::table_view const &probe,
+                                             null_equality compare_nulls,
+                                             rmm::cuda_stream_view stream,
+                                             rmm::mr::device_memory_resource *mr) const
 {
   CUDF_EXPECTS(0 != probe.num_columns(), "Hash join probe table is empty");
   CUDF_EXPECTS(probe.num_rows() < cudf::detail::MAX_JOIN_SIZE,
@@ -353,29 +359,6 @@ hash_join::hash_join_impl::compute_hash_join_indices(cudf::table_view const &pro
   return probe_join_indices<JoinKind>(probe, compare_nulls, stream);
 }
 
-template <cudf::detail::join_kind JoinKind>
-std::pair<std::unique_ptr<cudf::column>, std::unique_ptr<cudf::column>>
-hash_join::hash_join_impl::compute_hash_join(cudf::table_view const &probe,
-                                             null_equality compare_nulls,
-                                             rmm::cuda_stream_view stream,
-                                             rmm::mr::device_memory_resource *mr) const
-{
-  auto join_indices = compute_hash_join_indices<JoinKind>(probe, compare_nulls, stream, mr);
-  auto join_size    = join_indices.first.size();
-  auto left_map     = std::make_unique<cudf::column>(cudf::data_type(type_to_id<cudf::size_type>()),
-                                                 join_size,
-                                                 join_indices.first.release(),
-                                                 rmm::device_buffer{},
-                                                 0);
-  auto right_map    = std::make_unique<cudf::column>(cudf::data_type(type_to_id<cudf::size_type>()),
-                                                  join_size,
-                                                  join_indices.second.release(),
-                                                  rmm::device_buffer{},
-                                                  0);
-  return std::make_pair<std::unique_ptr<cudf::column>, std::unique_ptr<cudf::column>>(
-    std::move(left_map), std::move(right_map));
-}
-
 template <cudf::detail::join_kind JoinKind>
 std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>>
 hash_join::hash_join_impl::probe_join_indices(cudf::table_view const &probe,
diff --git a/cpp/src/join/hash_join.cuh b/cpp/src/join/hash_join.cuh
index d547d5190c4..cebf8fd612e 100644
--- a/cpp/src/join/hash_join.cuh
+++ b/cpp/src/join/hash_join.cuh
@@ -230,19 +230,19 @@ struct hash_join::hash_join_impl {
                  null_equality compare_nulls,
                  rmm::cuda_stream_view stream = rmm::cuda_stream_default);
 
-  std::pair<std::unique_ptr<cudf::column>, std::unique_ptr<cudf::column>> inner_join(
+  std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>> inner_join(
     cudf::table_view const& probe,
     null_equality compare_nulls,
     rmm::cuda_stream_view stream,
     rmm::mr::device_memory_resource* mr) const;
 
-  std::pair<std::unique_ptr<cudf::column>, std::unique_ptr<cudf::column>> left_join(
+  std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>> left_join(
     cudf::table_view const& probe,
     null_equality compare_nulls,
     rmm::cuda_stream_view stream,
     rmm::mr::device_memory_resource* mr) const;
 
-  std::pair<std::unique_ptr<cudf::column>, std::unique_ptr<cudf::column>> full_join(
+  std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>> full_join(
     cudf::table_view const& probe,
     null_equality compare_nulls,
     rmm::cuda_stream_view stream,
@@ -257,7 +257,7 @@ struct hash_join::hash_join_impl {
                             rmm::mr::device_memory_resource* mr) const;
 
   template <cudf::detail::join_kind JoinKind>
-  std::pair<std::unique_ptr<cudf::column>, std::unique_ptr<cudf::column>> compute_hash_join(
+  std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>> compute_hash_join(
     cudf::table_view const& probe,
     null_equality compare_nulls,
     rmm::cuda_stream_view stream,
diff --git a/cpp/src/join/join.cu b/cpp/src/join/join.cu
index 15b1f216928..a9f4b507efc 100644
--- a/cpp/src/join/join.cu
+++ b/cpp/src/join/join.cu
@@ -26,7 +26,7 @@
 namespace cudf {
 namespace detail {
 
-std::pair<std::unique_ptr<cudf::column>, std::unique_ptr<cudf::column>> inner_join(
+std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>> inner_join(
   table_view const& left_input,
   table_view const& right_input,
   null_equality compare_nulls,
@@ -50,7 +50,7 @@ std::pair<std::unique_ptr<cudf::column>, std::unique_ptr<cudf::column>> inner_jo
   if (right.num_rows() > left.num_rows()) {
     cudf::hash_join hj_obj(left, compare_nulls, stream);
     auto result = hj_obj.inner_join(right, compare_nulls, stream, mr);
-    return std::make_pair<std::unique_ptr<cudf::column>, std::unique_ptr<cudf::column>>(
+    return std::make_pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>>(
       std::move(result.second), std::move(result.first));
   } else {
     cudf::hash_join hj_obj(right, compare_nulls, stream);
@@ -82,48 +82,40 @@ std::unique_ptr<table> inner_join(table_view const& left_input,
   // build the hash map from the smaller table.
   if (right.num_rows() > left.num_rows()) {
     cudf::hash_join hj_obj(left.select(left_on), compare_nulls, stream);
-    auto join_indices      = hj_obj.inner_join(right.select(right_on), compare_nulls, stream, mr);
-    auto join_indices_view = std::make_pair<cudf::column_view, cudf::column_view>(
-      join_indices.first->view(), join_indices.second->view());
-    std::unique_ptr<table> left_result =
-      detail::gather(left,
-                     join_indices_view.second.template begin<cudf::size_type>(),
-                     join_indices_view.second.template end<cudf::size_type>(),
-                     out_of_bounds_policy::DONT_CHECK,
-                     stream,
-                     mr);
-    std::unique_ptr<table> right_result =
-      detail::gather(right,
-                     join_indices_view.first.template begin<cudf::size_type>(),
-                     join_indices_view.first.template end<cudf::size_type>(),
-                     out_of_bounds_policy::DONT_CHECK,
-                     stream,
-                     mr);
+    auto join_indices = hj_obj.inner_join(right.select(right_on), compare_nulls, stream, mr);
+    std::unique_ptr<table> left_result  = detail::gather(left,
+                                                        join_indices.second.begin(),
+                                                        join_indices.second.end(),
+                                                        out_of_bounds_policy::DONT_CHECK,
+                                                        stream,
+                                                        mr);
+    std::unique_ptr<table> right_result = detail::gather(right,
+                                                         join_indices.first.begin(),
+                                                         join_indices.first.end(),
+                                                         out_of_bounds_policy::DONT_CHECK,
+                                                         stream,
+                                                         mr);
     return combine_table_pair(std::move(left_result), std::move(right_result));
   } else {
     cudf::hash_join hj_obj(right.select(right_on), compare_nulls, stream);
-    auto join_indices      = hj_obj.inner_join(left.select(left_on), compare_nulls, stream, mr);
-    auto join_indices_view = std::make_pair<cudf::column_view, cudf::column_view>(
-      join_indices.first->view(), join_indices.second->view());
-    std::unique_ptr<table> left_result =
-      detail::gather(left,
-                     join_indices_view.first.template begin<cudf::size_type>(),
-                     join_indices_view.first.template end<cudf::size_type>(),
-                     out_of_bounds_policy::DONT_CHECK,
-                     stream,
-                     mr);
-    std::unique_ptr<table> right_result =
-      detail::gather(right,
-                     join_indices_view.second.template begin<cudf::size_type>(),
-                     join_indices_view.second.template end<cudf::size_type>(),
-                     out_of_bounds_policy::DONT_CHECK,
-                     stream,
-                     mr);
+    auto join_indices = hj_obj.inner_join(left.select(left_on), compare_nulls, stream, mr);
+    std::unique_ptr<table> left_result  = detail::gather(left,
+                                                        join_indices.first.begin(),
+                                                        join_indices.first.end(),
+                                                        out_of_bounds_policy::DONT_CHECK,
+                                                        stream,
+                                                        mr);
+    std::unique_ptr<table> right_result = detail::gather(right,
+                                                         join_indices.second.begin(),
+                                                         join_indices.second.end(),
+                                                         out_of_bounds_policy::DONT_CHECK,
+                                                         stream,
+                                                         mr);
     return combine_table_pair(std::move(left_result), std::move(right_result));
   }
 }
 
-std::pair<std::unique_ptr<cudf::column>, std::unique_ptr<cudf::column>> left_join(
+std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>> left_join(
   table_view const& left_input,
   table_view const& right_input,
   null_equality compare_nulls,
@@ -171,27 +163,22 @@ std::unique_ptr<table> left_join(table_view const& left_input,
     return cudf::detail::combine_table_pair(std::move(probe_build_pair.first),
                                             std::move(probe_build_pair.second));
   }
-
-  auto join_indices_view = std::make_pair<cudf::column_view, cudf::column_view>(
-    join_indices.first->view(), join_indices.second->view());
-  std::unique_ptr<table> left_result =
-    detail::gather(left,
-                   join_indices_view.first.template begin<cudf::size_type>(),
-                   join_indices_view.first.template end<cudf::size_type>(),
-                   out_of_bounds_policy::NULLIFY,
-                   stream,
-                   mr);
-  std::unique_ptr<table> right_result =
-    detail::gather(right,
-                   join_indices_view.second.template begin<cudf::size_type>(),
-                   join_indices_view.second.template end<cudf::size_type>(),
-                   out_of_bounds_policy::NULLIFY,
-                   stream,
-                   mr);
+  std::unique_ptr<table> left_result  = detail::gather(left,
+                                                      join_indices.first.begin(),
+                                                      join_indices.first.end(),
+                                                      out_of_bounds_policy::NULLIFY,
+                                                      stream,
+                                                      mr);
+  std::unique_ptr<table> right_result = detail::gather(right,
+                                                       join_indices.second.begin(),
+                                                       join_indices.second.end(),
+                                                       out_of_bounds_policy::NULLIFY,
+                                                       stream,
+                                                       mr);
   return combine_table_pair(std::move(left_result), std::move(right_result));
 }
 
-std::pair<std::unique_ptr<cudf::column>, std::unique_ptr<cudf::column>> full_join(
+std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>> full_join(
   table_view const& left_input,
   table_view const& right_input,
   null_equality compare_nulls,
@@ -239,23 +226,18 @@ std::unique_ptr<table> full_join(table_view const& left_input,
     return cudf::detail::combine_table_pair(std::move(probe_build_pair.first),
                                             std::move(probe_build_pair.second));
   }
-
-  auto join_indices_view = std::make_pair<cudf::column_view, cudf::column_view>(
-    join_indices.first->view(), join_indices.second->view());
-  std::unique_ptr<table> left_result =
-    detail::gather(left,
-                   join_indices_view.first.template begin<cudf::size_type>(),
-                   join_indices_view.first.template end<cudf::size_type>(),
-                   out_of_bounds_policy::NULLIFY,
-                   stream,
-                   mr);
-  std::unique_ptr<table> right_result =
-    detail::gather(right,
-                   join_indices_view.second.template begin<cudf::size_type>(),
-                   join_indices_view.second.template end<cudf::size_type>(),
-                   out_of_bounds_policy::NULLIFY,
-                   stream,
-                   mr);
+  std::unique_ptr<table> left_result  = detail::gather(left,
+                                                      join_indices.first.begin(),
+                                                      join_indices.first.end(),
+                                                      out_of_bounds_policy::NULLIFY,
+                                                      stream,
+                                                      mr);
+  std::unique_ptr<table> right_result = detail::gather(right,
+                                                       join_indices.second.begin(),
+                                                       join_indices.second.end(),
+                                                       out_of_bounds_policy::NULLIFY,
+                                                       stream,
+                                                       mr);
   return combine_table_pair(std::move(left_result), std::move(right_result));
 }
 
@@ -270,7 +252,7 @@ hash_join::hash_join(cudf::table_view const& build,
 {
 }
 
-std::pair<std::unique_ptr<cudf::column>, std::unique_ptr<cudf::column>> hash_join::inner_join(
+std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>> hash_join::inner_join(
   cudf::table_view const& probe,
   null_equality compare_nulls,
   rmm::cuda_stream_view stream,
@@ -279,7 +261,7 @@ std::pair<std::unique_ptr<cudf::column>, std::unique_ptr<cudf::column>> hash_joi
   return impl->inner_join(probe, compare_nulls, stream, mr);
 }
 
-std::pair<std::unique_ptr<cudf::column>, std::unique_ptr<cudf::column>> hash_join::left_join(
+std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>> hash_join::left_join(
   cudf::table_view const& probe,
   null_equality compare_nulls,
   rmm::cuda_stream_view stream,
@@ -288,7 +270,7 @@ std::pair<std::unique_ptr<cudf::column>, std::unique_ptr<cudf::column>> hash_joi
   return impl->left_join(probe, compare_nulls, stream, mr);
 }
 
-std::pair<std::unique_ptr<cudf::column>, std::unique_ptr<cudf::column>> hash_join::full_join(
+std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>> hash_join::full_join(
   cudf::table_view const& probe,
   null_equality compare_nulls,
   rmm::cuda_stream_view stream,
@@ -299,7 +281,7 @@ std::pair<std::unique_ptr<cudf::column>, std::unique_ptr<cudf::column>> hash_joi
 
 // external APIs
 
-std::pair<std::unique_ptr<cudf::column>, std::unique_ptr<cudf::column>> inner_join(
+std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>> inner_join(
   table_view const& left,
   table_view const& right,
   null_equality compare_nulls,
@@ -321,7 +303,7 @@ std::unique_ptr<table> inner_join(table_view const& left,
     left, right, left_on, right_on, compare_nulls, rmm::cuda_stream_default, mr);
 }
 
-std::pair<std::unique_ptr<cudf::column>, std::unique_ptr<cudf::column>> left_join(
+std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>> left_join(
   table_view const& left,
   table_view const& right,
   null_equality compare_nulls,
@@ -343,7 +325,7 @@ std::unique_ptr<table> left_join(table_view const& left,
     left, right, left_on, right_on, compare_nulls, rmm::cuda_stream_default, mr);
 }
 
-std::pair<std::unique_ptr<cudf::column>, std::unique_ptr<cudf::column>> full_join(
+std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>> full_join(
   table_view const& left,
   table_view const& right,
   null_equality compare_nulls,
diff --git a/cpp/src/join/semi_join.cu b/cpp/src/join/semi_join.cu
index e5966dd01e4..f1b797aa9f5 100644
--- a/cpp/src/join/semi_join.cu
+++ b/cpp/src/join/semi_join.cu
@@ -38,7 +38,7 @@ namespace cudf {
 namespace detail {
 
 template <join_kind JoinKind>
-std::unique_ptr<cudf::column> left_semi_anti_join(
+rmm::device_uvector<cudf::size_type> left_semi_anti_join(
   cudf::table_view const& left_keys,
   cudf::table_view const& right_keys,
   null_equality compare_nulls,
@@ -49,18 +49,12 @@ std::unique_ptr<cudf::column> left_semi_anti_join(
   CUDF_EXPECTS(0 != right_keys.num_columns(), "Right table is empty");
 
   if (is_trivial_join(left_keys, right_keys, JoinKind)) {
-    return std::make_unique<cudf::column>(cudf::data_type(type_to_id<cudf::size_type>()),
-                                          0,
-                                          rmm::device_buffer{},
-                                          rmm::device_buffer{},
-                                          0);
+    return rmm::device_uvector<cudf::size_type>(0, stream);
   }
   if ((join_kind::LEFT_ANTI_JOIN == JoinKind) && (0 == right_keys.num_rows())) {
-    using ScalarType = cudf::scalar_type_t<cudf::size_type>;
-    auto zero = cudf::make_numeric_scalar(cudf::data_type(cudf::type_id::INT32), stream.value());
-    zero->set_valid(true, stream);
-    static_cast<ScalarType*>(zero.get())->set_value(0, stream);
-    return cudf::detail::sequence(left_keys.num_rows(), *zero, stream);
+    auto result = rmm::device_uvector<cudf::size_type>(left_keys.num_rows(), stream);
+    thrust::sequence(thrust::cuda::par.on(stream.value()), result.begin(), result.end());
+    return std::move(result);
   }
 
   auto const left_num_rows  = left_keys.num_rows();
@@ -103,7 +97,7 @@ std::unique_ptr<cudf::column> left_semi_anti_join(
   // For semi join we want contains to be true, for anti join we want contains to be false
   bool join_type_boolean = (JoinKind == join_kind::LEFT_SEMI_JOIN);
 
-  rmm::device_uvector<size_type> gather_map(left_num_rows, stream);
+  rmm::device_uvector<cudf::size_type> gather_map(left_num_rows, stream);
 
   // gather_map_end will be the end of valid data in gather_map
   auto gather_map_end = thrust::copy_if(
@@ -117,11 +111,7 @@ std::unique_ptr<cudf::column> left_semi_anti_join(
     });
 
   auto join_size = thrust::distance(gather_map.begin(), gather_map_end);
-  return std::make_unique<cudf::column>(cudf::data_type(type_to_id<cudf::size_type>()),
-                                        join_size,
-                                        gather_map.release(),
-                                        rmm::device_buffer{},
-                                        0);
+  return std::move(gather_map);
 }
 
 /**
@@ -195,8 +185,8 @@ std::unique_ptr<cudf::table> left_semi_anti_join(
 
   auto const left_updated = scatter_columns(left_selected, left_on, left);
   return cudf::detail::gather(left_updated,
-                              gather_map->view().template begin<cudf::size_type>(),
-                              gather_map->view().template end<cudf::size_type>(),
+                              gather_map.begin(),
+                              gather_map.end(),
                               out_of_bounds_policy::DONT_CHECK,
                               stream,
                               mr);
@@ -216,10 +206,10 @@ std::unique_ptr<cudf::table> left_semi_join(cudf::table_view const& left,
     left, right, left_on, right_on, compare_nulls, rmm::cuda_stream_default, mr);
 }
 
-std::unique_ptr<cudf::column> left_semi_join(cudf::table_view const& left,
-                                             cudf::table_view const& right,
-                                             null_equality compare_nulls,
-                                             rmm::mr::device_memory_resource* mr)
+rmm::device_uvector<cudf::size_type> left_semi_join(cudf::table_view const& left,
+                                                    cudf::table_view const& right,
+                                                    null_equality compare_nulls,
+                                                    rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
   return detail::left_semi_anti_join<detail::join_kind::LEFT_SEMI_JOIN>(
@@ -238,10 +228,10 @@ std::unique_ptr<cudf::table> left_anti_join(cudf::table_view const& left,
     left, right, left_on, right_on, compare_nulls, rmm::cuda_stream_default, mr);
 }
 
-std::unique_ptr<cudf::column> left_anti_join(cudf::table_view const& left,
-                                             cudf::table_view const& right,
-                                             null_equality compare_nulls,
-                                             rmm::mr::device_memory_resource* mr)
+rmm::device_uvector<cudf::size_type> left_anti_join(cudf::table_view const& left,
+                                                    cudf::table_view const& right,
+                                                    null_equality compare_nulls,
+                                                    rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
   return detail::left_semi_anti_join<detail::join_kind::LEFT_ANTI_JOIN>(

From b82181d4a35a7d46b27939ae525feb45101dec24 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Wed, 3 Mar 2021 11:22:00 -0500
Subject: [PATCH 063/138] docs

---
 cpp/include/cudf/join.hpp | 269 ++++++++++++++++++++++----------------
 1 file changed, 158 insertions(+), 111 deletions(-)

diff --git a/cpp/include/cudf/join.hpp b/cpp/include/cudf/join.hpp
index b3988beaef6..b57a9ca079e 100644
--- a/cpp/include/cudf/join.hpp
+++ b/cpp/include/cudf/join.hpp
@@ -32,7 +32,7 @@ namespace cudf {
  */
 
 /**
- * @brief Returns the row indices to use when constructing
+ * @brief Returns the row indices that can be used to construct
  * the result of performing an inner join between two tables.
  *
  * @code{.pseudo}
@@ -44,10 +44,8 @@ namespace cudf {
  *     Right: {{1, 2, 3}, {4, 6, 7}}
  *     Result: {{1}, {0}}
  *
- * @throw cudf::logic_error if number of elements in `left_on` or `right_on`
+ * @throw cudf::logic_error if number of elements in `left_keys` or `right_keys`
  * mismatch.
- * @throw cudf::logic_error if number of columns in either `left` or `right`
- * table is 0 or exceeds MAX_JOIN_SIZE
  *
  * @param[in] left A table representing the keys of the left table of the join
  * @param[in] right A table representing  the keys of the right table of the join
@@ -73,26 +71,13 @@ std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>> inner_
  * in the columns being joined on match.
  *
  * @code{.pseudo}
- *          Left a: {0, 1, 2}
- *          Right b: {1, 2, 3}, a: {1, 2, 5}
+ *          Left: {{0, 1, 2}}
+ *          Right: {{1, 2, 3}, {1, 2, 5}}
  *          left_on: {0}
  *          right_on: {1}
- *          columns_in_common: { {0, 1} }
- * Result: { a: {1, 2}, b: {1, 2} }
- *
- *          Left a: {0, 1, 2}
- *          Right b: {1, 2, 3}, c: {1, 2, 5}
- *          left_on: {0}
- *          right_on: {0}
- *          columns_in_common: { }
- * Result: { a: {1, 2}, b: {1, 2}, c: {1, 2} }
+ * Result: {{1, 2}, {1, 2}, {1, 2}}
  * @endcode
  *
- * @throw cudf::logic_error if `columns_in_common` contains a pair of indices
- * (L, R) if L does not exist in `left_on` or R does not exist in `right_on`.
- * @throw cudf::logic_error if `columns_in_common` contains a pair of indices
- * (L, R) such that the location of `L` within `left_on` is not equal to
- * location of R within `right_on`
  * @throw cudf::logic_error if number of elements in `left_on` or `right_on`
  * mismatch.
  * @throw cudf::logic_error if number of columns in either `left` or `right`
@@ -108,19 +93,12 @@ std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>> inner_
  * @param[in] right_on The column indices from `right` to join on.
  * The column from `right` indicated by `right_on[i]` will be compared against the column
  * from `left` indicated by `left_on[i]`.
- * @param[in] columns_in_common is a vector of pairs of column indices into
- * `left` and `right`, respectively, that are "in common". For "common"
- * columns, only a single output column will be produced, which is gathered
- * from `left_on` columns. Else, for every column in `left_on` and `right_on`,
- * an output column will be produced.  For each of these pairs (L, R), L
- * should exist in `left_on` and R should exist in `right_on`.
  * @param[in] compare_nulls controls whether null join-key values
  * should match or not.
  * @param mr Device memory resource used to allocate the returned table and columns' device memory
  *
  * @return Result of joining `left` and `right` tables on the columns
- * specified by `left_on` and `right_on`. The resulting table will be joined columns of
- * `left(including common columns)+right(excluding common columns)`.
+ * specified by `left_on` and `right_on`.
  */
 std::unique_ptr<cudf::table> inner_join(
   cudf::table_view const& left,
@@ -130,11 +108,35 @@ std::unique_ptr<cudf::table> inner_join(
   null_equality compare_nulls         = null_equality::EQUAL,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
+
 /**
- * @brief Performs a left join on the specified columns of two
- * tables (`left`, `right`), and returns the row indices corresponding
- * to the result.
- */ // TODO: explain this better
+ * @brief Returns the row indices that can be used to construct
+ * the result of performing a left join between two tables.
+ * For rows in the right table that do not have a match in the
+ * left table, the row index is an unspecified out-of-bounds value.
+ *
+ * @code{.pseudo}
+ *     Left: {{0, 1, 2}}
+ *     Right: {{1, 2, 3}}
+ *     Result: {{0, 1, 2}, {None, 0, 1}}
+ *
+ *     Left: {{0, 1, 2}, {3, 4, 5}}
+ *     Right: {{1, 2, 3}, {4, 6, 7}}
+ *     Result: {{0, 1, 2}, {None, 0, None}}
+ *
+ * @throw cudf::logic_error if number of elements in `left_keys` or `right_keys`
+ * mismatch.
+ *
+ * @param[in] left A table representing the keys of the left table of the join
+ * @param[in] right A table representing  the keys of the right table of the join
+ * @param[in] compare_nulls controls whether null join-key values
+ * should match or not.
+ * @param mr Device memory resource used to allocate the returned table and columns' device memory
+ *
+ * @return A pair of columns [`left_indices`, `right_indices`] that can be used to construct
+ * the result of performing a left join between two tables with `left_keys` and `right_keys`
+ * as the join keys .
+ */
 std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>> left_join(
   cudf::table_view const& left_keys,
   cudf::table_view const& right_keys,
@@ -145,32 +147,25 @@ std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>> left_j
  * @brief Performs a left join (also known as left outer join) on the
  * specified columns of two tables (`left`, `right`)
  *
- * Left Join returns all the rows from the left table and those rows from the
+ * Left join returns all the rows from the left table and those rows from the
  * right table that match on the joined columns.
  * For rows from the right table that do not have a match, the corresponding
  * values in the left columns will be null.
  *
  * @code{.pseudo}
- *          Left a: {0, 1, 2}
- *          Right b: {1, 2, 3}, a: {1 ,2 ,5}
+ *          Left: {{0, 1, 2}}
+ *          Right: {{1, 2, 3}, {1, 2 ,5}}
  *          left_on: {0}
  *          right_on: {1}
- *          columns_in_common: { {0, 1} }
- * Result: { a: {0, 1, 2}, b: {NULL, 1, 2} }
+ * Result: { {0, 1, 2}, {NULL, 1, 2}, {NULL, 1, 2} }
  *
- *          Left a: {0, 1, 2}
- *          Right b: {1, 2, 3}, c: {1, 2, 5}
+ *          Left: {{0, 1, 2}}
+ *          Right {{1, 2, 3}, {1, 2, 5}}
  *          left_on: {0}
  *          right_on: {0}
- *          columns_in_common: { }
- * Result: { a: {0, 1, 2}, b: {NULL, 1, 2}, c: {NULL, 1, 2} }
+ * Result: { {0, 1, 2}, {NULL, 1, 2}, {NULL, 1, 2} }
  * @endcode
  *
- * @throw cudf::logic_error if `columns_in_common` contains a pair of indices
- * (L, R) if L does not exist in `left_on` or R does not exist in `right_on`.
- * @throw cudf::logic_error if `columns_in_common` contains a pair of indices
- * (L, R) such that the location of `L` within `left_on` is not equal to
- * location of R within `right_on`
  * @throw cudf::logic_error if number of elements in `left_on` or `right_on`
  * mismatch.
  * @throw cudf::logic_error if number of columns in either `left` or `right`
@@ -186,19 +181,12 @@ std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>> left_j
  * @param[in] right_on The column indices from `right` to join on.
  * The column from `right` indicated by `right_on[i]` will be compared against the column
  * from `left` indicated by `left_on[i]`.
- * @param[in] columns_in_common is a vector of pairs of column indices into
- * `left` and `right`, respectively, that are "in common". For "common"
- * columns, only a single output column will be produced, which is gathered
- * from `left_on` columns. Else, for every column in `left_on` and `right_on`,
- * an output column will be produced.  For each of these pairs (L, R), L
- * should exist in `left_on` and R should exist in `right_on`.
  * @param[in] compare_nulls controls whether null join-key values
  * should match or not.
  * @param mr Device memory resource used to allocate the returned table and columns' device memory
  *
  * @return Result of joining `left` and `right` tables on the columns
- * specified by `left_on` and `right_on`. The resulting table will be joined columns of
- * `left(including common columns)+right(excluding common columns)`.
+ * specified by `left_on` and `right_on`.
  */
 std::unique_ptr<cudf::table> left_join(
   cudf::table_view const& left,
@@ -208,11 +196,33 @@ std::unique_ptr<cudf::table> left_join(
   null_equality compare_nulls         = null_equality::EQUAL,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
+
 /**
- * @brief Performs a left join on the specified columns of two
- * tables (`left`, `right`), and returns the row indices corresponding
- * to the result.
- */ // TODO: explain this better
+ * @brief Returns the row indices that can be used to construct
+ * the result of performing a full join between two tables.
+ *
+ * @code{.pseudo}
+ *     Left: {{0, 1, 2}}
+ *     Right: {{1, 2, 3}}
+ *     Result: {{0, 1, 2, None}, {None, 0, 1, 2}}
+ *
+ *     Left: {{0, 1, 2}, {3, 4, 5}}
+ *     Right: {{1, 2, 3}, {4, 6, 7}}
+ *     Result: {{0, 1, 2, None, None}, {None, 0, None, 1, 2}}
+ *
+ * @throw cudf::logic_error if number of elements in `left_keys` or `right_keys`
+ * mismatch.
+ *
+ * @param[in] left A table representing the keys of the left table of the join
+ * @param[in] right A table representing  the keys of the right table of the join
+ * @param[in] compare_nulls controls whether null join-key values
+ * should match or not.
+ * @param mr Device memory resource used to allocate the returned table and columns' device memory
+ *
+ * @return A pair of columns [`left_indices`, `right_indices`] that can be used to construct
+ * the result of performing a full join between two tables with `left_keys` and `right_keys`
+ * as the join keys .
+ */
 std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>> full_join(
   cudf::table_view const& left_keys,
   cudf::table_view const& right_keys,
@@ -229,26 +239,19 @@ std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>> full_j
  * values in the left columns will be null.
  *
  * @code{.pseudo}
- *          Left a: {0, 1, 2}
- *          Right b: {1, 2, 3}, c: {1, 2, 5}
+ *          Left: {{0, 1, 2}}
+ *          Right: {{1, 2, 3}, {1, 2, 5}}
  *          left_on: {0}
  *          right_on: {1}
- *          columns_in_common: { {0, 1} }
- * Result: { a: {0, 1, 2, NULL}, b: {NULL, 1, 2, 3}, c: {NULL, 1, 2, 5} }
+ * Result: { {0, 1, 2, NULL}, {NULL, 1, 2, 3}, {NULL, 1, 2, 5} }
  *
- *          Left a: {0, 1, 2}
- *          Right b: {1, 2, 3}, c: {1, 2, 5}
+ *          Left: {{0, 1, 2}}
+ *          Right: {{1, 2, 3}, {1, 2, 5}}
  *          left_on: {0}
  *          right_on: {0}
- *          columns_in_common: { }
- * Result: { a: {0, 1, 2, NULL}, b: {NULL, 1, 2, 3}, c: {NULL, 1, 2, 5} }
+ * Result: { {0, 1, 2, NULL}, {NULL, 1, 2, 3}, {NULL, 1, 2, 5} }
  * @endcode
  *
- * @throw cudf::logic_error if `columns_in_common` contains a pair of indices
- * (L, R) if L does not exist in `left_on` or R does not exist in `right_on`.
- * @throw cudf::logic_error if `columns_in_common` contains a pair of indices
- * (L, R) such that the location of `L` within `left_on` is not equal to
- * location of R within `right_on`
  * @throw cudf::logic_error if number of elements in `left_on` or `right_on`
  * mismatch.
  * @throw cudf::logic_error if number of columns in either `left` or `right`
@@ -264,19 +267,12 @@ std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>> full_j
  * @param[in] right_on The column indices from `right` to join on.
  * The column from `right` indicated by `right_on[i]` will be compared against the column
  * from `left` indicated by `left_on[i]`.
- * @param[in] columns_in_common is a vector of pairs of column indices into
- * `left` and `right`, respectively, that are "in common". For "common"
- * columns, only a single output column will be produced, which is gathered
- * from `left_on` columns. Else, for every column in `left_on` and `right_on`,
- * an output column will be produced.  For each of these pairs (L, R), L
- * should exist in `left_on` and R should exist in `right_on`.
  * @param[in] compare_nulls controls whether null join-key values
  * should match or not.
  * @param mr Device memory resource used to allocate the returned table and columns' device memory
  *
  * @return Result of joining `left` and `right` tables on the columns
- * specified by `left_on` and `right_on`. The resulting table will be joined columns of
- * `left(including common columns)+right(excluding common columns)`.
+ * specified by `left_on` and `right_on`.
  */
 std::unique_ptr<cudf::table> full_join(
   cudf::table_view const& left,
@@ -294,24 +290,20 @@ std::unique_ptr<cudf::table> full_join(
  * returns rows that exist in the right table.
  *
  * @code{.pseudo}
- *          TableA a: {0, 1, 2}
- *          TableB b: {1, 2, 3}, a: {1, 2, 5}
+ *          TableA: {{0, 1, 2}}
+ *          TableB: {{1, 2, 3}, {1, 2, 5}}
  *          left_on: {0}
  *          right_on: {1}
- *          return_columns: { 0 }
- * Result: { a: {1, 2} }
+ * Result: { {1, 2} }
  *
- *          TableA a: {0, 1, 2}, c: {1, 2, 5}
- *          TableB b: {1, 2, 3}
+ *          TableA {{0, 1, 2}, {1, 2, 5}}
+ *          TableB {{1, 2, 3}}
  *          left_on: {0}
  *          right_on: {0}
- *          return_columns: { 1 }
- * Result: { c: {1, 2} }
+ * Result: { {1, 2}, {2, 5} }
  * @endcode
  *
- * @throw cudf::logic_error if the number of columns in either `left` or `right` table is 0
- * @throw cudf::logic_error if the number of returned columns is 0
- * @throw cudf::logic_error if the number of elements in `left_on` and `right_on` are not equal
+ * @throw cudf::logic_error if the number of columns in either `left_keys` or `right_keys` is 0
  *
  * @param[in] left             The left table
  * @param[in] right            The right table
@@ -323,15 +315,12 @@ std::unique_ptr<cudf::table> full_join(
  *                             The column from `right` indicated by `right_on[i]`
  *                             will be compared against the column from `left`
  *                             indicated by `left_on[i]`.
- * @param[in] return_columns   A vector of column indices from `left` to
- *                             include in the returned table.
  * @param[in] compare_nulls    Controls whether null join-key values should match or not.
  * @param[in] mr               Device memory resource used to allocate the returned table's
  *                             device memory
  *
  * @return                     Result of joining `left` and `right` tables on the columns
- *                             specified by `left_on` and `right_on`. The resulting table
- *                             will contain `return_columns` from `left` that match in right.
+ *                             specified by `left_on` and `right_on`.
  */
 std::unique_ptr<cudf::table> left_semi_join(
   cudf::table_view const& left,
@@ -341,7 +330,40 @@ std::unique_ptr<cudf::table> left_semi_join(
   null_equality compare_nulls         = null_equality::EQUAL,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
-/** TODO: document */
+
+/**
+ * @brief Returns the row indices that can be used to construct
+ * the result of performing a left semi join between two tables.
+ *
+ * @code{.pseudo}
+ *          TableA: {{0, 1, 2}}
+ *          TableB: {{1, 2, 3}, {1, 2, 5}}
+ *          left_on: {0}
+ *          right_on: {1}
+ * Result: {1, 2}
+ *
+ *          TableA {{0, 1, 2}, {1, 2, 5}}
+ *          TableB {{1, 2, 3}}
+ *          left_on: {0}
+ *          right_on: {0}
+ * Result: {1, 2}
+ * @endcode
+ *
+ * @throw cudf::logic_error if number of elements in `left_on` or `right_on`
+ * mismatch.
+ * @throw cudf::logic_error if number of columns in either `left` or `right`
+ * table is 0 or exceeds MAX_JOIN_SIZE
+ *
+ * @param[in] left A table representing the keys of the left table of the join
+ * @param[in] right A table representing  the keys of the right table of the join
+ * @param[in] compare_nulls controls whether null join-key values
+ * should match or not.
+ * @param mr Device memory resource used to allocate the returned table and columns' device memory
+ *
+ * @return A column `left_indices` that can be used to construct
+ * the result of performing a left semi join between two tables with
+ * `left_keys` and `right_keys` as the join keys .
+ */
 rmm::device_uvector<size_type> left_semi_join(
   cudf::table_view const& left_keys,
   cudf::table_view const& right_keys,
@@ -356,24 +378,23 @@ rmm::device_uvector<size_type> left_semi_join(
  * returns rows that do not exist in the right table.
  *
  * @code{.pseudo}
- *          TableA a: {0, 1, 2}
- *          TableB b: {1, 2, 3}, a: {1, 2, 5}
+ *          TableA: {{0, 1, 2}}
+ *          TableB: {{1, 2, 3},  {1, 2, 5}}
  *          left_on: {0}
  *          right_on: {1}
- *          return_columns: { 0 }
- * Result: { a: {0} }
+ * Result: {{0}, {1}}
  *
- *          TableA a: {0, 1, 2}, c: {1, 2, 5}
- *          TableB b: {1, 2, 3}
+ *          TableA: {{0, 1, 2}, {1, 2, 5}}
+ *          TableB: {{1, 2, 3}}
  *          left_on: {0}
  *          right_on: {0}
- *          return_columns: { 1 }
- * Result: { c: {1} }
+ * Result: { {0} {1} }
  * @endcode
  *
- * @throw cudf::logic_error if the number of columns in either `left` or `right` table is 0
- * @throw cudf::logic_error if the number of returned columns is 0
- * @throw cudf::logic_error if the number of elements in `left_on` and `right_on` are not equal
+ * @throw cudf::logic_error if number of elements in `left_on` or `right_on`
+ * mismatch.
+ * @throw cudf::logic_error if number of columns in either `left` or `right`
+ * table is 0 or exceeds MAX_JOIN_SIZE
  *
  * @param[in] left             The left table
  * @param[in] right            The right table
@@ -385,15 +406,12 @@ rmm::device_uvector<size_type> left_semi_join(
  *                             The column from `right` indicated by `right_on[i]`
  *                             will be compared against the column from `left`
  *                             indicated by `left_on[i]`.
- * @param[in] return_columns   A vector of column indices from `left` to
- *                             include in the returned table.
  * @param[in] compare_nulls    Controls whether null join-key values should match or not.
  * @param[in] mr               Device memory resource used to allocate the returned table's
  *                             device memory
  *
  * @return                     Result of joining `left` and `right` tables on the columns
- *                             specified by `left_on` and `right_on`. The resulting table
- *                             will contain `return_columns` from `left` that match in right.
+ *                             specified by `left_on` and `right_on`.
  */
 std::unique_ptr<cudf::table> left_anti_join(
   cudf::table_view const& left,
@@ -403,7 +421,36 @@ std::unique_ptr<cudf::table> left_anti_join(
   null_equality compare_nulls         = null_equality::EQUAL,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
-/** TODO: document */
+/**
+ * @brief Returns the row indices that can be used to construct
+ * the result of performing a left anti join between two tables.
+ *
+ * @code{.pseudo}
+ *          TableA: {{0, 1, 2}}
+ *          TableB: {{1, 2, 3},  {1, 2, 5}}
+ *          left_on: {0}
+ *          right_on: {1}
+ * Result: {0}
+ *
+ *          TableA: {{0, 1, 2}, {1, 2, 5}}
+ *          TableB: {{1, 2, 3}}
+ *          left_on: {0}
+ *          right_on: {0}
+ * Result: {0}
+ * @endcode
+ *
+ * @throw cudf::logic_error if the number of columns in either `left_keys` or `right_keys` is 0
+ *
+ * @param[in] left A table representing the keys of the left table of the join
+ * @param[in] right A table representing  the keys of the right table of the join
+ * @param[in] compare_nulls controls whether null join-key values
+ * should match or not.
+ * @param mr Device memory resource used to allocate the returned table and columns' device memory
+ *
+ * @return A column `left_indices` that can be used to construct
+ * the result of performing a left anti join between two tables with
+ * `left_keys` and `right_keys` as the join keys .
+ */
 rmm::device_uvector<size_type> left_anti_join(
   cudf::table_view const& left_keys,
   cudf::table_view const& right_keys,

From 77d2bfdf1d5ecc62db76b42c883ebe42f566b3e4 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Wed, 3 Mar 2021 11:29:47 -0500
Subject: [PATCH 064/138] Finish up docs?

---
 cpp/include/cudf/join.hpp | 53 +++++++++++++++++++++++++++++----------
 1 file changed, 40 insertions(+), 13 deletions(-)

diff --git a/cpp/include/cudf/join.hpp b/cpp/include/cudf/join.hpp
index b57a9ca079e..2f6981fb81c 100644
--- a/cpp/include/cudf/join.hpp
+++ b/cpp/include/cudf/join.hpp
@@ -507,7 +507,6 @@ class hash_join {
    * undefined.
    *
    * @param build The build table, from which the hash table is built.
-   * @param build_on The column indices from `build` to join on.
    * @param compare_nulls Controls whether null join-key values should match or not.
    * @param stream CUDA stream used for device memory operations and kernel launches
    */
@@ -516,10 +515,19 @@ class hash_join {
             rmm::cuda_stream_view stream = rmm::cuda_stream_default);
 
   /**
-   * @brief Performs  an inner join on the specified columns of two
-   * tables (`left`, `right`), and returns the row indices corresponding
-   * to the result.
-   */ // TODO: explain this better
+   * Returns the row indices that can be used to construct the result of performing
+   * an inner join between two tables. @see cudf::inner_join().
+   *
+   * @param probe The probe table, from which the tuples are probed.
+   * @param compare_nulls Controls whether null join-key values should match or not.
+   * @param mr Device memory resource used to allocate the returned table and columns' device
+   * memory.
+   * @param stream CUDA stream used for device memory operations and kernel launches
+   *
+   * @return A pair of columns [`left_indices`, `right_indices`] that can be used to construct
+   * the result of performing an inner join between two tables with `build` and `probe`
+   * as the the join keys .
+   */
   std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>> inner_join(
     cudf::table_view const& probe,
     null_equality compare_nulls         = null_equality::EQUAL,
@@ -527,21 +535,40 @@ class hash_join {
     rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) const;
 
   /**
-   * @brief Performs a left join on the specified columns of two
-   * tables (`left`, `right`), and returns the row indices corresponding
-   * to the result.
-   */ // TODO: explain this better
+   * Returns the row indices that can be used to construct the result of performing
+   * a left join between two tables. @see cudf::left_join().
+   *
+   * @param probe The probe table, from which the tuples are probed.
+   * @param compare_nulls Controls whether null join-key values should match or not.
+   * @param mr Device memory resource used to allocate the returned table and columns' device
+   * memory.
+   * @param stream CUDA stream used for device memory operations and kernel launches
+   *
+   * @return A pair of columns [`left_indices`, `right_indices`] that can be used to construct
+   * the result of performing a left join between two tables with `build` and `probe`
+   * as the the join keys .
+   */
   std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>> left_join(
     cudf::table_view const& probe,
     null_equality compare_nulls         = null_equality::EQUAL,
     rmm::cuda_stream_view stream        = rmm::cuda_stream_default,
     rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) const;
 
+
   /**
-   * @brief Performs a full join on the specified columns of two
-   * tables (`left`, `right`), and returns the row indices corresponding
-   * to the result.
-   */ // TODO: explain this better
+   * Returns the row indices that can be used to construct the result of performing
+   * a full join between two tables. @see cudf::full_join().
+   *
+   * @param probe The probe table, from which the tuples are probed.
+   * @param compare_nulls Controls whether null join-key values should match or not.
+   * @param mr Device memory resource used to allocate the returned table and columns' device
+   * memory.
+   * @param stream CUDA stream used for device memory operations and kernel launches
+   *
+   * @return A pair of columns [`left_indices`, `right_indices`] that can be used to construct
+   * the result of performing a full join between two tables with `build` and `probe`
+   * as the the join keys .
+   */
   std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>> full_join(
     cudf::table_view const& probe,
     null_equality compare_nulls         = null_equality::EQUAL,

From 26a3fb0b32d27e0051955d13af42b4dd548c773c Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Thu, 4 Mar 2021 15:01:58 -0500
Subject: [PATCH 065/138] Fix join tests

---
 cpp/tests/join/join_tests.cpp | 31 +++++++++++++++++++++++++------
 1 file changed, 25 insertions(+), 6 deletions(-)

diff --git a/cpp/tests/join/join_tests.cpp b/cpp/tests/join/join_tests.cpp
index fbde179d33d..1b910bfd0cc 100644
--- a/cpp/tests/join/join_tests.cpp
+++ b/cpp/tests/join/join_tests.cpp
@@ -1072,8 +1072,15 @@ TEST_F(JoinTest, HashJoinSequentialProbes)
 
     Table t0(std::move(cols0));
 
-    auto result            = hash_join.full_join(t0);
-    auto result_table      = cudf::table_view({result.first->view(), result.second->view()});
+    auto result = hash_join.full_join(t0);
+
+    auto result_table =
+      cudf::table_view({cudf::column_view{cudf::data_type{cudf::type_id::INT32},
+                                          static_cast<cudf::size_type>(result.first.size()),
+                                          result.first.data()},
+                        cudf::column_view{cudf::data_type{cudf::type_id::INT32},
+                                          static_cast<cudf::size_type>(result.second.size()),
+                                          result.second.data()}});
     auto result_sort_order = cudf::sorted_order(result_table);
     auto sorted_result     = cudf::gather(result_table, *result_sort_order);
 
@@ -1098,8 +1105,14 @@ TEST_F(JoinTest, HashJoinSequentialProbes)
 
     Table t0(std::move(cols0));
 
-    auto result            = hash_join.left_join(t0);
-    auto result_table      = cudf::table_view({result.first->view(), result.second->view()});
+    auto result = hash_join.left_join(t0);
+    auto result_table =
+      cudf::table_view({cudf::column_view{cudf::data_type{cudf::type_id::INT32},
+                                          static_cast<cudf::size_type>(result.first.size()),
+                                          result.first.data()},
+                        cudf::column_view{cudf::data_type{cudf::type_id::INT32},
+                                          static_cast<cudf::size_type>(result.second.size()),
+                                          result.second.data()}});
     auto result_sort_order = cudf::sorted_order(result_table);
     auto sorted_result     = cudf::gather(result_table, *result_sort_order);
 
@@ -1124,8 +1137,14 @@ TEST_F(JoinTest, HashJoinSequentialProbes)
 
     Table t0(std::move(cols0));
 
-    auto result            = hash_join.inner_join(t0);
-    auto result_table      = cudf::table_view({result.first->view(), result.second->view()});
+    auto result = hash_join.inner_join(t0);
+    auto result_table =
+      cudf::table_view({cudf::column_view{cudf::data_type{cudf::type_id::INT32},
+                                          static_cast<cudf::size_type>(result.first.size()),
+                                          result.first.data()},
+                        cudf::column_view{cudf::data_type{cudf::type_id::INT32},
+                                          static_cast<cudf::size_type>(result.second.size()),
+                                          result.second.data()}});
     auto result_sort_order = cudf::sorted_order(result_table);
     auto sorted_result     = cudf::gather(result_table, *result_sort_order);
 

From 8a60d622b2bb9353fb79defb1650bdc294355f33 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Fri, 5 Mar 2021 10:50:22 -0500
Subject: [PATCH 066/138] Refactor join APIs to work with
 unique_ptr<rmm::device_uvector>>

---
 cpp/include/cudf/join.hpp          |  74 +++++++--------
 cpp/src/join/hash_join.cu          | 135 ++++++++++++++-------------
 cpp/src/join/hash_join.cuh         |  74 +++++++--------
 cpp/src/join/join.cu               | 142 +++++++++++++++--------------
 cpp/src/join/join_common_utils.hpp |   3 +-
 cpp/src/join/semi_join.cu          |  37 ++++----
 cpp/tests/join/join_tests.cpp      |  24 ++---
 7 files changed, 260 insertions(+), 229 deletions(-)

diff --git a/cpp/include/cudf/join.hpp b/cpp/include/cudf/join.hpp
index 2f6981fb81c..b1f06697522 100644
--- a/cpp/include/cudf/join.hpp
+++ b/cpp/include/cudf/join.hpp
@@ -57,11 +57,12 @@ namespace cudf {
  * the result of performing an inner join between two tables with `left_keys` and `right_keys`
  * as the join keys .
  */
-std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>> inner_join(
-  cudf::table_view const& left_keys,
-  cudf::table_view const& right_keys,
-  null_equality compare_nulls         = null_equality::EQUAL,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
+          std::unique_ptr<rmm::device_uvector<size_type>>>
+inner_join(cudf::table_view const& left_keys,
+           cudf::table_view const& right_keys,
+           null_equality compare_nulls         = null_equality::EQUAL,
+           rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
  * @brief Performs an inner join on the specified columns of two
@@ -108,7 +109,6 @@ std::unique_ptr<cudf::table> inner_join(
   null_equality compare_nulls         = null_equality::EQUAL,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
-
 /**
  * @brief Returns the row indices that can be used to construct
  * the result of performing a left join between two tables.
@@ -137,11 +137,12 @@ std::unique_ptr<cudf::table> inner_join(
  * the result of performing a left join between two tables with `left_keys` and `right_keys`
  * as the join keys .
  */
-std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>> left_join(
-  cudf::table_view const& left_keys,
-  cudf::table_view const& right_keys,
-  null_equality compare_nulls         = null_equality::EQUAL,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
+          std::unique_ptr<rmm::device_uvector<size_type>>>
+left_join(cudf::table_view const& left_keys,
+          cudf::table_view const& right_keys,
+          null_equality compare_nulls         = null_equality::EQUAL,
+          rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
  * @brief Performs a left join (also known as left outer join) on the
@@ -196,7 +197,6 @@ std::unique_ptr<cudf::table> left_join(
   null_equality compare_nulls         = null_equality::EQUAL,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
-
 /**
  * @brief Returns the row indices that can be used to construct
  * the result of performing a full join between two tables.
@@ -223,11 +223,12 @@ std::unique_ptr<cudf::table> left_join(
  * the result of performing a full join between two tables with `left_keys` and `right_keys`
  * as the join keys .
  */
-std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>> full_join(
-  cudf::table_view const& left_keys,
-  cudf::table_view const& right_keys,
-  null_equality compare_nulls         = null_equality::EQUAL,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
+          std::unique_ptr<rmm::device_uvector<size_type>>>
+full_join(cudf::table_view const& left_keys,
+          cudf::table_view const& right_keys,
+          null_equality compare_nulls         = null_equality::EQUAL,
+          rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
  * @brief Performs a full join (also known as full outer join) on the
@@ -330,7 +331,6 @@ std::unique_ptr<cudf::table> left_semi_join(
   null_equality compare_nulls         = null_equality::EQUAL,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
-
 /**
  * @brief Returns the row indices that can be used to construct
  * the result of performing a left semi join between two tables.
@@ -364,7 +364,7 @@ std::unique_ptr<cudf::table> left_semi_join(
  * the result of performing a left semi join between two tables with
  * `left_keys` and `right_keys` as the join keys .
  */
-rmm::device_uvector<size_type> left_semi_join(
+std::unique_ptr<rmm::device_uvector<size_type>> left_semi_join(
   cudf::table_view const& left_keys,
   cudf::table_view const& right_keys,
   null_equality compare_nulls         = null_equality::EQUAL,
@@ -451,7 +451,7 @@ std::unique_ptr<cudf::table> left_anti_join(
  * the result of performing a left anti join between two tables with
  * `left_keys` and `right_keys` as the join keys .
  */
-rmm::device_uvector<size_type> left_anti_join(
+std::unique_ptr<rmm::device_uvector<size_type>> left_anti_join(
   cudf::table_view const& left_keys,
   cudf::table_view const& right_keys,
   null_equality compare_nulls         = null_equality::EQUAL,
@@ -528,11 +528,12 @@ class hash_join {
    * the result of performing an inner join between two tables with `build` and `probe`
    * as the the join keys .
    */
-  std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>> inner_join(
-    cudf::table_view const& probe,
-    null_equality compare_nulls         = null_equality::EQUAL,
-    rmm::cuda_stream_view stream        = rmm::cuda_stream_default,
-    rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) const;
+  std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
+            std::unique_ptr<rmm::device_uvector<size_type>>>
+  inner_join(cudf::table_view const& probe,
+             null_equality compare_nulls         = null_equality::EQUAL,
+             rmm::cuda_stream_view stream        = rmm::cuda_stream_default,
+             rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) const;
 
   /**
    * Returns the row indices that can be used to construct the result of performing
@@ -548,12 +549,12 @@ class hash_join {
    * the result of performing a left join between two tables with `build` and `probe`
    * as the the join keys .
    */
-  std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>> left_join(
-    cudf::table_view const& probe,
-    null_equality compare_nulls         = null_equality::EQUAL,
-    rmm::cuda_stream_view stream        = rmm::cuda_stream_default,
-    rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) const;
-
+  std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
+            std::unique_ptr<rmm::device_uvector<size_type>>>
+  left_join(cudf::table_view const& probe,
+            null_equality compare_nulls         = null_equality::EQUAL,
+            rmm::cuda_stream_view stream        = rmm::cuda_stream_default,
+            rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) const;
 
   /**
    * Returns the row indices that can be used to construct the result of performing
@@ -569,11 +570,12 @@ class hash_join {
    * the result of performing a full join between two tables with `build` and `probe`
    * as the the join keys .
    */
-  std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>> full_join(
-    cudf::table_view const& probe,
-    null_equality compare_nulls         = null_equality::EQUAL,
-    rmm::cuda_stream_view stream        = rmm::cuda_stream_default,
-    rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) const;
+  std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
+            std::unique_ptr<rmm::device_uvector<size_type>>>
+  full_join(cudf::table_view const& probe,
+            null_equality compare_nulls         = null_equality::EQUAL,
+            rmm::cuda_stream_view stream        = rmm::cuda_stream_default,
+            rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) const;
 
  private:
   struct hash_join_impl;
diff --git a/cpp/src/join/hash_join.cu b/cpp/src/join/hash_join.cu
index cb0e5bc4901..301726a978f 100644
--- a/cpp/src/join/hash_join.cu
+++ b/cpp/src/join/hash_join.cu
@@ -40,22 +40,24 @@ std::pair<std::unique_ptr<table>, std::unique_ptr<table>> get_empty_joined_table
 
 VectorPair concatenate_vector_pairs(VectorPair &a, VectorPair &b, rmm::cuda_stream_view stream)
 {
-  CUDF_EXPECTS((a.first.size() == a.second.size()),
+  CUDF_EXPECTS((a.first->size() == a.second->size()),
                "Mismatch between sizes of vectors in vector pair");
-  CUDF_EXPECTS((b.first.size() == b.second.size()),
+  CUDF_EXPECTS((b.first->size() == b.second->size()),
                "Mismatch between sizes of vectors in vector pair");
-  if (a.first.is_empty()) {
+  if (a.first->is_empty()) {
     return std::move(b);
-  } else if (b.first.is_empty()) {
+  } else if (b.first->is_empty()) {
     return std::move(a);
   }
-  auto original_size = a.first.size();
-  a.first.resize(a.first.size() + b.first.size(), stream);
-  a.second.resize(a.second.size() + b.second.size(), stream);
+  auto original_size = a.first->size();
+  a.first->resize(a.first->size() + b.first->size(), stream);
+  a.second->resize(a.second->size() + b.second->size(), stream);
   thrust::copy(
-    rmm::exec_policy(stream), b.first.begin(), b.first.end(), a.first.begin() + original_size);
-  thrust::copy(
-    rmm::exec_policy(stream), b.second.begin(), b.second.end(), a.second.begin() + original_size);
+    rmm::exec_policy(stream), b.first->begin(), b.first->end(), a.first->begin() + original_size);
+  thrust::copy(rmm::exec_policy(stream),
+               b.second->begin(),
+               b.second->end(),
+               a.second->begin() + original_size);
   return std::move(a);
 }
 
@@ -83,8 +85,9 @@ struct valid_range {
  *
  * @return Pair of vectors containing the left join indices complement
  */
-std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>>
-get_left_join_indices_complement(rmm::device_uvector<size_type> &right_indices,
+std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
+          std::unique_ptr<rmm::device_uvector<size_type>>>
+get_left_join_indices_complement(std::unique_ptr<rmm::device_uvector<size_type>> &right_indices,
                                  size_type left_table_row_count,
                                  size_type right_table_row_count,
                                  rmm::cuda_stream_view stream)
@@ -92,7 +95,8 @@ get_left_join_indices_complement(rmm::device_uvector<size_type> &right_indices,
   // Get array of indices that do not appear in right_indices
 
   // Vector allocated for unmatched result
-  rmm::device_uvector<size_type> right_indices_complement(right_table_row_count, stream);
+  auto right_indices_complement =
+    std::make_unique<rmm::device_uvector<size_type>>(right_table_row_count, stream);
 
   // If left table is empty in a full join call then all rows of the right table
   // should be represented in the joined indices. This is an optimization since
@@ -101,15 +105,16 @@ get_left_join_indices_complement(rmm::device_uvector<size_type> &right_indices,
   // produce exactly the same result as the else path but will be faster.
   if (left_table_row_count == 0) {
     thrust::sequence(rmm::exec_policy(stream),
-                     right_indices_complement.begin(),
-                     right_indices_complement.end(),
+                     right_indices_complement->begin(),
+                     right_indices_complement->end(),
                      0);
   } else {
     // Assume all the indices in invalid_index_map are invalid
-    rmm::device_uvector<size_type> invalid_index_map(right_table_row_count, stream);
+    auto invalid_index_map =
+      std::make_unique<rmm::device_uvector<size_type>>(right_table_row_count, stream);
     thrust::uninitialized_fill(thrust::cuda::par.on(stream.value()),
-                               invalid_index_map.begin(),
-                               invalid_index_map.end(),
+                               invalid_index_map->begin(),
+                               invalid_index_map->end(),
                                int32_t{1});
 
     // Functor to check for index validity since left joins can create invalid indices
@@ -119,11 +124,11 @@ get_left_join_indices_complement(rmm::device_uvector<size_type> &right_indices,
     // Thus specifying that those locations are valid
     thrust::scatter_if(rmm::exec_policy(stream),
                        thrust::make_constant_iterator(0),
-                       thrust::make_constant_iterator(0) + right_indices.size(),
-                       right_indices.begin(),      // Index locations
-                       right_indices.begin(),      // Stencil - Check if index location is valid
-                       invalid_index_map.begin(),  // Output indices
-                       valid);                     // Stencil Predicate
+                       thrust::make_constant_iterator(0) + right_indices->size(),
+                       right_indices->begin(),      // Index locations
+                       right_indices->begin(),      // Stencil - Check if index location is valid
+                       invalid_index_map->begin(),  // Output indices
+                       valid);                      // Stencil Predicate
     size_type begin_counter = static_cast<size_type>(0);
     size_type end_counter   = static_cast<size_type>(right_table_row_count);
 
@@ -131,17 +136,18 @@ get_left_join_indices_complement(rmm::device_uvector<size_type> &right_indices,
     size_type indices_count = thrust::copy_if(rmm::exec_policy(stream),
                                               thrust::make_counting_iterator(begin_counter),
                                               thrust::make_counting_iterator(end_counter),
-                                              invalid_index_map.begin(),
-                                              right_indices_complement.begin(),
+                                              invalid_index_map->begin(),
+                                              right_indices_complement->begin(),
                                               thrust::identity<size_type>()) -
-                              right_indices_complement.begin();
-    right_indices_complement.resize(indices_count, stream);
+                              right_indices_complement->begin();
+    right_indices_complement->resize(indices_count, stream);
   }
 
-  rmm::device_uvector<size_type> left_invalid_indices(right_indices_complement.size(), stream);
+  auto left_invalid_indices =
+    std::make_unique<rmm::device_uvector<size_type>>(right_indices_complement->size(), stream);
   thrust::fill(rmm::exec_policy(stream),
-               left_invalid_indices.begin(),
-               left_invalid_indices.end(),
+               left_invalid_indices->begin(),
+               left_invalid_indices->end(),
                JoinNoneValue);
 
   return std::make_pair(std::move(left_invalid_indices), std::move(right_indices_complement));
@@ -212,20 +218,21 @@ std::unique_ptr<multimap_type, std::function<void(multimap_type *)>> build_join_
  * @return Join output indices vector pair.
  */
 template <join_kind JoinKind>
-std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>> probe_join_hash_table(
-  cudf::table_device_view build_table,
-  cudf::table_device_view probe_table,
-  multimap_type const &hash_table,
-  null_equality compare_nulls,
-  rmm::cuda_stream_view stream)
+std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
+          std::unique_ptr<rmm::device_uvector<size_type>>>
+probe_join_hash_table(cudf::table_device_view build_table,
+                      cudf::table_device_view probe_table,
+                      multimap_type const &hash_table,
+                      null_equality compare_nulls,
+                      rmm::cuda_stream_view stream)
 {
   size_type estimated_size = estimate_join_output_size<JoinKind, multimap_type>(
     build_table, probe_table, hash_table, compare_nulls, stream);
 
   // If the estimated output size is zero, return immediately
   if (estimated_size == 0) {
-    return std::make_pair(rmm::device_uvector<size_type>{0, stream},
-                          rmm::device_uvector<size_type>{0, stream});
+    return std::make_pair(std::make_unique<rmm::device_uvector<size_type>>(0, stream),
+                          std::make_unique<rmm::device_uvector<size_type>>(0, stream));
   }
 
   // Because we are approximating the number of joined elements, our approximation
@@ -235,12 +242,13 @@ std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>> probe_
   rmm::device_scalar<size_type> write_index(0, stream);
   size_type join_size{0};
 
-  rmm::device_uvector<size_type> left_indices{0, stream};
-  rmm::device_uvector<size_type> right_indices{0, stream};
+  auto left_indices  = std::make_unique<rmm::device_uvector<size_type>>(0, stream);
+  auto right_indices = std::make_unique<rmm::device_uvector<size_type>>(0, stream);
+
   auto current_estimated_size = estimated_size;
   do {
-    left_indices.resize(estimated_size, stream);
-    right_indices.resize(estimated_size, stream);
+    left_indices->resize(estimated_size, stream);
+    right_indices->resize(estimated_size, stream);
 
     constexpr int block_size{DEFAULT_JOIN_BLOCK_SIZE};
     detail::grid_1d config(probe_table.num_rows(), block_size);
@@ -249,15 +257,16 @@ std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>> probe_
     row_hash hash_probe{probe_table};
     row_equality equality{probe_table, build_table, compare_nulls == null_equality::EQUAL};
     probe_hash_table<JoinKind, multimap_type, block_size, DEFAULT_JOIN_CACHE_SIZE>
-      <<<config.num_blocks, config.num_threads_per_block, 0, stream.value()>>>(hash_table,
-                                                                               build_table,
-                                                                               probe_table,
-                                                                               hash_probe,
-                                                                               equality,
-                                                                               left_indices.data(),
-                                                                               right_indices.data(),
-                                                                               write_index.data(),
-                                                                               estimated_size);
+      <<<config.num_blocks, config.num_threads_per_block, 0, stream.value()>>>(
+        hash_table,
+        build_table,
+        probe_table,
+        hash_probe,
+        equality,
+        left_indices->data(),
+        right_indices->data(),
+        write_index.data(),
+        estimated_size);
 
     CHECK_CUDA(stream.value());
 
@@ -266,8 +275,8 @@ std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>> probe_
     estimated_size *= 2;
   } while ((current_estimated_size < join_size));
 
-  left_indices.resize(join_size, stream);
-  right_indices.resize(join_size, stream);
+  left_indices->resize(join_size, stream);
+  right_indices->resize(join_size, stream);
   return std::make_pair(std::move(left_indices), std::move(right_indices));
 }
 
@@ -301,7 +310,8 @@ hash_join::hash_join_impl::hash_join_impl(cudf::table_view const &build,
   _hash_table = build_join_hash_table(_build, compare_nulls, stream);
 }
 
-std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>>
+std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
+          std::unique_ptr<rmm::device_uvector<size_type>>>
 hash_join::hash_join_impl::inner_join(cudf::table_view const &probe,
                                       null_equality compare_nulls,
                                       rmm::cuda_stream_view stream,
@@ -311,7 +321,8 @@ hash_join::hash_join_impl::inner_join(cudf::table_view const &probe,
   return compute_hash_join<cudf::detail::join_kind::INNER_JOIN>(probe, compare_nulls, stream, mr);
 }
 
-std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>>
+std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
+          std::unique_ptr<rmm::device_uvector<size_type>>>
 hash_join::hash_join_impl::left_join(cudf::table_view const &probe,
                                      null_equality compare_nulls,
                                      rmm::cuda_stream_view stream,
@@ -321,7 +332,8 @@ hash_join::hash_join_impl::left_join(cudf::table_view const &probe,
   return compute_hash_join<cudf::detail::join_kind::LEFT_JOIN>(probe, compare_nulls, stream, mr);
 }
 
-std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>>
+std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
+          std::unique_ptr<rmm::device_uvector<size_type>>>
 hash_join::hash_join_impl::full_join(cudf::table_view const &probe,
                                      null_equality compare_nulls,
                                      rmm::cuda_stream_view stream,
@@ -332,7 +344,8 @@ hash_join::hash_join_impl::full_join(cudf::table_view const &probe,
 }
 
 template <cudf::detail::join_kind JoinKind>
-std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>>
+std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
+          std::unique_ptr<rmm::device_uvector<size_type>>>
 hash_join::hash_join_impl::compute_hash_join(cudf::table_view const &probe,
                                              null_equality compare_nulls,
                                              rmm::cuda_stream_view stream,
@@ -345,8 +358,8 @@ hash_join::hash_join_impl::compute_hash_join(cudf::table_view const &probe,
                "Mismatch in number of columns to be joined on");
 
   if (is_trivial_join(probe, _build, JoinKind)) {
-    return std::make_pair(rmm::device_uvector<size_type>{0, stream},
-                          rmm::device_uvector<size_type>{0, stream});
+    return std::make_pair(std::make_unique<rmm::device_uvector<size_type>>(0, stream),
+                          std::make_unique<rmm::device_uvector<size_type>>(0, stream));
   }
 
   CUDF_EXPECTS(std::equal(std::cbegin(_build),
@@ -360,7 +373,8 @@ hash_join::hash_join_impl::compute_hash_join(cudf::table_view const &probe,
 }
 
 template <cudf::detail::join_kind JoinKind>
-std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>>
+std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
+          std::unique_ptr<rmm::device_uvector<size_type>>>
 hash_join::hash_join_impl::probe_join_indices(cudf::table_view const &probe,
                                               null_equality compare_nulls,
                                               rmm::cuda_stream_view stream) const
@@ -386,7 +400,6 @@ hash_join::hash_join_impl::probe_join_indices(cudf::table_view const &probe,
       join_indices.second, probe.num_rows(), _build.num_rows(), stream);
     join_indices = detail::concatenate_vector_pairs(join_indices, complement_indices, stream);
   }
-
   return join_indices;
 }
 
diff --git a/cpp/src/join/hash_join.cuh b/cpp/src/join/hash_join.cuh
index cebf8fd612e..3bf20eb9433 100644
--- a/cpp/src/join/hash_join.cuh
+++ b/cpp/src/join/hash_join.cuh
@@ -184,13 +184,15 @@ size_type estimate_join_output_size(table_device_view build_table,
  *
  * @return Join output indices vector pair
  */
-inline std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>>
+inline std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
+                 std::unique_ptr<rmm::device_uvector<size_type>>>
 get_trivial_left_join_indices(table_view const& left, rmm::cuda_stream_view stream)
 {
-  rmm::device_uvector<size_type> left_indices(left.num_rows(), stream);
-  thrust::sequence(rmm::exec_policy(stream), left_indices.begin(), left_indices.end(), 0);
-  rmm::device_uvector<size_type> right_indices(left.num_rows(), stream);
-  thrust::fill(rmm::exec_policy(stream), right_indices.begin(), right_indices.end(), JoinNoneValue);
+  auto left_indices = std::make_unique<rmm::device_uvector<size_type>>(left.num_rows(), stream);
+  thrust::sequence(rmm::exec_policy(stream), left_indices->begin(), left_indices->end(), 0);
+  auto right_indices = std::make_unique<rmm::device_uvector<size_type>>(left.num_rows(), stream);
+  thrust::fill(
+    rmm::exec_policy(stream), right_indices->begin(), right_indices->end(), JoinNoneValue);
   return std::make_pair(std::move(left_indices), std::move(right_indices));
 }
 
@@ -230,38 +232,35 @@ struct hash_join::hash_join_impl {
                  null_equality compare_nulls,
                  rmm::cuda_stream_view stream = rmm::cuda_stream_default);
 
-  std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>> inner_join(
-    cudf::table_view const& probe,
-    null_equality compare_nulls,
-    rmm::cuda_stream_view stream,
-    rmm::mr::device_memory_resource* mr) const;
-
-  std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>> left_join(
-    cudf::table_view const& probe,
-    null_equality compare_nulls,
-    rmm::cuda_stream_view stream,
-    rmm::mr::device_memory_resource* mr) const;
-
-  std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>> full_join(
-    cudf::table_view const& probe,
-    null_equality compare_nulls,
-    rmm::cuda_stream_view stream,
-    rmm::mr::device_memory_resource* mr) const;
+  std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
+            std::unique_ptr<rmm::device_uvector<size_type>>>
+  inner_join(cudf::table_view const& probe,
+             null_equality compare_nulls,
+             rmm::cuda_stream_view stream,
+             rmm::mr::device_memory_resource* mr) const;
+
+  std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
+            std::unique_ptr<rmm::device_uvector<size_type>>>
+  left_join(cudf::table_view const& probe,
+            null_equality compare_nulls,
+            rmm::cuda_stream_view stream,
+            rmm::mr::device_memory_resource* mr) const;
+
+  std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
+            std::unique_ptr<rmm::device_uvector<size_type>>>
+  full_join(cudf::table_view const& probe,
+            null_equality compare_nulls,
+            rmm::cuda_stream_view stream,
+            rmm::mr::device_memory_resource* mr) const;
 
  private:
   template <cudf::detail::join_kind JoinKind>
-  std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>>
-  compute_hash_join_indices(cudf::table_view const& probe,
-                            null_equality compare_nulls,
-                            rmm::cuda_stream_view stream,
-                            rmm::mr::device_memory_resource* mr) const;
-
-  template <cudf::detail::join_kind JoinKind>
-  std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>> compute_hash_join(
-    cudf::table_view const& probe,
-    null_equality compare_nulls,
-    rmm::cuda_stream_view stream,
-    rmm::mr::device_memory_resource* mr) const;
+  std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
+            std::unique_ptr<rmm::device_uvector<size_type>>>
+  compute_hash_join(cudf::table_view const& probe,
+                    null_equality compare_nulls,
+                    rmm::cuda_stream_view stream,
+                    rmm::mr::device_memory_resource* mr) const;
 
   /**
    * @brief Probes the `_hash_table` built from `_build` for tuples in `probe_table`,
@@ -279,8 +278,11 @@ struct hash_join::hash_join_impl {
    * @return Join output indices vector pair.
    */
   template <cudf::detail::join_kind JoinKind>
-  std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>> probe_join_indices(
-    cudf::table_view const& probe, null_equality compare_nulls, rmm::cuda_stream_view stream) const;
+  std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
+            std::unique_ptr<rmm::device_uvector<size_type>>>
+  probe_join_indices(cudf::table_view const& probe,
+                     null_equality compare_nulls,
+                     rmm::cuda_stream_view stream) const;
 };
 
 }  // namespace cudf
diff --git a/cpp/src/join/join.cu b/cpp/src/join/join.cu
index a9f4b507efc..bce72862220 100644
--- a/cpp/src/join/join.cu
+++ b/cpp/src/join/join.cu
@@ -26,12 +26,13 @@
 namespace cudf {
 namespace detail {
 
-std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>> inner_join(
-  table_view const& left_input,
-  table_view const& right_input,
-  null_equality compare_nulls,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr)
+std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
+          std::unique_ptr<rmm::device_uvector<size_type>>>
+inner_join(table_view const& left_input,
+           table_view const& right_input,
+           null_equality compare_nulls,
+           rmm::cuda_stream_view stream,
+           rmm::mr::device_memory_resource* mr)
 {
   // Make sure any dictionary columns have matched key sets.
   // This will return any new dictionary columns created as well as updated table_views.
@@ -50,8 +51,9 @@ std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>> inner_
   if (right.num_rows() > left.num_rows()) {
     cudf::hash_join hj_obj(left, compare_nulls, stream);
     auto result = hj_obj.inner_join(right, compare_nulls, stream, mr);
-    return std::make_pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>>(
-      std::move(result.second), std::move(result.first));
+    return std::make_pair<std::unique_ptr<rmm::device_uvector<size_type>>,
+                          std::unique_ptr<rmm::device_uvector<size_type>>>(std::move(result.second),
+                                                                           std::move(result.first));
   } else {
     cudf::hash_join hj_obj(right, compare_nulls, stream);
     return hj_obj.inner_join(left, compare_nulls, stream, mr);
@@ -84,14 +86,14 @@ std::unique_ptr<table> inner_join(table_view const& left_input,
     cudf::hash_join hj_obj(left.select(left_on), compare_nulls, stream);
     auto join_indices = hj_obj.inner_join(right.select(right_on), compare_nulls, stream, mr);
     std::unique_ptr<table> left_result  = detail::gather(left,
-                                                        join_indices.second.begin(),
-                                                        join_indices.second.end(),
+                                                        join_indices.second->begin(),
+                                                        join_indices.second->end(),
                                                         out_of_bounds_policy::DONT_CHECK,
                                                         stream,
                                                         mr);
     std::unique_ptr<table> right_result = detail::gather(right,
-                                                         join_indices.first.begin(),
-                                                         join_indices.first.end(),
+                                                         join_indices.first->begin(),
+                                                         join_indices.first->end(),
                                                          out_of_bounds_policy::DONT_CHECK,
                                                          stream,
                                                          mr);
@@ -100,14 +102,14 @@ std::unique_ptr<table> inner_join(table_view const& left_input,
     cudf::hash_join hj_obj(right.select(right_on), compare_nulls, stream);
     auto join_indices = hj_obj.inner_join(left.select(left_on), compare_nulls, stream, mr);
     std::unique_ptr<table> left_result  = detail::gather(left,
-                                                        join_indices.first.begin(),
-                                                        join_indices.first.end(),
+                                                        join_indices.first->begin(),
+                                                        join_indices.first->end(),
                                                         out_of_bounds_policy::DONT_CHECK,
                                                         stream,
                                                         mr);
     std::unique_ptr<table> right_result = detail::gather(right,
-                                                         join_indices.second.begin(),
-                                                         join_indices.second.end(),
+                                                         join_indices.second->begin(),
+                                                         join_indices.second->end(),
                                                          out_of_bounds_policy::DONT_CHECK,
                                                          stream,
                                                          mr);
@@ -115,12 +117,13 @@ std::unique_ptr<table> inner_join(table_view const& left_input,
   }
 }
 
-std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>> left_join(
-  table_view const& left_input,
-  table_view const& right_input,
-  null_equality compare_nulls,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr)
+std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
+          std::unique_ptr<rmm::device_uvector<size_type>>>
+left_join(table_view const& left_input,
+          table_view const& right_input,
+          null_equality compare_nulls,
+          rmm::cuda_stream_view stream,
+          rmm::mr::device_memory_resource* mr)
 {
   // Make sure any dictionary columns have matched key sets.
   // This will return any new dictionary columns created as well as updated table_views.
@@ -164,26 +167,27 @@ std::unique_ptr<table> left_join(table_view const& left_input,
                                             std::move(probe_build_pair.second));
   }
   std::unique_ptr<table> left_result  = detail::gather(left,
-                                                      join_indices.first.begin(),
-                                                      join_indices.first.end(),
+                                                      join_indices.first->begin(),
+                                                      join_indices.first->end(),
                                                       out_of_bounds_policy::NULLIFY,
                                                       stream,
                                                       mr);
   std::unique_ptr<table> right_result = detail::gather(right,
-                                                       join_indices.second.begin(),
-                                                       join_indices.second.end(),
+                                                       join_indices.second->begin(),
+                                                       join_indices.second->end(),
                                                        out_of_bounds_policy::NULLIFY,
                                                        stream,
                                                        mr);
   return combine_table_pair(std::move(left_result), std::move(right_result));
 }
 
-std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>> full_join(
-  table_view const& left_input,
-  table_view const& right_input,
-  null_equality compare_nulls,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr)
+std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
+          std::unique_ptr<rmm::device_uvector<size_type>>>
+full_join(table_view const& left_input,
+          table_view const& right_input,
+          null_equality compare_nulls,
+          rmm::cuda_stream_view stream,
+          rmm::mr::device_memory_resource* mr)
 {
   // Make sure any dictionary columns have matched key sets.
   // This will return any new dictionary columns created as well as updated table_views.
@@ -227,14 +231,14 @@ std::unique_ptr<table> full_join(table_view const& left_input,
                                             std::move(probe_build_pair.second));
   }
   std::unique_ptr<table> left_result  = detail::gather(left,
-                                                      join_indices.first.begin(),
-                                                      join_indices.first.end(),
+                                                      join_indices.first->begin(),
+                                                      join_indices.first->end(),
                                                       out_of_bounds_policy::NULLIFY,
                                                       stream,
                                                       mr);
   std::unique_ptr<table> right_result = detail::gather(right,
-                                                       join_indices.second.begin(),
-                                                       join_indices.second.end(),
+                                                       join_indices.second->begin(),
+                                                       join_indices.second->end(),
                                                        out_of_bounds_policy::NULLIFY,
                                                        stream,
                                                        mr);
@@ -252,40 +256,44 @@ hash_join::hash_join(cudf::table_view const& build,
 {
 }
 
-std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>> hash_join::inner_join(
-  cudf::table_view const& probe,
-  null_equality compare_nulls,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr) const
+std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
+          std::unique_ptr<rmm::device_uvector<size_type>>>
+hash_join::inner_join(cudf::table_view const& probe,
+                      null_equality compare_nulls,
+                      rmm::cuda_stream_view stream,
+                      rmm::mr::device_memory_resource* mr) const
 {
   return impl->inner_join(probe, compare_nulls, stream, mr);
 }
 
-std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>> hash_join::left_join(
-  cudf::table_view const& probe,
-  null_equality compare_nulls,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr) const
+std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
+          std::unique_ptr<rmm::device_uvector<size_type>>>
+hash_join::left_join(cudf::table_view const& probe,
+                     null_equality compare_nulls,
+                     rmm::cuda_stream_view stream,
+                     rmm::mr::device_memory_resource* mr) const
 {
   return impl->left_join(probe, compare_nulls, stream, mr);
 }
 
-std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>> hash_join::full_join(
-  cudf::table_view const& probe,
-  null_equality compare_nulls,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr) const
+std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
+          std::unique_ptr<rmm::device_uvector<size_type>>>
+hash_join::full_join(cudf::table_view const& probe,
+                     null_equality compare_nulls,
+                     rmm::cuda_stream_view stream,
+                     rmm::mr::device_memory_resource* mr) const
 {
   return impl->full_join(probe, compare_nulls, stream, mr);
 }
 
 // external APIs
 
-std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>> inner_join(
-  table_view const& left,
-  table_view const& right,
-  null_equality compare_nulls,
-  rmm::mr::device_memory_resource* mr)
+std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
+          std::unique_ptr<rmm::device_uvector<size_type>>>
+inner_join(table_view const& left,
+           table_view const& right,
+           null_equality compare_nulls,
+           rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
   return detail::inner_join(left, right, compare_nulls, rmm::cuda_stream_default, mr);
@@ -303,11 +311,12 @@ std::unique_ptr<table> inner_join(table_view const& left,
     left, right, left_on, right_on, compare_nulls, rmm::cuda_stream_default, mr);
 }
 
-std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>> left_join(
-  table_view const& left,
-  table_view const& right,
-  null_equality compare_nulls,
-  rmm::mr::device_memory_resource* mr)
+std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
+          std::unique_ptr<rmm::device_uvector<size_type>>>
+left_join(table_view const& left,
+          table_view const& right,
+          null_equality compare_nulls,
+          rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
   return detail::left_join(left, right, compare_nulls, rmm::cuda_stream_default, mr);
@@ -325,11 +334,12 @@ std::unique_ptr<table> left_join(table_view const& left,
     left, right, left_on, right_on, compare_nulls, rmm::cuda_stream_default, mr);
 }
 
-std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>> full_join(
-  table_view const& left,
-  table_view const& right,
-  null_equality compare_nulls,
-  rmm::mr::device_memory_resource* mr)
+std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
+          std::unique_ptr<rmm::device_uvector<size_type>>>
+full_join(table_view const& left,
+          table_view const& right,
+          null_equality compare_nulls,
+          rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
   return detail::full_join(left, right, compare_nulls, rmm::cuda_stream_default, mr);
diff --git a/cpp/src/join/join_common_utils.hpp b/cpp/src/join/join_common_utils.hpp
index 1fcfffb96bb..ed33fab685c 100644
--- a/cpp/src/join/join_common_utils.hpp
+++ b/cpp/src/join/join_common_utils.hpp
@@ -33,7 +33,8 @@ constexpr int DEFAULT_JOIN_BLOCK_SIZE = 128;
 constexpr int DEFAULT_JOIN_CACHE_SIZE = 128;
 constexpr size_type JoinNoneValue     = std::numeric_limits<size_type>::min();
 
-using VectorPair = std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>>;
+using VectorPair = std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
+                             std::unique_ptr<rmm::device_uvector<size_type>>>;
 
 using multimap_type =
   concurrent_unordered_multimap<hash_value_type,
diff --git a/cpp/src/join/semi_join.cu b/cpp/src/join/semi_join.cu
index f1b797aa9f5..3f1de67a05b 100644
--- a/cpp/src/join/semi_join.cu
+++ b/cpp/src/join/semi_join.cu
@@ -38,7 +38,7 @@ namespace cudf {
 namespace detail {
 
 template <join_kind JoinKind>
-rmm::device_uvector<cudf::size_type> left_semi_anti_join(
+std::unique_ptr<rmm::device_uvector<cudf::size_type>> left_semi_anti_join(
   cudf::table_view const& left_keys,
   cudf::table_view const& right_keys,
   null_equality compare_nulls,
@@ -49,11 +49,12 @@ rmm::device_uvector<cudf::size_type> left_semi_anti_join(
   CUDF_EXPECTS(0 != right_keys.num_columns(), "Right table is empty");
 
   if (is_trivial_join(left_keys, right_keys, JoinKind)) {
-    return rmm::device_uvector<cudf::size_type>(0, stream);
+    return std::make_unique<rmm::device_uvector<cudf::size_type>>(0, stream);
   }
   if ((join_kind::LEFT_ANTI_JOIN == JoinKind) && (0 == right_keys.num_rows())) {
-    auto result = rmm::device_uvector<cudf::size_type>(left_keys.num_rows(), stream);
-    thrust::sequence(thrust::cuda::par.on(stream.value()), result.begin(), result.end());
+    auto result =
+      std::make_unique<rmm::device_uvector<cudf::size_type>>(left_keys.num_rows(), stream);
+    thrust::sequence(thrust::cuda::par.on(stream.value()), result->begin(), result->end());
     return std::move(result);
   }
 
@@ -97,20 +98,20 @@ rmm::device_uvector<cudf::size_type> left_semi_anti_join(
   // For semi join we want contains to be true, for anti join we want contains to be false
   bool join_type_boolean = (JoinKind == join_kind::LEFT_SEMI_JOIN);
 
-  rmm::device_uvector<cudf::size_type> gather_map(left_num_rows, stream);
+  auto gather_map = std::make_unique<rmm::device_uvector<cudf::size_type>>(left_num_rows, stream);
 
   // gather_map_end will be the end of valid data in gather_map
   auto gather_map_end = thrust::copy_if(
     rmm::exec_policy(stream),
     thrust::make_counting_iterator<size_type>(0),
     thrust::make_counting_iterator<size_type>(left_num_rows),
-    gather_map.begin(),
+    gather_map->begin(),
     [hash_table, join_type_boolean, hash_probe, equality_probe] __device__(size_type idx) {
       auto pos = hash_table.find(idx, hash_probe, equality_probe);
       return (pos != hash_table.end()) == join_type_boolean;
     });
 
-  auto join_size = thrust::distance(gather_map.begin(), gather_map_end);
+  auto join_size = thrust::distance(gather_map->begin(), gather_map_end);
   return std::move(gather_map);
 }
 
@@ -185,8 +186,8 @@ std::unique_ptr<cudf::table> left_semi_anti_join(
 
   auto const left_updated = scatter_columns(left_selected, left_on, left);
   return cudf::detail::gather(left_updated,
-                              gather_map.begin(),
-                              gather_map.end(),
+                              gather_map->begin(),
+                              gather_map->end(),
                               out_of_bounds_policy::DONT_CHECK,
                               stream,
                               mr);
@@ -206,10 +207,11 @@ std::unique_ptr<cudf::table> left_semi_join(cudf::table_view const& left,
     left, right, left_on, right_on, compare_nulls, rmm::cuda_stream_default, mr);
 }
 
-rmm::device_uvector<cudf::size_type> left_semi_join(cudf::table_view const& left,
-                                                    cudf::table_view const& right,
-                                                    null_equality compare_nulls,
-                                                    rmm::mr::device_memory_resource* mr)
+std::unique_ptr<rmm::device_uvector<cudf::size_type>> left_semi_join(
+  cudf::table_view const& left,
+  cudf::table_view const& right,
+  null_equality compare_nulls,
+  rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
   return detail::left_semi_anti_join<detail::join_kind::LEFT_SEMI_JOIN>(
@@ -228,10 +230,11 @@ std::unique_ptr<cudf::table> left_anti_join(cudf::table_view const& left,
     left, right, left_on, right_on, compare_nulls, rmm::cuda_stream_default, mr);
 }
 
-rmm::device_uvector<cudf::size_type> left_anti_join(cudf::table_view const& left,
-                                                    cudf::table_view const& right,
-                                                    null_equality compare_nulls,
-                                                    rmm::mr::device_memory_resource* mr)
+std::unique_ptr<rmm::device_uvector<cudf::size_type>> left_anti_join(
+  cudf::table_view const& left,
+  cudf::table_view const& right,
+  null_equality compare_nulls,
+  rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
   return detail::left_semi_anti_join<detail::join_kind::LEFT_ANTI_JOIN>(
diff --git a/cpp/tests/join/join_tests.cpp b/cpp/tests/join/join_tests.cpp
index 1b910bfd0cc..32192234c56 100644
--- a/cpp/tests/join/join_tests.cpp
+++ b/cpp/tests/join/join_tests.cpp
@@ -1076,11 +1076,11 @@ TEST_F(JoinTest, HashJoinSequentialProbes)
 
     auto result_table =
       cudf::table_view({cudf::column_view{cudf::data_type{cudf::type_id::INT32},
-                                          static_cast<cudf::size_type>(result.first.size()),
-                                          result.first.data()},
+                                          static_cast<cudf::size_type>(result.first->size()),
+                                          result.first->data()},
                         cudf::column_view{cudf::data_type{cudf::type_id::INT32},
-                                          static_cast<cudf::size_type>(result.second.size()),
-                                          result.second.data()}});
+                                          static_cast<cudf::size_type>(result.second->size()),
+                                          result.second->data()}});
     auto result_sort_order = cudf::sorted_order(result_table);
     auto sorted_result     = cudf::gather(result_table, *result_sort_order);
 
@@ -1108,11 +1108,11 @@ TEST_F(JoinTest, HashJoinSequentialProbes)
     auto result = hash_join.left_join(t0);
     auto result_table =
       cudf::table_view({cudf::column_view{cudf::data_type{cudf::type_id::INT32},
-                                          static_cast<cudf::size_type>(result.first.size()),
-                                          result.first.data()},
+                                          static_cast<cudf::size_type>(result.first->size()),
+                                          result.first->data()},
                         cudf::column_view{cudf::data_type{cudf::type_id::INT32},
-                                          static_cast<cudf::size_type>(result.second.size()),
-                                          result.second.data()}});
+                                          static_cast<cudf::size_type>(result.second->size()),
+                                          result.second->data()}});
     auto result_sort_order = cudf::sorted_order(result_table);
     auto sorted_result     = cudf::gather(result_table, *result_sort_order);
 
@@ -1140,11 +1140,11 @@ TEST_F(JoinTest, HashJoinSequentialProbes)
     auto result = hash_join.inner_join(t0);
     auto result_table =
       cudf::table_view({cudf::column_view{cudf::data_type{cudf::type_id::INT32},
-                                          static_cast<cudf::size_type>(result.first.size()),
-                                          result.first.data()},
+                                          static_cast<cudf::size_type>(result.first->size()),
+                                          result.first->data()},
                         cudf::column_view{cudf::data_type{cudf::type_id::INT32},
-                                          static_cast<cudf::size_type>(result.second.size()),
-                                          result.second.data()}});
+                                          static_cast<cudf::size_type>(result.second->size()),
+                                          result.second->data()}});
     auto result_sort_order = cudf::sorted_order(result_table);
     auto sorted_result     = cudf::gather(result_table, *result_sort_order);
 

From 387a9539778ce98c4f2a30464d15a354aef3eb6b Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Fri, 5 Mar 2021 11:12:25 -0500
Subject: [PATCH 067/138] Update join Cython

---
 python/cudf/cudf/_lib/cpp/join.pxd | 14 +++++++-----
 python/cudf/cudf/_lib/join.pyx     | 36 ++++++++++++++++++++++++------
 2 files changed, 38 insertions(+), 12 deletions(-)

diff --git a/python/cudf/cudf/_lib/cpp/join.pxd b/python/cudf/cudf/_lib/cpp/join.pxd
index 6ebde3934c3..c221fea926d 100644
--- a/python/cudf/cudf/_lib/cpp/join.pxd
+++ b/python/cudf/cudf/_lib/cpp/join.pxd
@@ -10,30 +10,34 @@ from libcpp.memory cimport unique_ptr
 from cudf._lib.cpp.column.column cimport column
 from cudf._lib.cpp.table.table cimport table
 from cudf._lib.cpp.table.table_view cimport table_view
+from cudf._lib.cpp.types cimport size_type
+from rmm._lib.device_uvector cimport device_uvector
 
 
+ctypedef unique_ptr[device_uvector[size_type]] gather_map_type
+
 cdef extern from "cudf/join.hpp" namespace "cudf" nogil:
-    cdef pair[unique_ptr[column], unique_ptr[column]] inner_join(
+    cdef pair[gather_map_type, gather_map_type] inner_join(
         const table_view left_keys,
         const table_view right_keys,
     ) except +
 
-    cdef pair[unique_ptr[column], unique_ptr[column]] left_join(
+    cdef pair[gather_map_type, gather_map_type] left_join(
         const table_view left_keys,
         const table_view right_keys,
     ) except +
 
-    cdef pair[unique_ptr[column], unique_ptr[column]] full_join(
+    cdef pair[gather_map_type, gather_map_type] full_join(
         const table_view left_keys,
         const table_view right_keys,
     ) except +
 
-    cdef unique_ptr[column] left_semi_join(
+    cdef gather_map_type left_semi_join(
         const table_view left_keys,
         const table_view right_keys,
     ) except +
 
-    cdef unique_ptr[column] left_anti_join(
+    cdef gather_map_type left_anti_join(
         const table_view left_keys,
         const table_view right_keys,
     ) except +
diff --git a/python/cudf/cudf/_lib/join.pyx b/python/cudf/cudf/_lib/join.pyx
index f1677e3f856..03ad5fbeff7 100644
--- a/python/cudf/cudf/_lib/join.pyx
+++ b/python/cudf/cudf/_lib/join.pyx
@@ -3,7 +3,7 @@
 from collections import OrderedDict
 from itertools import chain
 
-from libcpp.memory cimport unique_ptr
+from libcpp.memory cimport unique_ptr, make_unique
 from libcpp.utility cimport move
 from libcpp.vector cimport vector
 from libcpp.pair cimport pair
@@ -13,7 +13,7 @@ from cudf._lib.column cimport Column
 from cudf._lib.table cimport Table, columns_from_ptr
 
 from cudf._lib.cpp.column.column cimport column
-from cudf._lib.cpp.types cimport size_type
+from cudf._lib.cpp.types cimport size_type, data_type, type_id
 from cudf._lib.cpp.table.table cimport table
 from cudf._lib.cpp.table.table_view cimport table_view
 cimport cudf._lib.cpp.join as cpp_join
@@ -21,7 +21,7 @@ cimport cudf._lib.cpp.join as cpp_join
 
 cpdef join(Table lhs, Table rhs, how=None):
     # left, inner and outer join
-    cdef pair[unique_ptr[column], unique_ptr[column]] c_result
+    cdef pair[cpp_join.gather_map_type, cpp_join.gather_map_type] c_result
     cdef table_view c_lhs = lhs.view()
     cdef table_view c_rhs = rhs.view()
 
@@ -42,9 +42,21 @@ cpdef join(Table lhs, Table rhs, how=None):
         ))
     else:
         raise ValueError(f"Invalid join type {how}")
+
+    cdef size_type join_size = c_result.first.get()[0].size()
+    cdef unique_ptr[column] left_rows = make_unique[column](
+        data_type(type_id.INT32),
+        join_size,
+        c_result.first.get()[0].release()
+    )
+    cdef unique_ptr[column] right_rows = make_unique[column](
+        data_type(type_id.INT32),
+        join_size,
+        c_result.second.get()[0].release()
+    )
     return (
-        Column.from_unique_ptr(move(c_result.first)),
-        Column.from_unique_ptr(move(c_result.second))
+        Column.from_unique_ptr(move(left_rows)),
+        Column.from_unique_ptr(move(right_rows))
     )
 
 
@@ -52,7 +64,7 @@ cpdef semi_join(Table lhs, Table rhs, how=None):
     from cudf.core.column import as_column
 
     # left-semi and left-anti joins
-    cdef unique_ptr[column] c_result
+    cdef cpp_join.gather_map_type c_result
     cdef table_view c_lhs = lhs.view()
     cdef table_view c_rhs = rhs.view()
 
@@ -68,4 +80,14 @@ cpdef semi_join(Table lhs, Table rhs, how=None):
         ))
     else:
         raise ValueError(f"Invalid join type {how}")
-    return Column.from_unique_ptr(move(c_result)), as_column([], dtype="int32")
+
+    cdef size_type join_size = c_result.get()[0].size()
+    cdef unique_ptr[column] left_rows = make_unique[column](
+        data_type(type_id.INT32),
+        join_size,
+        c_result.get()[0].release()
+    )
+    return (
+        Column.from_unique_ptr(move(left_rows)),
+        as_column([], dtype="int32")
+    )

From 6cd6433dc036a4e44e0eda417c653979db8dd116 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Fri, 5 Mar 2021 12:08:45 -0500
Subject: [PATCH 068/138] Need to resize the gathermap

---
 cpp/src/join/semi_join.cu | 1 +
 1 file changed, 1 insertion(+)

diff --git a/cpp/src/join/semi_join.cu b/cpp/src/join/semi_join.cu
index 3f1de67a05b..073ef9eb243 100644
--- a/cpp/src/join/semi_join.cu
+++ b/cpp/src/join/semi_join.cu
@@ -112,6 +112,7 @@ std::unique_ptr<rmm::device_uvector<cudf::size_type>> left_semi_anti_join(
     });
 
   auto join_size = thrust::distance(gather_map->begin(), gather_map_end);
+  gather_map->resize(join_size, stream);
   return std::move(gather_map);
 }
 

From c67dcced44b1ff85756d638a6af7172847022a02 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Fri, 5 Mar 2021 12:14:50 -0500
Subject: [PATCH 069/138] Doc

---
 python/cudf/cudf/_lib/join.pyx | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/python/cudf/cudf/_lib/join.pyx b/python/cudf/cudf/_lib/join.pyx
index 03ad5fbeff7..d187df8b5bd 100644
--- a/python/cudf/cudf/_lib/join.pyx
+++ b/python/cudf/cudf/_lib/join.pyx
@@ -19,8 +19,11 @@ from cudf._lib.cpp.table.table_view cimport table_view
 cimport cudf._lib.cpp.join as cpp_join
 
 
+# The functions below return the *gathermaps* that represent
+# the join result when joining on the keys `lhs` and `rhs`.
+
 cpdef join(Table lhs, Table rhs, how=None):
-    # left, inner and outer join
+
     cdef pair[cpp_join.gather_map_type, cpp_join.gather_map_type] c_result
     cdef table_view c_lhs = lhs.view()
     cdef table_view c_rhs = rhs.view()
@@ -61,6 +64,7 @@ cpdef join(Table lhs, Table rhs, how=None):
 
 
 cpdef semi_join(Table lhs, Table rhs, how=None):
+
     from cudf.core.column import as_column
 
     # left-semi and left-anti joins

From 30c22ed04a4c362b41ce900a43573ee332a56596 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Fri, 5 Mar 2021 13:28:35 -0500
Subject: [PATCH 070/138] Changelog

---
 CHANGELOG.md | 1 -
 1 file changed, 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 2bb46ea0de4..6b08a042615 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -212,7 +212,6 @@ Please see https://github.com/rapidsai/cudf/releases/tag/branch-0.19-latest for
 - Add static type checking via Mypy (#6381) @shwina
 - Update to official libcu++ on Github (#6275) @trxcllnt
 
-# cuDF 0.17.0 (Date TBD)
 # cuDF 0.17.0 (10 Dec 2020)
 
 ## New Features

From f73199deb820518c896375da58f636f240dfb977 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Tue, 9 Mar 2021 16:15:51 -0500
Subject: [PATCH 071/138] Add helper to convert gather_map_type->Column

---
 python/cudf/cudf/_lib/join.pyx | 37 ++++++++++++++--------------------
 1 file changed, 15 insertions(+), 22 deletions(-)

diff --git a/python/cudf/cudf/_lib/join.pyx b/python/cudf/cudf/_lib/join.pyx
index d187df8b5bd..9e21db00e5b 100644
--- a/python/cudf/cudf/_lib/join.pyx
+++ b/python/cudf/cudf/_lib/join.pyx
@@ -46,21 +46,9 @@ cpdef join(Table lhs, Table rhs, how=None):
     else:
         raise ValueError(f"Invalid join type {how}")
 
-    cdef size_type join_size = c_result.first.get()[0].size()
-    cdef unique_ptr[column] left_rows = make_unique[column](
-        data_type(type_id.INT32),
-        join_size,
-        c_result.first.get()[0].release()
-    )
-    cdef unique_ptr[column] right_rows = make_unique[column](
-        data_type(type_id.INT32),
-        join_size,
-        c_result.second.get()[0].release()
-    )
-    return (
-        Column.from_unique_ptr(move(left_rows)),
-        Column.from_unique_ptr(move(right_rows))
-    )
+    cdef Column left_rows = _gather_map_as_column(move(c_result.first))
+    cdef Column right_rows = _gather_map_as_column(move(c_result.second))
+    return left_rows, right_rows
 
 
 cpdef semi_join(Table lhs, Table rhs, how=None):
@@ -85,13 +73,18 @@ cpdef semi_join(Table lhs, Table rhs, how=None):
     else:
         raise ValueError(f"Invalid join type {how}")
 
-    cdef size_type join_size = c_result.get()[0].size()
-    cdef unique_ptr[column] left_rows = make_unique[column](
-        data_type(type_id.INT32),
-        join_size,
-        c_result.get()[0].release()
-    )
+    cdef Column left_rows = _gather_map_as_column(move(c_result))
     return (
-        Column.from_unique_ptr(move(left_rows)),
+        left_rows,
         as_column([], dtype="int32")
     )
+
+
+cdef Column _gather_map_as_column(cpp_join.gather_map_type gather_map):
+    # helple to convert a gather map to a Column
+    cdef size_type size = gather_map.get()[0].size()
+    cdef unique_ptr[column] c_col = make_unique[column](
+        data_type(type_id.INT32),
+        size,
+        gather_map.get()[0].release())
+    return Column.from_unique_ptr(move(c_col))

From 393c06acd8488543c07ab35dd0f09c7f0e857c23 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <3190405+shwina@users.noreply.github.com>
Date: Tue, 9 Mar 2021 16:22:56 -0500
Subject: [PATCH 072/138] Update python/cudf/cudf/core/frame.py

Co-authored-by: brandon-b-miller <53796099+brandon-b-miller@users.noreply.github.com>
---
 python/cudf/cudf/core/frame.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index 090b2d848b9..f540c7144a5 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -2742,7 +2742,7 @@ def _get_sorted_inds(self, by=None, ascending=True, na_position="last"):
         Parameters
         ----------
         by: list, optional
-            Labels specifyin columns to sort by. By default,
+            Labels specifying columns to sort by. By default,
             sort by all columns of `self`
         ascending : bool or list of bool, default True
             If True, sort values in ascending order, otherwise descending.

From e91f5543c7e3ceaae503e068783673a6562f34b3 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Tue, 9 Mar 2021 16:25:54 -0500
Subject: [PATCH 073/138] Cannot specify both column and  index

---
 python/cudf/cudf/core/join/_join_helpers.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/python/cudf/cudf/core/join/_join_helpers.py b/python/cudf/cudf/core/join/_join_helpers.py
index 02a1a49564a..24ffe9040bf 100644
--- a/python/cudf/cudf/core/join/_join_helpers.py
+++ b/python/cudf/cudf/core/join/_join_helpers.py
@@ -29,6 +29,8 @@ class _Indexer:
     # >>> _Indexer("b", index=True).get(df)  # returns index level "b" of df
 
     def __init__(self, name: Any, column=False, index=False):
+        if column and index:
+            raise ValueError("Cannot specify both column and index")
         self.name = name
         self.column, self.index = column, index
 

From 01858966c7da5ec5133cc2d2d585fcf0e9a9dd74 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Tue, 9 Mar 2021 16:57:58 -0500
Subject: [PATCH 074/138] Vaildate how

---
 python/cudf/cudf/tests/test_joining.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/python/cudf/cudf/tests/test_joining.py b/python/cudf/cudf/tests/test_joining.py
index 6b969d66108..bce558aa46d 100644
--- a/python/cudf/cudf/tests/test_joining.py
+++ b/python/cudf/cudf/tests/test_joining.py
@@ -14,11 +14,13 @@
     assert_exceptions_equal,
 )
 
+_JOIN_TYPES = {"left", "inner", "outer", "right", "leftanti", "leftsemi"}
+
 
 def make_params():
     np.random.seed(0)
 
-    hows = "left,inner,outer,right,leftanti,leftsemi".split(",")
+    hows = _JOIN_TYPES
     methods = "hash,sort".split(",")
 
     # Test specific cases (1)
@@ -70,6 +72,8 @@ def pd_odd_joins(left, right, join_type):
 
 
 def assert_join_results_equal(expect, got, how, **kwargs):
+    if how not in _JOIN_TYPES:
+        raise ValueError(f"Unrecognized join type {how}")
     if how == "right":
         got = got[expect.columns]
 

From 1eb495d7c4b988211e7340c93ffc575f9cc11f18 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Tue, 9 Mar 2021 16:59:47 -0500
Subject: [PATCH 075/138] Can't use a set

---
 python/cudf/cudf/tests/test_joining.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/cudf/cudf/tests/test_joining.py b/python/cudf/cudf/tests/test_joining.py
index bce558aa46d..50141428b02 100644
--- a/python/cudf/cudf/tests/test_joining.py
+++ b/python/cudf/cudf/tests/test_joining.py
@@ -14,7 +14,7 @@
     assert_exceptions_equal,
 )
 
-_JOIN_TYPES = {"left", "inner", "outer", "right", "leftanti", "leftsemi"}
+_JOIN_TYPES = ("left", "inner", "outer", "right", "leftanti", "leftsemi")
 
 
 def make_params():

From 4f1f0725d189b58caa4e978f2f854e61036ce6d1 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Wed, 10 Mar 2021 10:12:49 -0500
Subject: [PATCH 076/138] Avoid function local import

---
 python/cudf/cudf/_lib/join.pyx | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/python/cudf/cudf/_lib/join.pyx b/python/cudf/cudf/_lib/join.pyx
index 9e21db00e5b..ee4c72483a7 100644
--- a/python/cudf/cudf/_lib/join.pyx
+++ b/python/cudf/cudf/_lib/join.pyx
@@ -1,5 +1,7 @@
 # Copyright (c) 2020, NVIDIA CORPORATION.
 
+import cudf
+
 from collections import OrderedDict
 from itertools import chain
 
@@ -53,8 +55,6 @@ cpdef join(Table lhs, Table rhs, how=None):
 
 cpdef semi_join(Table lhs, Table rhs, how=None):
 
-    from cudf.core.column import as_column
-
     # left-semi and left-anti joins
     cdef cpp_join.gather_map_type c_result
     cdef table_view c_lhs = lhs.view()
@@ -76,7 +76,7 @@ cpdef semi_join(Table lhs, Table rhs, how=None):
     cdef Column left_rows = _gather_map_as_column(move(c_result))
     return (
         left_rows,
-        as_column([], dtype="int32")
+        cudf.core.column.as_column([], dtype="int32")
     )
 
 

From 4aa8fec8f7973f9e0501433b52d2db9e8de8d1b7 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Wed, 10 Mar 2021 14:14:57 -0500
Subject: [PATCH 077/138] False -> NotImplementedError

---
 python/cudf/cudf/core/column/column.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index 48b337e4738..d3bafec9a3b 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -1013,7 +1013,7 @@ def distinct_count(
         return cpp_distinct_count(self, ignore_nulls=dropna)
 
     def can_cast_safely(self, to_dtype: Dtype) -> bool:
-        return False
+        raise NotImplementedError()
 
     def astype(self, dtype: Dtype, **kwargs) -> ColumnBase:
         if is_categorical_dtype(dtype):

From ae0e5f9249898849cf983fdb864cda09f2f1fb4e Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <3190405+shwina@users.noreply.github.com>
Date: Wed, 10 Mar 2021 14:19:32 -0500
Subject: [PATCH 078/138] Update cpp/include/cudf/join.hpp

Co-authored-by: Jake Hemstad <jhemstad@nvidia.com>
---
 cpp/include/cudf/join.hpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/cpp/include/cudf/join.hpp b/cpp/include/cudf/join.hpp
index b1f06697522..cfa717f9625 100644
--- a/cpp/include/cudf/join.hpp
+++ b/cpp/include/cudf/join.hpp
@@ -32,8 +32,7 @@ namespace cudf {
  */
 
 /**
- * @brief Returns the row indices that can be used to construct
- * the result of performing an inner join between two tables.
+ * @brief Returns the indices of the matching rows resulting from an inner join between the specified tables. 
  *
  * @code{.pseudo}
  *     Left: {{0, 1, 2}}

From f47cf7e500ce29856a57cc80f99b7c89cdc45932 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Wed, 10 Mar 2021 18:06:21 -0500
Subject: [PATCH 079/138] Reuse some join logic

---
 cpp/include/cudf/table/table_view.hpp |  5 +++
 cpp/src/join/join.cu                  | 59 +++++++++------------------
 cpp/src/join/join_common_utils.hpp    |  2 +-
 3 files changed, 25 insertions(+), 41 deletions(-)

diff --git a/cpp/include/cudf/table/table_view.hpp b/cpp/include/cudf/table/table_view.hpp
index 22f2073f73c..ff259ffdd6e 100644
--- a/cpp/include/cudf/table/table_view.hpp
+++ b/cpp/include/cudf/table/table_view.hpp
@@ -126,6 +126,11 @@ class table_view_base {
    */
   size_type num_rows() const noexcept { return _num_rows; }
 
+  /**
+   * @brief Returns true if `num_columns()` returns zero, or false otherwise
+   */
+  size_type is_empty() const noexcept { return num_columns() == 0; }
+
   table_view_base() = default;
 
   ~table_view_base() = default;
diff --git a/cpp/src/join/join.cu b/cpp/src/join/join.cu
index bce72862220..6fedd3077b2 100644
--- a/cpp/src/join/join.cu
+++ b/cpp/src/join/join.cu
@@ -79,42 +79,21 @@ std::unique_ptr<table> inner_join(table_view const& left_input,
   auto const left  = scatter_columns(matched.second.front(), left_on, left_input);
   auto const right = scatter_columns(matched.second.back(), right_on, right_input);
 
-  // For `inner_join`, we can freely choose either the `left` or `right` table to use for
-  // building/probing the hash map. Because building is typically more expensive than probing, we
-  // build the hash map from the smaller table.
-  if (right.num_rows() > left.num_rows()) {
-    cudf::hash_join hj_obj(left.select(left_on), compare_nulls, stream);
-    auto join_indices = hj_obj.inner_join(right.select(right_on), compare_nulls, stream, mr);
-    std::unique_ptr<table> left_result  = detail::gather(left,
-                                                        join_indices.second->begin(),
-                                                        join_indices.second->end(),
-                                                        out_of_bounds_policy::DONT_CHECK,
-                                                        stream,
-                                                        mr);
-    std::unique_ptr<table> right_result = detail::gather(right,
-                                                         join_indices.first->begin(),
-                                                         join_indices.first->end(),
-                                                         out_of_bounds_policy::DONT_CHECK,
-                                                         stream,
-                                                         mr);
-    return combine_table_pair(std::move(left_result), std::move(right_result));
-  } else {
-    cudf::hash_join hj_obj(right.select(right_on), compare_nulls, stream);
-    auto join_indices = hj_obj.inner_join(left.select(left_on), compare_nulls, stream, mr);
-    std::unique_ptr<table> left_result  = detail::gather(left,
-                                                        join_indices.first->begin(),
-                                                        join_indices.first->end(),
-                                                        out_of_bounds_policy::DONT_CHECK,
-                                                        stream,
-                                                        mr);
-    std::unique_ptr<table> right_result = detail::gather(right,
-                                                         join_indices.second->begin(),
-                                                         join_indices.second->end(),
-                                                         out_of_bounds_policy::DONT_CHECK,
-                                                         stream,
-                                                         mr);
-    return combine_table_pair(std::move(left_result), std::move(right_result));
-  }
+  auto join_indices =
+    inner_join(left.select(left_on), right.select(right_on), compare_nulls, stream, mr);
+  std::unique_ptr<table> left_result  = detail::gather(left,
+                                                      join_indices.first->begin(),
+                                                      join_indices.first->end(),
+                                                      out_of_bounds_policy::DONT_CHECK,
+                                                      stream,
+                                                      mr);
+  std::unique_ptr<table> right_result = detail::gather(right,
+                                                       join_indices.second->begin(),
+                                                       join_indices.second->end(),
+                                                       out_of_bounds_policy::DONT_CHECK,
+                                                       stream,
+                                                       mr);
+  return combine_table_pair(std::move(left_result), std::move(right_result));
 }
 
 std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
@@ -157,8 +136,8 @@ std::unique_ptr<table> left_join(table_view const& left_input,
   table_view const left  = scatter_columns(matched.second.front(), left_on, left_input);
   table_view const right = scatter_columns(matched.second.back(), right_on, right_input);
 
-  cudf::hash_join hj_obj(right.select(right_on), compare_nulls, stream);
-  auto join_indices = hj_obj.left_join(left.select(left_on), compare_nulls, stream, mr);
+  auto join_indices =
+    left_join(left.select(left_on), right.select(right_on), compare_nulls, stream, mr);
 
   if ((left_on.empty() || right_on.empty()) ||
       is_trivial_join(left, right, cudf::detail::join_kind::LEFT_JOIN)) {
@@ -221,8 +200,8 @@ std::unique_ptr<table> full_join(table_view const& left_input,
   table_view const left  = scatter_columns(matched.second.front(), left_on, left_input);
   table_view const right = scatter_columns(matched.second.back(), right_on, right_input);
 
-  cudf::hash_join hj_obj(right.select(right_on), compare_nulls, stream);
-  auto join_indices = hj_obj.full_join(left.select(left_on), compare_nulls, stream, mr);
+  auto join_indices =
+    full_join(left.select(left_on), right.select(right_on), compare_nulls, stream, mr);
 
   if ((left_on.empty() || right_on.empty()) ||
       is_trivial_join(left, right, cudf::detail::join_kind::FULL_JOIN)) {
diff --git a/cpp/src/join/join_common_utils.hpp b/cpp/src/join/join_common_utils.hpp
index ed33fab685c..9312704f065 100644
--- a/cpp/src/join/join_common_utils.hpp
+++ b/cpp/src/join/join_common_utils.hpp
@@ -55,7 +55,7 @@ enum class join_kind { INNER_JOIN, LEFT_JOIN, FULL_JOIN, LEFT_SEMI_JOIN, LEFT_AN
 inline bool is_trivial_join(table_view const& left, table_view const& right, join_kind join_type)
 {
   // If there is nothing to join, then send empty table with all columns
-  if ((0 == left.num_columns()) || (0 == right.num_columns())) { return true; }
+  if (left.is_empty() || right.is_empty()) { return true; }
 
   // If left join and the left table is empty, return immediately
   if ((join_kind::LEFT_JOIN == join_type) && (0 == left.num_rows())) { return true; }

From 230ca0890fa9b1fb014bb40d3cfd9be06e1a8d3f Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Wed, 10 Mar 2021 18:07:47 -0500
Subject: [PATCH 080/138] Formatting

---
 cpp/src/join/join.cu | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/cpp/src/join/join.cu b/cpp/src/join/join.cu
index 6fedd3077b2..8f513187ef4 100644
--- a/cpp/src/join/join.cu
+++ b/cpp/src/join/join.cu
@@ -51,9 +51,7 @@ inner_join(table_view const& left_input,
   if (right.num_rows() > left.num_rows()) {
     cudf::hash_join hj_obj(left, compare_nulls, stream);
     auto result = hj_obj.inner_join(right, compare_nulls, stream, mr);
-    return std::make_pair<std::unique_ptr<rmm::device_uvector<size_type>>,
-                          std::unique_ptr<rmm::device_uvector<size_type>>>(std::move(result.second),
-                                                                           std::move(result.first));
+    return std::make_pair(std::move(result.second), std::move(result.first));
   } else {
     cudf::hash_join hj_obj(right, compare_nulls, stream);
     return hj_obj.inner_join(left, compare_nulls, stream, mr);

From 498a62123940b12f14d821c96984f6e3a9d7aa99 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <3190405+shwina@users.noreply.github.com>
Date: Thu, 11 Mar 2021 09:33:37 -0500
Subject: [PATCH 081/138] Update cpp/include/cudf/join.hpp

Co-authored-by: Jake Hemstad <jhemstad@nvidia.com>
---
 cpp/include/cudf/join.hpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/cpp/include/cudf/join.hpp b/cpp/include/cudf/join.hpp
index cfa717f9625..c284573a253 100644
--- a/cpp/include/cudf/join.hpp
+++ b/cpp/include/cudf/join.hpp
@@ -111,6 +111,7 @@ std::unique_ptr<cudf::table> inner_join(
 /**
  * @brief Returns the row indices that can be used to construct
  * the result of performing a left join between two tables.
+ *
  * For rows in the right table that do not have a match in the
  * left table, the row index is an unspecified out-of-bounds value.
  *

From 2de26f3059b78bd52bd85eafa58b8d5d60cf11cf Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Thu, 11 Mar 2021 10:33:45 -0500
Subject: [PATCH 082/138] Docs?

---
 cpp/include/cudf/join.hpp | 148 +++++++++++++++++++-------------------
 1 file changed, 76 insertions(+), 72 deletions(-)

diff --git a/cpp/include/cudf/join.hpp b/cpp/include/cudf/join.hpp
index cfa717f9625..7c778a17609 100644
--- a/cpp/include/cudf/join.hpp
+++ b/cpp/include/cudf/join.hpp
@@ -32,7 +32,13 @@ namespace cudf {
  */
 
 /**
- * @brief Returns the indices of the matching rows resulting from an inner join between the specified tables. 
+ * @brief Returns a pair of row index vectors corresponding to an
+ * inner join between the specified tables.
+ *
+ * The first returned vector contains the row indices from the left
+ * table that have a match in the right table (in unspecified order).
+ * The corresponding values in the second returned vector are
+ * the matched row indices from the right table.
  *
  * @code{.pseudo}
  *     Left: {{0, 1, 2}}
@@ -46,13 +52,13 @@ namespace cudf {
  * @throw cudf::logic_error if number of elements in `left_keys` or `right_keys`
  * mismatch.
  *
- * @param[in] left A table representing the keys of the left table of the join
- * @param[in] right A table representing  the keys of the right table of the join
+ * @param[in] left_keys The left table
+ * @param[in] right_keys The right table
  * @param[in] compare_nulls controls whether null join-key values
  * should match or not.
  * @param mr Device memory resource used to allocate the returned table and columns' device memory
  *
- * @return A pair of columns [`left_indices`, `right_indices`] that can be used to construct
+ * @return A pair of vectors [`left_indices`, `right_indices`] that can be used to construct
  * the result of performing an inner join between two tables with `left_keys` and `right_keys`
  * as the join keys .
  */
@@ -109,10 +115,14 @@ std::unique_ptr<cudf::table> inner_join(
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
- * @brief Returns the row indices that can be used to construct
- * the result of performing a left join between two tables.
- * For rows in the right table that do not have a match in the
- * left table, the row index is an unspecified out-of-bounds value.
+ * @brief Returns a pair of row index vectors corresponding to a
+ * left join between the specified tables.
+ *
+ * The first returned vector contains all the row indices from the left
+ * table (in unspecified order). The corresponding value in the
+ * second returned vector is either (1) the row index of the matched row
+ * from the right table, if there is a match  or  (2) an unspecified
+ * out-of-bounds value.
  *
  * @code{.pseudo}
  *     Left: {{0, 1, 2}}
@@ -126,13 +136,13 @@ std::unique_ptr<cudf::table> inner_join(
  * @throw cudf::logic_error if number of elements in `left_keys` or `right_keys`
  * mismatch.
  *
- * @param[in] left A table representing the keys of the left table of the join
- * @param[in] right A table representing  the keys of the right table of the join
+ * @param[in] left_keys The left table
+ * @param[in] right_keys The right table
  * @param[in] compare_nulls controls whether null join-key values
  * should match or not.
  * @param mr Device memory resource used to allocate the returned table and columns' device memory
  *
- * @return A pair of columns [`left_indices`, `right_indices`] that can be used to construct
+ * @return A pair of vectors [`left_indices`, `right_indices`] that can be used to construct
  * the result of performing a left join between two tables with `left_keys` and `right_keys`
  * as the join keys .
  */
@@ -197,8 +207,13 @@ std::unique_ptr<cudf::table> left_join(
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
- * @brief Returns the row indices that can be used to construct
- * the result of performing a full join between two tables.
+ * @brief Returns a pair of row index vectors corresponding to a
+ * full join between the specified tables.
+ *
+ * Taken pairwise, the values from the returned vectors are one of:
+ * (1) row indices corresponding to matching rows from the left and
+ * right tables, (2) a row index and an unspecified out-of-bounds value,
+ * representing a row from one table without a match in the other.
  *
  * @code{.pseudo}
  *     Left: {{0, 1, 2}}
@@ -212,13 +227,13 @@ std::unique_ptr<cudf::table> left_join(
  * @throw cudf::logic_error if number of elements in `left_keys` or `right_keys`
  * mismatch.
  *
- * @param[in] left A table representing the keys of the left table of the join
- * @param[in] right A table representing  the keys of the right table of the join
+ * @param[in] left The left table
+ * @param[in] right The right table
  * @param[in] compare_nulls controls whether null join-key values
  * should match or not.
  * @param mr Device memory resource used to allocate the returned table and columns' device memory
  *
- * @return A pair of columns [`left_indices`, `right_indices`] that can be used to construct
+ * @return A pair of vectors [`left_indices`, `right_indices`] that can be used to construct
  * the result of performing a full join between two tables with `left_keys` and `right_keys`
  * as the join keys .
  */
@@ -282,6 +297,39 @@ std::unique_ptr<cudf::table> full_join(
   null_equality compare_nulls         = null_equality::EQUAL,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
+/**
+ * @brief Returns a vector of row indices corresponding to a left semi join
+ * between the specified tables.
+ *
+ * The returned vector contains the row indices from the left table
+ * for which there is a matching row in the right table.
+ *
+ * @code{.pseudo}
+ *          TableA: {{0, 1, 2}}
+ *          TableB: {{1, 2, 3}}
+ *          right_on: {1}
+ * Result: {1, 2}
+ * @endcode
+ *
+ * @throw cudf::logic_error if number of columns in either
+ * `left_keys` or `right_keys` table is 0 or exceeds MAX_JOIN_SIZE
+ *
+ * @param[in] left_keys The left table
+ * @param[in] right_keys The right table
+ * @param[in] compare_nulls controls whether null join-key values
+ * should match or not.
+ * @param mr Device memory resource used to allocate the returned table and columns' device memory
+ *
+ * @return A vector `left_indices` that can be used to construct
+ * the result of performing a left semi join between two tables with
+ * `left_keys` and `right_keys` as the join keys .
+ */
+std::unique_ptr<rmm::device_uvector<size_type>> left_semi_join(
+  cudf::table_view const& left_keys,
+  cudf::table_view const& right_keys,
+  null_equality compare_nulls         = null_equality::EQUAL,
+  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+
 /**
  * @brief Performs a left semi join on the specified columns of two
  * tables (`left`, `right`)
@@ -331,39 +379,31 @@ std::unique_ptr<cudf::table> left_semi_join(
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
- * @brief Returns the row indices that can be used to construct
- * the result of performing a left semi join between two tables.
+ * @brief Returns a vector of row indices corresponding to a left anti join
+ * between the specified tables.
+ *
+ * The returned vector contains the row indices from the left table
+ * for which there is no matching row in the right table.
  *
  * @code{.pseudo}
  *          TableA: {{0, 1, 2}}
- *          TableB: {{1, 2, 3}, {1, 2, 5}}
- *          left_on: {0}
- *          right_on: {1}
- * Result: {1, 2}
- *
- *          TableA {{0, 1, 2}, {1, 2, 5}}
- *          TableB {{1, 2, 3}}
- *          left_on: {0}
- *          right_on: {0}
- * Result: {1, 2}
+ *          TableB: {{1, 2, 3}}
+ * Result: {0}
  * @endcode
  *
- * @throw cudf::logic_error if number of elements in `left_on` or `right_on`
- * mismatch.
- * @throw cudf::logic_error if number of columns in either `left` or `right`
- * table is 0 or exceeds MAX_JOIN_SIZE
+ * @throw cudf::logic_error if the number of columns in either `left_keys` or `right_keys` is 0
  *
- * @param[in] left A table representing the keys of the left table of the join
- * @param[in] right A table representing  the keys of the right table of the join
+ * @param[in] left_keys The left table
+ * @param[in] right_keys The right table
  * @param[in] compare_nulls controls whether null join-key values
  * should match or not.
  * @param mr Device memory resource used to allocate the returned table and columns' device memory
  *
  * @return A column `left_indices` that can be used to construct
- * the result of performing a left semi join between two tables with
+ * the result of performing a left anti join between two tables with
  * `left_keys` and `right_keys` as the join keys .
  */
-std::unique_ptr<rmm::device_uvector<size_type>> left_semi_join(
+std::unique_ptr<rmm::device_uvector<size_type>> left_anti_join(
   cudf::table_view const& left_keys,
   cudf::table_view const& right_keys,
   null_equality compare_nulls         = null_equality::EQUAL,
@@ -420,42 +460,6 @@ std::unique_ptr<cudf::table> left_anti_join(
   null_equality compare_nulls         = null_equality::EQUAL,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
-/**
- * @brief Returns the row indices that can be used to construct
- * the result of performing a left anti join between two tables.
- *
- * @code{.pseudo}
- *          TableA: {{0, 1, 2}}
- *          TableB: {{1, 2, 3},  {1, 2, 5}}
- *          left_on: {0}
- *          right_on: {1}
- * Result: {0}
- *
- *          TableA: {{0, 1, 2}, {1, 2, 5}}
- *          TableB: {{1, 2, 3}}
- *          left_on: {0}
- *          right_on: {0}
- * Result: {0}
- * @endcode
- *
- * @throw cudf::logic_error if the number of columns in either `left_keys` or `right_keys` is 0
- *
- * @param[in] left A table representing the keys of the left table of the join
- * @param[in] right A table representing  the keys of the right table of the join
- * @param[in] compare_nulls controls whether null join-key values
- * should match or not.
- * @param mr Device memory resource used to allocate the returned table and columns' device memory
- *
- * @return A column `left_indices` that can be used to construct
- * the result of performing a left anti join between two tables with
- * `left_keys` and `right_keys` as the join keys .
- */
-std::unique_ptr<rmm::device_uvector<size_type>> left_anti_join(
-  cudf::table_view const& left_keys,
-  cudf::table_view const& right_keys,
-  null_equality compare_nulls         = null_equality::EQUAL,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
-
 /**
  * @brief Performs a cross join on two tables (`left`, `right`)
  *

From b7d8d8aeafd1af20c7eaeff0d60abd35e96f5290 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Thu, 11 Mar 2021 12:43:22 -0500
Subject: [PATCH 083/138] Use mr

---
 cpp/src/join/hash_join.cu  | 36 ++++++++++++++++++++----------------
 cpp/src/join/hash_join.cuh | 15 +++++++++++----
 cpp/src/join/join.cu       |  9 +++------
 cpp/src/join/semi_join.cu  |  7 ++++---
 4 files changed, 38 insertions(+), 29 deletions(-)

diff --git a/cpp/src/join/hash_join.cu b/cpp/src/join/hash_join.cu
index 301726a978f..aa84bc56cce 100644
--- a/cpp/src/join/hash_join.cu
+++ b/cpp/src/join/hash_join.cu
@@ -87,10 +87,12 @@ struct valid_range {
  */
 std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
           std::unique_ptr<rmm::device_uvector<size_type>>>
-get_left_join_indices_complement(std::unique_ptr<rmm::device_uvector<size_type>> &right_indices,
-                                 size_type left_table_row_count,
-                                 size_type right_table_row_count,
-                                 rmm::cuda_stream_view stream)
+get_left_join_indices_complement(
+  std::unique_ptr<rmm::device_uvector<size_type>> &right_indices,
+  size_type left_table_row_count,
+  size_type right_table_row_count,
+  rmm::cuda_stream_view stream,
+  rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource())
 {
   // Get array of indices that do not appear in right_indices
 
@@ -224,15 +226,16 @@ probe_join_hash_table(cudf::table_device_view build_table,
                       cudf::table_device_view probe_table,
                       multimap_type const &hash_table,
                       null_equality compare_nulls,
-                      rmm::cuda_stream_view stream)
+                      rmm::cuda_stream_view stream,
+                      rmm::mr::device_memory_resource *mr)
 {
   size_type estimated_size = estimate_join_output_size<JoinKind, multimap_type>(
     build_table, probe_table, hash_table, compare_nulls, stream);
 
   // If the estimated output size is zero, return immediately
   if (estimated_size == 0) {
-    return std::make_pair(std::make_unique<rmm::device_uvector<size_type>>(0, stream),
-                          std::make_unique<rmm::device_uvector<size_type>>(0, stream));
+    return std::make_pair(std::make_unique<rmm::device_uvector<size_type>>(0, stream, mr),
+                          std::make_unique<rmm::device_uvector<size_type>>(0, stream, mr));
   }
 
   // Because we are approximating the number of joined elements, our approximation
@@ -242,8 +245,8 @@ probe_join_hash_table(cudf::table_device_view build_table,
   rmm::device_scalar<size_type> write_index(0, stream);
   size_type join_size{0};
 
-  auto left_indices  = std::make_unique<rmm::device_uvector<size_type>>(0, stream);
-  auto right_indices = std::make_unique<rmm::device_uvector<size_type>>(0, stream);
+  auto left_indices  = std::make_unique<rmm::device_uvector<size_type>>(0, stream, mr);
+  auto right_indices = std::make_unique<rmm::device_uvector<size_type>>(0, stream, mr);
 
   auto current_estimated_size = estimated_size;
   do {
@@ -358,8 +361,8 @@ hash_join::hash_join_impl::compute_hash_join(cudf::table_view const &probe,
                "Mismatch in number of columns to be joined on");
 
   if (is_trivial_join(probe, _build, JoinKind)) {
-    return std::make_pair(std::make_unique<rmm::device_uvector<size_type>>(0, stream),
-                          std::make_unique<rmm::device_uvector<size_type>>(0, stream));
+    return std::make_pair(std::make_unique<rmm::device_uvector<size_type>>(0, stream, mr),
+                          std::make_unique<rmm::device_uvector<size_type>>(0, stream, mr));
   }
 
   CUDF_EXPECTS(std::equal(std::cbegin(_build),
@@ -369,7 +372,7 @@ hash_join::hash_join_impl::compute_hash_join(cudf::table_view const &probe,
                           [](const auto &b, const auto &p) { return b.type() == p.type(); }),
                "Mismatch in joining column data types");
 
-  return probe_join_indices<JoinKind>(probe, compare_nulls, stream);
+  return probe_join_indices<JoinKind>(probe, compare_nulls, stream, mr);
 }
 
 template <cudf::detail::join_kind JoinKind>
@@ -377,11 +380,12 @@ std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
           std::unique_ptr<rmm::device_uvector<size_type>>>
 hash_join::hash_join_impl::probe_join_indices(cudf::table_view const &probe,
                                               null_equality compare_nulls,
-                                              rmm::cuda_stream_view stream) const
+                                              rmm::cuda_stream_view stream,
+                                              rmm::mr::device_memory_resource *mr) const
 {
   // Trivial left join case - exit early
   if (!_hash_table && JoinKind != cudf::detail::join_kind::INNER_JOIN) {
-    return get_trivial_left_join_indices(probe, stream);
+    return get_trivial_left_join_indices(probe, stream, mr);
   }
 
   CUDF_EXPECTS(_hash_table, "Hash table of hash join is null.");
@@ -393,11 +397,11 @@ hash_join::hash_join_impl::probe_join_indices(cudf::table_view const &probe,
                                                       ? cudf::detail::join_kind::LEFT_JOIN
                                                       : JoinKind;
   auto join_indices = cudf::detail::probe_join_hash_table<ProbeJoinKind>(
-    *build_table, *probe_table, *_hash_table, compare_nulls, stream);
+    *build_table, *probe_table, *_hash_table, compare_nulls, stream, mr);
 
   if (JoinKind == cudf::detail::join_kind::FULL_JOIN) {
     auto complement_indices = detail::get_left_join_indices_complement(
-      join_indices.second, probe.num_rows(), _build.num_rows(), stream);
+      join_indices.second, probe.num_rows(), _build.num_rows(), stream, mr);
     join_indices = detail::concatenate_vector_pairs(join_indices, complement_indices, stream);
   }
   return join_indices;
diff --git a/cpp/src/join/hash_join.cuh b/cpp/src/join/hash_join.cuh
index 3bf20eb9433..c502ff18260 100644
--- a/cpp/src/join/hash_join.cuh
+++ b/cpp/src/join/hash_join.cuh
@@ -181,16 +181,21 @@ size_type estimate_join_output_size(table_device_view build_table,
  *
  * @param left Table of left columns to join
  * @param stream CUDA stream used for device memory operations and kernel launches
+ * @param mr Device memory resource used to allocate the result
  *
  * @return Join output indices vector pair
  */
 inline std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
                  std::unique_ptr<rmm::device_uvector<size_type>>>
-get_trivial_left_join_indices(table_view const& left, rmm::cuda_stream_view stream)
+get_trivial_left_join_indices(
+  table_view const& left,
+  rmm::cuda_stream_view stream,
+  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
 {
-  auto left_indices = std::make_unique<rmm::device_uvector<size_type>>(left.num_rows(), stream);
+  auto left_indices = std::make_unique<rmm::device_uvector<size_type>>(left.num_rows(), stream, mr);
   thrust::sequence(rmm::exec_policy(stream), left_indices->begin(), left_indices->end(), 0);
-  auto right_indices = std::make_unique<rmm::device_uvector<size_type>>(left.num_rows(), stream);
+  auto right_indices =
+    std::make_unique<rmm::device_uvector<size_type>>(left.num_rows(), stream, mr);
   thrust::fill(
     rmm::exec_policy(stream), right_indices->begin(), right_indices->end(), JoinNoneValue);
   return std::make_pair(std::move(left_indices), std::move(right_indices));
@@ -274,6 +279,7 @@ struct hash_join::hash_join_impl {
    * @param probe_table Table of probe side columns to join.
    * @param compare_nulls Controls whether null join-key values should match or not.
    * @param stream CUDA stream used for device memory operations and kernel launches.
+   * @param mr Device memory resource used to allocate the returned vectors.
    *
    * @return Join output indices vector pair.
    */
@@ -282,7 +288,8 @@ struct hash_join::hash_join_impl {
             std::unique_ptr<rmm::device_uvector<size_type>>>
   probe_join_indices(cudf::table_view const& probe,
                      null_equality compare_nulls,
-                     rmm::cuda_stream_view stream) const;
+                     rmm::cuda_stream_view stream,
+                     rmm::mr::device_memory_resource* mr) const;
 };
 
 }  // namespace cudf
diff --git a/cpp/src/join/join.cu b/cpp/src/join/join.cu
index 8f513187ef4..f2e4bab02c6 100644
--- a/cpp/src/join/join.cu
+++ b/cpp/src/join/join.cu
@@ -77,8 +77,7 @@ std::unique_ptr<table> inner_join(table_view const& left_input,
   auto const left  = scatter_columns(matched.second.front(), left_on, left_input);
   auto const right = scatter_columns(matched.second.back(), right_on, right_input);
 
-  auto join_indices =
-    inner_join(left.select(left_on), right.select(right_on), compare_nulls, stream, mr);
+  auto join_indices = inner_join(left.select(left_on), right.select(right_on), compare_nulls, mr);
   std::unique_ptr<table> left_result  = detail::gather(left,
                                                       join_indices.first->begin(),
                                                       join_indices.first->end(),
@@ -134,8 +133,7 @@ std::unique_ptr<table> left_join(table_view const& left_input,
   table_view const left  = scatter_columns(matched.second.front(), left_on, left_input);
   table_view const right = scatter_columns(matched.second.back(), right_on, right_input);
 
-  auto join_indices =
-    left_join(left.select(left_on), right.select(right_on), compare_nulls, stream, mr);
+  auto join_indices = left_join(left.select(left_on), right.select(right_on), compare_nulls);
 
   if ((left_on.empty() || right_on.empty()) ||
       is_trivial_join(left, right, cudf::detail::join_kind::LEFT_JOIN)) {
@@ -198,8 +196,7 @@ std::unique_ptr<table> full_join(table_view const& left_input,
   table_view const left  = scatter_columns(matched.second.front(), left_on, left_input);
   table_view const right = scatter_columns(matched.second.back(), right_on, right_input);
 
-  auto join_indices =
-    full_join(left.select(left_on), right.select(right_on), compare_nulls, stream, mr);
+  auto join_indices = full_join(left.select(left_on), right.select(right_on), compare_nulls);
 
   if ((left_on.empty() || right_on.empty()) ||
       is_trivial_join(left, right, cudf::detail::join_kind::FULL_JOIN)) {
diff --git a/cpp/src/join/semi_join.cu b/cpp/src/join/semi_join.cu
index 073ef9eb243..fea9ea45fd3 100644
--- a/cpp/src/join/semi_join.cu
+++ b/cpp/src/join/semi_join.cu
@@ -49,11 +49,11 @@ std::unique_ptr<rmm::device_uvector<cudf::size_type>> left_semi_anti_join(
   CUDF_EXPECTS(0 != right_keys.num_columns(), "Right table is empty");
 
   if (is_trivial_join(left_keys, right_keys, JoinKind)) {
-    return std::make_unique<rmm::device_uvector<cudf::size_type>>(0, stream);
+    return std::make_unique<rmm::device_uvector<cudf::size_type>>(0, stream, mr);
   }
   if ((join_kind::LEFT_ANTI_JOIN == JoinKind) && (0 == right_keys.num_rows())) {
     auto result =
-      std::make_unique<rmm::device_uvector<cudf::size_type>>(left_keys.num_rows(), stream);
+      std::make_unique<rmm::device_uvector<cudf::size_type>>(left_keys.num_rows(), stream, mr);
     thrust::sequence(thrust::cuda::par.on(stream.value()), result->begin(), result->end());
     return std::move(result);
   }
@@ -98,7 +98,8 @@ std::unique_ptr<rmm::device_uvector<cudf::size_type>> left_semi_anti_join(
   // For semi join we want contains to be true, for anti join we want contains to be false
   bool join_type_boolean = (JoinKind == join_kind::LEFT_SEMI_JOIN);
 
-  auto gather_map = std::make_unique<rmm::device_uvector<cudf::size_type>>(left_num_rows, stream);
+  auto gather_map =
+    std::make_unique<rmm::device_uvector<cudf::size_type>>(left_num_rows, stream, mr);
 
   // gather_map_end will be the end of valid data in gather_map
   auto gather_map_end = thrust::copy_if(

From 9efc7614c690c106383d34bce04dd1d9931dcf63 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Mon, 15 Mar 2021 17:58:27 -0400
Subject: [PATCH 084/138] Docs

---
 python/cudf/cudf/core/join/join.py | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/python/cudf/cudf/core/join/join.py b/python/cudf/cudf/core/join/join.py
index 1377ecf5df8..698a99751d4 100644
--- a/python/cudf/cudf/core/join/join.py
+++ b/python/cudf/cudf/core/join/join.py
@@ -262,10 +262,11 @@ def _merge_results(self, left_result: Frame, right_result: Frame) -> Frame:
         # Merge the Frames `left_result` and `right_result` into a single
         # `Frame`, suffixing column names if necessary.
 
-        # For outer joins, the key columns from left_result and
-        # right_result are combined if they have the same name.
-        # We will drop those keys from right_result later, so
-        # combine them now with keys from left_result.
+        # If two key columns have the same name, a single output column appears
+        # in the result. For all other join types, the key column from the rhs
+        # is simply dropped. For outer joins, the two key columns are combined
+        # by filling nulls in the left key column with corresponding values
+        # from the right key column:
         if self.how == "outer":
             for lkey, rkey in zip(*self._keys):
                 if lkey.name == rkey.name:
@@ -275,11 +276,17 @@ def _merge_results(self, left_result: Frame, right_result: Frame) -> Frame:
                         lkey.get(left_result).fillna(rkey.get(right_result)),
                     )
 
-        # `left_names` and `right_names` are mappings of column names
-        # of `lhs` and `rhs` to the corresponding column names in the result
+        # Compute the result column names:
+        # left_names and right_names will be a mappings of input column names
+        # to the corresponding names in the final result.
         left_names = OrderedDict(zip(left_result._data, left_result._data))
         right_names = OrderedDict(zip(right_result._data, right_result._data))
 
+        # For any columns from left_result and right_result that have the same
+        # name:
+        # - if they are key columns, keep only the left column
+        # - if they are not key columns, use suffixes to differentiate them
+        #   in the final result
         common_names = set(left_names) & set(right_names)
 
         if self.on:
@@ -291,9 +298,6 @@ def _merge_results(self, left_result: Frame, right_result: Frame) -> Frame:
                     if lkey.name == rkey.name:
                         key_columns_with_same_name.append(lkey.name)
 
-        # For any columns with the same name:
-        # - if they are key columns, keep only the left column
-        # - if they are not key columns, use suffixes
         for name in common_names:
             if name not in key_columns_with_same_name:
                 left_names[name] = f"{name}{self.lsuffix}"

From 8779bc7c16ea2304bfa0ba0d32d7962e8e10abc8 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Tue, 16 Mar 2021 10:10:17 -0400
Subject: [PATCH 085/138] Simplify suffix handling

---
 python/cudf/cudf/core/dataframe.py |   9 +--
 python/cudf/cudf/core/frame.py     | 105 +++--------------------------
 python/cudf/cudf/core/join/join.py |  28 +-------
 python/cudf/cudf/core/series.py    |  19 ++++--
 4 files changed, 28 insertions(+), 133 deletions(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 18a7f052d62..0f5adf8aba1 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -4221,12 +4221,9 @@ def merge(
         else:
             lsuffix, rsuffix = suffixes
 
-        lhs = self.copy(deep=False)
-        rhs = right.copy(deep=False)
-
         # Compute merge
-        gdf_result = super(DataFrame, lhs)._merge(
-            rhs,
+        gdf_result = super()._merge(
+            right,
             on=on,
             left_on=left_on,
             right_on=right_on,
@@ -4234,8 +4231,6 @@ def merge(
             right_index=right_index,
             how=how,
             sort=sort,
-            lsuffix=lsuffix,
-            rsuffix=rsuffix,
             method=method,
             indicator=indicator,
             suffixes=suffixes,
diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index f540c7144a5..fb18d9c3cf9 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -3319,77 +3319,6 @@ def sqrt(self):
         """
         return self._unaryop("sqrt")
 
-    @staticmethod
-    def _validate_merge_cfg(
-        lhs,
-        rhs,
-        left_on,
-        right_on,
-        on,
-        how,
-        left_index=False,
-        right_index=False,
-        lsuffix=None,
-        rsuffix=None,
-    ):
-        """
-        Error for various combinations of merge input parameters
-        """
-        len_left_on = len(left_on) if left_on is not None else 0
-        len_right_on = len(right_on) if right_on is not None else 0
-
-        # must actually support the requested merge type
-        if how not in ["left", "inner", "outer", "leftanti", "leftsemi"]:
-            raise NotImplementedError(f"{how} merge not supported yet")
-
-        # Passing 'on' with 'left_on' or 'right_on' is potentially ambiguous
-        if on:
-            if left_on or right_on:
-                raise ValueError(
-                    'Can only pass argument "on" OR "left_on" '
-                    'and "right_on", not a combination of both.'
-                )
-
-        # Require same total number of columns to join on in both operands
-        if not (len_left_on + left_index * len(lhs.index.names)) == (
-            len_right_on + right_index * len(rhs.index.names)
-        ):
-            raise ValueError(
-                "Merge operands must have same number of join key columns"
-            )
-
-        # If nothing specified, must have common cols to use implicitly
-        same_named_columns = set(lhs._data.keys()) & set(rhs._data.keys())
-        if not (left_index or right_index):
-            if not (left_on or right_on):
-                if len(same_named_columns) == 0:
-                    raise ValueError("No common columns to perform merge on")
-
-        for name in same_named_columns:
-            if not (
-                name in left_on
-                and name in right_on
-                and (left_on.index(name) == right_on.index(name))
-            ):
-                if not (lsuffix or rsuffix):
-                    raise ValueError(
-                        "there are overlapping columns but "
-                        "lsuffix and rsuffix are not defined"
-                    )
-
-        if on:
-            on_keys = [on] if not isinstance(on, list) else on
-            for key in on_keys:
-                if not (key in lhs._data.keys() and key in rhs._data.keys()):
-                    raise KeyError(f"Key {on} not in both operands")
-        else:
-            for key in left_on:
-                if key not in lhs._data.keys():
-                    raise KeyError(f'Key "{key}" not in left operand')
-            for key in right_on:
-                if key not in rhs._data.keys():
-                    raise KeyError(f'Key "{key}" not in right operand')
-
     def _merge(
         self,
         right,
@@ -3400,34 +3329,24 @@ def _merge(
         right_index=False,
         how="inner",
         sort=False,
-        lsuffix=None,
-        rsuffix=None,
         method="hash",
         indicator=False,
         suffixes=("_x", "_y"),
     ):
-        # Merge doesn't support right, so just swap
-        if how == "right":
-            return right._merge(
-                self,
-                on=on,
-                left_on=right_on,
-                right_on=left_on,
-                left_index=right_index,
-                right_index=left_index,
-                how="left",
-                sort=sort,
-                lsuffix=rsuffix,
-                rsuffix=lsuffix,
-                method=method,
-                indicator=indicator,
-                suffixes=suffixes,
-            )
         from cudf.core.join.join import merge
 
+        lhs, rhs = self, right
+        if how == "right":
+            # Merge doesn't support right, so just swap
+            how = "left"
+            lhs, rhs = right, self
+            left_on, right_on = right_on, left_on
+            left_index, right_index = right_index, left_index
+            suffixes = (suffixes[1], suffixes[0])
+
         return merge(
-            self,
-            right,
+            lhs,
+            rhs,
             on=on,
             left_on=left_on,
             right_on=right_on,
@@ -3435,8 +3354,6 @@ def _merge(
             right_index=right_index,
             how=how,
             sort=sort,
-            lsuffix=lsuffix,
-            rsuffix=rsuffix,
             method=method,
             indicator=indicator,
             suffixes=suffixes,
diff --git a/python/cudf/cudf/core/join/join.py b/python/cudf/cudf/core/join/join.py
index 698a99751d4..d95a7d292f4 100644
--- a/python/cudf/cudf/core/join/join.py
+++ b/python/cudf/cudf/core/join/join.py
@@ -29,8 +29,6 @@ def merge(
     right_index,
     how,
     sort,
-    lsuffix,
-    rsuffix,
     method,
     indicator,
     suffixes,
@@ -49,8 +47,6 @@ def merge(
         right_index=right_index,
         how=how,
         sort=sort,
-        lsuffix=lsuffix,
-        rsuffix=rsuffix,
         method=method,
         indicator=indicator,
         suffixes=suffixes,
@@ -91,8 +87,6 @@ def __init__(
         right_index,
         how,
         sort,
-        lsuffix,
-        rsuffix,
         method,
         indicator,
         suffixes,
@@ -127,14 +121,6 @@ def __init__(
         sort : bool
             Boolean flag indicating if the output Frame is to be
             sorted on the output's join keys, in left to right order.
-        lsuffix : string
-            The suffix to be appended to left hand column names that
-            are found to exist in the right frame, but are not specified
-            as join keys themselves.
-        rsuffix : string
-            The suffix to be appended to right hand column names that
-            are found to exist in the left frame, but are not specified
-            as join keys themselves.
         suffixes : list like
             Left and right suffixes specified together, unpacked into lsuffix
             and rsuffix.
@@ -148,11 +134,8 @@ def __init__(
             left_index=left_index,
             right_index=right_index,
             how=how,
-            lsuffix=lsuffix,
-            rsuffix=rsuffix,
             suffixes=suffixes,
         )
-
         self.lhs = lhs
         self.rhs = rhs
         self.on = on
@@ -162,10 +145,8 @@ def __init__(
         self.right_index = right_index
         self.how = how
         self.sort = sort
-        self.lsuffix = lsuffix
-        self.rsuffix = rsuffix
-        self.suffixes = suffixes
-
+        if suffixes:
+            self.lsuffix, self.rsuffix = suffixes
         self._compute_join_keys()
 
     @property
@@ -369,8 +350,6 @@ def _validate_merge_params(
         left_index,
         right_index,
         how,
-        lsuffix,
-        rsuffix,
         suffixes,
     ):
         """
@@ -402,8 +381,7 @@ def _validate_merge_params(
         ):
             raise ValueError("No common columns to perform merge on")
 
-        if suffixes:
-            lsuffix, rsuffix = suffixes
+        lsuffix, rsuffix = suffixes
         for name in same_named_columns:
             if name == left_on == right_on:
                 continue
diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index f80c6a9b452..5d9ded90a24 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -4492,17 +4492,24 @@ def merge(
         method="hash",
         suffixes=("_x", "_y"),
     ):
-
         if left_on not in (self.name, None):
             raise ValueError(
                 "Series to other merge uses series name as key implicitly"
             )
 
-        lhs = self.copy(deep=False)
-        rhs = other.copy(deep=False)
+        if lsuffix or rsuffix:
+            raise ValueError(
+                "The lsuffix and rsuffix keywords have been replaced with the "
+                "``suffixes=`` keyword.  "
+                "Please provide the following instead: \n\n"
+                "    suffixes=('%s', '%s')"
+                % (lsuffix or "_x", rsuffix or "_y")
+            )
+        else:
+            lsuffix, rsuffix = suffixes
 
-        result = super(Series, lhs)._merge(
-            rhs,
+        result = super()._merge(
+            other,
             on=on,
             left_on=left_on,
             right_on=right_on,
@@ -4510,8 +4517,6 @@ def merge(
             right_index=right_index,
             how=how,
             sort=sort,
-            lsuffix=lsuffix,
-            rsuffix=rsuffix,
             method=method,
             indicator=False,
             suffixes=suffixes,

From 4c651ac3899574c70e88a6b7de9e8b989420050a Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Wed, 17 Mar 2021 10:14:24 -0400
Subject: [PATCH 086/138] Simplify joiner requirements

---
 python/cudf/cudf/core/join/join.py | 24 ++++++++++++++----------
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/python/cudf/cudf/core/join/join.py b/python/cudf/cudf/core/join/join.py
index d95a7d292f4..76eb32f6f74 100644
--- a/python/cudf/cudf/core/join/join.py
+++ b/python/cudf/cudf/core/join/join.py
@@ -1,6 +1,7 @@
 # Copyright (c) 2020-2021, NVIDIA CORPORATION.
 from __future__ import annotations
 
+import functools
 from collections import OrderedDict, namedtuple
 from typing import TYPE_CHECKING, Callable, Tuple
 
@@ -63,17 +64,14 @@ class Merge(object):
 
     # The joiner function must have the following signature:
     #
-    #     def joiner(lhs, rhs, how=how):
+    #     def joiner(lhs, rhs):
     #          ...
     #
-    # where:
-    #
-    # - `lhs` and `rhs` are Frames composed of the left and right join keys
-    # - `how` is a string specifying the kind of join to perform
-    #
-    # ...and it returns a tuple of two gather maps representing the rows
-    # to gather from the left- and right- side tables respectively.
-    _joiner: Callable = libcudf.join.join
+    # where `lhs` and `rhs` are Frames composed of the left and right
+    # join key, and `joiner` returns a tuple of two gather maps
+    # representing the rows to gather from the left- and right- side
+    # tables respectively.
+    _joiner: Callable
 
     def __init__(
         self,
@@ -136,6 +134,8 @@ def __init__(
             how=how,
             suffixes=suffixes,
         )
+        self._joiner = functools.partial(libcudf.join.join, how=how)
+
         self.lhs = lhs
         self.rhs = rhs
         self.on = on
@@ -434,7 +434,11 @@ def _restore_categorical_keys(
 
 
 class MergeSemi(Merge):
-    _joiner = libcudf.join.semi_join
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self._joiner = functools.partial(
+            libcudf.join.semi_join, how=kwargs["how"]
+        )
 
     def _merge_results(self, lhs: Frame, rhs: Frame) -> Frame:
         return super()._merge_results(lhs, cudf.core.frame.Frame())

From b4f4d7c85b0c72220099e3516c693fb451a51b23 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Wed, 17 Mar 2021 10:31:35 -0400
Subject: [PATCH 087/138] Do less work in SemiJoin._merge_results

---
 python/cudf/cudf/core/join/join.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/python/cudf/cudf/core/join/join.py b/python/cudf/cudf/core/join/join.py
index 76eb32f6f74..3ca0111c74d 100644
--- a/python/cudf/cudf/core/join/join.py
+++ b/python/cudf/cudf/core/join/join.py
@@ -441,4 +441,7 @@ def __init__(self, *args, **kwargs):
         )
 
     def _merge_results(self, lhs: Frame, rhs: Frame) -> Frame:
-        return super()._merge_results(lhs, cudf.core.frame.Frame())
+        if issubclass(self._out_class, cudf.Index):
+            return self._out_class._from_data(lhs)
+        else:
+            return self._out_class._from_data(lhs._data, index=lhs._index)

From d353c92c510a5cad9d11b90986fb13a9df9f80df Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Wed, 17 Mar 2021 10:33:36 -0400
Subject: [PATCH 088/138] Doc

---
 python/cudf/cudf/core/join/join.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/python/cudf/cudf/core/join/join.py b/python/cudf/cudf/core/join/join.py
index 3ca0111c74d..20eb5101d4c 100644
--- a/python/cudf/cudf/core/join/join.py
+++ b/python/cudf/cudf/core/join/join.py
@@ -441,6 +441,7 @@ def __init__(self, *args, **kwargs):
         )
 
     def _merge_results(self, lhs: Frame, rhs: Frame) -> Frame:
+        # semi-join result includes only lhs columns
         if issubclass(self._out_class, cudf.Index):
             return self._out_class._from_data(lhs)
         else:

From 580a346d95e218b173b10a0f37596e5cfa3a949b Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Wed, 17 Mar 2021 10:35:37 -0400
Subject: [PATCH 089/138] Doc

---
 python/cudf/cudf/core/join/join.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/python/cudf/cudf/core/join/join.py b/python/cudf/cudf/core/join/join.py
index 20eb5101d4c..f799171c2a8 100644
--- a/python/cudf/cudf/core/join/join.py
+++ b/python/cudf/cudf/core/join/join.py
@@ -151,6 +151,7 @@ def __init__(
 
     @property
     def _out_class(self):
+        # type of the result
         out_class = cudf.DataFrame
 
         if isinstance(self.lhs, cudf.MultiIndex) or isinstance(

From 328dafdb548f6dfdf47df4b60d67652a4b8c971d Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Wed, 17 Mar 2021 11:40:03 -0400
Subject: [PATCH 090/138] Return None from semi_join

---
 python/cudf/cudf/_lib/join.pyx     |  4 +---
 python/cudf/cudf/core/join/join.py | 16 ++++++++++++----
 2 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/python/cudf/cudf/_lib/join.pyx b/python/cudf/cudf/_lib/join.pyx
index ee4c72483a7..69b8004cede 100644
--- a/python/cudf/cudf/_lib/join.pyx
+++ b/python/cudf/cudf/_lib/join.pyx
@@ -25,7 +25,6 @@ cimport cudf._lib.cpp.join as cpp_join
 # the join result when joining on the keys `lhs` and `rhs`.
 
 cpdef join(Table lhs, Table rhs, how=None):
-
     cdef pair[cpp_join.gather_map_type, cpp_join.gather_map_type] c_result
     cdef table_view c_lhs = lhs.view()
     cdef table_view c_rhs = rhs.view()
@@ -54,7 +53,6 @@ cpdef join(Table lhs, Table rhs, how=None):
 
 
 cpdef semi_join(Table lhs, Table rhs, how=None):
-
     # left-semi and left-anti joins
     cdef cpp_join.gather_map_type c_result
     cdef table_view c_lhs = lhs.view()
@@ -76,7 +74,7 @@ cpdef semi_join(Table lhs, Table rhs, how=None):
     cdef Column left_rows = _gather_map_as_column(move(c_result))
     return (
         left_rows,
-        cudf.core.column.as_column([], dtype="int32")
+        None
     )
 
 
diff --git a/python/cudf/cudf/core/join/join.py b/python/cudf/cudf/core/join/join.py
index f799171c2a8..0d45abb76c2 100644
--- a/python/cudf/cudf/core/join/join.py
+++ b/python/cudf/cudf/core/join/join.py
@@ -64,11 +64,14 @@ class Merge(object):
 
     # The joiner function must have the following signature:
     #
-    #     def joiner(lhs, rhs):
+    #     def joiner(
+    #         lhs: Frame,
+    #         rhs: Frame
+    #     ) -> Tuple[Optional[Column], Optional[Column]]:
     #          ...
     #
     # where `lhs` and `rhs` are Frames composed of the left and right
-    # join key, and `joiner` returns a tuple of two gather maps
+    # join key. The `joiner` returns a tuple of two Columns
     # representing the rows to gather from the left- and right- side
     # tables respectively.
     _joiner: Callable
@@ -173,8 +176,13 @@ def perform_merge(self) -> Frame:
         )
         lhs, rhs = self._restore_categorical_keys(lhs, rhs)
 
-        left_result = lhs._gather(left_rows, nullify=True)
-        right_result = rhs._gather(right_rows, nullify=True)
+        left_result = cudf.core.frame.Frame()
+        right_result = cudf.core.frame.Frame()
+
+        if left_rows is not None:
+            left_result = lhs._gather(left_rows, nullify=True)
+        if right_rows is not None:
+            right_result = rhs._gather(right_rows, nullify=True)
 
         result = self._merge_results(left_result, right_result)
 

From 297d20abe71c5463557eca071573fda35046166a Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Wed, 17 Mar 2021 13:06:28 -0400
Subject: [PATCH 091/138] Init common_type

---
 python/cudf/cudf/core/join/_join_helpers.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/python/cudf/cudf/core/join/_join_helpers.py b/python/cudf/cudf/core/join/_join_helpers.py
index 24ffe9040bf..209a6c5c4e4 100644
--- a/python/cudf/cudf/core/join/_join_helpers.py
+++ b/python/cudf/cudf/core/join/_join_helpers.py
@@ -74,8 +74,9 @@ def _frame_select_by_indexers(
 
 
 def _match_join_keys(lcol: ColumnBase, rcol: ColumnBase, how: str) -> Dtype:
-    # cast the keys lcol and rcol to a common dtype
+    common_type = None
 
+    # cast the keys lcol and rcol to a common dtype
     ltype = lcol.dtype
     rtype = rcol.dtype
 
@@ -109,10 +110,7 @@ def _match_join_keys(lcol: ColumnBase, rcol: ColumnBase, how: str) -> Dtype:
                 "upcasting to {common_type}."
             )
 
-    if common_type:
-        return common_type
-
-    return None
+    return common_type
 
 
 def _match_categorical_dtypes(ltype: Dtype, rtype: Dtype, how: str) -> Dtype:

From 935648b01220deac0dc306014ecd82f5191fc6f5 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Fri, 19 Mar 2021 13:48:34 -0700
Subject: [PATCH 092/138] Move validation directly into set_by_label and use a
 raw dict to store the columns in the accessor.

---
 python/cudf/cudf/core/column_accessor.py | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/python/cudf/cudf/core/column_accessor.py b/python/cudf/cudf/core/column_accessor.py
index ad1a0c80ef5..a1de373eb37 100644
--- a/python/cudf/cudf/core/column_accessor.py
+++ b/python/cudf/cudf/core/column_accessor.py
@@ -3,7 +3,6 @@
 from __future__ import annotations
 
 import itertools
-from collections import OrderedDict
 from collections.abc import MutableMapping
 from typing import (
     TYPE_CHECKING,
@@ -18,8 +17,8 @@
 import pandas as pd
 
 import cudf
+from cudf.core import column
 from cudf.utils.utils import (
-    OrderedColumnDict,
     cached_property,
     to_flat_dict,
     to_nested_dict,
@@ -31,7 +30,7 @@
 
 class ColumnAccessor(MutableMapping):
 
-    _data: "OrderedDict[Any, ColumnBase]"
+    _data: "dict[Any, ColumnBase]"
     multiindex: bool
     _level_names: Tuple[Any, ...]
 
@@ -64,7 +63,7 @@ def __init__(
             self.multiindex = multiindex
             self._level_names = level_names
 
-        self._data = OrderedColumnDict(data)
+        self._data = dict(data)
         self.multiindex = multiindex
         self._level_names = level_names
 
@@ -280,6 +279,15 @@ def set_by_label(self, key: Any, value: Any):
         value : column-like
         """
         key = self._pad_key(key)
+
+        # Convert all types to columns and ensure that values are of equal
+        # length.
+        value = column.as_column(value)
+        if len(self._data) > 0:
+            first = next(iter(self._data.values()))
+            if len(value) != len(first):
+                raise ValueError("All columns must be of equal length")
+
         self._data[key] = value
         self._clear_cache()
 

From 806a3ef7740414bf16e21ea8b112982537a6f5ad Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Fri, 19 Mar 2021 13:54:27 -0700
Subject: [PATCH 093/138] Remove all references to OrderedColumnDict.

---
 python/cudf/cudf/_lib/table.pyx    |  8 ++++----
 python/cudf/cudf/core/dataframe.py |  3 +--
 python/cudf/cudf/core/frame.py     |  4 ++--
 python/cudf/cudf/utils/utils.py    | 30 ------------------------------
 4 files changed, 7 insertions(+), 38 deletions(-)

diff --git a/python/cudf/cudf/_lib/table.pyx b/python/cudf/cudf/_lib/table.pyx
index dba0abb9cf0..f97b45d8abf 100644
--- a/python/cudf/cudf/_lib/table.pyx
+++ b/python/cudf/cudf/_lib/table.pyx
@@ -34,8 +34,8 @@ cdef class Table:
 
         Parameters
         ----------
-        data : OrderedColumnDict
-            An OrderedColumnDict mapping column names to Columns
+        data : dict
+            An dict mapping column names to Columns
         index : Table
             A Table representing the (optional) index columns.
         """
@@ -109,7 +109,7 @@ cdef class Table:
                 it += 1
             index = Table(dict(zip(index_names, index_columns)))
 
-        # Construct the data OrderedColumnDict
+        # Construct the data dict
         data_columns = []
         for _ in column_names:
             data_columns.append(Column.from_unique_ptr(move(dereference(it))))
@@ -154,7 +154,7 @@ cdef class Table:
                 column_idx += 1
             index = Table(dict(zip(index_names, index_columns)))
 
-        # Construct the data OrderedColumnDict
+        # Construct the data dict
         cdef size_type source_column_idx = 0
         data_columns = []
         for _ in column_names:
diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 18a7f052d62..a04dbb826a8 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -52,7 +52,6 @@
     is_struct_dtype,
     numeric_normalize_types,
 )
-from cudf.utils.utils import OrderedColumnDict
 
 T = TypeVar("T", bound="DataFrame")
 
@@ -4599,7 +4598,7 @@ def hash_columns(self, columns=None):
             table_to_hash = self
         else:
             cols = [self[k]._column for k in columns]
-            table_to_hash = Frame(data=OrderedColumnDict(zip(columns, cols)))
+            table_to_hash = Frame(data=dict(zip(columns, cols)))
 
         return Series(table_to_hash._hash()).values
 
diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index 926aad368b0..e33fda3ee09 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -39,8 +39,8 @@ class Frame(libcudf.table.Table):
 
     Parameters
     ----------
-    data : OrderedColumnDict
-        An OrderedColumnDict mapping column names to Columns
+    data : dict
+        An dict mapping column names to Columns
     index : Table
         A Frame representing the (optional) index columns.
     """
diff --git a/python/cudf/cudf/utils/utils.py b/python/cudf/cudf/utils/utils.py
index 03a39f6fb4b..ba9fa734248 100644
--- a/python/cudf/cudf/utils/utils.py
+++ b/python/cudf/cudf/utils/utils.py
@@ -280,36 +280,6 @@ def __get__(self, instance, cls):
             return value
 
 
-class ColumnValuesMappingMixin:
-    """
-    Coerce provided values for the mapping to Columns.
-    """
-
-    def __setitem__(self, key, value):
-
-        value = column.as_column(value)
-        super().__setitem__(key, value)
-
-
-class EqualLengthValuesMappingMixin:
-    """
-    Require all values in the mapping to have the same length.
-    """
-
-    def __setitem__(self, key, value):
-        if len(self) > 0:
-            first = next(iter(self.values()))
-            if len(value) != len(first):
-                raise ValueError("All values must be of equal length")
-        super().__setitem__(key, value)
-
-
-class OrderedColumnDict(
-    ColumnValuesMappingMixin, EqualLengthValuesMappingMixin, OrderedDict
-):
-    pass
-
-
 class NestedMappingMixin:
     """
     Make missing values of a mapping empty instances

From 40a7b173bb3a86bd8d2473121aa519765c442e7f Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Fri, 19 Mar 2021 13:58:35 -0700
Subject: [PATCH 094/138] Move validation to separate method and use in both
 set_by_label and constructor.

---
 python/cudf/cudf/core/column_accessor.py | 25 +++++++++++++-----------
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/python/cudf/cudf/core/column_accessor.py b/python/cudf/cudf/core/column_accessor.py
index a1de373eb37..c6b9236f0d0 100644
--- a/python/cudf/cudf/core/column_accessor.py
+++ b/python/cudf/cudf/core/column_accessor.py
@@ -63,7 +63,7 @@ def __init__(
             self.multiindex = multiindex
             self._level_names = level_names
 
-        self._data = dict(data)
+        self._data = {k: self._convert_and_validate(v) for k, v in data.items()}
         self.multiindex = multiindex
         self._level_names = level_names
 
@@ -269,6 +269,18 @@ def select_by_index(self, index: Any) -> ColumnAccessor:
             data, multiindex=self.multiindex, level_names=self.level_names,
         )
 
+    def _convert_and_validate(self, value: Any):
+        # Make sure that the provided value can be stored as a column. This
+        # method will convert the column to an appropriate type and make sure
+        # that it is the same type as other columns in the accessor.
+
+        value = column.as_column(value)
+        if len(self._data) > 0:
+            first = next(iter(self._data.values()))
+            if len(value) != len(first):
+                raise ValueError("All columns must be of equal length")
+        return value
+
     def set_by_label(self, key: Any, value: Any):
         """
         Add (or modify) column by name.
@@ -279,16 +291,7 @@ def set_by_label(self, key: Any, value: Any):
         value : column-like
         """
         key = self._pad_key(key)
-
-        # Convert all types to columns and ensure that values are of equal
-        # length.
-        value = column.as_column(value)
-        if len(self._data) > 0:
-            first = next(iter(self._data.values()))
-            if len(value) != len(first):
-                raise ValueError("All columns must be of equal length")
-
-        self._data[key] = value
+        self._data[key] = self._convert_and_validate(value)
         self._clear_cache()
 
     def _select_by_label_list_like(self, key: Any) -> ColumnAccessor:

From a1c576ebe2cb9e5e344a9aaa00a7c2ef4044c5c6 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Fri, 19 Mar 2021 14:02:25 -0700
Subject: [PATCH 095/138] Format with black.

---
 python/cudf/cudf/core/column_accessor.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/python/cudf/cudf/core/column_accessor.py b/python/cudf/cudf/core/column_accessor.py
index c6b9236f0d0..fe8058c31ce 100644
--- a/python/cudf/cudf/core/column_accessor.py
+++ b/python/cudf/cudf/core/column_accessor.py
@@ -63,7 +63,9 @@ def __init__(
             self.multiindex = multiindex
             self._level_names = level_names
 
-        self._data = {k: self._convert_and_validate(v) for k, v in data.items()}
+        self._data = {
+            k: self._convert_and_validate(v) for k, v in data.items()
+        }
         self.multiindex = multiindex
         self._level_names = level_names
 

From 788d9d6a0bd1e8254dba31b3e085ed56abec0160 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Fri, 19 Mar 2021 14:08:21 -0700
Subject: [PATCH 096/138] Expose parameter to make validation optional.

---
 python/cudf/cudf/core/column_accessor.py | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/python/cudf/cudf/core/column_accessor.py b/python/cudf/cudf/core/column_accessor.py
index fe8058c31ce..38832396f1f 100644
--- a/python/cudf/cudf/core/column_accessor.py
+++ b/python/cudf/cudf/core/column_accessor.py
@@ -283,17 +283,24 @@ def _convert_and_validate(self, value: Any):
                 raise ValueError("All columns must be of equal length")
         return value
 
-    def set_by_label(self, key: Any, value: Any):
+    def set_by_label(self, key: Any, value: Any, validate: bool = True):
         """
         Add (or modify) column by name.
 
         Parameters
         ----------
-        key : name of the column
+        key
+            name of the column
         value : column-like
+            The value to insert into the column.
+        validate : bool
+            If True, the provided value will be coerced to a column and
+            validated before setting (Default value = True).
         """
         key = self._pad_key(key)
-        self._data[key] = self._convert_and_validate(value)
+        if validate:
+            value = self._convert_and_validate(value)
+        self._data[key] = value
         self._clear_cache()
 
     def _select_by_label_list_like(self, key: Any) -> ColumnAccessor:

From 6a64285f4e36a7437c550cf57ccd30dcf694f2e0 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Fri, 19 Mar 2021 14:13:55 -0700
Subject: [PATCH 097/138] Coerce constructor input to dict before calling
 items.

---
 python/cudf/cudf/core/column_accessor.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/cudf/cudf/core/column_accessor.py b/python/cudf/cudf/core/column_accessor.py
index 38832396f1f..034c74393b1 100644
--- a/python/cudf/cudf/core/column_accessor.py
+++ b/python/cudf/cudf/core/column_accessor.py
@@ -64,7 +64,7 @@ def __init__(
             self._level_names = level_names
 
         self._data = {
-            k: self._convert_and_validate(v) for k, v in data.items()
+            k: self._convert_and_validate(v) for k, v in dict(data).items()
         }
         self.multiindex = multiindex
         self._level_names = level_names

From e7d09812a3ee18a3ce8cbe3208c9a526d353a38d Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Fri, 19 Mar 2021 14:21:47 -0700
Subject: [PATCH 098/138] Make construction safe.

---
 python/cudf/cudf/core/column_accessor.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/python/cudf/cudf/core/column_accessor.py b/python/cudf/cudf/core/column_accessor.py
index 034c74393b1..2b5ed21b010 100644
--- a/python/cudf/cudf/core/column_accessor.py
+++ b/python/cudf/cudf/core/column_accessor.py
@@ -63,9 +63,12 @@ def __init__(
             self.multiindex = multiindex
             self._level_names = level_names
 
-        self._data = {
-            k: self._convert_and_validate(v) for k, v in dict(data).items()
-        }
+        # Explicitly initialize an empty data dict so that we can validate each
+        # new column.
+        self._data = {}
+        for k, v in dict(data).items():
+            self._data[k] =  self._convert_and_validate(v)
+
         self.multiindex = multiindex
         self._level_names = level_names
 
@@ -277,7 +280,7 @@ def _convert_and_validate(self, value: Any):
         # that it is the same type as other columns in the accessor.
 
         value = column.as_column(value)
-        if len(self._data) > 0:
+        if hasattr(self, '_data') and len(self._data) > 0:
             first = next(iter(self._data.values()))
             if len(value) != len(first):
                 raise ValueError("All columns must be of equal length")

From c39932c4dc46e35c262440b76d16d0ed3733e8c3 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Fri, 19 Mar 2021 15:39:01 -0700
Subject: [PATCH 099/138] Final cleanup and documentation.

---
 python/cudf/cudf/core/column_accessor.py | 52 ++++++++++++++----------
 1 file changed, 31 insertions(+), 21 deletions(-)

diff --git a/python/cudf/cudf/core/column_accessor.py b/python/cudf/cudf/core/column_accessor.py
index 2b5ed21b010..c175a6d9da7 100644
--- a/python/cudf/cudf/core/column_accessor.py
+++ b/python/cudf/cudf/core/column_accessor.py
@@ -33,6 +33,7 @@ class ColumnAccessor(MutableMapping):
     _data: "dict[Any, ColumnBase]"
     multiindex: bool
     _level_names: Tuple[Any, ...]
+    _column_length: int
 
     def __init__(
         self,
@@ -62,15 +63,30 @@ def __init__(
             self._data = data._data
             self.multiindex = multiindex
             self._level_names = level_names
+            self._column_length = column_length
+        else:
+            # This code path is performance-critical for copies and should be
+            # modified with care.
+            self._data = {}
+            if data:
+                data = dict(data)
+                # Faster than next(iter(data.values()))
+                column_length = len(data[next(iter(data))])
+                for k, v in data.items():
+                    # Much faster to avoid the function call if possible; the
+                    # extra isinstance is negligible if we do have to make a
+                    # column from something else.
+                    if not isinstance(v, column.ColumnBase):
+                        v = column.as_column(v)
+                    if len(v) != column_length:
+                        raise ValueError("All columns must be of equal length")
+                    self._data[k] = v
+                self._column_length = column_length
+            else:
+                self._column_length = None
 
-        # Explicitly initialize an empty data dict so that we can validate each
-        # new column.
-        self._data = {}
-        for k, v in dict(data).items():
-            self._data[k] =  self._convert_and_validate(v)
-
-        self.multiindex = multiindex
-        self._level_names = level_names
+            self.multiindex = multiindex
+            self._level_names = level_names
 
     def __iter__(self):
         return self._data.__iter__()
@@ -274,18 +290,6 @@ def select_by_index(self, index: Any) -> ColumnAccessor:
             data, multiindex=self.multiindex, level_names=self.level_names,
         )
 
-    def _convert_and_validate(self, value: Any):
-        # Make sure that the provided value can be stored as a column. This
-        # method will convert the column to an appropriate type and make sure
-        # that it is the same type as other columns in the accessor.
-
-        value = column.as_column(value)
-        if hasattr(self, '_data') and len(self._data) > 0:
-            first = next(iter(self._data.values()))
-            if len(value) != len(first):
-                raise ValueError("All columns must be of equal length")
-        return value
-
     def set_by_label(self, key: Any, value: Any, validate: bool = True):
         """
         Add (or modify) column by name.
@@ -302,7 +306,13 @@ def set_by_label(self, key: Any, value: Any, validate: bool = True):
         """
         key = self._pad_key(key)
         if validate:
-            value = self._convert_and_validate(value)
+            value = column.as_column(value)
+            if len(self._data) > 0:
+                if len(value) != self._column_length:
+                    raise ValueError("All columns must be of equal length")
+            else:
+                self._column_length = len(value)
+
         self._data[key] = value
         self._clear_cache()
 

From 4ff09fcf66566ac9aaf3b6df75cf2c60e96c060e Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Fri, 19 Mar 2021 15:57:35 -0700
Subject: [PATCH 100/138] Address style issues.

---
 python/cudf/cudf/core/column_accessor.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/python/cudf/cudf/core/column_accessor.py b/python/cudf/cudf/core/column_accessor.py
index c175a6d9da7..6988efeafa7 100644
--- a/python/cudf/cudf/core/column_accessor.py
+++ b/python/cudf/cudf/core/column_accessor.py
@@ -8,6 +8,7 @@
     TYPE_CHECKING,
     Any,
     Callable,
+    Dict,
     Mapping,
     Optional,
     Tuple,
@@ -30,7 +31,7 @@
 
 class ColumnAccessor(MutableMapping):
 
-    _data: "dict[Any, ColumnBase]"
+    _data: "Dict[Any, ColumnBase]"
     multiindex: bool
     _level_names: Tuple[Any, ...]
     _column_length: int
@@ -63,7 +64,7 @@ def __init__(
             self._data = data._data
             self.multiindex = multiindex
             self._level_names = level_names
-            self._column_length = column_length
+            self._column_length = data._column_length
         else:
             # This code path is performance-critical for copies and should be
             # modified with care.
@@ -82,8 +83,6 @@ def __init__(
                         raise ValueError("All columns must be of equal length")
                     self._data[k] = v
                 self._column_length = column_length
-            else:
-                self._column_length = None
 
             self.multiindex = multiindex
             self._level_names = level_names

From 0178127205b44383bf4d6a2c3d424512aa80b033 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Mon, 22 Mar 2021 17:28:57 -0400
Subject: [PATCH 101/138] CA  fix

---
 python/cudf/cudf/core/column_accessor.py | 29 ++++++++++++++----------
 1 file changed, 17 insertions(+), 12 deletions(-)

diff --git a/python/cudf/cudf/core/column_accessor.py b/python/cudf/cudf/core/column_accessor.py
index 84a21d78266..bd3e801fbec 100644
--- a/python/cudf/cudf/core/column_accessor.py
+++ b/python/cudf/cudf/core/column_accessor.py
@@ -19,11 +19,7 @@
 
 import cudf
 from cudf.core import column
-from cudf.utils.utils import (
-    cached_property,
-    to_flat_dict,
-    to_nested_dict,
-)
+from cudf.utils.utils import cached_property, to_flat_dict, to_nested_dict
 
 if TYPE_CHECKING:
     from cudf.core.column import ColumnBase
@@ -34,7 +30,6 @@ class ColumnAccessor(MutableMapping):
     _data: "Dict[Any, ColumnBase]"
     multiindex: bool
     _level_names: Tuple[Any, ...]
-    _column_length: int
 
     def __init__(
         self,
@@ -64,15 +59,13 @@ def __init__(
             self._data = data._data
             self.multiindex = multiindex
             self._level_names = level_names
-            self._column_length = data._column_length
         else:
             # This code path is performance-critical for copies and should be
             # modified with care.
             self._data = {}
             if data:
                 data = dict(data)
-                # Faster than next(iter(data.values()))
-                column_length = len(data[next(iter(data))])
+                column_length = _length_of_first_value(data)
                 for k, v in data.items():
                     # Much faster to avoid the function call if possible; the
                     # extra isinstance is negligible if we do have to make a
@@ -82,8 +75,6 @@ def __init__(
                     if len(v) != column_length:
                         raise ValueError("All columns must be of equal length")
                     self._data[k] = v
-                self._column_length = column_length
-
             self.multiindex = multiindex
             self._level_names = level_names
 
@@ -144,6 +135,10 @@ def nrows(self) -> int:
         else:
             return len(next(iter(self.values())))
 
+    @cached_property
+    def _column_length(self) -> int:
+        return _length_of_first_value(self._data)
+
     @cached_property
     def names(self) -> Tuple[Any, ...]:
         return tuple(self.keys())
@@ -164,7 +159,12 @@ def _grouped_data(self) -> MutableMapping:
             return self._data
 
     def _clear_cache(self):
-        cached_properties = "columns", "names", "_grouped_data"
+        cached_properties = (
+            "columns",
+            "names",
+            "_grouped_data",
+            "_column_length",
+        )
         for attr in cached_properties:
             try:
                 self.__delattr__(attr)
@@ -473,3 +473,8 @@ def _compare_keys(target: Any, key: Any) -> bool:
         if k1 != k2:
             return False
     return True
+
+
+def _length_of_first_value(data: Dict[Any, Any]) -> int:
+    # faster than next(iter(data.values())):
+    return 0 if not data else len(data[next(iter(data))])

From c8d23641df2ac735725523bc19bad8dc25d36a6e Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Mon, 22 Mar 2021 17:48:32 -0400
Subject: [PATCH 102/138] Don't validate on gathers

---
 python/cudf/cudf/core/join/join.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/python/cudf/cudf/core/join/join.py b/python/cudf/cudf/core/join/join.py
index 0d45abb76c2..862c50d8ca7 100644
--- a/python/cudf/cudf/core/join/join.py
+++ b/python/cudf/cudf/core/join/join.py
@@ -299,9 +299,13 @@ def _merge_results(self, left_result: Frame, right_result: Frame) -> Frame:
         data = left_result._data.__class__()
 
         for lcol in left_names:
-            data[left_names[lcol]] = left_result._data[lcol]
+            data.set_by_label(
+                left_names[lcol], left_result._data[lcol], validate=False
+            )
         for rcol in right_names:
-            data[right_names[rcol]] = right_result._data[rcol]
+            data.set_by_label(
+                right_names[rcol], right_result._data[rcol], validate=False
+            )
 
         # Index of the result:
         if self.left_index and self.right_index:

From efea63dd02a6d143a72bc8e76d012d24a27a8af6 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Mon, 22 Mar 2021 17:53:07 -0400
Subject: [PATCH 103/138] Prioritize numeric columns

---
 python/cudf/cudf/core/column/column.py | 24 ++++++++++++++----------
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index b2b2874eeb4..dd06d97d105 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -1017,7 +1017,9 @@ def distinct_count(
         return cpp_distinct_count(self, ignore_nulls=dropna)
 
     def astype(self, dtype: Dtype, **kwargs) -> ColumnBase:
-        if is_categorical_dtype(dtype):
+        if is_numerical_dtype(dtype):
+            return self.as_numerical_column(dtype)
+        elif is_categorical_dtype(dtype):
             return self.as_categorical_column(dtype, **kwargs)
         elif pd.api.types.pandas_dtype(dtype).type in {
             np.str_,
@@ -1548,6 +1550,16 @@ def build_column(
     """
     dtype = pd.api.types.pandas_dtype(dtype)
 
+    if is_numerical_dtype(dtype):
+        assert data is not None
+        return cudf.core.column.NumericalColumn(
+            data=data,
+            dtype=dtype,
+            mask=mask,
+            size=size,
+            offset=offset,
+            null_count=null_count,
+        )
     if is_categorical_dtype(dtype):
         if not len(children) == 1:
             raise ValueError(
@@ -1634,15 +1646,7 @@ def build_column(
             children=children,
         )
     else:
-        assert data is not None
-        return cudf.core.column.NumericalColumn(
-            data=data,
-            dtype=dtype,
-            mask=mask,
-            size=size,
-            offset=offset,
-            null_count=null_count,
-        )
+        raise TypeError(f"Unrecognized dtype: {dtype}")
 
 
 def build_categorical_column(

From c3b6444787e29a4536104adad1dc3508b7e5a9dd Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Mon, 22 Mar 2021 14:56:08 -0700
Subject: [PATCH 104/138] Lazily compute and delete column length on demand.

---
 python/cudf/cudf/core/column_accessor.py | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/python/cudf/cudf/core/column_accessor.py b/python/cudf/cudf/core/column_accessor.py
index 84a21d78266..f0677618d76 100644
--- a/python/cudf/cudf/core/column_accessor.py
+++ b/python/cudf/cudf/core/column_accessor.py
@@ -64,7 +64,6 @@ def __init__(
             self._data = data._data
             self.multiindex = multiindex
             self._level_names = level_names
-            self._column_length = data._column_length
         else:
             # This code path is performance-critical for copies and should be
             # modified with care.
@@ -82,7 +81,6 @@ def __init__(
                     if len(v) != column_length:
                         raise ValueError("All columns must be of equal length")
                     self._data[k] = v
-                self._column_length = column_length
 
             self.multiindex = multiindex
             self._level_names = level_names
@@ -163,6 +161,13 @@ def _grouped_data(self) -> MutableMapping:
         else:
             return self._data
 
+    @cached_property
+    def _column_length(self):
+        try:
+            return len(self._data[next(iter(self._data))])
+        except StopIteration:
+            return 0
+
     def _clear_cache(self):
         cached_properties = "columns", "names", "_grouped_data"
         for attr in cached_properties:
@@ -171,6 +176,10 @@ def _clear_cache(self):
             except AttributeError:
                 pass
 
+        # Column length should only be cleared if no data is present.
+        if len(self._data) == 0 and hasattr(self, "_column_length"):
+            del self._column_length
+
     def to_pandas_index(self) -> pd.Index:
         """"
         Convert the keys of the ColumnAccessor to a Pandas Index object.

From 01b2cf572d596da54423fdff36beefe1da382bb3 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Mon, 22 Mar 2021 14:59:42 -0700
Subject: [PATCH 105/138] Remove redundant clear cache in setitem.

---
 python/cudf/cudf/core/column_accessor.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/python/cudf/cudf/core/column_accessor.py b/python/cudf/cudf/core/column_accessor.py
index f0677618d76..77445dae3c7 100644
--- a/python/cudf/cudf/core/column_accessor.py
+++ b/python/cudf/cudf/core/column_accessor.py
@@ -93,7 +93,6 @@ def __getitem__(self, key: Any) -> ColumnBase:
 
     def __setitem__(self, key: Any, value: Any):
         self.set_by_label(key, value)
-        self._clear_cache()
 
     def __delitem__(self, key: Any):
         self._data.__delitem__(key)

From 88992581ec04b4092e9ef02edbd03350c31af0fa Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Mon, 22 Mar 2021 15:06:38 -0700
Subject: [PATCH 106/138] Remove mypy annotation for column length.

---
 python/cudf/cudf/core/column_accessor.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/python/cudf/cudf/core/column_accessor.py b/python/cudf/cudf/core/column_accessor.py
index 77445dae3c7..44484927985 100644
--- a/python/cudf/cudf/core/column_accessor.py
+++ b/python/cudf/cudf/core/column_accessor.py
@@ -34,7 +34,6 @@ class ColumnAccessor(MutableMapping):
     _data: "Dict[Any, ColumnBase]"
     multiindex: bool
     _level_names: Tuple[Any, ...]
-    _column_length: int
 
     def __init__(
         self,

From c6cd41528d008d5fce038bc3c45484a12fca304f Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Mon, 22 Mar 2021 18:54:03 -0400
Subject: [PATCH 107/138] Optimize casting logic

---
 python/cudf/cudf/core/column/categorical.py |  3 ++
 python/cudf/cudf/core/join/_join_helpers.py | 50 ++++++++++++---------
 python/cudf/cudf/core/join/join.py          | 11 +++--
 3 files changed, 40 insertions(+), 24 deletions(-)

diff --git a/python/cudf/cudf/core/column/categorical.py b/python/cudf/cudf/core/column/categorical.py
index 39c278d2abf..bb1bf3c5d5c 100644
--- a/python/cudf/cudf/core/column/categorical.py
+++ b/python/cudf/cudf/core/column/categorical.py
@@ -750,6 +750,9 @@ def _set_categories(
             ordered=ordered,
         )
 
+    def _decategorize(self) -> ColumnBase:
+        return self._column._get_decategorized_column()
+
 
 class CategoricalColumn(column.ColumnBase):
     """Implements operations for Columns of Categorical type
diff --git a/python/cudf/cudf/core/join/_join_helpers.py b/python/cudf/cudf/core/join/_join_helpers.py
index 209a6c5c4e4..544bc385358 100644
--- a/python/cudf/cudf/core/join/_join_helpers.py
+++ b/python/cudf/cudf/core/join/_join_helpers.py
@@ -2,7 +2,7 @@
 from __future__ import annotations
 
 import warnings
-from typing import TYPE_CHECKING, Any, Iterable
+from typing import TYPE_CHECKING, Any, Iterable, Tuple
 
 import numpy as np
 import pandas as pd
@@ -11,8 +11,7 @@
 from cudf.core.dtypes import CategoricalDtype
 
 if TYPE_CHECKING:
-    from cudf._typing import Dtype
-    from cudf.core.column import ColumnBase
+    from cudf.core.column import CategoricalColumn, ColumnBase
     from cudf.core.frame import Frame
 
 
@@ -73,7 +72,13 @@ def _frame_select_by_indexers(
     return result
 
 
-def _match_join_keys(lcol: ColumnBase, rcol: ColumnBase, how: str) -> Dtype:
+def _match_join_keys(
+    lcol: ColumnBase, rcol: ColumnBase, how: str
+) -> Tuple[ColumnBase, ColumnBase]:
+    # returns the common dtype that lcol and rcol should be casted to,
+    # before they can be used as left and right join keys.
+    # If no casting is necessary, returns None
+
     common_type = None
 
     # cast the keys lcol and rcol to a common dtype
@@ -84,10 +89,10 @@ def _match_join_keys(lcol: ColumnBase, rcol: ColumnBase, how: str) -> Dtype:
     if isinstance(ltype, CategoricalDtype) or isinstance(
         rtype, CategoricalDtype
     ):
-        return _match_categorical_dtypes(ltype, rtype, how)
+        return _match_categorical_dtypes(lcol, rcol, how)
 
     if pd.api.types.is_dtype_equal(ltype, rtype):
-        return ltype
+        return lcol, rcol
 
     if (np.issubdtype(ltype, np.number)) and (np.issubdtype(rtype, np.number)):
         common_type = (
@@ -103,45 +108,50 @@ def _match_join_keys(lcol: ColumnBase, rcol: ColumnBase, how: str) -> Dtype:
 
     if how == "left":
         if rcol.fillna(0).can_cast_safely(ltype):
-            return ltype
+            return lcol, rcol.astype(ltype)
         else:
             warnings.warn(
                 f"Can't safely cast column from {rtype} to {ltype}, "
                 "upcasting to {common_type}."
             )
 
-    return common_type
+    return lcol.astype(common_type), rcol.astype(common_type)
 
 
-def _match_categorical_dtypes(ltype: Dtype, rtype: Dtype, how: str) -> Dtype:
+def _match_categorical_dtypes(
+    lcol: ColumnBase, rcol: ColumnBase, how: str
+) -> Tuple[ColumnBase, ColumnBase]:
     # cast the keys lcol and rcol to a common dtype
     # when at least one of them is a categorical type
+    ltype, rtype = lcol.dtype, rcol.dtype
 
-    if isinstance(ltype, CategoricalDtype) and isinstance(
-        rtype, CategoricalDtype
+    if isinstance(lcol, CategoricalColumn) and isinstance(
+        rcol, CategoricalColumn
     ):
         # if both are categoricals, logic is complicated:
-        return _match_categorical_dtypes_both(ltype, rtype, how)
+        return _match_categorical_dtypes_both(lcol, rcol, how)
 
     if isinstance(ltype, CategoricalDtype):
         if how in {"left", "leftsemi", "leftanti"}:
-            return ltype
+            return lcol, rcol.astype(ltype)
         common_type = ltype.categories.dtype
     elif isinstance(rtype, CategoricalDtype):
         common_type = rtype.categories.dtype
-    return common_type
+    return lcol.astype(common_type), rcol.astype(common_type)
 
 
 def _match_categorical_dtypes_both(
-    ltype: CategoricalDtype, rtype: CategoricalDtype, how: str
-) -> Dtype:
+    lcol: CategoricalColumn, rcol: CategoricalColumn, how: str
+) -> Tuple[ColumnBase, ColumnBase]:
     # The commontype depends on both `how` and the specifics of the
     # categorical variables to be merged.
 
+    ltype, rtype = lcol.dtype, rcol.dtype
+
     # when both are ordered and both have the same categories,
     # no casting required:
     if ltype == rtype:
-        return ltype
+        return lcol, rcol
 
     # Merging categorical variables when only one side is ordered is
     # ambiguous and not allowed.
@@ -167,11 +177,11 @@ def _match_categorical_dtypes_both(
     if how == "inner":
         # cast to category types -- we must cast them back later
         return _match_join_keys(
-            ltype.categories._values, rtype.categories._values, how
+            lcol.cat()._decategorize(), rcol.cat()._decategorize(), how,
         )
     elif how in {"left", "leftanti", "leftsemi"}:
         # always cast to left type
-        return ltype
+        return lcol, rcol.astype(ltype)
     else:
         # merge categories
         merged_categories = cudf.concat(
@@ -180,7 +190,7 @@ def _match_categorical_dtypes_both(
         common_type = cudf.CategoricalDtype(
             categories=merged_categories, ordered=False
         )
-        return common_type
+        return lcol.astype(common_type), rcol.astype(common_type)
 
 
 def _coerce_to_tuple(obj):
diff --git a/python/cudf/cudf/core/join/join.py b/python/cudf/cudf/core/join/join.py
index 862c50d8ca7..6e35f35f1c3 100644
--- a/python/cudf/cudf/core/join/join.py
+++ b/python/cudf/cudf/core/join/join.py
@@ -416,10 +416,13 @@ def _match_key_dtypes(self, lhs: Frame, rhs: Frame) -> Tuple[Frame, Frame]:
         out_rhs = rhs.copy(deep=False)
         for left_key, right_key in zip(*self._keys):
             lcol, rcol = left_key.get(lhs), right_key.get(rhs)
-            dtype = _match_join_keys(lcol, rcol, how=self.how)
-            if dtype:
-                left_key.set(out_lhs, lcol.astype(dtype))
-                right_key.set(out_rhs, rcol.astype(dtype))
+            lcol_casted, rcol_casted = _match_join_keys(
+                lcol, rcol, how=self.how
+            )
+            if lcol is not lcol_casted:
+                left_key.set(out_lhs, lcol_casted)
+            if rcol is not rcol_casted:
+                right_key.set(out_rhs, rcol_casted)
         return out_lhs, out_rhs
 
     def _restore_categorical_keys(

From 7f8e1cd60525f3a06e064f8fa4bc4d93bb383700 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Mon, 22 Mar 2021 19:01:50 -0400
Subject: [PATCH 108/138] Undo

---
 python/cudf/cudf/core/column_accessor.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/python/cudf/cudf/core/column_accessor.py b/python/cudf/cudf/core/column_accessor.py
index 2582f7be287..a527713099f 100644
--- a/python/cudf/cudf/core/column_accessor.py
+++ b/python/cudf/cudf/core/column_accessor.py
@@ -167,7 +167,6 @@ def _clear_cache(self):
             "columns",
             "names",
             "_grouped_data",
-            "_column_length",
         )
         for attr in cached_properties:
             try:

From f2e4609f63389ba44b65284feaba155d4ba9721a Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Mon, 22 Mar 2021 19:04:48 -0400
Subject: [PATCH 109/138] Don't validate when copying type metadata

---
 python/cudf/cudf/core/frame.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index e6898b8c606..ecff3dee573 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -2408,7 +2408,9 @@ def _copy_type_metadata(
         for name, col, other_col in zip(
             self._data.keys(), self._data.values(), other._data.values()
         ):
-            self._data[name] = other_col._copy_type_metadata(col)
+            self._data.set_by_label(
+                name, other_col._copy_type_metadata(col), validate=False
+            )
 
         if include_index:
             if self._index is not None and other._index is not None:

From 83cc407ced4eb88cc399c6cae5cd7ee3bae55c5d Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Mon, 22 Mar 2021 19:11:48 -0400
Subject: [PATCH 110/138] ImportError

---
 python/cudf/cudf/core/join/_join_helpers.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/cudf/cudf/core/join/_join_helpers.py b/python/cudf/cudf/core/join/_join_helpers.py
index 544bc385358..1fb380f8697 100644
--- a/python/cudf/cudf/core/join/_join_helpers.py
+++ b/python/cudf/cudf/core/join/_join_helpers.py
@@ -125,8 +125,8 @@ def _match_categorical_dtypes(
     # when at least one of them is a categorical type
     ltype, rtype = lcol.dtype, rcol.dtype
 
-    if isinstance(lcol, CategoricalColumn) and isinstance(
-        rcol, CategoricalColumn
+    if isinstance(lcol, cudf.core.column.CategoricalColumn) and isinstance(
+        rcol, cudf.core.column.CategoricalColumn
     ):
         # if both are categoricals, logic is complicated:
         return _match_categorical_dtypes_both(lcol, rcol, how)

From 72598fbcacb046d7485b74f1e801772f4006a526 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Mon, 22 Mar 2021 19:33:43 -0400
Subject: [PATCH 111/138] Prioritize numeric dtypes in is_numerical_dtype

---
 python/cudf/cudf/utils/dtypes.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/python/cudf/cudf/utils/dtypes.py b/python/cudf/cudf/utils/dtypes.py
index 1438421bb12..375eccce310 100644
--- a/python/cudf/cudf/utils/dtypes.py
+++ b/python/cudf/cudf/utils/dtypes.py
@@ -144,16 +144,16 @@ def numeric_normalize_types(*args):
 
 
 def is_numerical_dtype(obj):
-    if is_categorical_dtype(obj):
-        return False
-    if is_list_dtype(obj):
+    if np.issubdtype(obj, np.bool_):
+        return True
+    elif np.issubdtype(obj, np.floating):
+        return True
+    elif np.issubdtype(obj, np.signedinteger):
+        return True
+    elif np.issubdtype(obj, np.unsignedinteger):
+        return True
+    else:
         return False
-    return (
-        np.issubdtype(obj, np.bool_)
-        or np.issubdtype(obj, np.floating)
-        or np.issubdtype(obj, np.signedinteger)
-        or np.issubdtype(obj, np.unsignedinteger)
-    )
 
 
 def is_string_dtype(obj):

From fa220b6d86801ed98c415d48305eb74c0afc9d2e Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Mon, 22 Mar 2021 19:47:20 -0400
Subject: [PATCH 112/138] Add unsafe CA ctor

---
 python/cudf/cudf/_lib/table.pyx          |  4 +++-
 python/cudf/cudf/core/column_accessor.py | 13 +++++++++++++
 2 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/python/cudf/cudf/_lib/table.pyx b/python/cudf/cudf/_lib/table.pyx
index f97b45d8abf..8b83de1e31c 100644
--- a/python/cudf/cudf/_lib/table.pyx
+++ b/python/cudf/cudf/_lib/table.pyx
@@ -114,7 +114,9 @@ cdef class Table:
         for _ in column_names:
             data_columns.append(Column.from_unique_ptr(move(dereference(it))))
             it += 1
-        data = dict(zip(column_names, data_columns))
+        data = ColumnAccessor._init_unsafe(
+            dict(zip(column_names, data_columns))
+        )
 
         return Table(data=data, index=index)
 
diff --git a/python/cudf/cudf/core/column_accessor.py b/python/cudf/cudf/core/column_accessor.py
index a527713099f..50c7dbd8812 100644
--- a/python/cudf/cudf/core/column_accessor.py
+++ b/python/cudf/cudf/core/column_accessor.py
@@ -80,6 +80,19 @@ def __init__(
             self.multiindex = multiindex
             self._level_names = level_names
 
+    @classmethod
+    def _init_unsafe(
+        cls,
+        data: Dict[Any, ColumnBase],
+        multiindex: bool = False,
+        level_names=None,
+    ) -> ColumnAccessor:
+        obj = cls()
+        obj._data = data
+        obj.multiindex = multiindex
+        obj._level_names = level_names
+        return obj
+
     def __iter__(self):
         return self._data.__iter__()
 

From f7dc417dec0519aca0d866b7f0dedcb9ad3a2d05 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Mon, 22 Mar 2021 19:48:15 -0400
Subject: [PATCH 113/138] Revert "Prioritize numeric dtypes in
 is_numerical_dtype"

This reverts commit 72598fbcacb046d7485b74f1e801772f4006a526.
---
 python/cudf/cudf/utils/dtypes.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/python/cudf/cudf/utils/dtypes.py b/python/cudf/cudf/utils/dtypes.py
index 375eccce310..1438421bb12 100644
--- a/python/cudf/cudf/utils/dtypes.py
+++ b/python/cudf/cudf/utils/dtypes.py
@@ -144,16 +144,16 @@ def numeric_normalize_types(*args):
 
 
 def is_numerical_dtype(obj):
-    if np.issubdtype(obj, np.bool_):
-        return True
-    elif np.issubdtype(obj, np.floating):
-        return True
-    elif np.issubdtype(obj, np.signedinteger):
-        return True
-    elif np.issubdtype(obj, np.unsignedinteger):
-        return True
-    else:
+    if is_categorical_dtype(obj):
+        return False
+    if is_list_dtype(obj):
         return False
+    return (
+        np.issubdtype(obj, np.bool_)
+        or np.issubdtype(obj, np.floating)
+        or np.issubdtype(obj, np.signedinteger)
+        or np.issubdtype(obj, np.unsignedinteger)
+    )
 
 
 def is_string_dtype(obj):

From 3760077f4f004129ca099b9d0ce8861bf3d87520 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Mon, 22 Mar 2021 19:51:47 -0400
Subject: [PATCH 114/138] Revert "Prioritize numeric dtypes in
 is_numerical_dtype"

This reverts commit 72598fbcacb046d7485b74f1e801772f4006a526.
---
 python/cudf/cudf/utils/dtypes.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/python/cudf/cudf/utils/dtypes.py b/python/cudf/cudf/utils/dtypes.py
index 375eccce310..1438421bb12 100644
--- a/python/cudf/cudf/utils/dtypes.py
+++ b/python/cudf/cudf/utils/dtypes.py
@@ -144,16 +144,16 @@ def numeric_normalize_types(*args):
 
 
 def is_numerical_dtype(obj):
-    if np.issubdtype(obj, np.bool_):
-        return True
-    elif np.issubdtype(obj, np.floating):
-        return True
-    elif np.issubdtype(obj, np.signedinteger):
-        return True
-    elif np.issubdtype(obj, np.unsignedinteger):
-        return True
-    else:
+    if is_categorical_dtype(obj):
+        return False
+    if is_list_dtype(obj):
         return False
+    return (
+        np.issubdtype(obj, np.bool_)
+        or np.issubdtype(obj, np.floating)
+        or np.issubdtype(obj, np.signedinteger)
+        or np.issubdtype(obj, np.unsignedinteger)
+    )
 
 
 def is_string_dtype(obj):

From de9ca28f86b46a6fe9cd93be58e865cd6a8afd96 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Mon, 22 Mar 2021 19:49:47 -0700
Subject: [PATCH 115/138] Change error message back so that tests pass.

---
 python/cudf/cudf/core/column_accessor.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/cudf/cudf/core/column_accessor.py b/python/cudf/cudf/core/column_accessor.py
index 44484927985..d2bab50a8ba 100644
--- a/python/cudf/cudf/core/column_accessor.py
+++ b/python/cudf/cudf/core/column_accessor.py
@@ -78,7 +78,7 @@ def __init__(
                     if not isinstance(v, column.ColumnBase):
                         v = column.as_column(v)
                     if len(v) != column_length:
-                        raise ValueError("All columns must be of equal length")
+                        raise ValueError("All values must be of equal length")
                     self._data[k] = v
 
             self.multiindex = multiindex

From e35d03b339dc008e8f264dec9d86a8417f3a77db Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Tue, 23 Mar 2021 10:47:05 -0400
Subject: [PATCH 116/138] Faster is_numerical_dtype

---
 python/cudf/cudf/utils/dtypes.py | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/python/cudf/cudf/utils/dtypes.py b/python/cudf/cudf/utils/dtypes.py
index 1438421bb12..8aa0e05bb07 100644
--- a/python/cudf/cudf/utils/dtypes.py
+++ b/python/cudf/cudf/utils/dtypes.py
@@ -148,11 +148,18 @@ def is_numerical_dtype(obj):
         return False
     if is_list_dtype(obj):
         return False
+    # convert to an np.dtype object first,
+    # otherwise each of the np.issubdtype() calls
+    # below will be slow.
+    try:
+        dtype = np.dtype(obj)
+    except TypeError:
+        return False
     return (
-        np.issubdtype(obj, np.bool_)
-        or np.issubdtype(obj, np.floating)
-        or np.issubdtype(obj, np.signedinteger)
-        or np.issubdtype(obj, np.unsignedinteger)
+        np.issubdtype(dtype, np.bool_)
+        or np.issubdtype(dtype, np.floating)
+        or np.issubdtype(dtype, np.signedinteger)
+        or np.issubdtype(dtype, np.unsignedinteger)
     )
 
 

From e2fd53369a554cfa887a137261baafdd94854bcd Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Tue, 23 Mar 2021 10:51:45 -0400
Subject: [PATCH 117/138] Faster is_numerical_dtype

---
 python/cudf/cudf/utils/dtypes.py | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/python/cudf/cudf/utils/dtypes.py b/python/cudf/cudf/utils/dtypes.py
index 8aa0e05bb07..225450d84b3 100644
--- a/python/cudf/cudf/utils/dtypes.py
+++ b/python/cudf/cudf/utils/dtypes.py
@@ -148,19 +148,11 @@ def is_numerical_dtype(obj):
         return False
     if is_list_dtype(obj):
         return False
-    # convert to an np.dtype object first,
-    # otherwise each of the np.issubdtype() calls
-    # below will be slow.
     try:
         dtype = np.dtype(obj)
     except TypeError:
         return False
-    return (
-        np.issubdtype(dtype, np.bool_)
-        or np.issubdtype(dtype, np.floating)
-        or np.issubdtype(dtype, np.signedinteger)
-        or np.issubdtype(dtype, np.unsignedinteger)
-    )
+    return dtype.kind in "biuf"
 
 
 def is_string_dtype(obj):

From 64ca702d44d2c75463c19bfd2e7a762e1b7d7717 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Tue, 23 Mar 2021 11:12:39 -0400
Subject: [PATCH 118/138] Even faster is_numerical_dtype

---
 python/cudf/cudf/utils/dtypes.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/python/cudf/cudf/utils/dtypes.py b/python/cudf/cudf/utils/dtypes.py
index 225450d84b3..4080d9cff9c 100644
--- a/python/cudf/cudf/utils/dtypes.py
+++ b/python/cudf/cudf/utils/dtypes.py
@@ -144,10 +144,6 @@ def numeric_normalize_types(*args):
 
 
 def is_numerical_dtype(obj):
-    if is_categorical_dtype(obj):
-        return False
-    if is_list_dtype(obj):
-        return False
     try:
         dtype = np.dtype(obj)
     except TypeError:

From 749edf18897ae1667eb6fa34972f76c25e2bec5f Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Tue, 23 Mar 2021 11:23:03 -0400
Subject: [PATCH 119/138] Enable fast path for constructing a Buffer from a
 DeviceBuffer

---
 python/cudf/cudf/core/buffer.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/python/cudf/cudf/core/buffer.py b/python/cudf/cudf/core/buffer.py
index 350346a87f9..9fc5570e35a 100644
--- a/python/cudf/cudf/core/buffer.py
+++ b/python/cudf/cudf/core/buffer.py
@@ -42,6 +42,10 @@ def __init__(
             self.ptr = data.ptr
             self.size = data.size
             self._owner = owner or data._owner
+        elif isinstance(data, rmm.DeviceBuffer):
+            self.ptr = data.ptr
+            self.size = data.size
+            self._owner = data
         elif hasattr(data, "__array_interface__") or hasattr(
             data, "__cuda_array_interface__"
         ):

From ca772b8ca46c3ae11b7232c33599387f0b42af65 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Tue, 23 Mar 2021 12:02:31 -0400
Subject: [PATCH 120/138] Small fix

---
 python/cudf/cudf/core/join/join.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/cudf/cudf/core/join/join.py b/python/cudf/cudf/core/join/join.py
index 0d45abb76c2..d066d40d052 100644
--- a/python/cudf/cudf/core/join/join.py
+++ b/python/cudf/cudf/core/join/join.py
@@ -452,6 +452,6 @@ def __init__(self, *args, **kwargs):
     def _merge_results(self, lhs: Frame, rhs: Frame) -> Frame:
         # semi-join result includes only lhs columns
         if issubclass(self._out_class, cudf.Index):
-            return self._out_class._from_data(lhs)
+            return self._out_class._from_data(lhs._data)
         else:
             return self._out_class._from_data(lhs._data, index=lhs._index)

From 739ec57975ae1fa2817633da577989162c01ef93 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Tue, 23 Mar 2021 09:49:20 -0700
Subject: [PATCH 121/138] Add validation option to insert and standardize error
 message.

---
 python/cudf/cudf/core/column_accessor.py | 11 +++++++++--
 python/cudf/cudf/tests/test_dataframe.py |  2 +-
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/python/cudf/cudf/core/column_accessor.py b/python/cudf/cudf/core/column_accessor.py
index d2bab50a8ba..add0570fc8f 100644
--- a/python/cudf/cudf/core/column_accessor.py
+++ b/python/cudf/cudf/core/column_accessor.py
@@ -78,7 +78,7 @@ def __init__(
                     if not isinstance(v, column.ColumnBase):
                         v = column.as_column(v)
                     if len(v) != column_length:
-                        raise ValueError("All values must be of equal length")
+                        raise ValueError("All columns must be of equal length")
                     self._data[k] = v
 
             self.multiindex = multiindex
@@ -195,7 +195,7 @@ def to_pandas_index(self) -> pd.Index:
             result = pd.Index(self.names, name=self.name, tupleize_cols=False)
         return result
 
-    def insert(self, name: Any, value: Any, loc: int = -1):
+    def insert(self, name: Any, value: Any, loc: int = -1, validate: bool = True):
         """
         Insert column into the ColumnAccessor at the specified location.
 
@@ -225,6 +225,13 @@ def insert(self, name: Any, value: Any, loc: int = -1):
         if name in self._data:
             raise ValueError(f"Cannot insert '{name}', already exists")
         if loc == len(self._data):
+            if validate:
+                value = column.as_column(value)
+                if len(self._data) > 0:
+                    if len(value) != self._column_length:
+                        raise ValueError("All columns must be of equal length")
+                else:
+                    self._column_length = len(value)
             self._data[name] = value
         else:
             new_keys = self.names[:loc] + (name,) + self.names[loc:]
diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
index b3ba439cb15..76a02d5e74a 100644
--- a/python/cudf/cudf/tests/test_dataframe.py
+++ b/python/cudf/cudf/tests/test_dataframe.py
@@ -5222,7 +5222,7 @@ def test_memory_usage_multi():
 def test_setitem_diff_size_list(list_input, key):
     gdf = cudf.datasets.randomdata(5)
     with pytest.raises(
-        ValueError, match=("All values must be of equal length")
+        ValueError, match=("All columns must be of equal length")
     ):
         gdf[key] = list_input
 

From 498b70ed8b337fd412759504770d85acfe57094b Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Tue, 23 Mar 2021 10:22:15 -0700
Subject: [PATCH 122/138] Fix style.

---
 python/cudf/cudf/core/column_accessor.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/python/cudf/cudf/core/column_accessor.py b/python/cudf/cudf/core/column_accessor.py
index add0570fc8f..0c580132290 100644
--- a/python/cudf/cudf/core/column_accessor.py
+++ b/python/cudf/cudf/core/column_accessor.py
@@ -195,7 +195,9 @@ def to_pandas_index(self) -> pd.Index:
             result = pd.Index(self.names, name=self.name, tupleize_cols=False)
         return result
 
-    def insert(self, name: Any, value: Any, loc: int = -1, validate: bool = True):
+    def insert(
+        self, name: Any, value: Any, loc: int = -1, validate: bool = True
+    ):
         """
         Insert column into the ColumnAccessor at the specified location.
 

From 01e13fa62bba2ad0b4b34e5574c2152291d65ee2 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Tue, 23 Mar 2021 17:08:04 -0400
Subject: [PATCH 123/138] Undo formatting change

---
 python/cudf/cudf/core/column_accessor.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/python/cudf/cudf/core/column_accessor.py b/python/cudf/cudf/core/column_accessor.py
index e2233423db4..68ce4c4c070 100644
--- a/python/cudf/cudf/core/column_accessor.py
+++ b/python/cudf/cudf/core/column_accessor.py
@@ -176,11 +176,7 @@ def _column_length(self):
             return 0
 
     def _clear_cache(self):
-        cached_properties = (
-            "columns",
-            "names",
-            "_grouped_data",
-        )
+        cached_properties = ("columns", "names", "_grouped_data")
         for attr in cached_properties:
             try:
                 self.__delattr__(attr)

From 89a03013ef99452e7b12bb6380d98f3cde0635ba Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Tue, 23 Mar 2021 17:10:16 -0400
Subject: [PATCH 124/138] Add TODO

---
 python/cudf/cudf/utils/dtypes.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/python/cudf/cudf/utils/dtypes.py b/python/cudf/cudf/utils/dtypes.py
index 4080d9cff9c..8875a36dba8 100644
--- a/python/cudf/cudf/utils/dtypes.py
+++ b/python/cudf/cudf/utils/dtypes.py
@@ -144,6 +144,8 @@ def numeric_normalize_types(*args):
 
 
 def is_numerical_dtype(obj):
+    # TODO: we should handle objects with a `.dtype` attribute,
+    # e.g., arrays, here.
     try:
         dtype = np.dtype(obj)
     except TypeError:

From 5e73de76451740ce5b52694c1c501f92e7429d25 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Tue, 23 Mar 2021 20:21:36 -0400
Subject: [PATCH 125/138] init->create + doc

---
 python/cudf/cudf/_lib/table.pyx          | 2 +-
 python/cudf/cudf/core/column_accessor.py | 4 +++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/python/cudf/cudf/_lib/table.pyx b/python/cudf/cudf/_lib/table.pyx
index 8b83de1e31c..0d6e9c16e8c 100644
--- a/python/cudf/cudf/_lib/table.pyx
+++ b/python/cudf/cudf/_lib/table.pyx
@@ -114,7 +114,7 @@ cdef class Table:
         for _ in column_names:
             data_columns.append(Column.from_unique_ptr(move(dereference(it))))
             it += 1
-        data = ColumnAccessor._init_unsafe(
+        data = ColumnAccessor._create_unsafe(
             dict(zip(column_names, data_columns))
         )
 
diff --git a/python/cudf/cudf/core/column_accessor.py b/python/cudf/cudf/core/column_accessor.py
index 68ce4c4c070..33bae5c1328 100644
--- a/python/cudf/cudf/core/column_accessor.py
+++ b/python/cudf/cudf/core/column_accessor.py
@@ -81,12 +81,14 @@ def __init__(
             self._level_names = level_names
 
     @classmethod
-    def _init_unsafe(
+    def _create_unsafe(
         cls,
         data: Dict[Any, ColumnBase],
         multiindex: bool = False,
         level_names=None,
     ) -> ColumnAccessor:
+        # create a ColumnAccessor without verifying column
+        # type or size
         obj = cls()
         obj._data = data
         obj.multiindex = multiindex

From ca116a37c872bc59fd76edc534fa75b7e4b30727 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Thu, 25 Mar 2021 09:40:31 -0400
Subject: [PATCH 126/138] Only gather the index if necessary

---
 python/cudf/cudf/core/join/join.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/python/cudf/cudf/core/join/join.py b/python/cudf/cudf/core/join/join.py
index d3bd835bb80..0b1c68bd64b 100644
--- a/python/cudf/cudf/core/join/join.py
+++ b/python/cudf/cudf/core/join/join.py
@@ -179,10 +179,15 @@ def perform_merge(self) -> Frame:
         left_result = cudf.core.frame.Frame()
         right_result = cudf.core.frame.Frame()
 
+        gather_index = self.left_index or self.right_index
         if left_rows is not None:
-            left_result = lhs._gather(left_rows, nullify=True)
+            left_result = lhs._gather(
+                left_rows, nullify=True, keep_index=gather_index
+            )
         if right_rows is not None:
-            right_result = rhs._gather(right_rows, nullify=True)
+            right_result = rhs._gather(
+                right_rows, nullify=True, keep_index=gather_index
+            )
 
         result = self._merge_results(left_result, right_result)
 

From ce03918eb2c792ee35a2b49c0e62d10ef4c6c66d Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Thu, 25 Mar 2021 09:48:23 -0400
Subject: [PATCH 127/138] Don't copy type metadata for the index unless we need
 to

---
 python/cudf/cudf/core/frame.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index 330398e302f..f1181feb692 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -623,7 +623,7 @@ def _gather(self, gather_map, keep_index=True, nullify=False):
                 nullify=nullify,
             )
         )
-        result._copy_type_metadata(self)
+        result._copy_type_metadata(self, include_index=keep_index)
         if keep_index and self._index is not None:
             result._index.names = self._index.names
         return result

From b7c6b198316eff1b0b64d3ddb5691723a6bb6f36 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Thu, 25 Mar 2021 10:21:33 -0400
Subject: [PATCH 128/138] Use validate=False in a few more places

---
 python/cudf/cudf/core/join/_join_helpers.py | 12 +++++++-----
 python/cudf/cudf/core/join/join.py          | 13 +++++++++----
 2 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/python/cudf/cudf/core/join/_join_helpers.py b/python/cudf/cudf/core/join/_join_helpers.py
index 1fb380f8697..7d322fdbc91 100644
--- a/python/cudf/cudf/core/join/_join_helpers.py
+++ b/python/cudf/cudf/core/join/_join_helpers.py
@@ -42,13 +42,15 @@ def get(self, obj: Frame) -> ColumnBase:
                 return obj._index._data[self.name]
         raise KeyError()
 
-    def set(self, obj: Frame, value: ColumnBase):
+    def set(self, obj: Frame, value: ColumnBase, validate=False):
         # set the colum in `obj`
         if self.column:
-            obj._data[self.name] = value
+            obj._data.set_by_label(self.name, value, validate=validate)
         else:
             if obj._index is not None:
-                obj._index._data[self.name] = value
+                obj._index._data.set_by_label(
+                    self.name, value, validate=validate
+                )
             else:
                 raise KeyError()
 
@@ -63,9 +65,9 @@ def _frame_select_by_indexers(
 
     for idx in indexers:
         if idx.index:
-            index_data[idx.name] = idx.get(frame)
+            index_data.set_by_label(idx.name, idx.get(frame), validate=False)
         else:
-            data[idx.name] = idx.get(frame)
+            data.set_by_label(idx.name, idx.get(frame), validate=False)
 
     result_index = cudf.Index._from_data(index_data) if index_data else None
     result = cudf.core.frame.Frame(data=data, index=result_index)
diff --git a/python/cudf/cudf/core/join/join.py b/python/cudf/cudf/core/join/join.py
index 0b1c68bd64b..e1e1028f803 100644
--- a/python/cudf/cudf/core/join/join.py
+++ b/python/cudf/cudf/core/join/join.py
@@ -269,6 +269,7 @@ def _merge_results(self, left_result: Frame, right_result: Frame) -> Frame:
                     lkey.set(
                         left_result,
                         lkey.get(left_result).fillna(rkey.get(right_result)),
+                        validate=False,
                     )
 
         # Compute the result column names:
@@ -425,9 +426,9 @@ def _match_key_dtypes(self, lhs: Frame, rhs: Frame) -> Tuple[Frame, Frame]:
                 lcol, rcol, how=self.how
             )
             if lcol is not lcol_casted:
-                left_key.set(out_lhs, lcol_casted)
+                left_key.set(out_lhs, lcol_casted, validate=False)
             if rcol is not rcol_casted:
-                right_key.set(out_rhs, rcol_casted)
+                right_key.set(out_rhs, rcol_casted, validate=False)
         return out_lhs, out_rhs
 
     def _restore_categorical_keys(
@@ -446,10 +447,14 @@ def _restore_categorical_keys(
                     right_key.get(self.rhs).dtype, cudf.CategoricalDtype
                 ):
                     left_key.set(
-                        out_lhs, left_key.get(out_lhs).astype("category")
+                        out_lhs,
+                        left_key.get(out_lhs).astype("category"),
+                        validate=False,
                     )
                     right_key.set(
-                        out_rhs, right_key.get(out_rhs).astype("category")
+                        out_rhs,
+                        right_key.get(out_rhs).astype("category"),
+                        validate=False,
                     )
         return out_lhs, out_rhs
 

From 671a0e096a4a44b6472db9e5c9a31cb986260452 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Fri, 26 Mar 2021 13:06:43 -0400
Subject: [PATCH 129/138] Import

---
 python/cudf/cudf/core/frame.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index f1181feb692..32c958f506f 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -3355,8 +3355,6 @@ def _merge(
         indicator=False,
         suffixes=("_x", "_y"),
     ):
-        from cudf.core.join.join import merge
-
         lhs, rhs = self, right
         if how == "right":
             # Merge doesn't support right, so just swap
@@ -3366,7 +3364,7 @@ def _merge(
             left_index, right_index = right_index, left_index
             suffixes = (suffixes[1], suffixes[0])
 
-        return merge(
+        return cudf.core.join.merge(
             lhs,
             rhs,
             on=on,

From 797087b57021f7e2e175cf6dff89aca49fb082a6 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Fri, 26 Mar 2021 14:07:06 -0400
Subject: [PATCH 130/138] Review

---
 python/cudf/cudf/core/frame.py              |  3 ++-
 python/cudf/cudf/core/join/_join_helpers.py |  4 ---
 python/cudf/cudf/core/join/join.py          | 30 ++++++++++++---------
 3 files changed, 19 insertions(+), 18 deletions(-)

diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index 32c958f506f..fb746d6c794 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -20,6 +20,7 @@
 from cudf import _lib as libcudf
 from cudf._typing import ColumnLike, DataFrameOrSeries
 from cudf.core.column import as_column, build_categorical_column, column_empty
+from cudf.core.join import merge
 from cudf.utils.dtypes import (
     is_categorical_dtype,
     is_column_like,
@@ -3364,7 +3365,7 @@ def _merge(
             left_index, right_index = right_index, left_index
             suffixes = (suffixes[1], suffixes[0])
 
-        return cudf.core.join.merge(
+        return merge(
             lhs,
             rhs,
             on=on,
diff --git a/python/cudf/cudf/core/join/_join_helpers.py b/python/cudf/cudf/core/join/_join_helpers.py
index 7d322fdbc91..0a20067b41d 100644
--- a/python/cudf/cudf/core/join/_join_helpers.py
+++ b/python/cudf/cudf/core/join/_join_helpers.py
@@ -200,7 +200,3 @@ def _coerce_to_tuple(obj):
         return tuple(obj)
     else:
         return (obj,)
-
-
-def _coerce_to_list(obj):
-    return list(_coerce_to_tuple(obj))
diff --git a/python/cudf/cudf/core/join/join.py b/python/cudf/cudf/core/join/join.py
index e1e1028f803..2bb959bc0ad 100644
--- a/python/cudf/cudf/core/join/join.py
+++ b/python/cudf/cudf/core/join/join.py
@@ -2,13 +2,12 @@
 from __future__ import annotations
 
 import functools
-from collections import OrderedDict, namedtuple
+from collections import namedtuple
 from typing import TYPE_CHECKING, Callable, Tuple
 
 import cudf
 from cudf import _lib as libcudf
 from cudf.core.join._join_helpers import (
-    _coerce_to_list,
     _coerce_to_tuple,
     _frame_select_by_indexers,
     _Indexer,
@@ -275,8 +274,8 @@ def _merge_results(self, left_result: Frame, right_result: Frame) -> Frame:
         # Compute the result column names:
         # left_names and right_names will be a mappings of input column names
         # to the corresponding names in the final result.
-        left_names = OrderedDict(zip(left_result._data, left_result._data))
-        right_names = OrderedDict(zip(right_result._data, right_result._data))
+        left_names = dict(zip(left_result._data, left_result._data))
+        right_names = dict(zip(right_result._data, right_result._data))
 
         # For any columns from left_result and right_result that have the same
         # name:
@@ -288,12 +287,14 @@ def _merge_results(self, left_result: Frame, right_result: Frame) -> Frame:
         if self.on:
             key_columns_with_same_name = self.on
         else:
-            key_columns_with_same_name = []
-            for lkey, rkey in zip(*self._keys):
-                if (lkey.index, rkey.index) == (False, False):
-                    if lkey.name == rkey.name:
-                        key_columns_with_same_name.append(lkey.name)
-
+            key_columns_with_same_name = [
+                lkey.name
+                for lkey, rkey in zip(*self._keys)
+                if (
+                    (lkey.index, rkey.index) == (False, False)
+                    and lkey.name == rkey.name
+                )
+            ]
         for name in common_names:
             if name not in key_columns_with_same_name:
                 left_names[name] = f"{name}{self.lsuffix}"
@@ -339,7 +340,10 @@ def _sort_result(self, result: Frame) -> Frame:
             if isinstance(result, cudf.Index):
                 sort_order = result._get_sorted_inds()
             else:
-                sort_order = result._get_sorted_inds(_coerce_to_list(self.on))
+                # need a list instead of a tuple here because
+                # _get_sorted_inds calls down to ColumnAccessor.get_by_label
+                # which handles lists and tuples differently
+                sort_order = result._get_sorted_inds(list(self.on))
             return result._gather(sort_order, keep_index=False)
         by = []
         if self.left_index and self.right_index:
@@ -347,11 +351,11 @@ def _sort_result(self, result: Frame) -> Frame:
                 by.extend(result._index._data.columns)
         if self.left_on:
             by.extend(
-                [result._data[col] for col in _coerce_to_list(self.left_on)]
+                [result._data[col] for col in _coerce_to_tuple(self.left_on)]
             )
         if self.right_on:
             by.extend(
-                [result._data[col] for col in _coerce_to_list(self.right_on)]
+                [result._data[col] for col in _coerce_to_tuple(self.right_on)]
             )
         if by:
             to_sort = cudf.DataFrame._from_columns(by)

From 5ad531fa0858e688bfea0434e54117f040dd6dd7 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Fri, 26 Mar 2021 14:10:38 -0400
Subject: [PATCH 131/138] Coerce to tuple first

---
 python/cudf/cudf/core/join/join.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/python/cudf/cudf/core/join/join.py b/python/cudf/cudf/core/join/join.py
index 2bb959bc0ad..1a4826d0570 100644
--- a/python/cudf/cudf/core/join/join.py
+++ b/python/cudf/cudf/core/join/join.py
@@ -343,7 +343,9 @@ def _sort_result(self, result: Frame) -> Frame:
                 # need a list instead of a tuple here because
                 # _get_sorted_inds calls down to ColumnAccessor.get_by_label
                 # which handles lists and tuples differently
-                sort_order = result._get_sorted_inds(list(self.on))
+                sort_order = result._get_sorted_inds(
+                    list(_coerce_to_tuple(self.on))
+                )
             return result._gather(sort_order, keep_index=False)
         by = []
         if self.left_index and self.right_index:

From f7e94fb0264eb590afddf1c256f5945d387c4b28 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Fri, 26 Mar 2021 14:13:05 -0400
Subject: [PATCH 132/138] Replace hasattr with isinstance

---
 python/cudf/cudf/core/join/_join_helpers.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/python/cudf/cudf/core/join/_join_helpers.py b/python/cudf/cudf/core/join/_join_helpers.py
index 0a20067b41d..3807f408369 100644
--- a/python/cudf/cudf/core/join/_join_helpers.py
+++ b/python/cudf/cudf/core/join/_join_helpers.py
@@ -1,6 +1,7 @@
 # Copyright (c) 2021, NVIDIA CORPORATION.
 from __future__ import annotations
 
+import collections
 import warnings
 from typing import TYPE_CHECKING, Any, Iterable, Tuple
 
@@ -196,7 +197,7 @@ def _match_categorical_dtypes_both(
 
 
 def _coerce_to_tuple(obj):
-    if hasattr(obj, "__iter__") and not isinstance(obj, str):
+    if isinstance(obj, collections.abc.Iterable) and not isinstance(obj, str):
         return tuple(obj)
     else:
         return (obj,)

From 1cb944804d5dd4d54066d2547a4f69da151228bb Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Fri, 26 Mar 2021 15:07:13 -0400
Subject: [PATCH 133/138] Handle renamed indexes

---
 python/cudf/cudf/core/multiindex.py    | 12 ++++++++++++
 python/cudf/cudf/tests/test_joining.py | 13 +++++++++++++
 2 files changed, 25 insertions(+)

diff --git a/python/cudf/cudf/core/multiindex.py b/python/cudf/cudf/core/multiindex.py
index a82735be901..e6cee7c1038 100644
--- a/python/cudf/cudf/core/multiindex.py
+++ b/python/cudf/cudf/core/multiindex.py
@@ -190,6 +190,18 @@ def names(self):
     def names(self, value):
         value = [None] * self.nlevels if value is None else value
         assert len(value) == self.nlevels
+
+        if len(value) == len(set(value)):
+            # IMPORTANT: if the provided names are unique,
+            # we reconstruct self._data with the names as keys.
+            # If they are not unique, the keys of self._data
+            # and self._names will be different, which can lead
+            # to unexpected behaviour in some cases. This is
+            # definitely buggy, but we can't disallow non-unique
+            # names either...
+            self._data = self._data._create_unsafe(
+                dict(zip(value, self._data.values()))
+            )
         self._names = pd.core.indexes.frozen.FrozenList(value)
 
     def rename(self, names, inplace=False):
diff --git a/python/cudf/cudf/tests/test_joining.py b/python/cudf/cudf/tests/test_joining.py
index 50141428b02..9164bfe98d1 100644
--- a/python/cudf/cudf/tests/test_joining.py
+++ b/python/cudf/cudf/tests/test_joining.py
@@ -1725,3 +1725,16 @@ def test_merge_with_lists(how):
     got = gd_left.merge(gd_right, on="a")
 
     assert_join_results_equal(expect, got, how=how)
+
+
+def test_join_renamed_index():
+    df = cudf.DataFrame(
+        {0: [1, 2, 3, 4, 5], 1: [1, 2, 3, 4, 5], "c": [1, 2, 3, 4, 5]}
+    ).set_index([0, 1])
+    df.index.names = ["a", "b"]  # doesn't actually change df._index._data
+
+    expect = df.to_pandas().merge(
+        df.to_pandas(), left_index=True, right_index=True
+    )
+    got = df.merge(df, left_index=True, right_index=True, how="inner")
+    assert_join_results_equal(expect, got, how="inner")

From cc89360b7b44356023287e16b81c1b8201e1a5b8 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Fri, 26 Mar 2021 15:10:04 -0400
Subject: [PATCH 134/138] Fix to names setter

---
 python/cudf/cudf/core/multiindex.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/python/cudf/cudf/core/multiindex.py b/python/cudf/cudf/core/multiindex.py
index e6cee7c1038..1c1e48e7372 100644
--- a/python/cudf/cudf/core/multiindex.py
+++ b/python/cudf/cudf/core/multiindex.py
@@ -199,8 +199,9 @@ def names(self, value):
             # to unexpected behaviour in some cases. This is
             # definitely buggy, but we can't disallow non-unique
             # names either...
-            self._data = self._data._create_unsafe(
-                dict(zip(value, self._data.values()))
+            self._data = self._data.__class__._create_unsafe(
+                dict(zip(value, self._data.values())),
+                level_names=self._data.level_names,
             )
         self._names = pd.core.indexes.frozen.FrozenList(value)
 

From 9cebf2ee4f6b8faa8e97633cfb02c15d929df9c3 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <3190405+shwina@users.noreply.github.com>
Date: Fri, 26 Mar 2021 15:56:52 -0400
Subject: [PATCH 135/138] Update cpp/src/join/hash_join.cu

Co-authored-by: Mark Harris <mharris@nvidia.com>
---
 cpp/src/join/hash_join.cu | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cpp/src/join/hash_join.cu b/cpp/src/join/hash_join.cu
index 90ecae4e3f4..3f11d6d3306 100644
--- a/cpp/src/join/hash_join.cu
+++ b/cpp/src/join/hash_join.cu
@@ -114,7 +114,7 @@ get_left_join_indices_complement(
     // Assume all the indices in invalid_index_map are invalid
     auto invalid_index_map =
       std::make_unique<rmm::device_uvector<size_type>>(right_table_row_count, stream);
-    thrust::uninitialized_fill(thrust::cuda::par.on(stream.value()),
+    thrust::uninitialized_fill(rmm::exec_policy(stream),
                                invalid_index_map->begin(),
                                invalid_index_map->end(),
                                int32_t{1});

From 1584b861a90d59ea408f5696874ca68fd08f1147 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Fri, 26 Mar 2021 16:00:11 -0400
Subject: [PATCH 136/138] Better example

---
 cpp/include/cudf/join.hpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/cpp/include/cudf/join.hpp b/cpp/include/cudf/join.hpp
index 7c778a17609..fcc0bcd444e 100644
--- a/cpp/include/cudf/join.hpp
+++ b/cpp/include/cudf/join.hpp
@@ -78,10 +78,10 @@ inner_join(cudf::table_view const& left_keys,
  *
  * @code{.pseudo}
  *          Left: {{0, 1, 2}}
- *          Right: {{1, 2, 3}, {1, 2, 5}}
+ *          Right: {{4, 9, 3}, {1, 2, 5}}
  *          left_on: {0}
  *          right_on: {1}
- * Result: {{1, 2}, {1, 2}, {1, 2}}
+ * Result: {{1, 2}, {4, 9}, {1, 2}}
  * @endcode
  *
  * @throw cudf::logic_error if number of elements in `left_on` or `right_on`

From 3977b793451d7687e17f4712a67227f850d87ef1 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Fri, 26 Mar 2021 16:09:33 -0400
Subject: [PATCH 137/138] Remove std::moves

---
 cpp/src/join/semi_join.cu | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/cpp/src/join/semi_join.cu b/cpp/src/join/semi_join.cu
index fea9ea45fd3..80a1ef9e204 100644
--- a/cpp/src/join/semi_join.cu
+++ b/cpp/src/join/semi_join.cu
@@ -55,7 +55,7 @@ std::unique_ptr<rmm::device_uvector<cudf::size_type>> left_semi_anti_join(
     auto result =
       std::make_unique<rmm::device_uvector<cudf::size_type>>(left_keys.num_rows(), stream, mr);
     thrust::sequence(thrust::cuda::par.on(stream.value()), result->begin(), result->end());
-    return std::move(result);
+    return result;
   }
 
   auto const left_num_rows  = left_keys.num_rows();
@@ -114,7 +114,7 @@ std::unique_ptr<rmm::device_uvector<cudf::size_type>> left_semi_anti_join(
 
   auto join_size = thrust::distance(gather_map->begin(), gather_map_end);
   gather_map->resize(join_size, stream);
-  return std::move(gather_map);
+  return gather_map;
 }
 
 /**

From 7bf65611212a4f3c2e7454168d05d6b23d93c85b Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <shwina@users.noreply.github.com>
Date: Fri, 26 Mar 2021 16:29:57 -0400
Subject: [PATCH 138/138] Fix formatting error

---
 cpp/src/join/hash_join.cu | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/cpp/src/join/hash_join.cu b/cpp/src/join/hash_join.cu
index 3f11d6d3306..5a6ad8892de 100644
--- a/cpp/src/join/hash_join.cu
+++ b/cpp/src/join/hash_join.cu
@@ -114,10 +114,8 @@ get_left_join_indices_complement(
     // Assume all the indices in invalid_index_map are invalid
     auto invalid_index_map =
       std::make_unique<rmm::device_uvector<size_type>>(right_table_row_count, stream);
-    thrust::uninitialized_fill(rmm::exec_policy(stream),
-                               invalid_index_map->begin(),
-                               invalid_index_map->end(),
-                               int32_t{1});
+    thrust::uninitialized_fill(
+      rmm::exec_policy(stream), invalid_index_map->begin(), invalid_index_map->end(), int32_t{1});
 
     // Functor to check for index validity since left joins can create invalid indices
     valid_range<size_type> valid(0, right_table_row_count);