From dc1435ba924410f94975411a8934414df11de8a7 Mon Sep 17 00:00:00 2001
From: Karthikeyan <6488848+karthikeyann@users.noreply.github.com>
Date: Wed, 27 Apr 2022 06:18:09 +0530
Subject: [PATCH 1/9] Use structured bindings instead of std::tie (#10726)

Addresses part of https://github.com/rapidsai/cudf/issues/10350 Take advantage of C++17 feature structured bindings

There are few usages where `std::tie` is used on existing variables and for ordered operator== comparisons. These usages are not replaced.

Authors:
  - Karthikeyan (https://github.com/karthikeyann)

Approvers:
  - Nghia Truong (https://github.com/ttnghia)
  - Conor Hoekstra (https://github.com/codereport)

URL: https://github.com/rapidsai/cudf/pull/10726
---
 cpp/include/cudf_test/column_wrapper.hpp      | 30 +++++-------
 cpp/src/io/parquet/page_enc.cu                | 14 ++----
 cpp/src/io/parquet/reader_impl.cu             |  6 +--
 cpp/src/quantiles/quantile.cu                 |  5 +-
 cpp/src/reshape/interleave_columns.cu         |  5 +-
 cpp/src/rolling/rolling_collect_list.cuh      |  4 +-
 cpp/src/strings/copying/concatenate.cu        |  4 +-
 .../partitioning/hash_partition_test.cpp      | 48 ++++++-------------
 cpp/tests/rolling/grouped_rolling_test.cpp    | 12 ++---
 cpp/tests/rolling/rolling_test.cpp            |  6 +--
 cpp/tests/sort/rank_test.cpp                  |  3 +-
 cpp/tests/transform/row_bit_count_test.cu     | 24 +++-------
 cpp/tests/unary/cast_tests.cpp                |  4 +-
 13 files changed, 48 insertions(+), 117 deletions(-)
diff --git a/cpp/include/cudf_test/column_wrapper.hpp b/cpp/include/cudf_test/column_wrapper.hpp
index 8a5d4e5efcc..158c4ff20bb 100644
--- a/cpp/include/cudf_test/column_wrapper.hpp
+++ b/cpp/include/cudf_test/column_wrapper.hpp
@@ -702,13 +702,11 @@ class strings_column_wrapper : public detail::column_wrapper {
   template <typename StringsIterator>
   strings_column_wrapper(StringsIterator begin, StringsIterator end) : column_wrapper{}
   {
-    std::vector<char> chars;
-    std::vector<cudf::size_type> offsets;
-    auto all_valid           = thrust::make_constant_iterator(true);
-    std::tie(chars, offsets) = detail::make_chars_and_offsets(begin, end, all_valid);
-    auto d_chars             = cudf::detail::make_device_uvector_sync(chars);
-    auto d_offsets           = cudf::detail::make_device_uvector_sync(offsets);
-    wrapped                  = cudf::make_strings_column(d_chars, d_offsets);
+    auto all_valid        = thrust::make_constant_iterator(true);
+    auto [chars, offsets] = detail::make_chars_and_offsets(begin, end, all_valid);
+    auto d_chars          = cudf::detail::make_device_uvector_sync(chars);
+    auto d_offsets        = cudf::detail::make_device_uvector_sync(offsets);
+    wrapped               = cudf::make_strings_column(d_chars, d_offsets);
   }
 
   /**
@@ -744,14 +742,12 @@ class strings_column_wrapper : public detail::column_wrapper {
     : column_wrapper{}
   {
     size_type num_strings = std::distance(begin, end);
-    std::vector<char> chars;
-    std::vector<size_type> offsets;
-    std::tie(chars, offsets) = detail::make_chars_and_offsets(begin, end, v);
-    auto null_mask           = detail::make_null_mask_vector(v, v + num_strings);
-    auto d_chars             = cudf::detail::make_device_uvector_sync(chars);
-    auto d_offsets           = cudf::detail::make_device_uvector_sync(offsets);
-    auto d_bitmask           = cudf::detail::make_device_uvector_sync(null_mask);
-    wrapped                  = cudf::make_strings_column(d_chars, d_offsets, d_bitmask);
+    auto [chars, offsets] = detail::make_chars_and_offsets(begin, end, v);
+    auto null_mask        = detail::make_null_mask_vector(v, v + num_strings);
+    auto d_chars          = cudf::detail::make_device_uvector_sync(chars);
+    auto d_offsets        = cudf::detail::make_device_uvector_sync(offsets);
+    auto d_bitmask        = cudf::detail::make_device_uvector_sync(null_mask);
+    wrapped               = cudf::make_strings_column(d_chars, d_offsets, d_bitmask);
   }
 
   /**
@@ -1479,9 +1475,7 @@ class lists_column_wrapper : public detail::column_wrapper {
     int32_t const expected_depth   = hierarchy_and_depth.second;
 
     // preprocess columns so that every column_view in 'cols' is an equivalent hierarchy
-    std::vector<std::unique_ptr<column>> stubs;
-    std::vector<column_view> cols;
-    std::tie(cols, stubs) = preprocess_columns(elements, expected_hierarchy, expected_depth);
+    auto [cols, stubs] = preprocess_columns(elements, expected_hierarchy, expected_depth);
 
     // generate offsets
     size_type count = 0;
diff --git a/cpp/src/io/parquet/page_enc.cu b/cpp/src/io/parquet/page_enc.cu
index 52b1d93d2a0..61bd29399cd 100644
--- a/cpp/src/io/parquet/page_enc.cu
+++ b/cpp/src/io/parquet/page_enc.cu
@@ -1663,9 +1663,7 @@ dremel_data get_dremel_data(column_view h_col,
     }
   }
 
-  std::unique_ptr<rmm::device_buffer> device_view_owners;
-  column_device_view* d_nesting_levels;
-  std::tie(device_view_owners, d_nesting_levels) =
+  auto [device_view_owners, d_nesting_levels] =
     contiguous_copy_column_device_views<column_device_view>(nesting_levels, stream);
 
   thrust::exclusive_scan(
@@ -1735,10 +1733,7 @@ dremel_data get_dremel_data(column_view h_col,
     auto offset_size_at_level = column_ends[level] - column_offsets[level] + 1;
 
     // Get empties at this level
-    rmm::device_uvector<size_type> empties(0, stream);
-    rmm::device_uvector<size_type> empties_idx(0, stream);
-    size_t empties_size;
-    std::tie(empties, empties_idx, empties_size) =
+    auto [empties, empties_idx, empties_size] =
       get_empties(nesting_levels[level], column_offsets[level], column_ends[level]);
 
     // Merge empty at deepest parent level with the rep, def level vals at leaf level
@@ -1819,10 +1814,7 @@ dremel_data get_dremel_data(column_view h_col,
     auto offset_size_at_level = column_ends[level] - column_offsets[level] + 1;
 
     // Get empties at this level
-    rmm::device_uvector<size_type> empties(0, stream);
-    rmm::device_uvector<size_type> empties_idx(0, stream);
-    size_t empties_size;
-    std::tie(empties, empties_idx, empties_size) =
+    auto [empties, empties_idx, empties_size] =
       get_empties(nesting_levels[level], column_offsets[level], column_ends[level]);
 
     auto offset_transformer = [new_child_offsets = new_offsets.data(),
diff --git a/cpp/src/io/parquet/reader_impl.cu b/cpp/src/io/parquet/reader_impl.cu
index 46b3206f731..cfca0bad518 100644
--- a/cpp/src/io/parquet/reader_impl.cu
+++ b/cpp/src/io/parquet/reader_impl.cu
@@ -1729,11 +1729,7 @@ table_with_metadata reader::impl::read(size_type skip_rows,
           continue;
         }
 
-        int32_t type_width;
-        int32_t clock_rate;
-        int8_t converted_type;
-
-        std::tie(type_width, clock_rate, converted_type) =
+        auto [type_width, clock_rate, converted_type] =
           conversion_info(to_type_id(schema, _strings_to_categorical, _timestamp_type.id()),
                           _timestamp_type.id(),
                           schema.type,
diff --git a/cpp/src/quantiles/quantile.cu b/cpp/src/quantiles/quantile.cu
index a71fc862bf3..f38d0a921b7 100644
--- a/cpp/src/quantiles/quantile.cu
+++ b/cpp/src/quantiles/quantile.cu
@@ -113,10 +113,7 @@ struct quantile_functor {
         ordered_indices,
         [input = *d_input] __device__(size_type idx) { return input.is_valid_nocheck(idx); });
 
-      rmm::device_buffer mask;
-      size_type null_count;
-
-      std::tie(mask, null_count) = valid_if(
+      auto [mask, null_count] = valid_if(
         q_device.begin(),
         q_device.end(),
         [sorted_validity, interp = interp, size = size] __device__(double q) {
diff --git a/cpp/src/reshape/interleave_columns.cu b/cpp/src/reshape/interleave_columns.cu
index 9954cb4a299..d9e858e8e40 100644
--- a/cpp/src/reshape/interleave_columns.cu
+++ b/cpp/src/reshape/interleave_columns.cu
@@ -258,10 +258,7 @@ struct interleave_columns_impl<T, std::enable_if_t<cudf::is_fixed_width<T>()>> {
                          func_value,
                          func_validity);
 
-    rmm::device_buffer mask;
-    size_type null_count;
-
-    std::tie(mask, null_count) = valid_if(index_begin, index_end, func_validity, stream, mr);
+    auto [mask, null_count] = valid_if(index_begin, index_end, func_validity, stream, mr);
 
     output->set_null_mask(std::move(mask), null_count);
 
diff --git a/cpp/src/rolling/rolling_collect_list.cuh b/cpp/src/rolling/rolling_collect_list.cuh
index 94703e320d0..13de4693e54 100644
--- a/cpp/src/rolling/rolling_collect_list.cuh
+++ b/cpp/src/rolling/rolling_collect_list.cuh
@@ -207,9 +207,7 @@ std::unique_ptr<column> rolling_collect_list(column_view const& input,
                                             stream,
                                             mr);
 
-  rmm::device_buffer null_mask;
-  size_type null_count;
-  std::tie(null_mask, null_count) = valid_if(
+  auto [null_mask, null_count] = valid_if(
     thrust::make_counting_iterator<size_type>(0),
     thrust::make_counting_iterator<size_type>(input.size()),
     [preceding_begin, following_begin, min_periods] __device__(auto i) {
diff --git a/cpp/src/strings/copying/concatenate.cu b/cpp/src/strings/copying/concatenate.cu
index fedb8d38a08..0ab7ef5ff2b 100644
--- a/cpp/src/strings/copying/concatenate.cu
+++ b/cpp/src/strings/copying/concatenate.cu
@@ -72,9 +72,7 @@ auto create_strings_device_views(host_span<column_view const> views, rmm::cuda_s
 {
   CUDF_FUNC_RANGE();
   // Assemble contiguous array of device views
-  std::unique_ptr<rmm::device_buffer> device_view_owners;
-  column_device_view* device_views_ptr;
-  std::tie(device_view_owners, device_views_ptr) =
+  auto [device_view_owners, device_views_ptr] =
     contiguous_copy_column_device_views<column_device_view>(views, stream);
 
   // Compute the partition offsets and size of offset column
diff --git a/cpp/tests/partitioning/hash_partition_test.cpp b/cpp/tests/partitioning/hash_partition_test.cpp
index befd9884b11..3ec6ae97595 100644
--- a/cpp/tests/partitioning/hash_partition_test.cpp
+++ b/cpp/tests/partitioning/hash_partition_test.cpp
@@ -67,9 +67,7 @@ TEST_F(HashPartition, ZeroPartitions)
   auto columns_to_hash = std::vector<cudf::size_type>({2});
 
   cudf::size_type const num_partitions = 0;
-  std::unique_ptr<cudf::table> output;
-  std::vector<cudf::size_type> offsets;
-  std::tie(output, offsets) = cudf::hash_partition(input, columns_to_hash, num_partitions);
+  auto [output, offsets] = cudf::hash_partition(input, columns_to_hash, num_partitions);
 
   // Expect empty table with same number of columns and zero partitions
   EXPECT_EQ(input.num_columns(), output->num_columns());
@@ -87,9 +85,7 @@ TEST_F(HashPartition, ZeroRows)
   auto columns_to_hash = std::vector<cudf::size_type>({2});
 
   cudf::size_type const num_partitions = 3;
-  std::unique_ptr<cudf::table> output;
-  std::vector<cudf::size_type> offsets;
-  std::tie(output, offsets) = cudf::hash_partition(input, columns_to_hash, num_partitions);
+  auto [output, offsets] = cudf::hash_partition(input, columns_to_hash, num_partitions);
 
   // Expect empty table with same number of columns and zero partitions
   EXPECT_EQ(input.num_columns(), output->num_columns());
@@ -104,9 +100,7 @@ TEST_F(HashPartition, ZeroColumns)
   auto columns_to_hash = std::vector<cudf::size_type>({});
 
   cudf::size_type const num_partitions = 3;
-  std::unique_ptr<cudf::table> output;
-  std::vector<cudf::size_type> offsets;
-  std::tie(output, offsets) = cudf::hash_partition(input, columns_to_hash, num_partitions);
+  auto [output, offsets] = cudf::hash_partition(input, columns_to_hash, num_partitions);
 
   // Expect empty table with same number of columns and zero partitions
   EXPECT_EQ(input.num_columns(), output->num_columns());
@@ -124,10 +118,8 @@ TEST_F(HashPartition, MixedColumnTypes)
   auto columns_to_hash = std::vector<cudf::size_type>({0, 2});
 
   cudf::size_type const num_partitions = 3;
-  std::unique_ptr<cudf::table> output1, output2;
-  std::vector<cudf::size_type> offsets1, offsets2;
-  std::tie(output1, offsets1) = cudf::hash_partition(input, columns_to_hash, num_partitions);
-  std::tie(output2, offsets2) = cudf::hash_partition(input, columns_to_hash, num_partitions);
+  auto [output1, offsets1] = cudf::hash_partition(input, columns_to_hash, num_partitions);
+  auto [output2, offsets2] = cudf::hash_partition(input, columns_to_hash, num_partitions);
 
   // Expect output to have size num_partitions
   EXPECT_EQ(static_cast<size_t>(num_partitions), offsets1.size());
@@ -148,9 +140,7 @@ TEST_F(HashPartition, NullableStrings)
   std::vector<cudf::size_type> const columns_to_hash({0});
   cudf::size_type const num_partitions = 3;
 
-  std::unique_ptr<cudf::table> result;
-  std::vector<cudf::size_type> offsets;
-  std::tie(result, offsets) = cudf::hash_partition(input, columns_to_hash, num_partitions);
+  auto [result, offsets] = cudf::hash_partition(input, columns_to_hash, num_partitions);
 
   auto const& col = result->get_column(0);
   EXPECT_EQ(0, col.null_count());
@@ -167,11 +157,9 @@ TEST_F(HashPartition, ColumnsToHash)
   auto columns_to_hash = std::vector<cudf::size_type>({0});
 
   cudf::size_type const num_partitions = 3;
-  std::unique_ptr<cudf::table> first_result, second_result;
-  std::vector<cudf::size_type> first_offsets, second_offsets;
-  std::tie(first_result, first_offsets) =
+  auto [first_result, first_offsets] =
     cudf::hash_partition(first_input, columns_to_hash, num_partitions);
-  std::tie(second_result, second_offsets) =
+  auto [second_result, second_offsets] =
     cudf::hash_partition(second_input, columns_to_hash, num_partitions);
 
   // Expect offsets to be equal and num_partitions in length
@@ -228,11 +216,9 @@ TEST_F(HashPartition, CustomSeedValue)
   auto columns_to_hash = std::vector<cudf::size_type>({0, 2});
 
   cudf::size_type const num_partitions = 3;
-  std::unique_ptr<cudf::table> output1, output2;
-  std::vector<cudf::size_type> offsets1, offsets2;
-  std::tie(output1, offsets1) = cudf::hash_partition(
+  auto [output1, offsets1]             = cudf::hash_partition(
     input, columns_to_hash, num_partitions, cudf::hash_id::HASH_MURMUR3, 12345);
-  std::tie(output2, offsets2) = cudf::hash_partition(
+  auto [output2, offsets2] = cudf::hash_partition(
     input, columns_to_hash, num_partitions, cudf::hash_id::HASH_MURMUR3, 12345);
 
   // Expect output to have size num_partitions
@@ -260,9 +246,7 @@ TYPED_TEST(HashPartitionFixedWidth, NullableFixedWidth)
   std::vector<cudf::size_type> const columns_to_hash({0});
   cudf::size_type const num_partitions = 3;
 
-  std::unique_ptr<cudf::table> result;
-  std::vector<cudf::size_type> offsets;
-  std::tie(result, offsets) = cudf::hash_partition(input, columns_to_hash, num_partitions);
+  auto [result, offsets] = cudf::hash_partition(input, columns_to_hash, num_partitions);
 
   auto const& col = result->get_column(0);
   EXPECT_EQ(0, col.null_count());
@@ -294,10 +278,8 @@ void run_fixed_width_test(size_t cols,
   auto columns_to_hash = std::vector<cudf::size_type>(cols);
   std::iota(columns_to_hash.begin(), columns_to_hash.end(), 0);
 
-  std::unique_ptr<cudf::table> output1, output2;
-  std::vector<cudf::size_type> offsets1, offsets2;
-  std::tie(output1, offsets1) = cudf::hash_partition(input, columns_to_hash, num_partitions);
-  std::tie(output2, offsets2) = cudf::hash_partition(input, columns_to_hash, num_partitions);
+  auto [output1, offsets1] = cudf::hash_partition(input, columns_to_hash, num_partitions);
+  auto [output2, offsets2] = cudf::hash_partition(input, columns_to_hash, num_partitions);
 
   // Expect output to have size num_partitions
   EXPECT_EQ(static_cast<size_t>(num_partitions), offsets1.size());
@@ -367,9 +349,7 @@ TEST_F(HashPartition, FixedPointColumnsToHash)
   auto columns_to_hash = std::vector<cudf::size_type>({0});
 
   cudf::size_type const num_partitions = 1;
-  std::unique_ptr<cudf::table> first_result;
-  std::vector<cudf::size_type> first_offsets;
-  std::tie(first_result, first_offsets) =
+  auto [first_result, first_offsets] =
     cudf::hash_partition(first_input, columns_to_hash, num_partitions);
 
   // Expect offsets to be equal and num_partitions in length
diff --git a/cpp/tests/rolling/grouped_rolling_test.cpp b/cpp/tests/rolling/grouped_rolling_test.cpp
index f484661eee8..3a69c13c889 100644
--- a/cpp/tests/rolling/grouped_rolling_test.cpp
+++ b/cpp/tests/rolling/grouped_rolling_test.cpp
@@ -340,10 +340,8 @@ class GroupedRollingTest : public cudf::test::BaseFixture {
     thrust::host_vector<bool> ref_valid(num_rows);
 
     // input data and mask
-    thrust::host_vector<T> in_col;
-    std::vector<bitmask_type> in_valid;
-    std::tie(in_col, in_valid) = cudf::test::to_host<T>(input);
-    bitmask_type* valid_mask   = in_valid.data();
+    auto [in_col, in_valid]  = cudf::test::to_host<T>(input);
+    bitmask_type* valid_mask = in_valid.data();
 
     agg_op op;
     for (size_type i = 0; i < num_rows; i++) {
@@ -973,10 +971,8 @@ class GroupedTimeRangeRollingTest : public cudf::test::BaseFixture {
     thrust::host_vector<bool> ref_valid(num_rows);
 
     // input data and mask
-    thrust::host_vector<T> in_col;
-    std::vector<bitmask_type> in_valid;
-    std::tie(in_col, in_valid) = cudf::test::to_host<T>(input);
-    bitmask_type* valid_mask   = in_valid.data();
+    auto [in_col, in_valid]  = cudf::test::to_host<T>(input);
+    bitmask_type* valid_mask = in_valid.data();
 
     agg_op op;
     for (size_type i = 0; i < num_rows; i++) {
diff --git a/cpp/tests/rolling/rolling_test.cpp b/cpp/tests/rolling/rolling_test.cpp
index c54fe073e3a..9549569d9f6 100644
--- a/cpp/tests/rolling/rolling_test.cpp
+++ b/cpp/tests/rolling/rolling_test.cpp
@@ -552,10 +552,8 @@ class RollingTest : public cudf::test::BaseFixture {
     thrust::host_vector<bool> ref_valid(num_rows);
 
     // input data and mask
-    thrust::host_vector<T> in_col;
-    std::vector<bitmask_type> in_valid;
-    std::tie(in_col, in_valid) = cudf::test::to_host<T>(input);
-    bitmask_type* valid_mask   = in_valid.data();
+    auto [in_col, in_valid]  = cudf::test::to_host<T>(input);
+    bitmask_type* valid_mask = in_valid.data();
 
     agg_op op;
     for (size_type i = 0; i < num_rows; i++) {
diff --git a/cpp/tests/sort/rank_test.cpp b/cpp/tests/sort/rank_test.cpp
index c4d0b6b04f4..28c9b40de11 100644
--- a/cpp/tests/sort/rank_test.cpp
+++ b/cpp/tests/sort/rank_test.cpp
@@ -91,8 +91,7 @@ struct Rank : public BaseFixture {
            test_case_t{table_view{{col1, col2, col3}},
                        table_view{{col1_rank, col2_rank, col3_rank}}},
          }) {
-      table_view input, output;
-      std::tie(input, output) = test_case;
+      auto [input, output] = test_case;
 
       run_rank_test(input,
                     output,
diff --git a/cpp/tests/transform/row_bit_count_test.cu b/cpp/tests/transform/row_bit_count_test.cu
index 8ed50b6eae0..9c3326cf575 100644
--- a/cpp/tests/transform/row_bit_count_test.cu
+++ b/cpp/tests/transform/row_bit_count_test.cu
@@ -123,9 +123,7 @@ TYPED_TEST(RowBitCountTyped, Lists)
 {
   using T = TypeParam;
 
-  std::unique_ptr<column> col;
-  std::unique_ptr<column> expected_sizes;
-  std::tie(col, expected_sizes) = build_list_column<T>();
+  auto [col, expected_sizes] = build_list_column<T>();
 
   table_view t({*col});
   auto result = cudf::row_bit_count(t);
@@ -326,9 +324,7 @@ TEST_F(RowBitCount, StructsNoNulls)
 
 TEST_F(RowBitCount, StructsNulls)
 {
-  std::unique_ptr<column> struct_col;
-  std::unique_ptr<column> expected_sizes;
-  std::tie(struct_col, expected_sizes) = build_struct_column();
+  auto [struct_col, expected_sizes] = build_struct_column();
   table_view t({*struct_col});
   auto result = cudf::row_bit_count(t);
 
@@ -440,9 +436,7 @@ TEST_F(RowBitCount, NestedTypes)
 {
   // List<Struct<List<int>, float, List<int>, int16>
   {
-    std::unique_ptr<column> col_no_nulls;
-    std::unique_ptr<column> expected_sizes;
-    std::tie(col_no_nulls, expected_sizes) =
+    auto [col_no_nulls, expected_sizes] =
       build_nested_and_expected_column({1, 1, 1, 1, 1, 1, 1, 1});
     table_view no_nulls_t({*col_no_nulls});
     auto no_nulls_result = cudf::row_bit_count(no_nulls_t);
@@ -600,19 +594,13 @@ struct sum_functor {
 TEST_F(RowBitCount, Table)
 {
   // complex nested column
-  std::unique_ptr<column> col0;
-  std::unique_ptr<column> col0_sizes;
-  std::tie(col0, col0_sizes) = build_nested_and_expected_column({1, 1, 1, 1, 1, 1, 1, 1});
+  auto [col0, col0_sizes] = build_nested_and_expected_column({1, 1, 1, 1, 1, 1, 1, 1});
 
   // struct column
-  std::unique_ptr<column> col1;
-  std::unique_ptr<column> col1_sizes;
-  std::tie(col1, col1_sizes) = build_struct_column();
+  auto [col1, col1_sizes] = build_struct_column();
 
   // list column
-  std::unique_ptr<column> col2;
-  std::unique_ptr<column> col2_sizes;
-  std::tie(col2, col2_sizes) = build_list_column<int16_t>();
+  auto [col2, col2_sizes] = build_list_column<int16_t>();
 
   table_view t({*col0, *col1, *col2});
   auto result = cudf::row_bit_count(t);
diff --git a/cpp/tests/unary/cast_tests.cpp b/cpp/tests/unary/cast_tests.cpp
index f53498bccec..16fb02e06bc 100644
--- a/cpp/tests/unary/cast_tests.cpp
+++ b/cpp/tests/unary/cast_tests.cpp
@@ -174,9 +174,7 @@ void validate_cast_result(cudf::column_view expected, cudf::column_view actual)
 {
   using namespace cudf::test;
   // round-trip through the host because sizeof(T) may not equal sizeof(R)
-  thrust::host_vector<T> h_data;
-  std::vector<cudf::bitmask_type> null_mask;
-  std::tie(h_data, null_mask) = to_host<T>(expected);
+  auto [h_data, null_mask] = to_host<T>(expected);
   if (null_mask.empty()) {
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(make_column<R, T>(h_data), actual);
   } else {

From 09995a5d3f5b1dd83584529e44ccf774d7f6efe2 Mon Sep 17 00:00:00 2001
From: ChrisJar <chris.jarrett.0@gmail.com>
Date: Wed, 27 Apr 2022 13:13:29 -0500
Subject: [PATCH 2/9] Add bindings for index_of with column search key (#10696)

This adds bindings for `index_of` to enable using `list.index` with a Series of search keys.

Closes #10692

cc: @randerzander

Authors:
  - https://github.com/ChrisJar

Approvers:
  - Ashwin Srinath (https://github.com/shwina)
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/10696
---
 python/cudf/cudf/_lib/cpp/lists/contains.pxd |  5 ++
 python/cudf/cudf/_lib/lists.pyx              | 20 ++++++-
 python/cudf/cudf/core/column/lists.py        | 61 ++++++++++++++++++--
 python/cudf/cudf/tests/test_list.py          | 55 ++++++++++++++++--
 4 files changed, 129 insertions(+), 12 deletions(-)

diff --git a/python/cudf/cudf/_lib/cpp/lists/contains.pxd b/python/cudf/cudf/_lib/cpp/lists/contains.pxd
index 46aea37643f..e3cb01721a0 100644
--- a/python/cudf/cudf/_lib/cpp/lists/contains.pxd
+++ b/python/cudf/cudf/_lib/cpp/lists/contains.pxd
@@ -18,3 +18,8 @@ cdef extern from "cudf/lists/contains.hpp" namespace "cudf::lists" nogil:
         lists_column_view lists,
         scalar search_key,
     ) except +
+
+    cdef unique_ptr[column] index_of(
+        lists_column_view lists,
+        column_view search_keys,
+    ) except +
diff --git a/python/cudf/cudf/_lib/lists.pyx b/python/cudf/cudf/_lib/lists.pyx
index e5a705ab603..025fb0665d3 100644
--- a/python/cudf/cudf/_lib/lists.pyx
+++ b/python/cudf/cudf/_lib/lists.pyx
@@ -176,7 +176,7 @@ def contains_scalar(Column col, object py_search_key):
     return result
 
 
-def index_of(Column col, object py_search_key):
+def index_of_scalar(Column col, object py_search_key):
 
     cdef DeviceScalar search_key = py_search_key.device_value
 
@@ -195,6 +195,24 @@ def index_of(Column col, object py_search_key):
     return Column.from_unique_ptr(move(c_result))
 
 
+def index_of_column(Column col, Column search_keys):
+
+    cdef column_view keys_view = search_keys.view()
+
+    cdef shared_ptr[lists_column_view] list_view = (
+        make_shared[lists_column_view](col.view())
+    )
+
+    cdef unique_ptr[column] c_result
+
+    with nogil:
+        c_result = move(cpp_index_of(
+            list_view.get()[0],
+            keys_view,
+        ))
+    return Column.from_unique_ptr(move(c_result))
+
+
 def concatenate_rows(list source_columns):
     cdef unique_ptr[column] c_result
 
diff --git a/python/cudf/cudf/core/column/lists.py b/python/cudf/cudf/core/column/lists.py
index df6aaa91a2b..2964378d114 100644
--- a/python/cudf/cudf/core/column/lists.py
+++ b/python/cudf/cudf/core/column/lists.py
@@ -17,7 +17,8 @@
     drop_list_duplicates,
     extract_element_column,
     extract_element_scalar,
-    index_of,
+    index_of_column,
+    index_of_scalar,
     sort_lists,
 )
 from cudf._lib.strings.convert.convert_lists import format_list_column
@@ -463,10 +464,61 @@ def contains(self, search_key: ScalarLike) -> ParentType:
             raise
         return res
 
-    def index(self, search_key: ScalarLike) -> ParentType:
-        search_key = cudf.Scalar(search_key)
+    def index(self, search_key: Union[ScalarLike, ColumnLike]) -> ParentType:
+        """
+        Returns integers representing the index of the search key for each row.
+
+        If ``search_key`` is a sequence, it must be the same length as the
+        Series and ``search_key[i]`` represents the search key for the
+        ``i``-th row of the Series.
+
+        If the search key is not contained in a row, -1 is returned. If either
+        the row or the search key are null, <NA> is returned. If the search key
+        is contained multiple times, the smallest matching index is returned.
+
+        Parameters
+        ----------
+        search_key : scalar or sequence of scalars
+            Element or elements being searched for in each row of the list
+            column
+
+        Returns
+        -------
+        Series or Index
+
+        Examples
+        --------
+        >>> s = cudf.Series([[1, 2, 3], [3, 4, 5], [4, 5, 6]])
+        >>> s.list.index(4)
+        0   -1
+        1    1
+        2    0
+        dtype: int32
+
+        >>> s = cudf.Series([["a", "b", "c"], ["x", "y", "z"]])
+        >>> s.list.index(["b", "z"])
+        0    1
+        1    2
+        dtype: int32
+
+        >>> s = cudf.Series([[4, 5, 6], None, [-3, -2, -1]])
+        >>> s.list.index([None, 3, -2])
+        0    <NA>
+        1    <NA>
+        2       1
+        dtype: int32
+        """
+
         try:
-            res = self._return_or_inplace(index_of(self._column, search_key))
+            if is_scalar(search_key):
+                return self._return_or_inplace(
+                    index_of_scalar(self._column, cudf.Scalar(search_key))
+                )
+            else:
+                return self._return_or_inplace(
+                    index_of_column(self._column, as_column(search_key))
+                )
+
         except RuntimeError as e:
             if (
                 "Type/Scale of search key does not "
@@ -474,7 +526,6 @@ def index(self, search_key: ScalarLike) -> ParentType:
             ):
                 raise TypeError(str(e)) from e
             raise
-        return res
 
     @property
     def leaves(self) -> ParentType:
diff --git a/python/cudf/cudf/tests/test_list.py b/python/cudf/cudf/tests/test_list.py
index c21e1a0f61f..09eee3520e5 100644
--- a/python/cudf/cudf/tests/test_list.py
+++ b/python/cudf/cudf/tests/test_list.py
@@ -11,6 +11,7 @@
 import cudf
 from cudf import NA
 from cudf._lib.copying import get_element
+from cudf.api.types import is_scalar
 from cudf.testing._utils import (
     DATETIME_TYPES,
     NUMERIC_TYPES,
@@ -425,7 +426,7 @@ def test_contains_invalid(data, scalar):
 
 
 @pytest.mark.parametrize(
-    "data, scalar, expect",
+    "data, search_key, expect",
     [
         (
             [[1, 2, 3], [], [3, 4, 5]],
@@ -448,6 +449,16 @@ def test_contains_invalid(data, scalar):
             "y",
             [3, -1],
         ),
+        (
+            [["h", "a", None], ["t", "g"]],
+            ["a", "b"],
+            [1, -1],
+        ),
+        (
+            [None, ["h", "i"], ["p", "k", "z"]],
+            ["x", None, "z"],
+            [None, None, 2],
+        ),
         (
             [["d", None, "e"], [None, "f"], []],
             cudf.Scalar(cudf.NA, "O"),
@@ -460,15 +471,21 @@ def test_contains_invalid(data, scalar):
         ),
     ],
 )
-def test_index(data, scalar, expect):
+def test_index(data, search_key, expect):
     sr = cudf.Series(data)
     expect = cudf.Series(expect, dtype="int32")
-    got = sr.list.index(cudf.Scalar(scalar, sr.dtype.element_type))
+    if is_scalar(search_key):
+        got = sr.list.index(cudf.Scalar(search_key, sr.dtype.element_type))
+    else:
+        got = sr.list.index(
+            cudf.Series(search_key, dtype=sr.dtype.element_type)
+        )
+
     assert_eq(expect, got)
 
 
 @pytest.mark.parametrize(
-    "data, scalar",
+    "data, search_key",
     [
         (
             [[9, None, 8], [], [7, 6, 5]],
@@ -478,16 +495,42 @@ def test_index(data, scalar, expect):
             [["a", "b", "c"], None, [None, "d"]],
             2,
         ),
+        (
+            [["e", "s"], ["t", "w"]],
+            [5, 6],
+        ),
     ],
 )
-def test_index_invalid(data, scalar):
+def test_index_invalid_type(data, search_key):
     sr = cudf.Series(data)
     with pytest.raises(
         TypeError,
         match="Type/Scale of search key does not "
         "match list column element type.",
     ):
-        sr.list.index(scalar)
+        sr.list.index(search_key)
+
+
+@pytest.mark.parametrize(
+    "data, search_key",
+    [
+        (
+            [[5, 8], [2, 6]],
+            [8, 2, 4],
+        ),
+        (
+            [["h", "j"], ["p", None], ["t", "z"]],
+            ["j", "a"],
+        ),
+    ],
+)
+def test_index_invalid_length(data, search_key):
+    sr = cudf.Series(data)
+    with pytest.raises(
+        RuntimeError,
+        match="Number of search keys must match list column size.",
+    ):
+        sr.list.index(search_key)
 
 
 @pytest.mark.parametrize(

From 1f8a03e69704562dfac38de40b7172650280c6ea Mon Sep 17 00:00:00 2001
From: Karthikeyan <6488848+karthikeyann@users.noreply.github.com>
Date: Wed, 27 Apr 2022 23:57:20 +0530
Subject: [PATCH 3/9] Replace std::make_pair with std::pair (C++17 CTAD)
 (#10727)

Addresses part of https://github.com/rapidsai/cudf/issues/10350 Take advantage of C++17 feature CTAD.

Authors:
  - Karthikeyan (https://github.com/karthikeyann)

Approvers:
  - Nghia Truong (https://github.com/ttnghia)
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/10727
---
 cpp/benchmarks/reduction/segment_reduce.cu     |  2 +-
 cpp/docs/DEVELOPER_GUIDE.md                    |  4 ++--
 cpp/include/cudf/detail/null_mask.cuh          |  2 +-
 cpp/include/cudf/detail/valid_if.cuh           |  2 +-
 cpp/include/cudf/strings/detail/utilities.cuh  |  2 +-
 .../cudf/table/experimental/row_operators.cuh  | 13 ++++++-------
 cpp/include/cudf_test/column_wrapper.hpp       | 16 ++++++++--------
 cpp/src/bitmask/null_mask.cu                   |  8 ++++----
 cpp/src/copying/contiguous_split.cu            |  4 ++--
 cpp/src/groupby/groupby.cu                     | 17 ++++++++---------
 cpp/src/groupby/hash/groupby.cu                |  2 +-
 cpp/src/groupby/sort/aggregate.cpp             |  2 +-
 cpp/src/groupby/sort/group_collect.cu          |  7 +++----
 cpp/src/groupby/sort/scan.cpp                  |  2 +-
 cpp/src/io/orc/aggregate_orc_metadata.cpp      |  6 +++---
 cpp/src/io/orc/reader_impl.cu                  | 18 +++++++++---------
 cpp/src/io/parquet/chunk_dict.cu               |  2 +-
 cpp/src/io/parquet/writer_impl.cu              |  8 ++++----
 cpp/src/join/conditional_join.cu               | 16 ++++++++--------
 cpp/src/join/hash_join.cu                      | 12 ++++++------
 cpp/src/join/join.cu                           |  2 +-
 cpp/src/join/join_utils.cu                     |  4 ++--
 cpp/src/join/mixed_join.cu                     | 16 ++++++++--------
 .../lists/combine/concatenate_list_elements.cu |  2 +-
 cpp/src/lists/copying/scatter_helper.cu        |  6 +++---
 cpp/src/partitioning/partitioning.cu           | 11 +++++------
 cpp/src/partitioning/round_robin.cu            | 10 +++++-----
 cpp/src/replace/clamp.cu                       |  2 +-
 cpp/src/strings/convert/convert_datetime.cu    |  2 +-
 cpp/src/strings/json/json_path.cu              |  4 ++--
 cpp/src/strings/repeat_strings.cu              |  6 +++---
 cpp/src/structs/utilities.cpp                  |  6 +++---
 cpp/src/text/subword/data_normalizer.cu        |  8 ++++----
 cpp/src/transform/bools_to_mask.cu             |  8 ++++----
 cpp/src/transform/encode.cu                    |  2 +-
 cpp/src/transform/nans_to_nulls.cu             |  8 +++-----
 cpp/src/transform/one_hot_encode.cu            | 10 ++++------
 cpp/src/transpose/transpose.cu                 |  4 ++--
 cpp/tests/groupby/m2_tests.cpp                 |  5 ++---
 cpp/tests/groupby/merge_lists_tests.cpp        |  5 ++---
 cpp/tests/groupby/merge_m2_tests.cpp           | 12 +++++-------
 cpp/tests/groupby/merge_sets_tests.cpp         |  5 ++---
 cpp/tests/interop/to_arrow_test.cpp            |  2 +-
 cpp/tests/join/conditional_join_tests.cu       |  6 +++---
 cpp/tests/join/join_tests.cpp                  |  2 +-
 cpp/tests/join/mixed_join_tests.cu             |  6 +++---
 cpp/tests/merge/merge_test.cpp                 |  4 ++--
 cpp/tests/search/search_struct_test.cpp        |  2 +-
 .../stream_compaction/distinct_count_tests.cpp |  2 +-
 .../stream_compaction/unique_count_tests.cpp   |  2 +-
 cpp/tests/strings/translate_tests.cpp          |  2 +-
 51 files changed, 149 insertions(+), 162 deletions(-)

diff --git a/cpp/benchmarks/reduction/segment_reduce.cu b/cpp/benchmarks/reduction/segment_reduce.cu
index 3723147d95c..08fc4622b43 100644
--- a/cpp/benchmarks/reduction/segment_reduce.cu
+++ b/cpp/benchmarks/reduction/segment_reduce.cu
@@ -82,7 +82,7 @@ std::pair<std::unique_ptr<column>, thrust::device_vector<size_type>> make_test_d
 
   thrust::device_vector<size_type> d_offsets(offset_it, offset_it + num_segments + 1);
 
-  return std::make_pair(std::move((input->release())[0]), d_offsets);
+  return std::pair(std::move((input->release())[0]), d_offsets);
 }
 
 template <typename InputType, typename OutputType, aggregation::Kind kind>
diff --git a/cpp/docs/DEVELOPER_GUIDE.md b/cpp/docs/DEVELOPER_GUIDE.md
index 165edd443f6..84f69f559a8 100644
--- a/cpp/docs/DEVELOPER_GUIDE.md
+++ b/cpp/docs/DEVELOPER_GUIDE.md
@@ -572,7 +572,7 @@ The preferred style for how inputs are passed in and outputs are returned is the
 
 Sometimes it is necessary for functions to have multiple outputs. There are a few ways this can be
 done in C++ (including creating a `struct` for the output). One convenient way to do this is
-using `std::tie`  and `std::make_pair`. Note that objects passed to `std::make_pair` will invoke
+using `std::tie`  and `std::pair`. Note that objects passed to `std::pair` will invoke
 either the copy constructor or the move constructor of the object, and it may be preferable to move
 non-trivially copyable objects (and required for types with deleted copy constructors, like
 `std::unique_ptr`).
@@ -585,7 +585,7 @@ std::pair<table, table> return_two_tables(void){
   // Do stuff with out0, out1
 
   // Return a std::pair of the two outputs
-  return std::make_pair(std::move(out0), std::move(out1));
+  return std::pair(std::move(out0), std::move(out1));
 }
 
 cudf::table out0;
diff --git a/cpp/include/cudf/detail/null_mask.cuh b/cpp/include/cudf/detail/null_mask.cuh
index 7aec56fdc51..6a6cdd43004 100644
--- a/cpp/include/cudf/detail/null_mask.cuh
+++ b/cpp/include/cudf/detail/null_mask.cuh
@@ -133,7 +133,7 @@ std::pair<rmm::device_buffer, size_type> bitmask_binop(
                           stream,
                           mr);
 
-  return std::make_pair(std::move(dest_mask), null_count);
+  return std::pair(std::move(dest_mask), null_count);
 }
 
 /**
diff --git a/cpp/include/cudf/detail/valid_if.cuh b/cpp/include/cudf/detail/valid_if.cuh
index aa4421bb4ed..f91f51b2161 100644
--- a/cpp/include/cudf/detail/valid_if.cuh
+++ b/cpp/include/cudf/detail/valid_if.cuh
@@ -110,7 +110,7 @@ std::pair<rmm::device_buffer, size_type> valid_if(
 
     null_count = size - valid_count.value(stream);
   }
-  return std::make_pair(std::move(null_mask), null_count);
+  return std::pair(std::move(null_mask), null_count);
 }
 
 /**
diff --git a/cpp/include/cudf/strings/detail/utilities.cuh b/cpp/include/cudf/strings/detail/utilities.cuh
index bb7f29a4172..e6dba5147b5 100644
--- a/cpp/include/cudf/strings/detail/utilities.cuh
+++ b/cpp/include/cudf/strings/detail/utilities.cuh
@@ -156,7 +156,7 @@ auto make_strings_children(
     for_each_fn(size_and_exec_fn);
   }
 
-  return std::make_pair(std::move(offsets_column), std::move(chars_column));
+  return std::pair(std::move(offsets_column), std::move(chars_column));
 }
 
 /**
diff --git a/cpp/include/cudf/table/experimental/row_operators.cuh b/cpp/include/cudf/table/experimental/row_operators.cuh
index 88e31744fdf..32b71e660ac 100644
--- a/cpp/include/cudf/table/experimental/row_operators.cuh
+++ b/cpp/include/cudf/table/experimental/row_operators.cuh
@@ -165,14 +165,13 @@ class device_row_comparator {
         bool const rhs_is_null{_rhs.is_null(rhs_element_index)};
 
         if (lhs_is_null or rhs_is_null) {  // at least one is null
-          return cuda::std::make_pair(null_compare(lhs_is_null, rhs_is_null, _null_precedence),
-                                      _depth);
+          return cuda::std::pair(null_compare(lhs_is_null, rhs_is_null, _null_precedence), _depth);
         }
       }
 
-      return cuda::std::make_pair(relational_compare(_lhs.element<Element>(lhs_element_index),
-                                                     _rhs.element<Element>(rhs_element_index)),
-                                  std::numeric_limits<int>::max());
+      return cuda::std::pair(relational_compare(_lhs.element<Element>(lhs_element_index),
+                                                _rhs.element<Element>(rhs_element_index)),
+                             std::numeric_limits<int>::max());
     }
 
     template <typename Element,
@@ -197,11 +196,11 @@ class device_row_comparator {
 
         if (lhs_is_null or rhs_is_null) {  // at least one is null
           weak_ordering state = null_compare(lhs_is_null, rhs_is_null, _null_precedence);
-          return cuda::std::make_pair(state, depth);
+          return cuda::std::pair(state, depth);
         }
 
         if (lcol.num_child_columns() == 0) {
-          return cuda::std::make_pair(weak_ordering::EQUIVALENT, depth);
+          return cuda::std::pair(weak_ordering::EQUIVALENT, depth);
         }
 
         // Non-empty structs have been modified to only have 1 child when using this.
diff --git a/cpp/include/cudf_test/column_wrapper.hpp b/cpp/include/cudf_test/column_wrapper.hpp
index 158c4ff20bb..ff2ff2a0961 100644
--- a/cpp/include/cudf_test/column_wrapper.hpp
+++ b/cpp/include/cudf_test/column_wrapper.hpp
@@ -275,7 +275,7 @@ auto make_chars_and_offsets(StringsIterator begin, StringsIterator end, Validity
     chars.insert(chars.end(), std::cbegin(tmp), std::cend(tmp));
     offsets.push_back(offsets.back() + tmp.length());
   }
-  return std::make_pair(std::move(chars), std::move(offsets));
+  return std::pair(std::move(chars), std::move(offsets));
 };
 }  // namespace detail
 
@@ -1464,13 +1464,13 @@ class lists_column_wrapper : public detail::column_wrapper {
       0, [&v](auto i) { return v.empty() ? true : v[i]; });
 
     // compute the expected hierarchy and depth
-    auto const hierarchy_and_depth = std::accumulate(
-      elements.begin(),
-      elements.end(),
-      std::pair<column_view, int32_t>{{}, -1},
-      [](auto acc, lists_column_wrapper const& lcw) {
-        return lcw.depth > acc.second ? std::make_pair(lcw.get_view(), lcw.depth) : acc;
-      });
+    auto const hierarchy_and_depth =
+      std::accumulate(elements.begin(),
+                      elements.end(),
+                      std::pair<column_view, int32_t>{{}, -1},
+                      [](auto acc, lists_column_wrapper const& lcw) {
+                        return lcw.depth > acc.second ? std::pair(lcw.get_view(), lcw.depth) : acc;
+                      });
     column_view expected_hierarchy = hierarchy_and_depth.first;
     int32_t const expected_depth   = hierarchy_and_depth.second;
 
diff --git a/cpp/src/bitmask/null_mask.cu b/cpp/src/bitmask/null_mask.cu
index 756cf3421c9..ec14f8e6ded 100644
--- a/cpp/src/bitmask/null_mask.cu
+++ b/cpp/src/bitmask/null_mask.cu
@@ -445,7 +445,7 @@ std::pair<rmm::device_buffer, size_type> bitmask_and(table_view const& view,
   CUDF_FUNC_RANGE();
   rmm::device_buffer null_mask{0, stream, mr};
   if (view.num_rows() == 0 or view.num_columns() == 0) {
-    return std::make_pair(std::move(null_mask), 0);
+    return std::pair(std::move(null_mask), 0);
   }
 
   std::vector<bitmask_type const*> masks;
@@ -467,7 +467,7 @@ std::pair<rmm::device_buffer, size_type> bitmask_and(table_view const& view,
       mr);
   }
 
-  return std::make_pair(std::move(null_mask), 0);
+  return std::pair(std::move(null_mask), 0);
 }
 
 // Returns the bitwise OR of the null masks of all columns in the table view
@@ -478,7 +478,7 @@ std::pair<rmm::device_buffer, size_type> bitmask_or(table_view const& view,
   CUDF_FUNC_RANGE();
   rmm::device_buffer null_mask{0, stream, mr};
   if (view.num_rows() == 0 or view.num_columns() == 0) {
-    return std::make_pair(std::move(null_mask), 0);
+    return std::pair(std::move(null_mask), 0);
   }
 
   std::vector<bitmask_type const*> masks;
@@ -500,7 +500,7 @@ std::pair<rmm::device_buffer, size_type> bitmask_or(table_view const& view,
       mr);
   }
 
-  return std::make_pair(std::move(null_mask), 0);
+  return std::pair(std::move(null_mask), 0);
 }
 
 }  // namespace detail
diff --git a/cpp/src/copying/contiguous_split.cu b/cpp/src/copying/contiguous_split.cu
index 514374d450d..35e7eba974f 100644
--- a/cpp/src/copying/contiguous_split.cu
+++ b/cpp/src/copying/contiguous_split.cu
@@ -688,9 +688,9 @@ BufInfo build_output_columns(InputIter begin,
                                   ? 0
                                   : (current_info->num_rows - current_info->valid_count);
         ++current_info;
-        return std::make_pair(ptr, null_count);
+        return std::pair(ptr, null_count);
       }
-      return std::make_pair(static_cast<bitmask_type const*>(nullptr), 0);
+      return std::pair(static_cast<bitmask_type const*>(nullptr), 0);
     }();
 
     // size/data pointer for the column
diff --git a/cpp/src/groupby/groupby.cu b/cpp/src/groupby/groupby.cu
index 57bb222aaa0..79882239b38 100644
--- a/cpp/src/groupby/groupby.cu
+++ b/cpp/src/groupby/groupby.cu
@@ -83,8 +83,7 @@ std::pair<std::unique_ptr<table>, std::vector<aggregation_result>> groupby::disp
                  "Unsupported groupby key type does not support equality comparison");
     auto [grouped_keys, results] =
       detail::hash::groupby(flattened_keys, requests, _include_null_keys, stream, mr);
-    return std::make_pair(unflatten_nested_columns(std::move(grouped_keys), _keys),
-                          std::move(results));
+    return std::pair(unflatten_nested_columns(std::move(grouped_keys), _keys), std::move(results));
   } else {
     return sort_aggregate(requests, stream, mr);
   }
@@ -193,7 +192,7 @@ std::pair<std::unique_ptr<table>, std::vector<aggregation_result>> groupby::aggr
 
   verify_valid_requests(requests);
 
-  if (_keys.num_rows() == 0) { return std::make_pair(empty_like(_keys), empty_results(requests)); }
+  if (_keys.num_rows() == 0) { return std::pair(empty_like(_keys), empty_results(requests)); }
 
   return dispatch_aggregation(requests, rmm::cuda_stream_default, mr);
 }
@@ -211,7 +210,7 @@ std::pair<std::unique_ptr<table>, std::vector<aggregation_result>> groupby::scan
 
   verify_valid_requests(requests);
 
-  if (_keys.num_rows() == 0) { return std::make_pair(empty_like(_keys), empty_results(requests)); }
+  if (_keys.num_rows() == 0) { return std::pair(empty_like(_keys), empty_results(requests)); }
 
   return sort_scan(requests, rmm::cuda_stream_default, mr);
 }
@@ -250,7 +249,7 @@ std::pair<std::unique_ptr<table>, std::unique_ptr<table>> groupby::replace_nulls
   CUDF_EXPECTS(static_cast<cudf::size_type>(replace_policies.size()) == values.num_columns(),
                "Size mismatch between num_columns and replace_policies.");
 
-  if (values.is_empty()) { return std::make_pair(empty_like(_keys), empty_like(values)); }
+  if (values.is_empty()) { return std::pair(empty_like(_keys), empty_like(values)); }
   auto const stream = rmm::cuda_stream_default;
 
   auto const& group_labels = helper().group_labels(stream);
@@ -269,8 +268,8 @@ std::pair<std::unique_ptr<table>, std::unique_ptr<table>> groupby::replace_nulls
                       : std::move(grouped_values);
     });
 
-  return std::make_pair(std::move(helper().sorted_keys(stream, mr)),
-                        std::make_unique<table>(std::move(results)));
+  return std::pair(std::move(helper().sorted_keys(stream, mr)),
+                   std::make_unique<table>(std::move(results)));
 }
 
 // Get the sort helper object
@@ -310,8 +309,8 @@ std::pair<std::unique_ptr<table>, std::unique_ptr<table>> groupby::shift(
         grouped_values->view(), group_offsets, offsets[i], fill_values[i].get(), stream, mr);
     });
 
-  return std::make_pair(helper().sorted_keys(stream, mr),
-                        std::make_unique<cudf::table>(std::move(results)));
+  return std::pair(helper().sorted_keys(stream, mr),
+                   std::make_unique<cudf::table>(std::move(results)));
 }
 
 }  // namespace groupby
diff --git a/cpp/src/groupby/hash/groupby.cu b/cpp/src/groupby/hash/groupby.cu
index f225afaec71..e22b3a4f3a4 100644
--- a/cpp/src/groupby/hash/groupby.cu
+++ b/cpp/src/groupby/hash/groupby.cu
@@ -672,7 +672,7 @@ std::pair<std::unique_ptr<table>, std::vector<aggregation_result>> groupby(
   std::unique_ptr<table> unique_keys =
     groupby(keys, requests, &cache, has_nulls(keys), include_null_keys, stream, mr);
 
-  return std::make_pair(std::move(unique_keys), extract_results(requests, cache, stream, mr));
+  return std::pair(std::move(unique_keys), extract_results(requests, cache, stream, mr));
 }
 }  // namespace hash
 }  // namespace detail
diff --git a/cpp/src/groupby/sort/aggregate.cpp b/cpp/src/groupby/sort/aggregate.cpp
index 4904aa42723..02036ff0bbf 100644
--- a/cpp/src/groupby/sort/aggregate.cpp
+++ b/cpp/src/groupby/sort/aggregate.cpp
@@ -778,7 +778,7 @@ std::pair<std::unique_ptr<table>, std::vector<aggregation_result>> groupby::sort
 
   auto results = detail::extract_results(requests, cache, stream, mr);
 
-  return std::make_pair(helper().unique_keys(stream, mr), std::move(results));
+  return std::pair(helper().unique_keys(stream, mr), std::move(results));
 }
 }  // namespace groupby
 }  // namespace cudf
diff --git a/cpp/src/groupby/sort/group_collect.cu b/cpp/src/groupby/sort/group_collect.cu
index 8b8a03f35a5..000a595ea2f 100644
--- a/cpp/src/groupby/sort/group_collect.cu
+++ b/cpp/src/groupby/sort/group_collect.cu
@@ -82,8 +82,7 @@ std::pair<std::unique_ptr<column>, std::unique_ptr<column>> purge_null_entries(
   auto null_purged_offsets = strings::detail::make_offsets_child_column(
     null_purged_sizes.cbegin(), null_purged_sizes.cend(), stream, mr);
 
-  return std::make_pair<std::unique_ptr<column>, std::unique_ptr<column>>(
-    std::move(null_purged_values), std::move(null_purged_offsets));
+  return std::pair(std::move(null_purged_values), std::move(null_purged_offsets));
 }
 
 std::unique_ptr<column> group_collect(column_view const& values,
@@ -109,8 +108,8 @@ std::unique_ptr<column> group_collect(column_view const& values,
       return cudf::groupby::detail::purge_null_entries(
         values, offsets_column->view(), num_groups, stream, mr);
     } else {
-      return std::make_pair(std::make_unique<cudf::column>(values, stream, mr),
-                            std::move(offsets_column));
+      return std::pair(std::make_unique<cudf::column>(values, stream, mr),
+                       std::move(offsets_column));
     }
   }();
 
diff --git a/cpp/src/groupby/sort/scan.cpp b/cpp/src/groupby/sort/scan.cpp
index 8c4959da35b..20edc1b3f50 100644
--- a/cpp/src/groupby/sort/scan.cpp
+++ b/cpp/src/groupby/sort/scan.cpp
@@ -185,7 +185,7 @@ std::pair<std::unique_ptr<table>, std::vector<aggregation_result>> groupby::sort
 
   auto results = detail::extract_results(requests, cache, stream, mr);
 
-  return std::make_pair(helper().sorted_keys(stream, mr), std::move(results));
+  return std::pair(helper().sorted_keys(stream, mr), std::move(results));
 }
 }  // namespace groupby
 }  // namespace cudf
diff --git a/cpp/src/io/orc/aggregate_orc_metadata.cpp b/cpp/src/io/orc/aggregate_orc_metadata.cpp
index a4ae9999a19..47244279599 100644
--- a/cpp/src/io/orc/aggregate_orc_metadata.cpp
+++ b/cpp/src/io/orc/aggregate_orc_metadata.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -177,7 +177,7 @@ std::vector<metadata::stripe_source_mapping> aggregate_orc_metadata::select_stri
                                              per_file_metadata[src_file_idx].ff.stripes.size()),
           "Invalid stripe index");
         stripe_infos.push_back(
-          std::make_pair(&per_file_metadata[src_file_idx].ff.stripes[stripe_idx], nullptr));
+          std::pair(&per_file_metadata[src_file_idx].ff.stripes[stripe_idx], nullptr));
         row_count += per_file_metadata[src_file_idx].ff.stripes[stripe_idx].numberOfRows;
       }
       selected_stripes_mapping.push_back({static_cast<int>(src_file_idx), stripe_infos});
@@ -206,7 +206,7 @@ std::vector<metadata::stripe_source_mapping> aggregate_orc_metadata::select_stri
         count += per_file_metadata[src_file_idx].ff.stripes[stripe_idx].numberOfRows;
         if (count > row_start || count == 0) {
           stripe_infos.push_back(
-            std::make_pair(&per_file_metadata[src_file_idx].ff.stripes[stripe_idx], nullptr));
+            std::pair(&per_file_metadata[src_file_idx].ff.stripes[stripe_idx], nullptr));
         } else {
           stripe_skip_rows = count;
         }
diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu
index 83c23774362..a768d568178 100644
--- a/cpp/src/io/orc/reader_impl.cu
+++ b/cpp/src/io/orc/reader_impl.cu
@@ -108,20 +108,20 @@ constexpr std::pair<gpu::StreamIndexType, uint32_t> get_index_type_and_pos(
     case orc::DATA:
       skip_count += 1;
       skip_count |= (skip_count & 0xff) << 8;
-      return std::make_pair(gpu::CI_DATA, skip_count);
+      return std::pair(gpu::CI_DATA, skip_count);
     case orc::LENGTH:
     case orc::SECONDARY:
       skip_count += 1;
       skip_count |= (skip_count & 0xff) << 16;
-      return std::make_pair(gpu::CI_DATA2, skip_count);
-    case orc::DICTIONARY_DATA: return std::make_pair(gpu::CI_DICTIONARY, skip_count);
+      return std::pair(gpu::CI_DATA2, skip_count);
+    case orc::DICTIONARY_DATA: return std::pair(gpu::CI_DICTIONARY, skip_count);
     case orc::PRESENT:
       skip_count += (non_child ? 1 : 0);
-      return std::make_pair(gpu::CI_PRESENT, skip_count);
-    case orc::ROW_INDEX: return std::make_pair(gpu::CI_INDEX, skip_count);
+      return std::pair(gpu::CI_PRESENT, skip_count);
+    case orc::ROW_INDEX: return std::pair(gpu::CI_INDEX, skip_count);
     default:
       // Skip this stream as it's not strictly required
-      return std::make_pair(gpu::CI_NUM_STREAMS, 0);
+      return std::pair(gpu::CI_NUM_STREAMS, 0);
   }
 }
 
@@ -1120,9 +1120,9 @@ table_with_metadata reader::impl::read(size_type skip_rows,
             if (_metadata.per_file_metadata[stripe_source_mapping.source_idx]
                   .source->is_device_read_preferred(len)) {
               read_tasks.push_back(
-                std::make_pair(_metadata.per_file_metadata[stripe_source_mapping.source_idx]
-                                 .source->device_read_async(offset, len, d_dst, stream),
-                               len));
+                std::pair(_metadata.per_file_metadata[stripe_source_mapping.source_idx]
+                            .source->device_read_async(offset, len, d_dst, stream),
+                          len));
 
             } else {
               const auto buffer =
diff --git a/cpp/src/io/parquet/chunk_dict.cu b/cpp/src/io/parquet/chunk_dict.cu
index 9075a319ab3..93e76a6ac23 100644
--- a/cpp/src/io/parquet/chunk_dict.cu
+++ b/cpp/src/io/parquet/chunk_dict.cu
@@ -75,7 +75,7 @@ struct map_insert_fn {
     if constexpr (column_device_view::has_element_accessor<T>()) {
       auto hash_fn     = hash_functor<T>{col};
       auto equality_fn = equality_functor<T>{col};
-      return map.insert(std::make_pair(i, i), hash_fn, equality_fn);
+      return map.insert(std::pair(i, i), hash_fn, equality_fn);
     } else {
       CUDF_UNREACHABLE("Unsupported type to insert in map");
     }
diff --git a/cpp/src/io/parquet/writer_impl.cu b/cpp/src/io/parquet/writer_impl.cu
index 92d436e4566..75a50714407 100644
--- a/cpp/src/io/parquet/writer_impl.cu
+++ b/cpp/src/io/parquet/writer_impl.cu
@@ -876,7 +876,7 @@ auto build_chunk_dictionaries(hostdevice_2dvector<gpu::EncColumnChunk>& chunks,
   std::vector<rmm::device_uvector<size_type>> dict_data;
   std::vector<rmm::device_uvector<uint16_t>> dict_index;
 
-  if (h_chunks.size() == 0) { return std::make_pair(std::move(dict_data), std::move(dict_index)); }
+  if (h_chunks.size() == 0) { return std::pair(std::move(dict_data), std::move(dict_index)); }
 
   // Allocate slots for each chunk
   std::vector<rmm::device_uvector<gpu::slot_type>> hash_maps_storage;
@@ -912,7 +912,7 @@ auto build_chunk_dictionaries(hostdevice_2dvector<gpu::EncColumnChunk>& chunks,
 
       // We don't use dictionary if the indices are > 16 bits because that's the maximum bitpacking
       // bitsize we efficiently support
-      if (nbits > 16) { return std::make_pair(false, 0); }
+      if (nbits > 16) { return std::pair(false, 0); }
 
       // Only these bit sizes are allowed for RLE encoding because it's compute optimized
       constexpr auto allowed_bitsizes = std::array<size_type, 6>{1, 2, 4, 8, 12, 16};
@@ -925,7 +925,7 @@ auto build_chunk_dictionaries(hostdevice_2dvector<gpu::EncColumnChunk>& chunks,
 
       bool use_dict = (ck.plain_data_size > dict_enc_size);
       if (not use_dict) { rle_bits = 0; }
-      return std::make_pair(use_dict, rle_bits);
+      return std::pair(use_dict, rle_bits);
     }();
   }
 
@@ -946,7 +946,7 @@ auto build_chunk_dictionaries(hostdevice_2dvector<gpu::EncColumnChunk>& chunks,
   gpu::collect_map_entries(chunks.device_view().flat_view(), stream);
   gpu::get_dictionary_indices(frags, stream);
 
-  return std::make_pair(std::move(dict_data), std::move(dict_index));
+  return std::pair(std::move(dict_data), std::move(dict_index));
 }
 
 void writer::impl::init_encoder_pages(hostdevice_2dvector<gpu::EncColumnChunk>& chunks,
diff --git a/cpp/src/join/conditional_join.cu b/cpp/src/join/conditional_join.cu
index 9bf7e6a7a43..ae1561b422b 100644
--- a/cpp/src/join/conditional_join.cu
+++ b/cpp/src/join/conditional_join.cu
@@ -59,8 +59,8 @@ conditional_join(table_view const& left,
       // Inner and left semi joins return empty output because no matches can exist.
       case join_kind::INNER_JOIN:
       case join_kind::LEFT_SEMI_JOIN:
-        return std::make_pair(std::make_unique<rmm::device_uvector<size_type>>(0, stream, mr),
-                              std::make_unique<rmm::device_uvector<size_type>>(0, stream, mr));
+        return std::pair(std::make_unique<rmm::device_uvector<size_type>>(0, stream, mr),
+                         std::make_unique<rmm::device_uvector<size_type>>(0, stream, mr));
       default: CUDF_FAIL("Invalid join kind."); break;
     }
   } else if (left_num_rows == 0) {
@@ -70,12 +70,12 @@ conditional_join(table_view const& left,
       case join_kind::LEFT_ANTI_JOIN:
       case join_kind::INNER_JOIN:
       case join_kind::LEFT_SEMI_JOIN:
-        return std::make_pair(std::make_unique<rmm::device_uvector<size_type>>(0, stream, mr),
-                              std::make_unique<rmm::device_uvector<size_type>>(0, stream, mr));
+        return std::pair(std::make_unique<rmm::device_uvector<size_type>>(0, stream, mr),
+                         std::make_unique<rmm::device_uvector<size_type>>(0, stream, mr));
       // Full joins need to return the trivial complement.
       case join_kind::FULL_JOIN: {
         auto ret_flipped = get_trivial_left_join_indices(right, stream);
-        return std::make_pair(std::move(ret_flipped.second), std::move(ret_flipped.first));
+        return std::pair(std::move(ret_flipped.second), std::move(ret_flipped.first));
       }
       default: CUDF_FAIL("Invalid join kind."); break;
     }
@@ -139,8 +139,8 @@ conditional_join(table_view const& left,
   // all other cases (inner, left semi, and left anti joins) if we reach this
   // point we can safely return an empty result.
   if (join_size == 0) {
-    return std::make_pair(std::make_unique<rmm::device_uvector<size_type>>(0, stream, mr),
-                          std::make_unique<rmm::device_uvector<size_type>>(0, stream, mr));
+    return std::pair(std::make_unique<rmm::device_uvector<size_type>>(0, stream, mr),
+                     std::make_unique<rmm::device_uvector<size_type>>(0, stream, mr));
   }
 
   rmm::device_scalar<size_type> write_index(0, stream);
@@ -176,7 +176,7 @@ conditional_join(table_view const& left,
         swap_tables);
   }
 
-  auto join_indices = std::make_pair(std::move(left_indices), std::move(right_indices));
+  auto join_indices = std::pair(std::move(left_indices), std::move(right_indices));
 
   // For full joins, get the indices in the right table that were not joined to
   // by any row in the left table.
diff --git a/cpp/src/join/hash_join.cu b/cpp/src/join/hash_join.cu
index 086e1e49986..8d2888fd761 100644
--- a/cpp/src/join/hash_join.cu
+++ b/cpp/src/join/hash_join.cu
@@ -44,7 +44,7 @@ std::pair<std::unique_ptr<table>, std::unique_ptr<table>> get_empty_joined_table
 {
   std::unique_ptr<table> empty_probe = empty_like(probe);
   std::unique_ptr<table> empty_build = empty_like(build);
-  return std::make_pair(std::move(empty_probe), std::move(empty_build));
+  return std::pair(std::move(empty_probe), std::move(empty_build));
 }
 
 /**
@@ -88,8 +88,8 @@ probe_join_hash_table(cudf::table_device_view build_table,
 
   // If output size is zero, return immediately
   if (join_size == 0) {
-    return std::make_pair(std::make_unique<rmm::device_uvector<size_type>>(0, stream, mr),
-                          std::make_unique<rmm::device_uvector<size_type>>(0, stream, mr));
+    return std::pair(std::make_unique<rmm::device_uvector<size_type>>(0, stream, mr),
+                     std::make_unique<rmm::device_uvector<size_type>>(0, stream, mr));
   }
 
   auto left_indices  = std::make_unique<rmm::device_uvector<size_type>>(join_size, stream, mr);
@@ -125,7 +125,7 @@ probe_join_hash_table(cudf::table_device_view build_table,
     hash_table.pair_retrieve(
       iter, iter + probe_table_num_rows, out1_zip_begin, out2_zip_begin, equality, stream.value());
   }
-  return std::make_pair(std::move(left_indices), std::move(right_indices));
+  return std::pair(std::move(left_indices), std::move(right_indices));
 }
 
 /**
@@ -390,8 +390,8 @@ hash_join::hash_join_impl::compute_hash_join(cudf::table_view const& probe,
                "Mismatch in number of columns to be joined on");
 
   if (is_trivial_join(flattened_probe_table, _build, JoinKind)) {
-    return std::make_pair(std::make_unique<rmm::device_uvector<size_type>>(0, stream, mr),
-                          std::make_unique<rmm::device_uvector<size_type>>(0, stream, mr));
+    return std::pair(std::make_unique<rmm::device_uvector<size_type>>(0, stream, mr),
+                     std::make_unique<rmm::device_uvector<size_type>>(0, stream, mr));
   }
 
   CUDF_EXPECTS(std::equal(std::cbegin(_build),
diff --git a/cpp/src/join/join.cu b/cpp/src/join/join.cu
index 7a478ca2eb3..15aed83b641 100644
--- a/cpp/src/join/join.cu
+++ b/cpp/src/join/join.cu
@@ -52,7 +52,7 @@ inner_join(table_view const& left_input,
   if (right.num_rows() > left.num_rows()) {
     cudf::hash_join hj_obj(left, compare_nulls, stream);
     auto [right_result, left_result] = hj_obj.inner_join(right, std::nullopt, stream, mr);
-    return std::make_pair(std::move(left_result), std::move(right_result));
+    return std::pair(std::move(left_result), std::move(right_result));
   } else {
     cudf::hash_join hj_obj(right, compare_nulls, stream);
     return hj_obj.inner_join(left, std::nullopt, stream, mr);
diff --git a/cpp/src/join/join_utils.cu b/cpp/src/join/join_utils.cu
index 151db830962..1eb2d4cf4a7 100644
--- a/cpp/src/join/join_utils.cu
+++ b/cpp/src/join/join_utils.cu
@@ -61,7 +61,7 @@ get_trivial_left_join_indices(table_view const& left,
     std::make_unique<rmm::device_uvector<size_type>>(left.num_rows(), stream, mr);
   thrust::uninitialized_fill(
     rmm::exec_policy(stream), right_indices->begin(), right_indices->end(), JoinNoneValue);
-  return std::make_pair(std::move(left_indices), std::move(right_indices));
+  return std::pair(std::move(left_indices), std::move(right_indices));
 }
 
 VectorPair concatenate_vector_pairs(VectorPair& a, VectorPair& b, rmm::cuda_stream_view stream)
@@ -151,7 +151,7 @@ get_left_join_indices_complement(std::unique_ptr<rmm::device_uvector<size_type>>
                              left_invalid_indices->end(),
                              JoinNoneValue);
 
-  return std::make_pair(std::move(left_invalid_indices), std::move(right_indices_complement));
+  return std::pair(std::move(left_invalid_indices), std::move(right_indices_complement));
 }
 
 }  // namespace detail
diff --git a/cpp/src/join/mixed_join.cu b/cpp/src/join/mixed_join.cu
index f9cbb2b5441..b540c013f47 100644
--- a/cpp/src/join/mixed_join.cu
+++ b/cpp/src/join/mixed_join.cu
@@ -81,8 +81,8 @@ mixed_join(
       case join_kind::FULL_JOIN: return get_trivial_left_join_indices(left_conditional, stream);
       // Inner joins return empty output because no matches can exist.
       case join_kind::INNER_JOIN:
-        return std::make_pair(std::make_unique<rmm::device_uvector<size_type>>(0, stream, mr),
-                              std::make_unique<rmm::device_uvector<size_type>>(0, stream, mr));
+        return std::pair(std::make_unique<rmm::device_uvector<size_type>>(0, stream, mr),
+                         std::make_unique<rmm::device_uvector<size_type>>(0, stream, mr));
       default: CUDF_FAIL("Invalid join kind."); break;
     }
   } else if (left_num_rows == 0) {
@@ -90,12 +90,12 @@ mixed_join(
       // Left and inner joins all return empty sets.
       case join_kind::LEFT_JOIN:
       case join_kind::INNER_JOIN:
-        return std::make_pair(std::make_unique<rmm::device_uvector<size_type>>(0, stream, mr),
-                              std::make_unique<rmm::device_uvector<size_type>>(0, stream, mr));
+        return std::pair(std::make_unique<rmm::device_uvector<size_type>>(0, stream, mr),
+                         std::make_unique<rmm::device_uvector<size_type>>(0, stream, mr));
       // Full joins need to return the trivial complement.
       case join_kind::FULL_JOIN: {
         auto ret_flipped = get_trivial_left_join_indices(right_conditional, stream);
-        return std::make_pair(std::move(ret_flipped.second), std::move(ret_flipped.first));
+        return std::pair(std::move(ret_flipped.second), std::move(ret_flipped.first));
       }
       default: CUDF_FAIL("Invalid join kind."); break;
     }
@@ -208,8 +208,8 @@ mixed_join(
   // all other cases (inner, left semi, and left anti joins) if we reach this
   // point we can safely return an empty result.
   if (join_size == 0) {
-    return std::make_pair(std::make_unique<rmm::device_uvector<size_type>>(0, stream, mr),
-                          std::make_unique<rmm::device_uvector<size_type>>(0, stream, mr));
+    return std::pair(std::make_unique<rmm::device_uvector<size_type>>(0, stream, mr),
+                     std::make_unique<rmm::device_uvector<size_type>>(0, stream, mr));
   }
 
   // Given the number of matches per row, we need to compute the offsets for insertion.
@@ -258,7 +258,7 @@ mixed_join(
         swap_tables);
   }
 
-  auto join_indices = std::make_pair(std::move(left_indices), std::move(right_indices));
+  auto join_indices = std::pair(std::move(left_indices), std::move(right_indices));
 
   // For full joins, get the indices in the right table that were not joined to
   // by any row in the left table.
diff --git a/cpp/src/lists/combine/concatenate_list_elements.cu b/cpp/src/lists/combine/concatenate_list_elements.cu
index fecdec0b1b2..f4d8e7678b1 100644
--- a/cpp/src/lists/combine/concatenate_list_elements.cu
+++ b/cpp/src/lists/combine/concatenate_list_elements.cu
@@ -81,7 +81,7 @@ std::unique_ptr<column> concatenate_lists_ignore_null(column_view const& input,
 
   auto [null_mask, null_count] = [&] {
     if (!build_null_mask)
-      return std::make_pair(cudf::detail::copy_bitmask(input, stream, mr), input.null_count());
+      return std::pair(cudf::detail::copy_bitmask(input, stream, mr), input.null_count());
 
     // The output row will be null only if all lists on the input row are null.
     auto const lists_dv_ptr = column_device_view::create(lists_column_view(input).child(), stream);
diff --git a/cpp/src/lists/copying/scatter_helper.cu b/cpp/src/lists/copying/scatter_helper.cu
index fecf6e1c1a1..7220e8b5980 100644
--- a/cpp/src/lists/copying/scatter_helper.cu
+++ b/cpp/src/lists/copying/scatter_helper.cu
@@ -175,7 +175,7 @@ struct list_child_constructor {
       source_lists_column_view.child().nullable() || target_lists_column_view.child().nullable()
         ? construct_child_nullmask(
             list_vector, list_offsets, source_lists, target_lists, num_child_rows, stream, mr)
-        : std::make_pair(rmm::device_buffer{}, 0);
+        : std::pair(rmm::device_buffer{}, 0);
 
     auto child_column = cudf::make_fixed_width_column(source_lists_column_view.child().type(),
                                                       num_child_rows,
@@ -348,7 +348,7 @@ struct list_child_constructor {
       source_lists_column_view.child().nullable() || target_lists_column_view.child().nullable()
         ? construct_child_nullmask(
             list_vector, list_offsets, source_lists, target_lists, num_child_rows, stream, mr)
-        : std::make_pair(rmm::device_buffer{}, 0);
+        : std::pair(rmm::device_buffer{}, 0);
 
     return cudf::make_lists_column(num_child_rows,
                                    std::move(child_offsets),
@@ -444,7 +444,7 @@ struct list_child_constructor {
       source_lists_column_view.child().nullable() || target_lists_column_view.child().nullable()
         ? construct_child_nullmask(
             list_vector, list_offsets, source_lists, target_lists, num_child_rows, stream, mr)
-        : std::make_pair(rmm::device_buffer{}, 0);
+        : std::pair(rmm::device_buffer{}, 0);
 
     return cudf::make_structs_column(num_child_rows,
                                      std::move(child_columns),
diff --git a/cpp/src/partitioning/partitioning.cu b/cpp/src/partitioning/partitioning.cu
index 09f07a1ca8c..0371065a2e5 100644
--- a/cpp/src/partitioning/partitioning.cu
+++ b/cpp/src/partitioning/partitioning.cu
@@ -595,8 +595,7 @@ std::pair<std::unique_ptr<table>, std::vector<size_type>> hash_partition_table(
     }
 
     stream.synchronize();  // Async D2H copy must finish before returning host vec
-    return std::make_pair(std::make_unique<table>(std::move(output_cols)),
-                          std::move(partition_offsets));
+    return std::pair(std::make_unique<table>(std::move(output_cols)), std::move(partition_offsets));
   } else {
     // Compute a scatter map from input to output such that the output rows are
     // sorted by partition number
@@ -613,7 +612,7 @@ std::pair<std::unique_ptr<table>, std::vector<size_type>> hash_partition_table(
       input, row_partition_numbers.begin(), row_partition_numbers.end(), input, false, stream, mr);
 
     stream.synchronize();  // Async D2H copy must finish before returning host vec
-    return std::make_pair(std::move(output), std::move(partition_offsets));
+    return std::pair(std::move(output), std::move(partition_offsets));
   }
 }
 
@@ -700,7 +699,7 @@ struct dispatch_map_type {
     auto scattered =
       cudf::detail::scatter(t, scatter_map.begin(), scatter_map.end(), t, false, stream, mr);
 
-    return std::make_pair(std::move(scattered), std::move(partition_offsets));
+    return std::pair(std::move(scattered), std::move(partition_offsets));
   }
 
   template <typename MapType, typename... Args>
@@ -728,7 +727,7 @@ std::pair<std::unique_ptr<table>, std::vector<size_type>> hash_partition(
 
   // Return empty result if there are no partitions or nothing to hash
   if (num_partitions <= 0 || input.num_rows() == 0 || table_to_hash.num_columns() == 0) {
-    return std::make_pair(empty_like(input), std::vector<size_type>{});
+    return std::pair(empty_like(input), std::vector<size_type>{});
   }
 
   if (has_nulls(table_to_hash)) {
@@ -753,7 +752,7 @@ std::pair<std::unique_ptr<table>, std::vector<size_type>> partition(
   CUDF_EXPECTS(not partition_map.has_nulls(), "Unexpected null values in partition_map.");
 
   if (num_partitions == 0 or t.num_rows() == 0) {
-    return std::make_pair(empty_like(t), std::vector<size_type>{});
+    return std::pair(empty_like(t), std::vector<size_type>{});
   }
 
   return cudf::type_dispatcher(
diff --git a/cpp/src/partitioning/round_robin.cu b/cpp/src/partitioning/round_robin.cu
index 193bb5a4353..9cfad602db0 100644
--- a/cpp/src/partitioning/round_robin.cu
+++ b/cpp/src/partitioning/round_robin.cu
@@ -104,8 +104,8 @@ std::pair<std::unique_ptr<cudf::table>, std::vector<cudf::size_type>> degenerate
                                          stream,
                                          mr);
 
-    return std::make_pair(std::move(uniq_tbl),
-                          cudf::detail::make_std_vector_sync(partition_offsets, stream));
+    return std::pair(std::move(uniq_tbl),
+                     cudf::detail::make_std_vector_sync(partition_offsets, stream));
   } else {  //( num_partitions > nrows )
     rmm::device_uvector<cudf::size_type> d_row_indices(nrows, stream);
 
@@ -140,8 +140,8 @@ std::pair<std::unique_ptr<cudf::table>, std::vector<cudf::size_type>> degenerate
                            nedges_iter_begin + num_partitions,
                            partition_offsets.begin());
 
-    return std::make_pair(std::move(uniq_tbl),
-                          cudf::detail::make_std_vector_sync(partition_offsets, stream));
+    return std::pair(std::move(uniq_tbl),
+                     cudf::detail::make_std_vector_sync(partition_offsets, stream));
   }
 }
 }  // namespace
@@ -230,7 +230,7 @@ std::pair<std::unique_ptr<table>, std::vector<cudf::size_type>> round_robin_part
 
   auto uniq_tbl = cudf::detail::gather(
     input, iter_begin, iter_begin + nrows, cudf::out_of_bounds_policy::DONT_CHECK, stream, mr);
-  auto ret_pair = std::make_pair(std::move(uniq_tbl), std::vector<cudf::size_type>(num_partitions));
+  auto ret_pair = std::pair(std::move(uniq_tbl), std::vector<cudf::size_type>(num_partitions));
 
   // this has the effect of rotating the set of partition sizes
   // right by start_partition positions:
diff --git a/cpp/src/replace/clamp.cu b/cpp/src/replace/clamp.cu
index 8b696854c25..73b224b0c99 100644
--- a/cpp/src/replace/clamp.cu
+++ b/cpp/src/replace/clamp.cu
@@ -76,7 +76,7 @@ std::pair<std::unique_ptr<column>, std::unique_ptr<column>> form_offsets_and_cha
     cudf::detail::get_value<int32_t>(offsets_column->view(), strings_count, stream);
   auto chars_column = cudf::strings::detail::create_chars_child_column(bytes, stream, mr);
 
-  return std::make_pair(std::move(offsets_column), std::move(chars_column));
+  return std::pair(std::move(offsets_column), std::move(chars_column));
 }
 
 template <typename OptionalScalarIterator, typename ReplaceScalarIterator>
diff --git a/cpp/src/strings/convert/convert_datetime.cu b/cpp/src/strings/convert/convert_datetime.cu
index 70a6252e9b3..9473bed963e 100644
--- a/cpp/src/strings/convert/convert_datetime.cu
+++ b/cpp/src/strings/convert/convert_datetime.cu
@@ -1086,7 +1086,7 @@ struct dispatch_from_timestamps_fn {
                        thrust::make_counting_iterator<cudf::size_type>(0),
                        d_timestamps.size(),
                        pfn);
-    return std::make_pair(std::move(offsets_column), std::move(chars_column));
+    return std::pair(std::move(offsets_column), std::move(chars_column));
   }
 
   template <typename T, typename... Args>
diff --git a/cpp/src/strings/json/json_path.cu b/cpp/src/strings/json/json_path.cu
index 30e8770c3c2..995b6223ddc 100644
--- a/cpp/src/strings/json/json_path.cu
+++ b/cpp/src/strings/json/json_path.cu
@@ -670,8 +670,8 @@ std::pair<thrust::optional<rmm::device_uvector<path_operator>>, int> build_comma
 
   auto const is_empty = h_operators.size() == 1 && h_operators[0].type == path_operator_type::END;
   return is_empty
-           ? std::make_pair(thrust::nullopt, 0)
-           : std::make_pair(
+           ? std::pair(thrust::nullopt, 0)
+           : std::pair(
                thrust::make_optional(cudf::detail::make_device_uvector_sync(h_operators, stream)),
                max_stack_depth);
 }
diff --git a/cpp/src/strings/repeat_strings.cu b/cpp/src/strings/repeat_strings.cu
index d496b46bc36..7a3e0fb0243 100644
--- a/cpp/src/strings/repeat_strings.cu
+++ b/cpp/src/strings/repeat_strings.cu
@@ -283,7 +283,7 @@ auto make_strings_children(Func fn,
     for_each_fn(fn);
   }
 
-  return std::make_pair(std::move(offsets_column), std::move(chars_column));
+  return std::pair(std::move(offsets_column), std::move(chars_column));
 }
 
 }  // namespace
@@ -345,7 +345,7 @@ std::pair<std::unique_ptr<column>, int64_t> repeat_strings_output_sizes(
 
   auto const strings_count = input.size();
   if (strings_count == 0) {
-    return std::make_pair(make_empty_column(type_to_id<size_type>()), int64_t{0});
+    return std::pair(make_empty_column(type_to_id<size_type>()), int64_t{0});
   }
 
   auto output_sizes = make_numeric_column(
@@ -374,7 +374,7 @@ std::pair<std::unique_ptr<column>, int64_t> repeat_strings_output_sizes(
                              int64_t{0},
                              thrust::plus{});
 
-  return std::make_pair(std::move(output_sizes), total_bytes);
+  return std::pair(std::move(output_sizes), total_bytes);
 }
 
 }  // namespace detail
diff --git a/cpp/src/structs/utilities.cpp b/cpp/src/structs/utilities.cpp
index 852a32bed3d..a2c173cae5f 100644
--- a/cpp/src/structs/utilities.cpp
+++ b/cpp/src/structs/utilities.cpp
@@ -371,7 +371,7 @@ std::tuple<cudf::column_view, std::vector<rmm::device_buffer>> superimpose_paren
     auto [new_child_mask, null_count] = [&] {
       if (not child.nullable()) {
         // Adopt parent STRUCT's null mask.
-        return std::make_pair(structs_column.null_mask(), 0);
+        return std::pair(structs_column.null_mask(), 0);
       }
 
       // Both STRUCT and child are nullable. AND() for the child's new null mask.
@@ -387,8 +387,8 @@ std::tuple<cudf::column_view, std::vector<rmm::device_buffer>> superimpose_paren
                                                               stream,
                                                               mr);
       ret_validity_buffers.push_back(std::move(new_mask));
-      return std::make_pair(
-        reinterpret_cast<bitmask_type const*>(ret_validity_buffers.back().data()), null_count);
+      return std::pair(reinterpret_cast<bitmask_type const*>(ret_validity_buffers.back().data()),
+                       null_count);
     }();
 
     return cudf::column_view(
diff --git a/cpp/src/text/subword/data_normalizer.cu b/cpp/src/text/subword/data_normalizer.cu
index 2ed59c3ae0c..71f9e3f7043 100644
--- a/cpp/src/text/subword/data_normalizer.cu
+++ b/cpp/src/text/subword/data_normalizer.cu
@@ -278,8 +278,8 @@ uvector_pair data_normalizer::normalize(char const* d_strings,
                                         rmm::cuda_stream_view stream) const
 {
   if (num_strings == 0)
-    return std::make_pair(std::make_unique<rmm::device_uvector<uint32_t>>(0, stream),
-                          std::make_unique<rmm::device_uvector<uint32_t>>(0, stream));
+    return std::pair(std::make_unique<rmm::device_uvector<uint32_t>>(0, stream),
+                     std::make_unique<rmm::device_uvector<uint32_t>>(0, stream));
 
   // copy offsets to working memory
   size_t const num_offsets = num_strings + 1;
@@ -294,8 +294,8 @@ uvector_pair data_normalizer::normalize(char const* d_strings,
                     });
   uint32_t const bytes_count = d_strings_offsets->element(num_strings, stream);
   if (bytes_count == 0)  // if no bytes, nothing to do
-    return std::make_pair(std::make_unique<rmm::device_uvector<uint32_t>>(0, stream),
-                          std::make_unique<rmm::device_uvector<uint32_t>>(0, stream));
+    return std::pair(std::make_unique<rmm::device_uvector<uint32_t>>(0, stream),
+                     std::make_unique<rmm::device_uvector<uint32_t>>(0, stream));
 
   cudf::detail::grid_1d const grid{static_cast<cudf::size_type>(bytes_count), THREADS_PER_BLOCK, 1};
   size_t const threads_on_device  = grid.num_threads_per_block * grid.num_blocks;
diff --git a/cpp/src/transform/bools_to_mask.cu b/cpp/src/transform/bools_to_mask.cu
index 2cf4771890b..a1f49a5685f 100644
--- a/cpp/src/transform/bools_to_mask.cu
+++ b/cpp/src/transform/bools_to_mask.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2020, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -34,7 +34,7 @@ std::pair<std::unique_ptr<rmm::device_buffer>, cudf::size_type> bools_to_mask(
 {
   CUDF_EXPECTS(input.type().id() == type_id::BOOL8, "Input is not of type bool");
 
-  if (input.is_empty()) { return std::make_pair(std::make_unique<rmm::device_buffer>(), 0); }
+  if (input.is_empty()) { return std::pair(std::make_unique<rmm::device_buffer>(), 0); }
 
   auto input_device_view_ptr = column_device_view::create(input, stream);
   auto input_device_view     = *input_device_view_ptr;
@@ -45,12 +45,12 @@ std::pair<std::unique_ptr<rmm::device_buffer>, cudf::size_type> bools_to_mask(
 
     auto mask = detail::valid_if(input_begin, input_begin + input.size(), pred, stream, mr);
 
-    return std::make_pair(std::make_unique<rmm::device_buffer>(std::move(mask.first)), mask.second);
+    return std::pair(std::make_unique<rmm::device_buffer>(std::move(mask.first)), mask.second);
   } else {
     auto mask = detail::valid_if(
       input_device_view.begin<bool>(), input_device_view.end<bool>(), pred, stream, mr);
 
-    return std::make_pair(std::make_unique<rmm::device_buffer>(std::move(mask.first)), mask.second);
+    return std::pair(std::make_unique<rmm::device_buffer>(std::move(mask.first)), mask.second);
   }
 }
 
diff --git a/cpp/src/transform/encode.cu b/cpp/src/transform/encode.cu
index 04821b09eab..60769665fca 100644
--- a/cpp/src/transform/encode.cu
+++ b/cpp/src/transform/encode.cu
@@ -57,7 +57,7 @@ std::pair<std::unique_ptr<table>, std::unique_ptr<column>> encode(
   auto indices_column = cudf::detail::lower_bound(
     sorted_unique_keys->view(), input_table, column_order, null_precedence, stream, mr);
 
-  return std::make_pair(std::move(sorted_unique_keys), std::move(indices_column));
+  return std::pair(std::move(sorted_unique_keys), std::move(indices_column));
 }
 
 }  // namespace detail
diff --git a/cpp/src/transform/nans_to_nulls.cu b/cpp/src/transform/nans_to_nulls.cu
index ee63e6d366f..42d41b44779 100644
--- a/cpp/src/transform/nans_to_nulls.cu
+++ b/cpp/src/transform/nans_to_nulls.cu
@@ -53,8 +53,7 @@ struct dispatch_nan_to_null {
                                    stream,
                                    mr);
 
-      return std::make_pair(std::make_unique<rmm::device_buffer>(std::move(mask.first)),
-                            mask.second);
+      return std::pair(std::make_unique<rmm::device_buffer>(std::move(mask.first)), mask.second);
     } else {
       auto pred = [input_device_view] __device__(cudf::size_type idx) {
         return not(std::isnan(input_device_view.element<T>(idx)));
@@ -66,8 +65,7 @@ struct dispatch_nan_to_null {
                                    stream,
                                    mr);
 
-      return std::make_pair(std::make_unique<rmm::device_buffer>(std::move(mask.first)),
-                            mask.second);
+      return std::pair(std::make_unique<rmm::device_buffer>(std::move(mask.first)), mask.second);
     }
   }
 
@@ -85,7 +83,7 @@ struct dispatch_nan_to_null {
 std::pair<std::unique_ptr<rmm::device_buffer>, cudf::size_type> nans_to_nulls(
   column_view const& input, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr)
 {
-  if (input.is_empty()) { return std::make_pair(std::make_unique<rmm::device_buffer>(), 0); }
+  if (input.is_empty()) { return std::pair(std::make_unique<rmm::device_buffer>(), 0); }
 
   return cudf::type_dispatcher(input.type(), dispatch_nan_to_null{}, input, stream, mr);
 }
diff --git a/cpp/src/transform/one_hot_encode.cu b/cpp/src/transform/one_hot_encode.cu
index 16aee349bb5..b1a8858f847 100644
--- a/cpp/src/transform/one_hot_encode.cu
+++ b/cpp/src/transform/one_hot_encode.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -89,7 +89,7 @@ struct one_hot_encode_launcher {
     auto views = cudf::split(all_encodings->view(), split_indices);
     table_view encodings_view{views};
 
-    return std::make_pair(std::move(all_encodings), encodings_view);
+    return std::pair(std::move(all_encodings), encodings_view);
   }
 
   template <typename InputType,
@@ -108,14 +108,12 @@ std::pair<std::unique_ptr<column>, table_view> one_hot_encode(column_view const&
 {
   CUDF_EXPECTS(input.type() == categories.type(), "Mismatch type between input and categories.");
 
-  if (categories.is_empty()) {
-    return std::make_pair(make_empty_column(type_id::BOOL8), table_view{});
-  }
+  if (categories.is_empty()) { return std::pair(make_empty_column(type_id::BOOL8), table_view{}); }
 
   if (input.is_empty()) {
     auto empty_data = make_empty_column(type_id::BOOL8);
     std::vector<column_view> views(categories.size(), empty_data->view());
-    return std::make_pair(std::move(empty_data), table_view{views});
+    return std::pair(std::move(empty_data), table_view{views});
   }
 
   return type_dispatcher(input.type(), one_hot_encode_launcher{}, input, categories, stream, mr);
diff --git a/cpp/src/transpose/transpose.cu b/cpp/src/transpose/transpose.cu
index b5b00b11a0f..a87cf60a252 100644
--- a/cpp/src/transpose/transpose.cu
+++ b/cpp/src/transpose/transpose.cu
@@ -37,7 +37,7 @@ std::pair<std::unique_ptr<column>, table_view> transpose(table_view const& input
 {
   // If there are no rows in the input, return successfully
   if (input.num_columns() == 0 || input.num_rows() == 0) {
-    return std::make_pair(std::make_unique<column>(), table_view{});
+    return std::pair(std::make_unique<column>(), table_view{});
   }
 
   // Check datatype homogeneity
@@ -54,7 +54,7 @@ std::pair<std::unique_ptr<column>, table_view> transpose(table_view const& input
   auto splits = std::vector<size_type>(splits_iter, splits_iter + input.num_rows() - 1);
   auto output_column_views = split(output_column->view(), splits, stream);
 
-  return std::make_pair(std::move(output_column), table_view(output_column_views));
+  return std::pair(std::move(output_column), table_view(output_column_views));
 }
 }  // namespace detail
 
diff --git a/cpp/tests/groupby/m2_tests.cpp b/cpp/tests/groupby/m2_tests.cpp
index be7d6c1ce05..6f5a04e3752 100644
--- a/cpp/tests/groupby/m2_tests.cpp
+++ b/cpp/tests/groupby/m2_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -48,8 +48,7 @@ auto compute_M2(cudf::column_view const& keys, cudf::column_view const& values)
 
   auto gb_obj = cudf::groupby::groupby(cudf::table_view({keys}));
   auto result = gb_obj.aggregate(requests);
-  return std::make_pair(std::move(result.first->release()[0]),
-                        std::move(result.second[0].results[0]));
+  return std::pair(std::move(result.first->release()[0]), std::move(result.second[0].results[0]));
 }
 }  // namespace
 
diff --git a/cpp/tests/groupby/merge_lists_tests.cpp b/cpp/tests/groupby/merge_lists_tests.cpp
index 7c24c6267ca..593bb7c50af 100644
--- a/cpp/tests/groupby/merge_lists_tests.cpp
+++ b/cpp/tests/groupby/merge_lists_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -47,8 +47,7 @@ auto merge_lists(vcol_views const& keys_cols, vcol_views const& values_cols)
 
   auto gb_obj = cudf::groupby::groupby(cudf::table_view({*keys}));
   auto result = gb_obj.aggregate(requests);
-  return std::make_pair(std::move(result.first->release()[0]),
-                        std::move(result.second[0].results[0]));
+  return std::pair(std::move(result.first->release()[0]), std::move(result.second[0].results[0]));
 }
 
 }  // namespace
diff --git a/cpp/tests/groupby/merge_m2_tests.cpp b/cpp/tests/groupby/merge_m2_tests.cpp
index 60067e78022..79ffebf146c 100644
--- a/cpp/tests/groupby/merge_m2_tests.cpp
+++ b/cpp/tests/groupby/merge_m2_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -67,10 +67,9 @@ auto compute_partial_results(cudf::column_view const& keys, cudf::column_view co
   auto [out_keys, out_results] = gb_obj.aggregate(requests);
 
   auto const num_output_rows = out_keys->num_rows();
-  return std::make_pair(
-    std::move(out_keys->release()[0]),
-    cudf::make_structs_column(
-      num_output_rows, std::move(out_results[0].results), 0, rmm::device_buffer{}));
+  return std::pair(std::move(out_keys->release()[0]),
+                   cudf::make_structs_column(
+                     num_output_rows, std::move(out_results[0].results), 0, rmm::device_buffer{}));
 }
 
 /**
@@ -93,8 +92,7 @@ auto merge_M2(vcol_views const& keys_cols, vcol_views const& values_cols)
 
   auto gb_obj = cudf::groupby::groupby(cudf::table_view({*keys}));
   auto result = gb_obj.aggregate(requests);
-  return std::make_pair(std::move(result.first->release()[0]),
-                        std::move(result.second[0].results[0]));
+  return std::pair(std::move(result.first->release()[0]), std::move(result.second[0].results[0]));
 }
 }  // namespace
 
diff --git a/cpp/tests/groupby/merge_sets_tests.cpp b/cpp/tests/groupby/merge_sets_tests.cpp
index 1e2f0c9fa9e..57f67f6b81a 100644
--- a/cpp/tests/groupby/merge_sets_tests.cpp
+++ b/cpp/tests/groupby/merge_sets_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -47,8 +47,7 @@ auto merge_sets(vcol_views const& keys_cols, vcol_views const& values_cols)
 
   auto gb_obj = cudf::groupby::groupby(cudf::table_view({*keys}));
   auto result = gb_obj.aggregate(requests);
-  return std::make_pair(std::move(result.first->release()[0]),
-                        std::move(result.second[0].results[0]));
+  return std::pair(std::move(result.first->release()[0]), std::move(result.second[0].results[0]));
 }
 
 }  // namespace
diff --git a/cpp/tests/interop/to_arrow_test.cpp b/cpp/tests/interop/to_arrow_test.cpp
index d1dc60119b6..4b481ade83f 100644
--- a/cpp/tests/interop/to_arrow_test.cpp
+++ b/cpp/tests/interop/to_arrow_test.cpp
@@ -148,7 +148,7 @@ std::pair<std::unique_ptr<cudf::table>, std::shared_ptr<arrow::Table>> get_table
 
   auto schema = std::make_shared<arrow::Schema>(schema_vector);
 
-  return std::make_pair(
+  return std::pair(
     std::make_unique<cudf::table>(std::move(columns)),
     arrow::Table::Make(
       schema, {int64array, string_array, dict_array, boolarray, list_array, struct_array}));
diff --git a/cpp/tests/join/conditional_join_tests.cu b/cpp/tests/join/conditional_join_tests.cu
index 73b355d496d..13852027bf0 100644
--- a/cpp/tests/join/conditional_join_tests.cu
+++ b/cpp/tests/join/conditional_join_tests.cu
@@ -93,7 +93,7 @@ std::pair<std::vector<T>, std::vector<T>> gen_random_repeated_columns(
   std::mt19937 gen(rd());
   std::shuffle(left.begin(), left.end(), gen);
   std::shuffle(right.begin(), right.end(), gen);
-  return std::make_pair(std::move(left), std::move(right));
+  return std::pair(std::move(left), std::move(right));
 }
 
 // Generate a single pair of left/right nullable columns of random data
@@ -120,8 +120,8 @@ gen_random_nullable_repeated_columns(unsigned int N = 10000, unsigned int num_re
     return uniform_dist(gen) > 0.5;
   });
 
-  return std::make_pair(std::make_pair(std::move(left), std::move(left_nulls)),
-                        std::make_pair(std::move(right), std::move(right_nulls)));
+  return std::pair(std::pair(std::move(left), std::move(left_nulls)),
+                   std::pair(std::move(right), std::move(right_nulls)));
 }
 
 }  // namespace
diff --git a/cpp/tests/join/join_tests.cpp b/cpp/tests/join/join_tests.cpp
index f560ce7f20c..8ed50c8fb39 100644
--- a/cpp/tests/join/join_tests.cpp
+++ b/cpp/tests/join/join_tests.cpp
@@ -67,7 +67,7 @@ struct JoinTest : public cudf::test::BaseFixture {
     auto gold_sort_order = cudf::sorted_order(gold);
     auto sorted_gold     = cudf::gather(gold, *gold_sort_order);
 
-    return std::make_pair(std::move(sorted_gold), std::move(sorted_result));
+    return std::pair(std::move(sorted_gold), std::move(sorted_result));
   }
 };
 
diff --git a/cpp/tests/join/mixed_join_tests.cu b/cpp/tests/join/mixed_join_tests.cu
index df5b1f5c14a..edcf1d1be27 100644
--- a/cpp/tests/join/mixed_join_tests.cu
+++ b/cpp/tests/join/mixed_join_tests.cu
@@ -94,7 +94,7 @@ std::pair<std::vector<T>, std::vector<T>> gen_random_repeated_columns(
   std::mt19937 gen(rd());
   std::shuffle(left.begin(), left.end(), gen);
   std::shuffle(right.begin(), right.end(), gen);
-  return std::make_pair(std::move(left), std::move(right));
+  return std::pair(std::move(left), std::move(right));
 }
 
 // Generate a single pair of left/right nullable columns of random data
@@ -121,8 +121,8 @@ gen_random_nullable_repeated_columns(unsigned int N = 10000, unsigned int num_re
     return uniform_dist(gen) > 0.5;
   });
 
-  return std::make_pair(std::make_pair(std::move(left), std::move(left_nulls)),
-                        std::make_pair(std::move(right), std::move(right_nulls)));
+  return std::pair(std::pair(std::move(left), std::move(left_nulls)),
+                   std::pair(std::move(right), std::move(right_nulls)));
 }
 
 }  // namespace
diff --git a/cpp/tests/merge/merge_test.cpp b/cpp/tests/merge/merge_test.cpp
index ea26cad3b59..129d1ad66f3 100644
--- a/cpp/tests/merge/merge_test.cpp
+++ b/cpp/tests/merge/merge_test.cpp
@@ -652,8 +652,8 @@ TYPED_TEST(MergeTest_, NMerge1KeyColumns)
   std::vector<std::pair<PairT0, PairT1>> facts{};
   std::vector<cudf::table_view> tables{};
   for (int i = 0; i < num_tables; ++i) {
-    facts.emplace_back(std::make_pair(PairT0(sequence0, sequence0 + inputRows),
-                                      PairT1(sequence1, sequence1 + inputRows)));
+    facts.emplace_back(std::pair(PairT0(sequence0, sequence0 + inputRows),
+                                 PairT1(sequence1, sequence1 + inputRows)));
     tables.push_back(cudf::table_view{{facts.back().first, facts.back().second}});
   }
   std::vector<cudf::size_type> key_cols{0};
diff --git a/cpp/tests/search/search_struct_test.cpp b/cpp/tests/search/search_struct_test.cpp
index a1f0b1d81cf..159b082890a 100644
--- a/cpp/tests/search/search_struct_test.cpp
+++ b/cpp/tests/search/search_struct_test.cpp
@@ -57,7 +57,7 @@ auto search_bounds(cudf::column_view const& t_col_view,
   auto const values       = cudf::table_view{std::vector<cudf::column_view>{values_col->view()}};
   auto result_lower_bound = cudf::lower_bound(t, values, column_orders, null_precedence);
   auto result_upper_bound = cudf::upper_bound(t, values, column_orders, null_precedence);
-  return std::make_pair(std::move(result_lower_bound), std::move(result_upper_bound));
+  return std::pair(std::move(result_lower_bound), std::move(result_upper_bound));
 }
 
 auto search_bounds(std::unique_ptr<cudf::column> const& t_col,
diff --git a/cpp/tests/stream_compaction/distinct_count_tests.cpp b/cpp/tests/stream_compaction/distinct_count_tests.cpp
index 0529539c4b2..31bbd43c78d 100644
--- a/cpp/tests/stream_compaction/distinct_count_tests.cpp
+++ b/cpp/tests/stream_compaction/distinct_count_tests.cpp
@@ -71,7 +71,7 @@ TYPED_TEST(TypedDistinctCount, TableNoNull)
   std::vector<std::pair<T, T>> pair_input;
   std::transform(
     input1.begin(), input1.end(), input2.begin(), std::back_inserter(pair_input), [](T a, T b) {
-      return std::make_pair(a, b);
+      return std::pair(a, b);
     });
 
   cudf::test::fixed_width_column_wrapper<T> input_col1(input1.begin(), input1.end());
diff --git a/cpp/tests/stream_compaction/unique_count_tests.cpp b/cpp/tests/stream_compaction/unique_count_tests.cpp
index 3285cd1a711..591fe042592 100644
--- a/cpp/tests/stream_compaction/unique_count_tests.cpp
+++ b/cpp/tests/stream_compaction/unique_count_tests.cpp
@@ -71,7 +71,7 @@ TYPED_TEST(TypedUniqueCount, TableNoNull)
   std::vector<std::pair<T, T>> pair_input;
   std::transform(
     input1.begin(), input1.end(), input2.begin(), std::back_inserter(pair_input), [](T a, T b) {
-      return std::make_pair(a, b);
+      return std::pair(a, b);
     });
 
   cudf::test::fixed_width_column_wrapper<T> input_col1(input1.begin(), input1.end());
diff --git a/cpp/tests/strings/translate_tests.cpp b/cpp/tests/strings/translate_tests.cpp
index e928065dca4..53c6982b880 100644
--- a/cpp/tests/strings/translate_tests.cpp
+++ b/cpp/tests/strings/translate_tests.cpp
@@ -38,7 +38,7 @@ std::pair<cudf::char_utf8, cudf::char_utf8> make_entry(const char* from, const c
   cudf::char_utf8 out = 0;
   cudf::strings::detail::to_char_utf8(from, in);
   if (to) cudf::strings::detail::to_char_utf8(to, out);
-  return std::make_pair(in, out);
+  return std::pair(in, out);
 }
 
 TEST_F(StringsTranslateTest, Translate)

From 3d92bf257bcfb46fe5386821d7f81d4b9f4e6dd5 Mon Sep 17 00:00:00 2001
From: David Wendt <45795991+davidwendt@users.noreply.github.com>
Date: Wed, 27 Apr 2022 17:55:33 -0400
Subject: [PATCH 4/9] Fix scatter for all-empty-string column case (#10724)

Closes #10717

Fixes bug introduced with changes in #10673 which uses the `cudf::make_strings_column` that accepts a span of `string_view` objects with a null-placeholder. The placeholder can be unintentionally created in `create_string_vector_from_column` when given a strings column where all the rows are empty. The utility is fixed to prevent creating the placeholder for empty strings.

A gtest was added to scatter from/to an all-empty strings column to verify this behavior.

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Nghia Truong (https://github.com/ttnghia)
  - Bradley Dice (https://github.com/bdice)
  - Robert Maynard (https://github.com/robertmaynard)

URL: https://github.com/rapidsai/cudf/pull/10724
---
 cpp/include/cudf/strings/detail/scatter.cuh |  7 ++++++-
 cpp/src/lists/copying/scatter_helper.cu     | 15 +++++++++-----
 cpp/src/strings/utilities.cu                | 22 +++++++++++++--------
 cpp/tests/copying/scatter_tests.cpp         | 13 +++++++++++-
 4 files changed, 42 insertions(+), 15 deletions(-)

diff --git a/cpp/include/cudf/strings/detail/scatter.cuh b/cpp/include/cudf/strings/detail/scatter.cuh
index f167206f36b..cfede60c771 100644
--- a/cpp/include/cudf/strings/detail/scatter.cuh
+++ b/cpp/include/cudf/strings/detail/scatter.cuh
@@ -67,8 +67,13 @@ std::unique_ptr<column> scatter(
   // create vector of string_view's to scatter into
   rmm::device_uvector<string_view> target_vector = create_string_vector_from_column(target, stream);
 
+  // this ensures empty strings are not mapped to nulls in the make_strings_column function
+  auto const size = thrust::distance(begin, end);
+  auto itr        = thrust::make_transform_iterator(
+    begin, [] __device__(string_view const sv) { return sv.empty() ? string_view{} : sv; });
+
   // do the scatter
-  thrust::scatter(rmm::exec_policy(stream), begin, end, scatter_map, target_vector.begin());
+  thrust::scatter(rmm::exec_policy(stream), itr, itr + size, scatter_map, target_vector.begin());
 
   // build the output column
   auto sv_span = cudf::device_span<string_view const>(target_vector);
diff --git a/cpp/src/lists/copying/scatter_helper.cu b/cpp/src/lists/copying/scatter_helper.cu
index 7220e8b5980..38f738b4035 100644
--- a/cpp/src/lists/copying/scatter_helper.cu
+++ b/cpp/src/lists/copying/scatter_helper.cu
@@ -232,6 +232,8 @@ struct list_child_constructor {
 
     auto string_views = rmm::device_uvector<string_view>(num_child_rows, stream);
 
+    auto const null_string_view = string_view{nullptr, 0};  // placeholder for factory function
+
     thrust::transform(
       rmm::exec_policy(stream),
       thrust::make_counting_iterator<size_type>(0),
@@ -241,7 +243,8 @@ struct list_child_constructor {
        offset_size   = list_offsets.size(),
        d_list_vector = list_vector.begin(),
        source_lists,
-       target_lists] __device__(auto index) {
+       target_lists,
+       null_string_view] __device__(auto index) {
         auto const list_index_iter =
           thrust::upper_bound(thrust::seq, offset_begin, offset_begin + offset_size, index);
         auto const list_index =
@@ -254,14 +257,16 @@ struct list_child_constructor {
         auto child_strings_column = lists_column.child();
         auto strings_offset       = lists_offsets_ptr[row_index] + intra_index;
 
-        return child_strings_column.is_null(strings_offset)
-                 ? string_view{nullptr, 0}
-                 : child_strings_column.template element<string_view>(strings_offset);
+        if (child_strings_column.is_null(strings_offset)) { return null_string_view; }
+        auto const d_str = child_strings_column.template element<string_view>(strings_offset);
+        // ensure a string from an all-empty column is not mapped to the null placeholder
+        auto const empty_string_view = string_view{};
+        return d_str.empty() ? empty_string_view : d_str;
       });
 
     // string_views should now have been populated with source and target references.
     auto sv_span = cudf::device_span<string_view const>(string_views);
-    return cudf::make_strings_column(sv_span, string_view{nullptr, 0}, stream, mr);
+    return cudf::make_strings_column(sv_span, null_string_view, stream, mr);
   }
 
   /**
diff --git a/cpp/src/strings/utilities.cu b/cpp/src/strings/utilities.cu
index a7ef2afb47f..ac073f8efbc 100644
--- a/cpp/src/strings/utilities.cu
+++ b/cpp/src/strings/utilities.cu
@@ -46,14 +46,20 @@ rmm::device_uvector<string_view> create_string_vector_from_column(
 
   auto strings_vector = rmm::device_uvector<string_view>(input.size(), stream, mr);
 
-  thrust::transform(
-    rmm::exec_policy(stream),
-    thrust::make_counting_iterator<size_type>(0),
-    thrust::make_counting_iterator<size_type>(input.size()),
-    strings_vector.begin(),
-    [d_strings = *d_strings] __device__(size_type idx) {
-      return d_strings.is_null(idx) ? string_view{nullptr, 0} : d_strings.element<string_view>(idx);
-    });
+  thrust::transform(rmm::exec_policy(stream),
+                    thrust::make_counting_iterator<size_type>(0),
+                    thrust::make_counting_iterator<size_type>(input.size()),
+                    strings_vector.begin(),
+                    [d_strings = *d_strings] __device__(size_type idx) {
+                      // placeholder for factory function that takes a span of string_views
+                      auto const null_string_view = string_view{nullptr, 0};
+                      if (d_strings.is_null(idx)) { return null_string_view; }
+                      auto const d_str = d_strings.element<string_view>(idx);
+                      // special case when the entire column is filled with empty strings:
+                      // here the empty d_str may have a d_str.data() == nullptr
+                      auto const empty_string_view = string_view{};
+                      return d_str.empty() ? empty_string_view : d_str;
+                    });
 
   return strings_vector;
 }
diff --git a/cpp/tests/copying/scatter_tests.cpp b/cpp/tests/copying/scatter_tests.cpp
index 28ebb6cbcb6..306ab8a3d5c 100644
--- a/cpp/tests/copying/scatter_tests.cpp
+++ b/cpp/tests/copying/scatter_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2021, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -573,6 +573,17 @@ TEST_F(ScatterStringsTests, ScatterScalarNoNulls)
   CUDF_TEST_EXPECT_TABLES_EQUAL(result->view(), expected_table);
 }
 
+TEST_F(ScatterStringsTests, EmptyStrings)
+{
+  cudf::test::strings_column_wrapper input{"", "", ""};
+  cudf::table_view t({input});
+
+  // Test for issue 10717: all-empty-string column scatter
+  auto map    = cudf::test::fixed_width_column_wrapper<int32_t>({0});
+  auto result = cudf::scatter(t, map, t);
+  CUDF_TEST_EXPECT_TABLES_EQUAL(result->view(), t);
+}
+
 template <typename T>
 class BooleanMaskScatter : public cudf::test::BaseFixture {
 };

From f0b91179b38ba7224a33a9b1390230f8575f886a Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Wed, 27 Apr 2022 17:40:48 -0500
Subject: [PATCH 5/9] Revise CONTRIBUTING.md (#10644)

I have revised the `CONTRIBUTING.md` file to address several pieces that are out of date. I also revised a good portion of the text and updated external references. Finally, I wrapped the lines at 100 characters to align with other Markdown files in the C++ docs. I would prefer to adopt a convention of one sentence per line if reviewers agree, but went with the 100 character wrapping for now to be consistent with other docs.

Authors:
  - Bradley Dice (https://github.com/bdice)

Approvers:
  - https://github.com/brandon-b-miller
  - Jason Lowe (https://github.com/jlowe)
  - Jake Hemstad (https://github.com/jrhemstad)

URL: https://github.com/rapidsai/cudf/pull/10644
---
 CONTRIBUTING.md | 359 +++++++++++++++++++++++-------------------------
 1 file changed, 171 insertions(+), 188 deletions(-)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 6d1c0528832..db8a8d88b99 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -1,69 +1,79 @@
 # Contributing to cuDF
 
-Contributions to cuDF fall into the following three categories.
-
-1. To report a bug, request a new feature, or report a problem with
-    documentation, please file an [issue](https://github.com/rapidsai/cudf/issues/new/choose)
-    describing in detail the problem or new feature. The RAPIDS team evaluates
-    and triages issues, and schedules them for a release. If you believe the
-    issue needs priority attention, please comment on the issue to notify the
-    team.
-2. To propose and implement a new Feature, please file a new feature request
-    [issue](https://github.com/rapidsai/cudf/issues/new/choose). Describe the
-    intended feature and discuss the design and implementation with the team and
-    community. Once the team agrees that the plan looks good, go ahead and
-    implement it, using the [code contributions](#code-contributions) guide below.
-3. To implement a feature or bug-fix for an existing outstanding issue, please
-    Follow the [code contributions](#code-contributions) guide below. If you
-    need more context on a particular issue, please ask in a comment.
-
-As contributors and maintainers to this project,
-you are expected to abide by cuDF's code of conduct.
-More information can be found at: [Contributor Code of Conduct](https://docs.rapids.ai/resources/conduct/).
+Contributions to cuDF fall into the following categories:
+
+1. To report a bug, request a new feature, or report a problem with documentation, please file an
+   [issue](https://github.com/rapidsai/cudf/issues/new/choose) describing the problem or new feature
+   in detail. The RAPIDS team evaluates and triages issues, and schedules them for a release. If you
+   believe the issue needs priority attention, please comment on the issue to notify the team.
+2. To propose and implement a new feature, please file a new feature request
+   [issue](https://github.com/rapidsai/cudf/issues/new/choose). Describe the intended feature and
+   discuss the design and implementation with the team and community. Once the team agrees that the
+   plan looks good, go ahead and implement it, using the [code contributions](#code-contributions)
+   guide below.
+3. To implement a feature or bug fix for an existing issue, please follow the [code
+   contributions](#code-contributions) guide below. If you need more context on a particular issue,
+   please ask in a comment.
+
+As contributors and maintainers to this project, you are expected to abide by cuDF's code of
+conduct. More information can be found at:
+[Contributor Code of Conduct](https://docs.rapids.ai/resources/conduct/).
 
 ## Code contributions
 
 ### Your first issue
 
-1. Follow the guide at the bottom of this page for [Setting Up Your Build Environment](#setting-up-your-build-environment).
-2. Find an issue to work on. The best way is to look for the [good first issue](https://github.com/rapidsai/cudf/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22)
-    or [help wanted](https://github.com/rapidsai/cudf/issues?q=is%3Aissue+is%3Aopen+label%3A%22help+wanted%22) labels.
+1. Follow the guide at the bottom of this page for
+   [Setting up your build environment](#setting-up-your-build-environment).
+2. Find an issue to work on. The best way is to look for the
+   [good first issue](https://github.com/rapidsai/cudf/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22)
+   or [help wanted](https://github.com/rapidsai/cudf/issues?q=is%3Aissue+is%3Aopen+label%3A%22help+wanted%22)
+   labels.
 3. Comment on the issue stating that you are going to work on it.
-4. Code! Make sure to update unit tests!
-5. When done, [create your pull request](https://github.com/rapidsai/cudf/compare).
-6. Verify that CI passes all [status checks](https://help.github.com/articles/about-status-checks/). Fix if needed.
-7. Wait for other developers to review your code and update code as needed.
-8. Once reviewed and approved, a RAPIDS developer will merge your pull request.
-
-Remember, if you are unsure about anything, don't hesitate to comment on issues
-and ask for clarifications!
+4. Create a fork of the cudf repository and check out a branch with a name that
+   describes your planned work. For example, `fix-documentation`.
+5. Write code to address the issue or implement the feature.
+6. Add unit tests and unit benchmarks.
+7. [Create your pull request](https://github.com/rapidsai/cudf/compare).
+8. Verify that CI passes all [status checks](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/collaborating-on-repositories-with-code-quality-features/about-status-checks).
+   Fix if needed.
+9. Wait for other developers to review your code and update code as needed.
+10. Once reviewed and approved, a RAPIDS developer will merge your pull request.
+
+If you are unsure about anything, don't hesitate to comment on issues and ask for clarification!
 
 ### Seasoned developers
 
-Once you have gotten your feet wet and are more comfortable with the code, you
-can look at the prioritized issues for our next release in our [project boards](https://github.com/rapidsai/cudf/projects).
-
-> **Pro Tip:** Always look at the release board with the highest number for
-issues to work on. This is where RAPIDS developers also focus their efforts.
+Once you have gotten your feet wet and are more comfortable with the code, you can look at the
+prioritized issues for our next release in our
+[project boards](https://github.com/rapidsai/cudf/projects).
 
-Look at the unassigned issues, and find an issue to which you are comfortable
-contributing. Start with _Step 3_ above, commenting on the issue to let
-others know you are working on it. If you have any questions related to the
-implementation of the issue, ask them in the issue instead of the PR.
+**Note:** Always look at the release board that is
+[currently under development](https://docs.rapids.ai/maintainers) for issues to work on. This is
+where RAPIDS developers also focus their efforts.
 
-## Setting Up Your Build Environment
+Look at the unassigned issues, and find an issue to which you are comfortable contributing. Start
+with _Step 3_ above, commenting on the issue to let others know you are working on it. If you have
+any questions related to the implementation of the issue, ask them in the issue instead of the PR.
 
-The following instructions are for developers and contributors to cuDF OSS development. These instructions are tested on Linux Ubuntu 16.04 & 18.04. Use these instructions to build cuDF from source and contribute to its development.  Other operating systems may be compatible, but are not currently tested.
+## Setting up your build environment
 
+The following instructions are for developers and contributors to cuDF development. These
+instructions are tested on Ubuntu Linux LTS releases. Use these instructions to build cuDF from
+source and contribute to its development. Other operating systems may be compatible, but are not
+currently tested.
 
+Building cudf with the provided conda environment is recommended for users who wish to enable all
+library features. The following instructions are for building with a conda environment. Dependencies
+for a minimal build of libcudf without using conda are also listed below.
 
 ### General requirements
 
 Compilers:
 
-* `gcc`     version 9.3+
-* `nvcc`    version 11.5+
-* `cmake`   version 3.20.1+
+* `gcc` version 9.3+
+* `nvcc` version 11.5+
+* `cmake` version 3.20.1+
 
 CUDA/GPU:
 
@@ -71,127 +81,166 @@ CUDA/GPU:
 * NVIDIA driver 450.80.02+
 * Pascal architecture or better
 
-You can obtain CUDA from [https://developer.nvidia.com/cuda-downloads](https://developer.nvidia.com/cuda-downloads).
+You can obtain CUDA from
+[https://developer.nvidia.com/cuda-downloads](https://developer.nvidia.com/cuda-downloads).
 
-### Create the build Environment
+### Create the build environment
+
+- Clone the repository:
 
-- Clone the repository and submodules
 ```bash
 CUDF_HOME=$(pwd)/cudf
 git clone https://github.com/rapidsai/cudf.git $CUDF_HOME
 cd $CUDF_HOME
-git submodule update --init --remote --recursive
 ```
+
+#### Building with a conda environment
+
+**Note:** Using a conda environment is the easiest way to satisfy the library's dependencies.
+Instructions for a minimal build environment without conda are included below.
+
 - Create the conda development environment `cudf_dev`:
+
 ```bash
 # create the conda environment (assuming in base `cudf` directory)
-# note: RAPIDS currently doesn't support `channel_priority: strict`; use `channel_priority: flexible` instead
+# note: RAPIDS currently doesn't support `channel_priority: strict`;
+# use `channel_priority: flexible` instead
 conda env create --name cudf_dev --file conda/environments/cudf_dev_cuda11.5.yml
 # activate the environment
 conda activate cudf_dev
 ```
-- For other CUDA versions, check the corresponding cudf_dev_cuda*.yml file in conda/environments
+
+- **Note**: the conda environment files are updated frequently, so the
+  development environment may also need to be updated if dependency versions or
+  pinnings are changed.
+
+- For other CUDA versions, check the corresponding `cudf_dev_cuda*.yml` file in
+  `conda/environments/`.
+
+#### Building without a conda environment
+
+- libcudf has the following minimal dependencies (in addition to those listed in the [General
+  requirements](#general-requirements)). The packages listed below use Ubuntu package names:
+
+  - `build-essential`
+  - `libssl-dev`
+  - `libz-dev`
+  - `libpython3-dev` (required if building cudf)
 
 ### Build cuDF from source
 
-- A `build.sh` script is provided in `$CUDF_HOME`. Running the script with no additional arguments will install the `libcudf`, `cudf` and `dask_cudf` libraries. By default, the libraries are installed to the `$CONDA_PREFIX` directory. To install into a different location, set the location in `$INSTALL_PREFIX`. Finally, note that the script depends on the `nvcc` executable being on your path, or defined in `$CUDACXX`.
+- A `build.sh` script is provided in `$CUDF_HOME`. Running the script with no additional arguments
+  will install the `libcudf`, `cudf` and `dask_cudf` libraries. By default, the libraries are
+  installed to the `$CONDA_PREFIX` directory. To install into a different location, set the location
+  in `$INSTALL_PREFIX`. Finally, note that the script depends on the `nvcc` executable being on your
+  path, or defined in `$CUDACXX`.
+
 ```bash
 cd $CUDF_HOME
 
 # Choose one of the following commands, depending on whether
-# you want to build and install the libcudf C++ library only, 
+# you want to build and install the libcudf C++ library only,
 # or include the cudf and/or dask_cudf Python libraries:
 
 ./build.sh  # libcudf, cudf and dask_cudf
 ./build.sh libcudf  # libcudf only
-./build.sh libcudf cudf  # libcudf and cudf only             
+./build.sh libcudf cudf  # libcudf and cudf only
 ```
-- Other libraries like `cudf-kafka` and `custreamz` can be installed with this script. For the complete list of libraries as well as details about the script usage, run the `help` command:
+
+- Other libraries like `cudf-kafka` and `custreamz` can be installed with this script. For the
+  complete list of libraries as well as details about the script usage, run the `help` command:
+
 ```bash
-./build.sh --help            
+./build.sh --help
 ```
 
 ### Build, install and test cuDF libraries for contributors
 
-The general workflow is provided below. Please, also see the last section about [code formatting](###code-formatting).
+The general workflow is provided below. Please also see the last section about
+[code formatting](#code-formatting).
 
 #### `libcudf` (C++)
 
-If you're only interested in building the library (and not the unit tests):
- 
+- If you're only interested in building the library (and not the unit tests):
+
 ```bash
 cd $CUDF_HOME
 ./build.sh libcudf
 ```
-If, in addition, you want to build tests:
+
+- If, in addition, you want to build tests:
 
 ```bash
 ./build.sh libcudf tests
 ```
-To run the tests:
+
+- To run the tests:
 
 ```bash
-make test                                      
+make test
 ```
 
 #### `cudf` (Python)
 
 - First, build the `libcudf` C++ library following the steps above
 
-- To build and install in edit/develop `cudf` python package:
+- To build and install in edit/develop `cudf` Python package:
 ```bash
 cd $CUDF_HOME/python/cudf
 python setup.py build_ext --inplace
 python setup.py develop
 ```
 
-- To run `cudf` tests :
+- To run `cudf` tests:
 ```bash
 cd $CUDF_HOME/python
-py.test -v cudf/cudf/tests
+pytest -v cudf/cudf/tests
 ```
 
 #### `dask-cudf` (Python)
 
 - First, build the `libcudf` C++ and `cudf` Python libraries following the steps above
 
-- To install in edit/develop mode the `dask-cudf` python package:
+- To install the `dask-cudf` Python package in editable/develop mode:
 ```bash
 cd $CUDF_HOME/python/dask_cudf
 python setup.py build_ext --inplace
 python setup.py develop
 ```
 
-- To run `dask_cudf` tests :
+- To run `dask_cudf` tests:
 ```bash
 cd $CUDF_HOME/python
-py.test -v dask_cudf
+pytest -v dask_cudf
 ```
 
 #### `libcudf_kafka` (C++)
 
-If you're only interested in building the library (and not the unit tests):
- 
+- If you're only interested in building the library (and not the unit tests):
+
 ```bash
 cd $CUDF_HOME
 ./build.sh libcudf_kafka
 ```
-If, in addition, you want to build tests:
+
+- If, in addition, you want to build tests:
 
 ```bash
 ./build.sh libcudf_kafka tests
 ```
-To run the tests:
+
+- To run the tests:
 
 ```bash
-make test                                      
+make test
 ```
 
 #### `cudf-kafka` (Python)
 
-- First, build the `libcudf` and `libcudf_kafka` following the steps above
+- First, build the `libcudf` and `libcudf_kafka` libraries following the steps above
+
+- To install the `cudf-kafka` Python package in editable/develop mode:
 
-- To install in edit/develop mode the `cudf-kafka` python package:
 ```bash
 cd $CUDF_HOME/python/cudf_kafka
 python setup.py build_ext --inplace
@@ -202,7 +251,8 @@ python setup.py develop
 
 - First, build `libcudf`, `libcudf_kafka`, and `cudf_kafka` following the steps above
 
-- To install in edit/develop mode the `custreamz` python package:
+- To install the `custreamz` Python package in editable/develop mode:
+
 ```bash
 cd $CUDF_HOME/python/custreamz
 python setup.py build_ext --inplace
@@ -210,40 +260,45 @@ python setup.py develop
 ```
 
 - To run `custreamz` tests :
+
 ```bash
 cd $CUDF_HOME/python
-py.test -v custreamz
+pytest -v custreamz
 ```
 
 #### `cudf` (Java):
 
 - First, build the `libcudf` C++ library following the steps above
 
-- Then, refer to [Java README](https://github.com/rapidsai/cudf/blob/branch-21.10/java/README.md)
-
+- Then, refer to the [Java README](java/README.md)
 
-Done! You are ready to develop for the cuDF OSS project. But please go to [code formatting](###code-formatting) to ensure that you contributing code follows the expected format.
+Done! You are ready to develop for the cuDF project. Please review the project's
+[code formatting guidelines](#code-formatting).
 
 ## Debugging cuDF
 
-### Building Debug mode from source
+### Building in debug mode from source
 
-Follow the [above instructions](####build-cudf-from-source) to build from source and add `-g` to the `./build.sh` command.
+Follow the instructions to [build from source](#build-cudf-from-source) and add `-g` to the
+`./build.sh` command.
 
 For example:
+
 ```bash
 ./build.sh libcudf -g
 ```
 
-This builds `libcudf` in Debug mode which enables some `assert` safety checks and includes symbols in the library for debugging.
+This builds `libcudf` in debug mode which enables some `assert` safety checks and includes symbols
+in the library for debugging.
 
 All other steps for installing `libcudf` into your environment are the same.
 
 ### Debugging with `cuda-gdb` and `cuda-memcheck`
 
-When you have a debug build of `libcudf` installed, debugging with the `cuda-gdb` and `cuda-memcheck` is easy.
+When you have a debug build of `libcudf` installed, debugging with the `cuda-gdb` and
+`cuda-memcheck` is easy.
 
-If you are debugging a Python script, simply run the following:
+If you are debugging a Python script, run the following:
 
 ```bash
 cuda-gdb -ex r --args python <program_name>.py <program_arguments>
@@ -255,143 +310,71 @@ cuda-memcheck python <program_name>.py <program_arguments>
 
 ### Device debug symbols
 
-The device debug symbols are not automatically added with the cmake `Debug`
-build type because it causes a runtime delay of several minutes when loading
-the libcudf.so library.
+The device debug symbols are not automatically added with the cmake `Debug` build type because it
+causes a runtime delay of several minutes when loading the libcudf.so library.
 
-Therefore, it is recommended to add device debug symbols only to specific files by
-setting the `-G` compile option locally in your `cpp/CMakeLists.txt` for that file.
-Here is an example of adding the `-G` option to the compile command for
-`src/copying/copy.cu` source file:
+Therefore, it is recommended to add device debug symbols only to specific files by setting the `-G`
+compile option locally in your `cpp/CMakeLists.txt` for that file. Here is an example of adding the
+`-G` option to the compile command for `src/copying/copy.cu` source file:
 
-```
+```cmake
 set_source_files_properties(src/copying/copy.cu PROPERTIES COMPILE_OPTIONS "-G")
 ```
 
-This will add the device debug symbols for this object file in libcudf.so.
-You can then use `cuda-dbg` to debug into the kernels in that source file.
-
-### Building and Testing on a gpuCI image locally
-
-Before submitting a pull request, you can do a local build and test on your machine that mimics our gpuCI environment using the `ci/local/build.sh` script.
-For detailed information on usage of this script, see [here](ci/local/README.md).
-
+This will add the device debug symbols for this object file in `libcudf.so`.  You can then use
+`cuda-dbg` to debug into the kernels in that source file.
 
-## Automated Build in Docker Container
+## Code Formatting
 
-A Dockerfile is provided with a preconfigured conda environment for building and installing cuDF from source based off of the main branch.
+### C++/CUDA
 
-### Prerequisites
+cuDF uses [`clang-format`](https://clang.llvm.org/docs/ClangFormat.html).
 
-* Install [nvidia-docker2](https://github.com/nvidia/nvidia-docker/wiki/Installation-(version-2.0)) for Docker + GPU support
-* Verify NVIDIA driver is `450.80.02` or higher
-* Ensure CUDA 11.0+ is installed
-
-### Usage
+In order to format the C++/CUDA files, navigate to the root (`cudf`) directory and run:
 
-From cudf project root run the following, to build with defaults:
-```bash
-docker build --tag cudf .
-```
-After the container is built run the container:
 ```bash
-docker run --runtime=nvidia -it cudf bash
-```
-Activate the conda environment `cudf` to use the newly built cuDF and libcudf libraries:
-```
-root@3f689ba9c842:/# source activate cudf
-(cudf) root@3f689ba9c842:/# python -c "import cudf"
-(cudf) root@3f689ba9c842:/#
+python3 ./cpp/scripts/run-clang-format.py -inplace
 ```
 
-### Customizing the Build
-
-Several build arguments are available to customize the build process of the
-container. These are specified by using the Docker [build-arg](https://docs.docker.com/engine/reference/commandline/build/#set-build-time-variables---build-arg)
-flag. Below is a list of the available arguments and their purpose:
+Additionally, many editors have plugins or extensions that you can set up to automatically run
+`clang-format` either manually or on file save.
 
-| Build Argument | Default Value | Other Value(s) | Purpose |
-| --- | --- | --- | --- |
-| `CUDA_VERSION` | 11.0 | 11.2.2 | set CUDA version |
-| `LINUX_VERSION` | ubuntu18.04 | ubuntu20.04 | set Ubuntu version |
-| `CC` & `CXX` | 9 | 10 | set gcc/g++ version |
-| `CUDF_REPO` | This repo | Forks of cuDF | set git URL to use for `git clone` |
-| `CUDF_BRANCH` | main | Any branch name | set git branch to checkout of `CUDF_REPO` |
-| `NUMBA_VERSION` | newest | >=0.40.0 | set numba version |
-| `NUMPY_VERSION` | newest | >=1.14.3 | set numpy version |
-| `PANDAS_VERSION` | newest | >=0.23.4 | set pandas version |
-| `PYARROW_VERSION` | 1.0.1 | Not supported | set pyarrow version |
-| `CMAKE_VERSION` | newest | >=3.18 | set cmake version |
-| `CYTHON_VERSION` | 0.29 | Not supported | set Cython version |
-| `PYTHON_VERSION` | 3.7 | 3.8 | set python version |
+### Python / Pre-commit hooks
 
+cuDF uses [pre-commit](https://pre-commit.com/) to execute code linters and formatters such as
+[Black](https://black.readthedocs.io/en/stable/), [isort](https://pycqa.github.io/isort/), and
+[flake8](https://flake8.pycqa.org/en/latest/). These tools ensure a consistent code format
+throughout the project. Using pre-commit ensures that linter versions and options are aligned for
+all developers. Additionally, there is a CI check in place to enforce that committed code follows
+our standards.
 
-### Code Formatting
-
-
-#### Python
-
-cuDF uses [Black](https://black.readthedocs.io/en/stable/),
-[isort](https://readthedocs.org/projects/isort/), and
-[flake8](http://flake8.pycqa.org/en/latest/) to ensure a consistent code format
-throughout the project. They have been installed during the `cudf_dev` environment creation.
-
-These tools are used to auto-format the Python code, as well as check the Cython
-code in the repository. Additionally, there is a CI check in place to enforce
-that committed code follows our standards. You can use the tools to
-automatically format your python code by running:
+To use `pre-commit`, install via `conda` or `pip`:
 
 ```bash
-isort --atomic python/**/*.py
-black python
+conda install -c conda-forge pre-commit
 ```
 
-and then check the syntax of your Python and Cython code by running:
-
 ```bash
-flake8 python
-flake8 --config=python/.flake8.cython
-```
-
-Additionally, many editors have plugins that will apply `isort` and `Black` as
-you edit files, as well as use `flake8` to report any style / syntax issues.
-
-#### C++/CUDA
-
-cuDF uses [`clang-format`](https://clang.llvm.org/docs/ClangFormat.html)
-
-In order to format the C++/CUDA files, navigate to the root (`cudf`) directory and run:
-```
-python3 ./cpp/scripts/run-clang-format.py -inplace
+pip install pre-commit
 ```
 
-Additionally, many editors have plugins or extensions that you can set up to automatically run `clang-format` either manually or on file save.
-
-#### Pre-commit hooks
-
-Optionally, you may wish to setup [pre-commit hooks](https://pre-commit.com/)
-to automatically run `isort`, `Black`, `flake8` and `clang-format` when you make a git commit.
-This can be done by installing `pre-commit` via `conda` or `pip`:
+Then run pre-commit hooks before committing code:
 
 ```bash
-conda install -c conda-forge pre_commit
+pre-commit run
 ```
 
-```bash
-pip install pre-commit
-```
-
-and then running:
+Optionally, you may set up the pre-commit hooks to run automatically when you make a git commit. This can be done by running:
 
 ```bash
 pre-commit install
 ```
 
-from the root of the cuDF repository. Now `isort`, `Black`, `flake8` and `clang-format` will be
-run each time you commit changes.
+Now code linters and formatters will be run each time you commit changes.
 
----
+You can skip these checks with `git commit --no-verify` or with the short version `git commit -n`.
 
 ## Attribution
+
 Portions adopted from https://github.com/pytorch/pytorch/blob/master/CONTRIBUTING.md
 Portions adopted from https://github.com/dask/dask/blob/master/docs/source/develop.rst

From 03d419d96753d29cf24226ab661377da23eef969 Mon Sep 17 00:00:00 2001
From: "Richard (Rick) Zamora" <rzamora217@gmail.com>
Date: Thu, 28 Apr 2022 08:27:45 -0500
Subject: [PATCH 6/9] Prepare dask_cudf test_parquet.py for upcoming API
 changes (#10709)

This is a relatively-simple PR to clean up `dask_cudf`'s `to/read_parquet` tests. These changes are mostly meant to avoid **future** test failures that will arise after impending changes are implemented in up-stream Dask. These changes include:

- The default value for `write_metadata_file` will become `False` for `to_parquet` (because writing the _metadata file scales very poorly)
- The default value for `split_row_groups` will become `False` (because this setting is typically optimal when the file are not too large). Users with larger-than-memory files will need to specify `split_row_groups=True/int` explicitly.
- The `gather_statistics` argument will be removed in favor of a more descriptive `calculate_divisions` argument.

This PR also removes the long-deprecated `row_groups_per_part` argument from `dask_cudf.read_parquet` (established replacement is `split_row_groups`).

Authors:
  - Richard (Rick) Zamora (https://github.com/rjzamora)
  - Ray Douglass (https://github.com/raydouglass)
  - gpuCI (https://github.com/GPUtester)
  - Mike Wendt (https://github.com/mike-wendt)
  - AJ Schmidt (https://github.com/ajschmidt8)

Approvers:
  - Benjamin Zaitlen (https://github.com/quasiben)
  - GALI PREM SAGAR (https://github.com/galipremsagar)
  - Randy Gelhausen (https://github.com/randerzander)

URL: https://github.com/rapidsai/cudf/pull/10709
---
 python/dask_cudf/dask_cudf/io/parquet.py      | 212 +++++++++++-------
 .../dask_cudf/io/tests/test_parquet.py        | 106 ++++++---
 2 files changed, 197 insertions(+), 121 deletions(-)

diff --git a/python/dask_cudf/dask_cudf/io/parquet.py b/python/dask_cudf/dask_cudf/io/parquet.py
index 042759f68cf..b201626becf 100644
--- a/python/dask_cudf/dask_cudf/io/parquet.py
+++ b/python/dask_cudf/dask_cudf/io/parquet.py
@@ -177,65 +177,98 @@ def read_partition(
         strings_to_cats = kwargs.get("strings_to_categorical", False)
         read_kwargs = kwargs.get("read", {})
         read_kwargs.update(open_file_options or {})
-
-        # Assume multi-piece read
-        paths = []
-        rgs = []
-        last_partition_keys = None
-        dfs = []
-
-        for i, piece in enumerate(pieces):
-
-            (path, row_group, partition_keys) = piece
-            row_group = None if row_group == [None] else row_group
-
-            if i > 0 and partition_keys != last_partition_keys:
-                dfs.append(
-                    cls._read_paths(
-                        paths,
-                        fs,
-                        columns=read_columns,
-                        row_groups=rgs if rgs else None,
-                        strings_to_categorical=strings_to_cats,
-                        partitions=partitions,
-                        partitioning=partitioning,
-                        partition_keys=last_partition_keys,
-                        **read_kwargs,
+        check_file_size = read_kwargs.pop("check_file_size", None)
+
+        # Wrap reading logic in a `try` block so that we can
+        # inform the user that the `read_parquet` partition
+        # size is too large for the available memory
+        try:
+
+            # Assume multi-piece read
+            paths = []
+            rgs = []
+            last_partition_keys = None
+            dfs = []
+
+            for i, piece in enumerate(pieces):
+
+                (path, row_group, partition_keys) = piece
+                row_group = None if row_group == [None] else row_group
+
+                # File-size check to help "protect" users from change
+                # to up-stream `split_row_groups` default. We only
+                # check the file size if this partition corresponds
+                # to a full file, and `check_file_size` is defined
+                if check_file_size and len(pieces) == 1 and row_group is None:
+                    file_size = fs.size(path)
+                    if file_size > check_file_size:
+                        warnings.warn(
+                            f"A large parquet file ({file_size}B) is being "
+                            f"used to create a DataFrame partition in "
+                            f"read_parquet. This may cause out of memory "
+                            f"exceptions in operations downstream. See the "
+                            f"notes on split_row_groups in the read_parquet "
+                            f"documentation. Setting split_row_groups "
+                            f"explicitly will silence this warning."
+                        )
+
+                if i > 0 and partition_keys != last_partition_keys:
+                    dfs.append(
+                        cls._read_paths(
+                            paths,
+                            fs,
+                            columns=read_columns,
+                            row_groups=rgs if rgs else None,
+                            strings_to_categorical=strings_to_cats,
+                            partitions=partitions,
+                            partitioning=partitioning,
+                            partition_keys=last_partition_keys,
+                            **read_kwargs,
+                        )
                     )
+                    paths = rgs = []
+                    last_partition_keys = None
+                paths.append(path)
+                rgs.append(
+                    [row_group]
+                    if not isinstance(row_group, list)
+                    and row_group is not None
+                    else row_group
                 )
-                paths = rgs = []
-                last_partition_keys = None
-            paths.append(path)
-            rgs.append(
-                [row_group]
-                if not isinstance(row_group, list) and row_group is not None
-                else row_group
-            )
-            last_partition_keys = partition_keys
+                last_partition_keys = partition_keys
 
-        dfs.append(
-            cls._read_paths(
-                paths,
-                fs,
-                columns=read_columns,
-                row_groups=rgs if rgs else None,
-                strings_to_categorical=strings_to_cats,
-                partitions=partitions,
-                partitioning=partitioning,
-                partition_keys=last_partition_keys,
-                **read_kwargs,
+            dfs.append(
+                cls._read_paths(
+                    paths,
+                    fs,
+                    columns=read_columns,
+                    row_groups=rgs if rgs else None,
+                    strings_to_categorical=strings_to_cats,
+                    partitions=partitions,
+                    partitioning=partitioning,
+                    partition_keys=last_partition_keys,
+                    **read_kwargs,
+                )
             )
-        )
-        df = cudf.concat(dfs) if len(dfs) > 1 else dfs[0]
-
-        # Re-set "object" dtypes align with pa schema
-        set_object_dtypes_from_pa_schema(df, schema)
+            df = cudf.concat(dfs) if len(dfs) > 1 else dfs[0]
 
-        if index and (index[0] in df.columns):
-            df = df.set_index(index[0])
-        elif index is False and df.index.names != (None,):
-            # If index=False, we shouldn't have a named index
-            df.reset_index(inplace=True)
+            # Re-set "object" dtypes align with pa schema
+            set_object_dtypes_from_pa_schema(df, schema)
+
+            if index and (index[0] in df.columns):
+                df = df.set_index(index[0])
+            elif index is False and df.index.names != (None,):
+                # If index=False, we shouldn't have a named index
+                df.reset_index(inplace=True)
+
+        except MemoryError as err:
+            raise MemoryError(
+                "Parquet data was larger than the available GPU memory!\n\n"
+                "See the notes on split_row_groups in the read_parquet "
+                "documentation.\n\n"
+                "Original Error: " + str(err)
+            )
+            raise err
 
         return df
 
@@ -349,25 +382,34 @@ def set_object_dtypes_from_pa_schema(df, schema):
                 df._data[col_name] = col.astype(typ)
 
 
-def read_parquet(
-    path,
-    columns=None,
-    split_row_groups=None,
-    row_groups_per_part=None,
-    **kwargs,
-):
+def read_parquet(path, columns=None, **kwargs):
     """Read parquet files into a Dask DataFrame
 
-    Calls ``dask.dataframe.read_parquet`` to cordinate the execution of
-    ``cudf.read_parquet``, and ultimately read multiple partitions into
-    a single Dask dataframe. The Dask version must supply an
-    ``ArrowDatasetEngine`` class to support full functionality.
-    See ``cudf.read_parquet`` and Dask documentation for further details.
+    Calls ``dask.dataframe.read_parquet`` with ``engine=CudfEngine``
+    to cordinate the execution of ``cudf.read_parquet``, and to
+    ultimately create a ``dask_cudf.DataFrame`` collection.
+
+    See the ``dask.dataframe.read_parquet`` documentation for
+    all available options.
 
     Examples
     --------
-    >>> import dask_cudf
-    >>> df = dask_cudf.read_parquet("/path/to/dataset/")  # doctest: +SKIP
+    >>> from dask_cudf import read_parquet
+    >>> df = read_parquet("/path/to/dataset/")  # doctest: +SKIP
+
+    When dealing with one or more large parquet files having an
+    in-memory footprint >15% device memory, the ``split_row_groups``
+    argument should be used to map Parquet **row-groups** to DataFrame
+    partitions (instead of **files** to partitions). For example, the
+    following code will map each row-group to a distinct partition:
+
+    >>> df = read_parquet(..., split_row_groups=True)  # doctest: +SKIP
+
+    To map **multiple** row-groups to each partition, an integer can be
+    passed to ``split_row_groups`` to specify the **maximum** number of
+    row-groups allowed in each output partition:
+
+    >>> df = read_parquet(..., split_row_groups=10)  # doctest: +SKIP
 
     See Also
     --------
@@ -376,22 +418,24 @@ def read_parquet(
     if isinstance(columns, str):
         columns = [columns]
 
-    if row_groups_per_part:
-        warnings.warn(
-            "row_groups_per_part is deprecated. "
-            "Pass an integer value to split_row_groups instead.",
-            FutureWarning,
-        )
-        if split_row_groups is None:
-            split_row_groups = row_groups_per_part
-
-    return dd.read_parquet(
-        path,
-        columns=columns,
-        split_row_groups=split_row_groups,
-        engine=CudfEngine,
-        **kwargs,
-    )
+    # Set "check_file_size" option to determine whether we
+    # should check the parquet-file size. This check is meant
+    # to "protect" users from `split_row_groups` default changes
+    check_file_size = kwargs.pop("check_file_size", 500_000_000)
+    if (
+        check_file_size
+        and ("split_row_groups" not in kwargs)
+        and ("chunksize" not in kwargs)
+    ):
+        # User is not specifying `split_row_groups` or `chunksize`,
+        # so we should warn them if/when a file is ~>0.5GB on disk.
+        # They can set `split_row_groups` explicitly to silence/skip
+        # this check
+        if "read" not in kwargs:
+            kwargs["read"] = {}
+        kwargs["read"]["check_file_size"] = check_file_size
+
+    return dd.read_parquet(path, columns=columns, engine=CudfEngine, **kwargs)
 
 
 to_parquet = partial(dd.to_parquet, engine=CudfEngine)
diff --git a/python/dask_cudf/dask_cudf/io/tests/test_parquet.py b/python/dask_cudf/dask_cudf/io/tests/test_parquet.py
index d9b8ee4595a..ef5741b0539 100644
--- a/python/dask_cudf/dask_cudf/io/tests/test_parquet.py
+++ b/python/dask_cudf/dask_cudf/io/tests/test_parquet.py
@@ -36,42 +36,55 @@
 ddf = dd.from_pandas(df, npartitions=npartitions)
 
 
-@pytest.mark.parametrize("stats", [True, False])
-def test_roundtrip_from_dask(tmpdir, stats):
+# Helper function to make it easier to handle the
+# upcoming deprecation of `gather_statistics`.
+# See: https://github.com/dask/dask/issues/8937
+# TODO: This function should be used to switch to
+# the "new" `calculate_divisions` kwarg (for newer
+# Dask versions) once it is introduced
+def _divisions(setting):
+    return {"gather_statistics": setting}
+
+
+@pytest.mark.parametrize("write_metadata_file", [True, False])
+@pytest.mark.parametrize("divisions", [True, False])
+def test_roundtrip_from_dask(tmpdir, divisions, write_metadata_file):
     tmpdir = str(tmpdir)
-    ddf.to_parquet(tmpdir, engine="pyarrow")
+    ddf.to_parquet(
+        tmpdir, write_metadata_file=write_metadata_file, engine="pyarrow"
+    )
     files = sorted(
         (os.path.join(tmpdir, f) for f in os.listdir(tmpdir)),
         key=natural_sort_key,
     )
 
     # Read list of parquet files
-    ddf2 = dask_cudf.read_parquet(files, gather_statistics=stats)
-    dd.assert_eq(ddf, ddf2, check_divisions=stats)
+    ddf2 = dask_cudf.read_parquet(files, **_divisions(divisions))
+    dd.assert_eq(ddf, ddf2, check_divisions=divisions)
 
     # Specify columns=['x']
     ddf2 = dask_cudf.read_parquet(
-        files, columns=["x"], gather_statistics=stats
+        files, columns=["x"], **_divisions(divisions)
     )
-    dd.assert_eq(ddf[["x"]], ddf2, check_divisions=stats)
+    dd.assert_eq(ddf[["x"]], ddf2, check_divisions=divisions)
 
     # Specify columns='y'
-    ddf2 = dask_cudf.read_parquet(files, columns="y", gather_statistics=stats)
-    dd.assert_eq(ddf[["y"]], ddf2, check_divisions=stats)
+    ddf2 = dask_cudf.read_parquet(files, columns="y", **_divisions(divisions))
+    dd.assert_eq(ddf[["y"]], ddf2, check_divisions=divisions)
 
     # Now include metadata
-    ddf2 = dask_cudf.read_parquet(tmpdir, gather_statistics=stats)
-    dd.assert_eq(ddf, ddf2, check_divisions=stats)
+    ddf2 = dask_cudf.read_parquet(tmpdir, **_divisions(divisions))
+    dd.assert_eq(ddf, ddf2, check_divisions=divisions)
 
     # Specify columns=['x'] (with metadata)
     ddf2 = dask_cudf.read_parquet(
-        tmpdir, columns=["x"], gather_statistics=stats
+        tmpdir, columns=["x"], **_divisions(divisions)
     )
-    dd.assert_eq(ddf[["x"]], ddf2, check_divisions=stats)
+    dd.assert_eq(ddf[["x"]], ddf2, check_divisions=divisions)
 
     # Specify columns='y' (with metadata)
-    ddf2 = dask_cudf.read_parquet(tmpdir, columns="y", gather_statistics=stats)
-    dd.assert_eq(ddf[["y"]], ddf2, check_divisions=stats)
+    ddf2 = dask_cudf.read_parquet(tmpdir, columns="y", **_divisions(divisions))
+    dd.assert_eq(ddf[["y"]], ddf2, check_divisions=divisions)
 
 
 def test_roundtrip_from_dask_index_false(tmpdir):
@@ -99,8 +112,8 @@ def test_roundtrip_from_dask_cudf(tmpdir, write_meta):
     gddf = dask_cudf.from_dask_dataframe(ddf)
     gddf.to_parquet(tmpdir, write_metadata_file=write_meta)
 
-    gddf2 = dask_cudf.read_parquet(tmpdir)
-    dd.assert_eq(gddf, gddf2, check_divisions=write_meta)
+    gddf2 = dask_cudf.read_parquet(tmpdir, **_divisions(True))
+    dd.assert_eq(gddf, gddf2)
 
 
 def test_roundtrip_none_rangeindex(tmpdir):
@@ -161,21 +174,21 @@ def test_dask_timeseries_from_pandas(tmpdir):
 
 
 @pytest.mark.parametrize("index", [False, None])
-@pytest.mark.parametrize("stats", [False, True])
-def test_dask_timeseries_from_dask(tmpdir, index, stats):
+@pytest.mark.parametrize("divisions", [False, True])
+def test_dask_timeseries_from_dask(tmpdir, index, divisions):
 
     fn = str(tmpdir)
     ddf2 = dask.datasets.timeseries(freq="D")
     ddf2.to_parquet(fn, engine="pyarrow", write_index=index)
-    read_df = dask_cudf.read_parquet(fn, index=index, gather_statistics=stats)
+    read_df = dask_cudf.read_parquet(fn, index=index, **_divisions(divisions))
     dd.assert_eq(
-        ddf2, read_df, check_divisions=(stats and index), check_index=index
+        ddf2, read_df, check_divisions=(divisions and index), check_index=index
     )
 
 
 @pytest.mark.parametrize("index", [False, None])
-@pytest.mark.parametrize("stats", [False, True])
-def test_dask_timeseries_from_daskcudf(tmpdir, index, stats):
+@pytest.mark.parametrize("divisions", [False, True])
+def test_dask_timeseries_from_daskcudf(tmpdir, index, divisions):
 
     fn = str(tmpdir)
     ddf2 = dask_cudf.from_cudf(
@@ -183,9 +196,9 @@ def test_dask_timeseries_from_daskcudf(tmpdir, index, stats):
     )
     ddf2.name = ddf2.name.astype("object")
     ddf2.to_parquet(fn, write_index=index)
-    read_df = dask_cudf.read_parquet(fn, index=index, gather_statistics=stats)
+    read_df = dask_cudf.read_parquet(fn, index=index, **_divisions(divisions))
     dd.assert_eq(
-        ddf2, read_df, check_divisions=(stats and index), check_index=index
+        ddf2, read_df, check_divisions=(divisions and index), check_index=index
     )
 
 
@@ -212,17 +225,23 @@ def test_filters(tmpdir):
 
     ddf.to_parquet(tmp_path, engine="pyarrow")
 
-    a = dask_cudf.read_parquet(tmp_path, filters=[("x", ">", 4)])
+    a = dask_cudf.read_parquet(
+        tmp_path, filters=[("x", ">", 4)], split_row_groups=True
+    )
     assert a.npartitions == 3
     assert (a.x > 3).all().compute()
 
-    b = dask_cudf.read_parquet(tmp_path, filters=[("y", "==", "c")])
+    b = dask_cudf.read_parquet(
+        tmp_path, filters=[("y", "==", "c")], split_row_groups=True
+    )
     assert b.npartitions == 1
     b = b.compute().to_pandas()
     assert (b.y == "c").all()
 
     c = dask_cudf.read_parquet(
-        tmp_path, filters=[("y", "==", "c"), ("x", ">", 6)]
+        tmp_path,
+        filters=[("y", "==", "c"), ("x", ">", 6)],
+        split_row_groups=True,
     )
     assert c.npartitions <= 1
     assert not len(c)
@@ -237,13 +256,17 @@ def test_filters_at_row_group_level(tmpdir):
 
     ddf.to_parquet(tmp_path, engine="pyarrow", row_group_size=10 / 5)
 
-    a = dask_cudf.read_parquet(tmp_path, filters=[("x", "==", 1)])
+    a = dask_cudf.read_parquet(
+        tmp_path, filters=[("x", "==", 1)], split_row_groups=True
+    )
     assert a.npartitions == 1
     assert (a.shape[0] == 2).compute()
 
     ddf.to_parquet(tmp_path, engine="pyarrow", row_group_size=1)
 
-    b = dask_cudf.read_parquet(tmp_path, filters=[("x", "==", 1)])
+    b = dask_cudf.read_parquet(
+        tmp_path, filters=[("x", "==", 1)], split_row_groups=True
+    )
     assert b.npartitions == 1
     assert (b.shape[0] == 1).compute()
 
@@ -341,7 +364,7 @@ def test_chunksize(tmpdir, chunksize, metadata):
         path,
         chunksize=chunksize,
         split_row_groups=True,
-        gather_statistics=True,
+        **_divisions(True),
     )
     ddf2.compute(scheduler="synchronous")
 
@@ -360,8 +383,8 @@ def test_chunksize(tmpdir, chunksize, metadata):
             path,
             chunksize=chunksize,
             split_row_groups=True,
-            gather_statistics=True,
             aggregate_files=True,
+            **_divisions(True),
         )
 
         dd.assert_eq(ddf1, ddf3, check_divisions=False)
@@ -382,7 +405,7 @@ def test_chunksize(tmpdir, chunksize, metadata):
 
 @pytest.mark.parametrize("row_groups", [1, 3, 10, 12])
 @pytest.mark.parametrize("index", [False, True])
-def test_row_groups_per_part(tmpdir, row_groups, index):
+def test_split_row_groups(tmpdir, row_groups, index):
     nparts = 2
     df_size = 100
     row_group_size = 5
@@ -410,7 +433,7 @@ def test_row_groups_per_part(tmpdir, row_groups, index):
 
     ddf2 = dask_cudf.read_parquet(
         str(tmpdir),
-        row_groups_per_part=row_groups,
+        split_row_groups=row_groups,
     )
 
     dd.assert_eq(ddf1, ddf2, check_divisions=False)
@@ -448,9 +471,9 @@ def test_create_metadata_file(tmpdir, partition_on):
     # with the _metadata file present
     ddf2 = dask_cudf.read_parquet(
         tmpdir,
-        gather_statistics=True,
         split_row_groups=False,
         index="myindex",
+        **_divisions(True),
     )
     if partition_on:
         ddf1 = df1.sort_values("b")
@@ -481,7 +504,7 @@ def test_create_metadata_file_inconsistent_schema(tmpdir):
     # New pyarrow-dataset base can handle an inconsistent
     # schema (even without a _metadata file), but computing
     # and dtype validation may fail
-    ddf1 = dask_cudf.read_parquet(str(tmpdir), gather_statistics=True)
+    ddf1 = dask_cudf.read_parquet(str(tmpdir), **_divisions(True))
 
     # Add global metadata file.
     # Dask-CuDF can do this without requiring schema
@@ -490,7 +513,7 @@ def test_create_metadata_file_inconsistent_schema(tmpdir):
 
     # Check that we can still read the ddf
     # with the _metadata file present
-    ddf2 = dask_cudf.read_parquet(str(tmpdir), gather_statistics=True)
+    ddf2 = dask_cudf.read_parquet(str(tmpdir), **_divisions(True))
 
     # Check that the result is the same with and
     # without the _metadata file.  Note that we must
@@ -538,3 +561,12 @@ def test_cudf_list_struct_write(tmpdir):
     ddf.to_parquet(temp_file)
     new_ddf = dask_cudf.read_parquet(temp_file)
     dd.assert_eq(df, new_ddf)
+
+
+def test_check_file_size(tmpdir):
+    # Test simple file-size check to help warn users
+    # of upstream change to `split_row_groups` default
+    fn = str(tmpdir.join("test.parquet"))
+    cudf.DataFrame({"a": np.arange(1000)}).to_parquet(fn)
+    with pytest.warns(match="large parquet file"):
+        dask_cudf.read_parquet(fn, check_file_size=1).compute()

From a43fb9eafb15b50bf5de21ac0bdebd3b490f511e Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Thu, 28 Apr 2022 11:04:12 -0400
Subject: [PATCH 7/9] Implement DataFrame.eval using libcudf ASTs (#8022)

This PR exposes `libcudf`'s expression parsing functionality in `cudf` and uses it to implement `DataFrame.eval`. The implementation is mostly feature-complete, but there are a few limitations relative to the `pandas` API and a couple of gotchas around type casting. The implementation is reasonably performant, improving upon an equivalent `df.apply` even accounting for JIT-compilation overhead. This implementation provides a stepping stone to leveraging `libcudf`'s AST implementation for more complex tasks in `cudf` such as conditional joins.

The most significant issue with the current implementation is the lack of casting between integral types, meaning that operations can only be performed between columns of the _exact_ same dtype. For example, operations between int8 and int16 would fail. This becomes particularly problematic for constants e.g. `df.eval('x+1')`. The best paths to improve this are at the C++ level of the expression evaluation, so I think we'll have to live with this limitation for now if we want to move forward.

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Vukasin Milovanovic (https://github.com/vuule)
  - Ashwin Srinath (https://github.com/shwina)
  - Bradley Dice (https://github.com/bdice)
  - https://github.com/brandon-b-miller

URL: https://github.com/rapidsai/cudf/pull/8022
---
 cpp/include/cudf/ast/expressions.hpp          |   6 +-
 python/cudf/cudf/_lib/__init__.py             |   3 +-
 python/cudf/cudf/_lib/cpp/expressions.pxd     |  88 +++++++
 python/cudf/cudf/_lib/cpp/transform.pxd       |   8 +-
 python/cudf/cudf/_lib/expressions.pxd         |  38 +++
 python/cudf/cudf/_lib/expressions.pyx         | 130 ++++++++++
 python/cudf/cudf/_lib/transform.pyx           |  37 +++
 .../cudf/cudf/core/_internals/expressions.py  | 222 ++++++++++++++++++
 python/cudf/cudf/core/dataframe.py            | 160 +++++++++++++
 python/cudf/cudf/tests/test_dataframe.py      |  87 +++++++
 10 files changed, 775 insertions(+), 4 deletions(-)
 create mode 100644 python/cudf/cudf/_lib/cpp/expressions.pxd
 create mode 100644 python/cudf/cudf/_lib/expressions.pxd
 create mode 100644 python/cudf/cudf/_lib/expressions.pyx
 create mode 100644 python/cudf/cudf/core/_internals/expressions.py

diff --git a/cpp/include/cudf/ast/expressions.hpp b/cpp/include/cudf/ast/expressions.hpp
index eb98e0e0bee..96c99e054a5 100644
--- a/cpp/include/cudf/ast/expressions.hpp
+++ b/cpp/include/cudf/ast/expressions.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2021, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -21,6 +21,8 @@
 #include <cudf/types.hpp>
 #include <cudf/utilities/error.hpp>
 
+#include <cstdint>
+
 namespace cudf {
 namespace ast {
 
@@ -53,7 +55,7 @@ struct expression {
 /**
  * @brief Enum of supported operators.
  */
-enum class ast_operator {
+enum class ast_operator : int32_t {
   // Binary operators
   ADD,         ///< operator +
   SUB,         ///< operator -
diff --git a/python/cudf/cudf/_lib/__init__.py b/python/cudf/cudf/_lib/__init__.py
index bd25aa53405..542262b7908 100644
--- a/python/cudf/cudf/_lib/__init__.py
+++ b/python/cudf/cudf/_lib/__init__.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2021, NVIDIA CORPORATION.
+# Copyright (c) 2020-2022, NVIDIA CORPORATION.
 import numpy as np
 
 from . import (
@@ -8,6 +8,7 @@
     copying,
     csv,
     datetime,
+    expressions,
     filling,
     gpuarrow,
     groupby,
diff --git a/python/cudf/cudf/_lib/cpp/expressions.pxd b/python/cudf/cudf/_lib/cpp/expressions.pxd
new file mode 100644
index 00000000000..1721f8aa734
--- /dev/null
+++ b/python/cudf/cudf/_lib/cpp/expressions.pxd
@@ -0,0 +1,88 @@
+# Copyright (c) 2022, NVIDIA CORPORATION.
+
+from libcpp.memory cimport unique_ptr
+
+from cudf._lib.cpp.column.column cimport column
+from cudf._lib.cpp.scalar.scalar cimport (
+    duration_scalar,
+    numeric_scalar,
+    timestamp_scalar,
+)
+from cudf._lib.cpp.table.table_view cimport table_view
+from cudf._lib.cpp.types cimport size_type
+
+
+cdef extern from "cudf/ast/expressions.hpp" namespace "cudf::ast" nogil:
+    ctypedef enum ast_operator:
+        # Binary operators
+        ADD "cudf::ast::ast_operator::ADD"
+        SUB "cudf::ast::ast_operator::SUB"
+        MUL "cudf::ast::ast_operator::MUL"
+        DIV "cudf::ast::ast_operator::DIV"
+        TRUE_DIV "cudf::ast::ast_operator::TRUE_DIV"
+        FLOOR_DIV "cudf::ast::ast_operator::FLOOR_DIV"
+        MOD "cudf::ast::ast_operator::MOD"
+        PYMOD "cudf::ast::ast_operator::PYMOD"
+        POW "cudf::ast::ast_operator::POW"
+        EQUAL "cudf::ast::ast_operator::EQUAL"
+        NULL_EQUAL "cudf::ast::ast_operator::NULL_EQUAL"
+        NOT_EQUAL "cudf::ast::ast_operator::NOT_EQUAL"
+        LESS "cudf::ast::ast_operator::LESS"
+        GREATER "cudf::ast::ast_operator::GREATER"
+        LESS_EQUAL "cudf::ast::ast_operator::LESS_EQUAL"
+        GREATER_EQUAL "cudf::ast::ast_operator::GREATER_EQUAL"
+        BITWISE_AND "cudf::ast::ast_operator::BITWISE_AND"
+        BITWISE_OR "cudf::ast::ast_operator::BITWISE_OR"
+        BITWISE_XOR "cudf::ast::ast_operator::BITWISE_XOR"
+        NULL_LOGICAL_AND "cudf::ast::ast_operator::NULL_LOGICAL_AND"
+        LOGICAL_AND "cudf::ast::ast_operator::LOGICAL_AND"
+        NULL_LOGICAL_OR "cudf::ast::ast_operator::NULL_LOGICAL_OR"
+        LOGICAL_OR "cudf::ast::ast_operator::LOGICAL_OR"
+        # Unary operators
+        IDENTITY "cudf::ast::ast_operator::IDENTITY"
+        SIN "cudf::ast::ast_operator::SIN"
+        COS "cudf::ast::ast_operator::COS"
+        TAN "cudf::ast::ast_operator::TAN"
+        ARCSIN "cudf::ast::ast_operator::ARCSIN"
+        ARCCOS "cudf::ast::ast_operator::ARCCOS"
+        ARCTAN "cudf::ast::ast_operator::ARCTAN"
+        SINH "cudf::ast::ast_operator::SINH"
+        COSH "cudf::ast::ast_operator::COSH"
+        TANH "cudf::ast::ast_operator::TANH"
+        ARCSINH "cudf::ast::ast_operator::ARCSINH"
+        ARCCOSH "cudf::ast::ast_operator::ARCCOSH"
+        ARCTANH "cudf::ast::ast_operator::ARCTANH"
+        EXP "cudf::ast::ast_operator::EXP"
+        LOG "cudf::ast::ast_operator::LOG"
+        SQRT "cudf::ast::ast_operator::SQRT"
+        CBRT "cudf::ast::ast_operator::CBRT"
+        CEIL "cudf::ast::ast_operator::CEIL"
+        FLOOR "cudf::ast::ast_operator::FLOOR"
+        ABS "cudf::ast::ast_operator::ABS"
+        RINT "cudf::ast::ast_operator::RINT"
+        BIT_INVERT "cudf::ast::ast_operator::BIT_INVERT"
+        NOT "cudf::ast::ast_operator::NOT"
+
+    cdef cppclass expression:
+        pass
+
+    ctypedef enum table_reference:
+        LEFT "cudf::ast::table_reference::LEFT"
+        RIGHT "cudf::ast::table_reference::RIGHT"
+
+    cdef cppclass literal(expression):
+        # Due to https://github.com/cython/cython/issues/3198, we need to
+        # specify a return type for templated constructors.
+        literal literal[T](numeric_scalar[T] &) except +
+        literal literal[T](timestamp_scalar[T] &) except +
+        literal literal[T](duration_scalar[T] &) except +
+
+    cdef cppclass column_reference(expression):
+        # Allow for default C++ parameters by declaring multiple constructors
+        # with the default parameters optionally omitted.
+        column_reference(size_type) except +
+        column_reference(size_type, table_reference) except +
+
+    cdef cppclass operation(expression):
+        operation(ast_operator, const expression &)
+        operation(ast_operator, const expression &, const expression&)
diff --git a/python/cudf/cudf/_lib/cpp/transform.pxd b/python/cudf/cudf/_lib/cpp/transform.pxd
index 590a371ff52..d9de04b676e 100644
--- a/python/cudf/cudf/_lib/cpp/transform.pxd
+++ b/python/cudf/cudf/_lib/cpp/transform.pxd
@@ -1,4 +1,4 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
+# Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
 from libcpp cimport bool
 from libcpp.memory cimport unique_ptr
@@ -9,6 +9,7 @@ from rmm._lib.device_buffer cimport device_buffer
 
 from cudf._lib.cpp.column.column cimport column
 from cudf._lib.cpp.column.column_view cimport column_view
+from cudf._lib.cpp.expressions cimport expression
 from cudf._lib.cpp.table.table cimport table
 from cudf._lib.cpp.table.table_view cimport table_view
 from cudf._lib.cpp.types cimport bitmask_type, data_type, size_type
@@ -42,3 +43,8 @@ cdef extern from "cudf/transform.hpp" namespace "cudf" nogil:
         column_view input_column,
         column_view categories
     )
+
+    cdef unique_ptr[column] compute_column(
+        const table_view table,
+        const expression& expr
+    ) except +
diff --git a/python/cudf/cudf/_lib/expressions.pxd b/python/cudf/cudf/_lib/expressions.pxd
new file mode 100644
index 00000000000..85665822174
--- /dev/null
+++ b/python/cudf/cudf/_lib/expressions.pxd
@@ -0,0 +1,38 @@
+# Copyright (c) 2022, NVIDIA CORPORATION.
+
+from libc.stdint cimport int32_t, int64_t
+from libcpp.memory cimport unique_ptr
+
+from cudf._lib.cpp.expressions cimport (
+    column_reference,
+    expression,
+    literal,
+    operation,
+)
+from cudf._lib.cpp.scalar.scalar cimport numeric_scalar
+
+ctypedef enum scalar_type_t:
+    INT
+    DOUBLE
+
+
+ctypedef union int_or_double_scalar_ptr:
+    unique_ptr[numeric_scalar[int64_t]] int_ptr
+    unique_ptr[numeric_scalar[double]] double_ptr
+
+
+cdef class Expression:
+    cdef unique_ptr[expression] c_obj
+
+
+cdef class Literal(Expression):
+    cdef scalar_type_t c_scalar_type
+    cdef int_or_double_scalar_ptr c_scalar
+
+
+cdef class ColumnReference(Expression):
+    pass
+
+
+cdef class Operation(Expression):
+    pass
diff --git a/python/cudf/cudf/_lib/expressions.pyx b/python/cudf/cudf/_lib/expressions.pyx
new file mode 100644
index 00000000000..f069bcdbe73
--- /dev/null
+++ b/python/cudf/cudf/_lib/expressions.pyx
@@ -0,0 +1,130 @@
+# Copyright (c) 2022, NVIDIA CORPORATION.
+
+from enum import Enum
+
+from cython.operator cimport dereference
+from libc.stdint cimport int64_t
+from libcpp.memory cimport make_unique, unique_ptr
+from libcpp.utility cimport move
+
+from cudf._lib.cpp cimport expressions as libcudf_exp
+from cudf._lib.cpp.types cimport size_type
+
+# Necessary for proper casting, see below.
+ctypedef int32_t underlying_type_ast_operator
+
+
+# Aliases for simplicity
+ctypedef unique_ptr[libcudf_exp.expression] expression_ptr
+
+
+class ASTOperator(Enum):
+    ADD = libcudf_exp.ast_operator.ADD
+    SUB = libcudf_exp.ast_operator.SUB
+    MUL = libcudf_exp.ast_operator.MUL
+    DIV = libcudf_exp.ast_operator.DIV
+    TRUE_DIV = libcudf_exp.ast_operator.TRUE_DIV
+    FLOOR_DIV = libcudf_exp.ast_operator.FLOOR_DIV
+    MOD = libcudf_exp.ast_operator.MOD
+    PYMOD = libcudf_exp.ast_operator.PYMOD
+    POW = libcudf_exp.ast_operator.POW
+    EQUAL = libcudf_exp.ast_operator.EQUAL
+    NULL_EQUAL = libcudf_exp.ast_operator.NULL_EQUAL
+    NOT_EQUAL = libcudf_exp.ast_operator.NOT_EQUAL
+    LESS = libcudf_exp.ast_operator.LESS
+    GREATER = libcudf_exp.ast_operator.GREATER
+    LESS_EQUAL = libcudf_exp.ast_operator.LESS_EQUAL
+    GREATER_EQUAL = libcudf_exp.ast_operator.GREATER_EQUAL
+    BITWISE_AND = libcudf_exp.ast_operator.BITWISE_AND
+    BITWISE_OR = libcudf_exp.ast_operator.BITWISE_OR
+    BITWISE_XOR = libcudf_exp.ast_operator.BITWISE_XOR
+    LOGICAL_AND = libcudf_exp.ast_operator.LOGICAL_AND
+    NULL_LOGICAL_AND = libcudf_exp.ast_operator.NULL_LOGICAL_AND
+    LOGICAL_OR = libcudf_exp.ast_operator.LOGICAL_OR
+    NULL_LOGICAL_OR = libcudf_exp.ast_operator.NULL_LOGICAL_OR
+    # Unary operators
+    IDENTITY = libcudf_exp.ast_operator.IDENTITY
+    SIN = libcudf_exp.ast_operator.SIN
+    COS = libcudf_exp.ast_operator.COS
+    TAN = libcudf_exp.ast_operator.TAN
+    ARCSIN = libcudf_exp.ast_operator.ARCSIN
+    ARCCOS = libcudf_exp.ast_operator.ARCCOS
+    ARCTAN = libcudf_exp.ast_operator.ARCTAN
+    SINH = libcudf_exp.ast_operator.SINH
+    COSH = libcudf_exp.ast_operator.COSH
+    TANH = libcudf_exp.ast_operator.TANH
+    ARCSINH = libcudf_exp.ast_operator.ARCSINH
+    ARCCOSH = libcudf_exp.ast_operator.ARCCOSH
+    ARCTANH = libcudf_exp.ast_operator.ARCTANH
+    EXP = libcudf_exp.ast_operator.EXP
+    LOG = libcudf_exp.ast_operator.LOG
+    SQRT = libcudf_exp.ast_operator.SQRT
+    CBRT = libcudf_exp.ast_operator.CBRT
+    CEIL = libcudf_exp.ast_operator.CEIL
+    FLOOR = libcudf_exp.ast_operator.FLOOR
+    ABS = libcudf_exp.ast_operator.ABS
+    RINT = libcudf_exp.ast_operator.RINT
+    BIT_INVERT = libcudf_exp.ast_operator.BIT_INVERT
+    NOT = libcudf_exp.ast_operator.NOT
+
+
+class TableReference(Enum):
+    LEFT = libcudf_exp.table_reference.LEFT
+    RIGHT = libcudf_exp.table_reference.RIGHT
+
+
+# Note that this function only currently supports numeric literals. libcudf
+# expressions don't really support other types yet though, so this isn't
+# restrictive at the moment.
+cdef class Literal(Expression):
+    def __cinit__(self, value):
+        # TODO: Would love to find a better solution than unions for literals.
+        cdef int intval
+        cdef double doubleval
+
+        if isinstance(value, int):
+            self.c_scalar_type = scalar_type_t.INT
+            intval = value
+            self.c_scalar.int_ptr = make_unique[numeric_scalar[int64_t]](
+                intval, True
+            )
+            self.c_obj = <expression_ptr> make_unique[libcudf_exp.literal](
+                <numeric_scalar[int64_t] &>dereference(self.c_scalar.int_ptr)
+            )
+        elif isinstance(value, float):
+            self.c_scalar_type = scalar_type_t.DOUBLE
+            doubleval = value
+            self.c_scalar.double_ptr = make_unique[numeric_scalar[double]](
+                doubleval, True
+            )
+            self.c_obj = <expression_ptr> make_unique[libcudf_exp.literal](
+                <numeric_scalar[double] &>dereference(self.c_scalar.double_ptr)
+            )
+
+
+cdef class ColumnReference(Expression):
+    def __cinit__(self, size_type index):
+        self.c_obj = <expression_ptr>make_unique[libcudf_exp.column_reference](
+            index
+        )
+
+
+cdef class Operation(Expression):
+    def __cinit__(self, op, Expression left, Expression right=None):
+        # This awkward double casting is the only way to get Cython to generate
+        # valid C++. Cython doesn't support scoped enumerations, so it assumes
+        # that enums correspond to their underlying value types and will thus
+        # attempt operations that are invalid without first explicitly casting
+        # to the underlying before casting to the desired type.
+        cdef libcudf_exp.ast_operator op_value = <libcudf_exp.ast_operator>(
+            <underlying_type_ast_operator> op.value
+        )
+
+        if right is None:
+            self.c_obj = <expression_ptr> make_unique[libcudf_exp.operation](
+                op_value, dereference(left.c_obj)
+            )
+        else:
+            self.c_obj = <expression_ptr> make_unique[libcudf_exp.operation](
+                op_value, dereference(left.c_obj), dereference(right.c_obj)
+            )
diff --git a/python/cudf/cudf/_lib/transform.pyx b/python/cudf/cudf/_lib/transform.pyx
index 175150b6865..2d94ef2cedf 100644
--- a/python/cudf/cudf/_lib/transform.pyx
+++ b/python/cudf/cudf/_lib/transform.pyx
@@ -5,9 +5,11 @@ from numba.np import numpy_support
 
 import cudf
 from cudf._lib.types import SUPPORTED_NUMPY_TO_LIBCUDF_TYPES
+from cudf.core._internals.expressions import parse_expression
 from cudf.core.buffer import Buffer
 from cudf.utils import cudautils
 
+from cython.operator cimport dereference
 from libc.stdint cimport uintptr_t
 from libcpp.memory cimport unique_ptr
 from libcpp.pair cimport pair
@@ -20,14 +22,18 @@ cimport cudf._lib.cpp.transform as libcudf_transform
 from cudf._lib.column cimport Column
 from cudf._lib.cpp.column.column cimport column
 from cudf._lib.cpp.column.column_view cimport column_view
+from cudf._lib.cpp.expressions cimport expression
 from cudf._lib.cpp.table.table cimport table
 from cudf._lib.cpp.table.table_view cimport table_view
 from cudf._lib.cpp.types cimport bitmask_type, data_type, size_type, type_id
+from cudf._lib.expressions cimport Expression
 from cudf._lib.types cimport underlying_type_t_type_id
 from cudf._lib.utils cimport (
     columns_from_unique_ptr,
     data_from_table_view,
+    data_from_unique_ptr,
     table_view_from_columns,
+    table_view_from_table,
 )
 
 
@@ -156,3 +162,34 @@ def one_hot_encode(Column input_column, Column categories):
     )
 
     return encodings
+
+
+def compute_column(list columns, tuple column_names, expr: str):
+    """Compute a new column by evaluating an expression on a set of columns.
+
+    Parameters
+    ----------
+    columns : list
+        The set of columns forming the table to evaluate the expression on.
+    column_names : tuple[str]
+        The names associated with each column. These names are necessary to map
+        column names in the expression to indices in the provided list of
+        columns, which are what will be used by libcudf to evaluate the
+        expression on the table.
+    expr : str
+        The expression to evaluate.
+    """
+    visitor = parse_expression(expr, column_names)
+
+    # At the end, all the stack contains is the expression to evaluate.
+    cdef Expression cudf_expr = visitor.expression
+    cdef table_view tbl = table_view_from_columns(columns)
+    cdef unique_ptr[column] col
+    with nogil:
+        col = move(
+            libcudf_transform.compute_column(
+                tbl,
+                <expression &> dereference(cudf_expr.c_obj.get())
+            )
+        )
+    return Column.from_unique_ptr(move(col))
diff --git a/python/cudf/cudf/core/_internals/expressions.py b/python/cudf/cudf/core/_internals/expressions.py
new file mode 100644
index 00000000000..bc587d4e1e2
--- /dev/null
+++ b/python/cudf/cudf/core/_internals/expressions.py
@@ -0,0 +1,222 @@
+# Copyright (c) 2022, NVIDIA CORPORATION.
+
+import ast
+import functools
+from typing import List, Tuple
+
+from cudf._lib.expressions import (
+    ASTOperator,
+    ColumnReference,
+    Expression,
+    Literal,
+    Operation,
+)
+
+# This dictionary encodes the mapping from Python AST operators to their cudf
+# counterparts.
+python_cudf_operator_map = {
+    # Binary operators
+    ast.Add: ASTOperator.ADD,
+    ast.Sub: ASTOperator.SUB,
+    ast.Mult: ASTOperator.MUL,
+    ast.Div: ASTOperator.DIV,
+    ast.FloorDiv: ASTOperator.FLOOR_DIV,
+    ast.Mod: ASTOperator.PYMOD,
+    ast.Pow: ASTOperator.POW,
+    ast.Eq: ASTOperator.EQUAL,
+    ast.NotEq: ASTOperator.NOT_EQUAL,
+    ast.Lt: ASTOperator.LESS,
+    ast.Gt: ASTOperator.GREATER,
+    ast.LtE: ASTOperator.LESS_EQUAL,
+    ast.GtE: ASTOperator.GREATER_EQUAL,
+    ast.BitXor: ASTOperator.BITWISE_XOR,
+    # TODO: The mapping of logical/bitwise operators here is inconsistent with
+    # pandas. In pandas, Both `BitAnd` and `And` map to
+    # `ASTOperator.LOGICAL_AND` for booleans, while they map to
+    # `ASTOperator.BITWISE_AND` for integers. However, there is no good way to
+    # encode this at present because expressions can be arbitrarily nested so
+    # we won't know the dtype of the input without inserting a much more
+    # complex traversal of the expression tree to determine the output types at
+    # each node. For now, we'll rely on users to use the appropriate operator.
+    ast.BitAnd: ASTOperator.BITWISE_AND,
+    ast.BitOr: ASTOperator.BITWISE_OR,
+    ast.And: ASTOperator.LOGICAL_AND,
+    ast.Or: ASTOperator.LOGICAL_OR,
+    # Unary operators
+    ast.Invert: ASTOperator.BIT_INVERT,
+    ast.Not: ASTOperator.NOT,
+    # TODO: Missing USub, possibility other unary ops?
+}
+
+
+# Mapping between Python function names encode in an ast.Call node and the
+# corresponding libcudf C++ AST operators.
+python_cudf_function_map = {
+    # TODO: Operators listed on
+    # https://pandas.pydata.org/pandas-docs/stable/user_guide/enhancingperf.html#expression-evaluation-via-eval  # noqa: E501
+    # that we don't support yet:
+    # expm1, log1p, arctan2 and log10.
+    "sin": ASTOperator.SIN,
+    "cos": ASTOperator.COS,
+    "tan": ASTOperator.TAN,
+    "arcsin": ASTOperator.ARCSIN,
+    "arccos": ASTOperator.ARCCOS,
+    "arctan": ASTOperator.ARCTAN,
+    "sinh": ASTOperator.SINH,
+    "cosh": ASTOperator.COSH,
+    "tanh": ASTOperator.TANH,
+    "arcsinh": ASTOperator.ARCSINH,
+    "arccosh": ASTOperator.ARCCOSH,
+    "arctanh": ASTOperator.ARCTANH,
+    "exp": ASTOperator.EXP,
+    "log": ASTOperator.LOG,
+    "sqrt": ASTOperator.SQRT,
+    "abs": ASTOperator.ABS,
+    "ceil": ASTOperator.CEIL,
+    "floor": ASTOperator.FLOOR,
+    # TODO: Operators supported by libcudf with no Python function analog.
+    # ast.rint: ASTOperator.RINT,
+    # ast.cbrt: ASTOperator.CBRT,
+}
+
+
+class libcudfASTVisitor(ast.NodeVisitor):
+    """A NodeVisitor specialized for constructing a libcudf expression tree.
+
+    This visitor is designed to handle AST nodes that have libcudf equivalents.
+    It constructs column references from names and literals from constants,
+    then builds up operations. The final result can be accessed using the
+    `expression` property. The visitor must be kept in scope for as long as the
+    expression is needed because all of the underlying libcudf expressions will
+    be destroyed when the libcudfASTVisitor is.
+
+    Parameters
+    ----------
+    col_names : Tuple[str]
+        The column names used to map the names in an expression.
+    """
+
+    def __init__(self, col_names: Tuple[str]):
+        self.stack: List[Expression] = []
+        self.nodes: List[Expression] = []
+        self.col_names = col_names
+
+    @property
+    def expression(self):
+        """Expression: The result of parsing an AST."""
+        assert len(self.stack) == 1
+        return self.stack[-1]
+
+    def visit_Name(self, node):
+        try:
+            col_id = self.col_names.index(node.id)
+        except ValueError:
+            raise ValueError(f"Unknown column name {node.id}")
+        self.stack.append(ColumnReference(col_id))
+
+    def visit_Constant(self, node):
+        if not isinstance(node, ast.Num):
+            raise ValueError(
+                f"Unsupported literal {repr(node.value)} of type "
+                "{type(node.value).__name__}"
+            )
+        self.stack.append(Literal(node.value))
+
+    def visit_UnaryOp(self, node):
+        self.visit(node.operand)
+        self.nodes.append(self.stack.pop())
+        if isinstance(node.op, ast.USub):
+            # TODO: Except for leaf nodes, we won't know the type of the
+            # operand, so there's no way to know whether this should be a float
+            # or an int. We should maybe see what Spark does, and this will
+            # probably require casting.
+            self.nodes.append(Literal(-1))
+            op = ASTOperator.MUL
+            self.stack.append(Operation(op, self.nodes[-1], self.nodes[-2]))
+        elif isinstance(node.op, ast.UAdd):
+            self.stack.append(self.nodes[-1])
+        else:
+            op = python_cudf_operator_map[type(node.op)]
+            self.stack.append(Operation(op, self.nodes[-1]))
+
+    def visit_BinOp(self, node):
+        self.visit(node.left)
+        self.visit(node.right)
+        self.nodes.append(self.stack.pop())
+        self.nodes.append(self.stack.pop())
+
+        op = python_cudf_operator_map[type(node.op)]
+        self.stack.append(Operation(op, self.nodes[-1], self.nodes[-2]))
+
+    def _visit_BoolOp_Compare(self, operators, operands, has_multiple_ops):
+        # Helper function handling the common components of parsing BoolOp and
+        # Compare AST nodes. These two types of nodes both support chaining
+        # (e.g. `a > b > c` is equivalent to `a > b and b > c`, so this
+        # function helps standardize that.
+
+        # TODO: Whether And/Or and BitAnd/BitOr actually correspond to
+        # logical or bitwise operators depends on the data types that they
+        # are applied to. We'll need to add logic to map to that.
+        inner_ops = []
+        for op, (left, right) in zip(operators, operands):
+            # Note that this will lead to duplicate nodes, e.g. if
+            # the comparison is `a < b < c` that will be encoded as
+            # `a < b and b < c`. We could potentially optimize by caching
+            # expressions by name so that we only construct them once.
+            self.visit(left)
+            self.visit(right)
+
+            self.nodes.append(self.stack.pop())
+            self.nodes.append(self.stack.pop())
+
+            op = python_cudf_operator_map[type(op)]
+            inner_ops.append(Operation(op, self.nodes[-1], self.nodes[-2]))
+
+        self.nodes.extend(inner_ops)
+
+        # If we have more than one comparator, we need to link them
+        # together with LOGICAL_AND operators.
+        if has_multiple_ops:
+            op = ASTOperator.LOGICAL_AND
+
+            def _combine_compare_ops(left, right):
+                self.nodes.append(Operation(op, left, right))
+                return self.nodes[-1]
+
+            functools.reduce(_combine_compare_ops, inner_ops)
+
+        self.stack.append(self.nodes[-1])
+
+    def visit_BoolOp(self, node):
+        operators = [node.op] * (len(node.values) - 1)
+        operands = zip(node.values[:-1], node.values[1:])
+        self._visit_BoolOp_Compare(operators, operands, len(node.values) > 2)
+
+    def visit_Compare(self, node):
+        operands = (node.left, *node.comparators)
+        has_multiple_ops = len(operands) > 2
+        operands = zip(operands[:-1], operands[1:])
+        self._visit_BoolOp_Compare(node.ops, operands, has_multiple_ops)
+
+    def visit_Call(self, node):
+        try:
+            op = python_cudf_function_map[node.func.id]
+        except KeyError:
+            raise ValueError(f"Unsupported function {node.func}.")
+        # Assuming only unary functions are supported, which is checked above.
+        if len(node.args) != 1 or node.keywords:
+            raise ValueError(
+                f"Function {node.func} only accepts one positional "
+                "argument."
+            )
+        self.visit(node.args[0])
+
+        self.nodes.append(self.stack.pop())
+        self.stack.append(Operation(op, self.nodes[-1]))
+
+
+@functools.lru_cache(256)
+def parse_expression(expr: str, col_names: Tuple[str]):
+    visitor = libcudfASTVisitor(col_names)
+    visitor.visit(ast.parse(expr))
+    return visitor
diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 7b4b81630bd..0d3b3ee0300 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -7,6 +7,7 @@
 import itertools
 import numbers
 import pickle
+import re
 import sys
 import warnings
 from collections import abc, defaultdict
@@ -6253,6 +6254,165 @@ def interleave_columns(self):
             {None: libcudf.reshape.interleave_columns([*self._columns])}
         )
 
+    @_cudf_nvtx_annotate
+    def eval(self, expr: str, inplace: bool = False, **kwargs):
+        """Evaluate a string describing operations on DataFrame columns.
+
+        Operates on columns only, not specific rows or elements.
+
+        Parameters
+        ----------
+        expr : str
+            The expression string to evaluate.
+        inplace : bool, default False
+            If the expression contains an assignment, whether to perform the
+            operation inplace and mutate the existing DataFrame. Otherwise,
+            a new DataFrame is returned.
+        **kwargs
+            Not supported.
+
+        Returns
+        -------
+        DataFrame, Series, or None
+            Series if a single column is returned (the typical use case),
+            DataFrame if any assignment statements are included in
+            ``expr``, or None if ``inplace=True``.
+
+        Notes
+        -----
+        Difference from pandas:
+            * Additional kwargs are not supported.
+            * Bitwise and logical operators are not dtype-dependent.
+              Specifically, `&` must be used for bitwise operators on integers,
+              not `and`, which is specifically for the logical and between
+              booleans.
+            * Only numerical types are currently supported.
+            * Operators generally will not cast automatically. Users are
+              responsible for casting columns to suitable types before
+              evaluating a function.
+            * Multiple assignments to the same name (i.e. a sequence of
+              assignment statements where later statements are conditioned upon
+              the output of earlier statements) is not supported.
+
+        Examples
+        --------
+        >>> df = cudf.DataFrame({'A': range(1, 6), 'B': range(10, 0, -2)})
+        >>> df
+           A   B
+        0  1  10
+        1  2   8
+        2  3   6
+        3  4   4
+        4  5   2
+        >>> df.eval('A + B')
+        0    11
+        1    10
+        2     9
+        3     8
+        4     7
+        dtype: int64
+
+        Assignment is allowed though by default the original DataFrame is not
+        modified.
+
+        >>> df.eval('C = A + B')
+           A   B   C
+        0  1  10  11
+        1  2   8  10
+        2  3   6   9
+        3  4   4   8
+        4  5   2   7
+        >>> df
+           A   B
+        0  1  10
+        1  2   8
+        2  3   6
+        3  4   4
+        4  5   2
+
+        Use ``inplace=True`` to modify the original DataFrame.
+
+        >>> df.eval('C = A + B', inplace=True)
+        >>> df
+           A   B   C
+        0  1  10  11
+        1  2   8  10
+        2  3   6   9
+        3  4   4   8
+        4  5   2   7
+
+        Multiple columns can be assigned to using multi-line expressions:
+
+        >>> df.eval(
+        ...     '''
+        ... C = A + B
+        ... D = A - B
+        ... '''
+        ... )
+           A   B   C  D
+        0  1  10  11 -9
+        1  2   8  10 -6
+        2  3   6   9 -3
+        3  4   4   8  0
+        4  5   2   7  3
+        """
+        if kwargs:
+            raise ValueError(
+                "Keyword arguments other than `inplace` are not supported"
+            )
+
+        # Have to use a regex match to avoid capturing "=="
+        includes_assignment = re.search("[^=]=[^=]", expr) is not None
+
+        # Check if there were multiple statements. Filter out empty lines.
+        statements = tuple(filter(None, expr.strip().split("\n")))
+        if len(statements) > 1 and any(
+            re.search("[^=]=[^=]", st) is None for st in statements
+        ):
+            raise ValueError(
+                "Multi-line expressions are only valid if all expressions "
+                "contain an assignment."
+            )
+
+        if not includes_assignment:
+            if inplace:
+                raise ValueError(
+                    "Cannot operate inplace if there is no assignment"
+                )
+            return Series._from_data(
+                {
+                    None: libcudf.transform.compute_column(
+                        [*self._columns], self._column_names, statements[0]
+                    )
+                }
+            )
+
+        targets = []
+        exprs = []
+        for st in statements:
+            try:
+                t, e = re.split("[^=]=[^=]", st)
+            except ValueError as err:
+                if "too many values" in str(err):
+                    raise ValueError(
+                        f"Statement {st} contains too many assignments ('=')"
+                    )
+                raise
+            targets.append(t.strip())
+            exprs.append(e.strip())
+
+        cols = (
+            libcudf.transform.compute_column(
+                [*self._columns], self._column_names, e
+            )
+            for e in exprs
+        )
+        ret = self if inplace else self.copy(deep=False)
+        for name, col in zip(targets, cols):
+            ret._data[name] = col
+        if not inplace:
+            return ret
+
 
 def from_dataframe(df, allow_copy=False):
     return df_protocol.from_dataframe(df, allow_copy=allow_copy)
diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
index bf5c4ae319b..d95fe278469 100644
--- a/python/cudf/cudf/tests/test_dataframe.py
+++ b/python/cudf/cudf/tests/test_dataframe.py
@@ -9266,3 +9266,90 @@ def test_empty_numeric_only(data):
     expected = pdf.prod(numeric_only=True)
     actual = gdf.prod(numeric_only=True)
     assert_eq(expected, actual)
+
+
+@pytest.fixture
+def df_eval():
+    N = 10
+    int_max = 10
+    rng = cupy.random.default_rng(0)
+    return cudf.DataFrame(
+        {
+            "a": rng.integers(N, size=int_max),
+            "b": rng.integers(N, size=int_max),
+            "c": rng.integers(N, size=int_max),
+            "d": rng.integers(N, size=int_max),
+        }
+    )
+
+
+# Note that for now expressions do not automatically handle casting, so inputs
+# need to be casted appropriately
+@pytest.mark.parametrize(
+    "expr, dtype",
+    [
+        ("a", int),
+        ("+a", int),
+        ("a + b", int),
+        ("a == b", int),
+        ("a / b", float),
+        ("a * b", int),
+        ("a > b", int),
+        ("a > b > c", int),
+        ("a > b < c", int),
+        ("a & b", int),
+        ("a & b | c", int),
+        ("sin(a)", float),
+        ("exp(sin(abs(a)))", float),
+        ("sqrt(floor(a))", float),
+        ("ceil(arctanh(a))", float),
+        ("(a + b) - (c * d)", int),
+        ("~a", int),
+        ("(a > b) and (c > d)", int),
+        ("(a > b) or (c > d)", int),
+        ("not (a > b)", int),
+        ("a + 1", int),
+        ("a + 1.0", float),
+        ("-a + 1", int),
+        ("+a + 1", int),
+        ("e = a + 1", int),
+        (
+            """
+            e = log(cos(a)) + 1.0
+            f = abs(c) - exp(d)
+            """,
+            float,
+        ),
+        ("a_b_are_equal = (a == b)", int),
+    ],
+)
+def test_dataframe_eval(df_eval, expr, dtype):
+    df_eval = df_eval.astype(dtype)
+    expect = df_eval.to_pandas().eval(expr)
+    got = df_eval.eval(expr)
+    # In the specific case where the evaluated expression is a unary function
+    # of a single column with no nesting, pandas will retain the name. This
+    # level of compatibility is out of scope for now.
+    assert_eq(expect, got, check_names=False)
+
+    # Test inplace
+    if re.search("[^=]=[^=]", expr) is not None:
+        pdf_eval = df_eval.to_pandas()
+        pdf_eval.eval(expr, inplace=True)
+        df_eval.eval(expr, inplace=True)
+        assert_eq(pdf_eval, df_eval)
+
+
+@pytest.mark.parametrize(
+    "expr",
+    [
+        """
+        e = a + b
+        a == b
+        """,
+        "a_b_are_equal = (a == b) = c",
+    ],
+)
+def test_dataframe_eval_errors(df_eval, expr):
+    with pytest.raises(ValueError):
+        df_eval.eval(expr)

From 20569f6cd9e03f1d7536ac49e6e93ffc99941e98 Mon Sep 17 00:00:00 2001
From: Yunsong Wang <yunsongw@nvidia.com>
Date: Thu, 28 Apr 2022 11:38:06 -0400
Subject: [PATCH 8/9] Add `detail::hash_join` (#10695)

Closes https://github.com/rapidsai/cudf/issues/10587

This PR adds a `detail::hash_join` class which is templated on the hash function. It also cleans up `join` internal functions by moving code around to proper files. The implementation of `detail::hash_join` is mainly taken from `cudf::hash_join::hash_join_impl`.

Authors:
  - Yunsong Wang (https://github.com/PointKernel)

Approvers:
  - Jake Hemstad (https://github.com/jrhemstad)
  - David Wendt (https://github.com/davidwendt)

URL: https://github.com/rapidsai/cudf/pull/10695
---
 cpp/include/cudf/detail/join.hpp              | 185 +++++++++++
 .../cudf/detail/utilities/hash_functions.cuh  |   3 -
 cpp/include/cudf/hashing.hpp                  |   3 +
 cpp/include/cudf/join.hpp                     |  17 +-
 cpp/src/join/hash_join.cu                     | 267 +++++++++++-----
 cpp/src/join/hash_join.cuh                    | 289 ------------------
 cpp/src/join/join.cu                          |  85 ++----
 cpp/src/join/join_common_utils.cuh            |  82 ++++-
 cpp/src/join/join_common_utils.hpp            |  13 +-
 cpp/src/join/join_utils.cu                    |   2 +-
 cpp/src/join/mixed_join.cu                    |   9 +-
 cpp/src/join/mixed_join_kernel.cuh            |   7 +-
 cpp/src/join/mixed_join_semi.cu               |  11 +-
 cpp/src/join/mixed_join_size_kernel.cuh       |   7 +-
 cpp/src/join/semi_join.cu                     |   4 +-
 15 files changed, 516 insertions(+), 468 deletions(-)
 create mode 100644 cpp/include/cudf/detail/join.hpp
 delete mode 100644 cpp/src/join/hash_join.cuh

diff --git a/cpp/include/cudf/detail/join.hpp b/cpp/include/cudf/detail/join.hpp
new file mode 100644
index 00000000000..12e4aaa03fd
--- /dev/null
+++ b/cpp/include/cudf/detail/join.hpp
@@ -0,0 +1,185 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include <cudf/column/column.hpp>
+#include <cudf/detail/structs/utilities.hpp>
+#include <cudf/detail/utilities/hash_functions.cuh>
+#include <cudf/table/table_view.hpp>
+#include <cudf/types.hpp>
+
+#include <rmm/cuda_stream_view.hpp>
+#include <rmm/device_uvector.hpp>
+#include <rmm/mr/device/polymorphic_allocator.hpp>
+
+#include <cuco/static_multimap.cuh>
+
+#include <cstddef>
+#include <memory>
+#include <optional>
+
+// Forward declaration
+template <typename T>
+class default_allocator;
+
+namespace cudf {
+namespace detail {
+
+constexpr int DEFAULT_JOIN_CG_SIZE = 2;
+
+enum class join_kind { INNER_JOIN, LEFT_JOIN, FULL_JOIN, LEFT_SEMI_JOIN, LEFT_ANTI_JOIN };
+
+/**
+ * @brief Hash join that builds hash table in creation and probes results in subsequent `*_join`
+ * member functions.
+ *
+ * User-defined hash function can be passed via the template parameter `Hasher`
+ *
+ * @tparam Hasher Unary callable type
+ */
+template <typename Hasher>
+struct hash_join {
+ public:
+  using map_type =
+    cuco::static_multimap<hash_value_type,
+                          cudf::size_type,
+                          cuda::thread_scope_device,
+                          rmm::mr::stream_allocator_adaptor<default_allocator<char>>,
+                          cuco::double_hashing<DEFAULT_JOIN_CG_SIZE, Hasher, Hasher>>;
+
+  hash_join()                 = delete;
+  ~hash_join()                = default;
+  hash_join(hash_join const&) = delete;
+  hash_join(hash_join&&)      = delete;
+  hash_join& operator=(hash_join const&) = delete;
+  hash_join& operator=(hash_join&&) = delete;
+
+ private:
+  bool const _is_empty;                    ///< true if `_hash_table` is empty
+  cudf::null_equality const _nulls_equal;  ///< whether to consider nulls as equal
+  cudf::table_view _build;                 ///< input table to build the hash map
+  cudf::structs::detail::flattened_table
+    _flattened_build_table;  ///< flattened data structures for `_build`
+  map_type _hash_table;      ///< hash table built on `_build`
+
+ public:
+  /**
+   * @brief Constructor that internally builds the hash table based on the given `build` table.
+   *
+   * @throw cudf::logic_error if the number of columns in `build` table is 0.
+   * @throw cudf::logic_error if the number of rows in `build` table exceeds MAX_JOIN_SIZE.
+   *
+   * @param build The build table, from which the hash table is built.
+   * @param compare_nulls Controls whether null join-key values should match or not.
+   * @param stream CUDA stream used for device memory operations and kernel launches.
+   */
+  hash_join(cudf::table_view const& build,
+            cudf::null_equality compare_nulls,
+            rmm::cuda_stream_view stream = rmm::cuda_stream_default);
+
+  /**
+   * @copydoc cudf::hash_join::inner_join
+   */
+  std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
+            std::unique_ptr<rmm::device_uvector<size_type>>>
+  inner_join(cudf::table_view const& probe,
+             std::optional<std::size_t> output_size,
+             rmm::cuda_stream_view stream,
+             rmm::mr::device_memory_resource* mr) const;
+
+  /**
+   * @copydoc cudf::hash_join::left_join
+   */
+  std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
+            std::unique_ptr<rmm::device_uvector<size_type>>>
+  left_join(cudf::table_view const& probe,
+            std::optional<std::size_t> output_size,
+            rmm::cuda_stream_view stream,
+            rmm::mr::device_memory_resource* mr) const;
+
+  /**
+   * @copydoc cudf::hash_join::full_join
+   */
+  std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
+            std::unique_ptr<rmm::device_uvector<size_type>>>
+  full_join(cudf::table_view const& probe,
+            std::optional<std::size_t> output_size,
+            rmm::cuda_stream_view stream,
+            rmm::mr::device_memory_resource* mr) const;
+
+  /**
+   * @copydoc cudf::hash_join::inner_join_size
+   */
+  [[nodiscard]] std::size_t inner_join_size(cudf::table_view const& probe,
+                                            rmm::cuda_stream_view stream) const;
+
+  /**
+   * @copydoc cudf::hash_join::left_join_size
+   */
+  [[nodiscard]] std::size_t left_join_size(cudf::table_view const& probe,
+                                           rmm::cuda_stream_view stream) const;
+
+  /**
+   * @copydoc cudf::hash_join::full_join_size
+   */
+  std::size_t full_join_size(cudf::table_view const& probe,
+                             rmm::cuda_stream_view stream,
+                             rmm::mr::device_memory_resource* mr) const;
+
+ private:
+  /**
+   * @brief Probes the `_hash_table` built from `_build` for tuples in `probe_table`,
+   * and returns the output indices of `build_table` and `probe_table` as a combined table,
+   * i.e. if full join is specified as the join type then left join is called. Behavior
+   * is undefined if the provided `output_size` is smaller than the actual output size.
+   *
+   * @throw cudf::logic_error if build table is empty and `JoinKind == INNER_JOIN`.
+   *
+   * @tparam JoinKind The type of join to be performed.
+   *
+   * @param probe_table Table of probe side columns to join.
+   * @param output_size Optional value which allows users to specify the exact output size.
+   * @param stream CUDA stream used for device memory operations and kernel launches.
+   * @param mr Device memory resource used to allocate the returned vectors.
+   *
+   * @return Join output indices vector pair.
+   */
+  template <cudf::detail::join_kind JoinKind>
+  std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
+            std::unique_ptr<rmm::device_uvector<size_type>>>
+  probe_join_indices(cudf::table_view const& probe_table,
+                     std::optional<std::size_t> output_size,
+                     rmm::cuda_stream_view stream,
+                     rmm::mr::device_memory_resource* mr) const;
+
+  /**
+   * @copydoc cudf::detail::hash_join::probe_join_indices
+   *
+   * @throw cudf::logic_error if probe table is empty.
+   * @throw cudf::logic_error if the size of probe table exceeds `MAX_JOIN_SIZE`.
+   * @throw cudf::logic_error if the number of columns in build table and probe table do not match.
+   * @throw cudf::logic_error if the column data types in build table and probe table do not match.
+   */
+  template <cudf::detail::join_kind JoinKind>
+  std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
+            std::unique_ptr<rmm::device_uvector<size_type>>>
+  compute_hash_join(cudf::table_view const& probe,
+                    std::optional<std::size_t> output_size,
+                    rmm::cuda_stream_view stream,
+                    rmm::mr::device_memory_resource* mr) const;
+};
+}  // namespace detail
+}  // namespace cudf
diff --git a/cpp/include/cudf/detail/utilities/hash_functions.cuh b/cpp/include/cudf/detail/utilities/hash_functions.cuh
index 9c6f3e9cb13..2c5434b63d2 100644
--- a/cpp/include/cudf/detail/utilities/hash_functions.cuh
+++ b/cpp/include/cudf/detail/utilities/hash_functions.cuh
@@ -32,9 +32,6 @@
 #include <thrust/reverse.h>
 
 namespace cudf {
-
-using hash_value_type = uint32_t;
-
 namespace detail {
 
 /**
diff --git a/cpp/include/cudf/hashing.hpp b/cpp/include/cudf/hashing.hpp
index e973c585410..bbff304e547 100644
--- a/cpp/include/cudf/hashing.hpp
+++ b/cpp/include/cudf/hashing.hpp
@@ -19,6 +19,9 @@
 #include <cudf/table/table_view.hpp>
 
 namespace cudf {
+
+using hash_value_type = uint32_t;
+
 /**
  * @addtogroup column_hash
  * @{
diff --git a/cpp/include/cudf/join.hpp b/cpp/include/cudf/join.hpp
index d56f8f0e904..f48f8a83e9a 100644
--- a/cpp/include/cudf/join.hpp
+++ b/cpp/include/cudf/join.hpp
@@ -17,6 +17,7 @@
 #pragma once
 
 #include <cudf/ast/expressions.hpp>
+#include <cudf/hashing.hpp>
 #include <cudf/table/table_view.hpp>
 #include <cudf/types.hpp>
 #include <cudf/utilities/span.hpp>
@@ -29,6 +30,16 @@
 #include <vector>
 
 namespace cudf {
+
+// forward declaration
+namespace detail {
+template <typename T>
+class MurmurHash3_32;
+
+template <typename T>
+class hash_join;
+}  // namespace detail
+
 /**
  * @addtogroup column_join
  * @{
@@ -503,6 +514,9 @@ std::unique_ptr<cudf::table> cross_join(
  */
 class hash_join {
  public:
+  using impl_type =
+    typename cudf::detail::hash_join<cudf::detail::MurmurHash3_32<cudf::hash_value_type>>;
+
   hash_join() = delete;
   ~hash_join();
   hash_join(hash_join const&) = delete;
@@ -634,8 +648,7 @@ class hash_join {
     rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) const;
 
  private:
-  struct hash_join_impl;
-  const std::unique_ptr<const hash_join_impl> impl;
+  const std::unique_ptr<const impl_type> _impl;
 };
 
 /**
diff --git a/cpp/src/join/hash_join.cu b/cpp/src/join/hash_join.cu
index 8d2888fd761..3e0e76de708 100644
--- a/cpp/src/join/hash_join.cu
+++ b/cpp/src/join/hash_join.cu
@@ -13,11 +13,14 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include <join/hash_join.cuh>
+#include "join_common_utils.cuh"
 
 #include <cudf/copying.hpp>
 #include <cudf/detail/concatenate.cuh>
+#include <cudf/detail/iterator.cuh>
+#include <cudf/detail/join.hpp>
 #include <cudf/detail/structs/utilities.hpp>
+#include <cudf/join.hpp>
 
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/device_buffer.hpp>
@@ -38,13 +41,67 @@
 
 namespace cudf {
 namespace detail {
-
-std::pair<std::unique_ptr<table>, std::unique_ptr<table>> get_empty_joined_table(
-  table_view const& probe, table_view const& build)
+namespace {
+/**
+ * @brief Calculates the exact size of the join output produced when
+ * joining two tables together.
+ *
+ * @throw cudf::logic_error if JoinKind is not INNER_JOIN or LEFT_JOIN
+ *
+ * @tparam JoinKind The type of join to be performed
+ *
+ * @param build_table The right hand table
+ * @param probe_table The left hand table
+ * @param hash_table A hash table built on the build table that maps the index
+ * of every row to the hash value of that row.
+ * @param nulls_equal Flag to denote nulls are equal or not.
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ *
+ * @return The exact size of the output of the join operation
+ */
+template <join_kind JoinKind>
+std::size_t compute_join_output_size(table_device_view build_table,
+                                     table_device_view probe_table,
+                                     cudf::detail::multimap_type const& hash_table,
+                                     bool const has_nulls,
+                                     cudf::null_equality const nulls_equal,
+                                     rmm::cuda_stream_view stream)
 {
-  std::unique_ptr<table> empty_probe = empty_like(probe);
-  std::unique_ptr<table> empty_build = empty_like(build);
-  return std::pair(std::move(empty_probe), std::move(empty_build));
+  const size_type build_table_num_rows{build_table.num_rows()};
+  const size_type probe_table_num_rows{probe_table.num_rows()};
+
+  // If the build table is empty, we know exactly how large the output
+  // will be for the different types of joins and can return immediately
+  if (0 == build_table_num_rows) {
+    switch (JoinKind) {
+      // Inner join with an empty table will have no output
+      case join_kind::INNER_JOIN: return 0;
+
+      // Left join with an empty table will have an output of NULL rows
+      // equal to the number of rows in the probe table
+      case join_kind::LEFT_JOIN: return probe_table_num_rows;
+
+      default: CUDF_FAIL("Unsupported join type");
+    }
+  }
+
+  auto const probe_nulls = cudf::nullate::DYNAMIC{has_nulls};
+  pair_equality equality{probe_table, build_table, probe_nulls, nulls_equal};
+
+  row_hash hash_probe{probe_nulls, probe_table};
+  auto const empty_key_sentinel = hash_table.get_empty_key_sentinel();
+  make_pair_function pair_func{hash_probe, empty_key_sentinel};
+
+  auto iter = cudf::detail::make_counting_transform_iterator(0, pair_func);
+
+  std::size_t size;
+  if constexpr (JoinKind == join_kind::LEFT_JOIN) {
+    size = hash_table.pair_count_outer(iter, iter + probe_table_num_rows, equality, stream.value());
+  } else {
+    size = hash_table.pair_count(iter, iter + probe_table_num_rows, equality, stream.value());
+  }
+
+  return size;
 }
 
 /**
@@ -69,7 +126,7 @@ std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
           std::unique_ptr<rmm::device_uvector<size_type>>>
 probe_join_hash_table(cudf::table_device_view build_table,
                       cudf::table_device_view probe_table,
-                      multimap_type const& hash_table,
+                      cudf::detail::multimap_type const& hash_table,
                       bool has_nulls,
                       null_equality compare_nulls,
                       std::optional<std::size_t> output_size,
@@ -145,7 +202,7 @@ probe_join_hash_table(cudf::table_device_view build_table,
  */
 std::size_t get_full_join_size(cudf::table_device_view build_table,
                                cudf::table_device_view probe_table,
-                               multimap_type const& hash_table,
+                               cudf::detail::multimap_type const& hash_table,
                                bool const has_nulls,
                                null_equality const compare_nulls,
                                rmm::cuda_stream_view stream,
@@ -157,8 +214,6 @@ std::size_t get_full_join_size(cudf::table_device_view build_table,
   // If output size is zero, return immediately
   if (join_size == 0) { return join_size; }
 
-  rmm::device_scalar<size_type> write_index(0, stream);
-
   auto left_indices  = std::make_unique<rmm::device_uvector<size_type>>(join_size, stream, mr);
   auto right_indices = std::make_unique<rmm::device_uvector<size_type>>(join_size, stream, mr);
 
@@ -221,25 +276,12 @@ std::size_t get_full_join_size(cudf::table_device_view build_table,
   }
   return join_size + left_join_complement_size;
 }
+}  // namespace
 
-std::unique_ptr<cudf::table> combine_table_pair(std::unique_ptr<cudf::table>&& left,
-                                                std::unique_ptr<cudf::table>&& right)
-{
-  auto joined_cols = left->release();
-  auto right_cols  = right->release();
-  joined_cols.insert(joined_cols.end(),
-                     std::make_move_iterator(right_cols.begin()),
-                     std::make_move_iterator(right_cols.end()));
-  return std::make_unique<cudf::table>(std::move(joined_cols));
-}
-
-}  // namespace detail
-
-hash_join::hash_join_impl::~hash_join_impl() = default;
-
-hash_join::hash_join_impl::hash_join_impl(cudf::table_view const& build,
-                                          null_equality compare_nulls,
-                                          rmm::cuda_stream_view stream)
+template <typename Hasher>
+hash_join<Hasher>::hash_join(cudf::table_view const& build,
+                             cudf::null_equality compare_nulls,
+                             rmm::cuda_stream_view stream)
   : _is_empty{build.num_rows() == 0},
     _nulls_equal{compare_nulls},
     _hash_table{compute_hash_table_size(build.num_rows()),
@@ -263,41 +305,45 @@ hash_join::hash_join_impl::hash_join_impl(cudf::table_view const& build,
   cudf::detail::build_join_hash_table(_build, _hash_table, _nulls_equal, stream);
 }
 
+template <typename Hasher>
 std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
           std::unique_ptr<rmm::device_uvector<size_type>>>
-hash_join::hash_join_impl::inner_join(cudf::table_view const& probe,
-                                      std::optional<std::size_t> output_size,
-                                      rmm::cuda_stream_view stream,
-                                      rmm::mr::device_memory_resource* mr) const
+hash_join<Hasher>::inner_join(cudf::table_view const& probe,
+                              std::optional<std::size_t> output_size,
+                              rmm::cuda_stream_view stream,
+                              rmm::mr::device_memory_resource* mr) const
 {
   CUDF_FUNC_RANGE();
   return compute_hash_join<cudf::detail::join_kind::INNER_JOIN>(probe, output_size, stream, mr);
 }
 
+template <typename Hasher>
 std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
           std::unique_ptr<rmm::device_uvector<size_type>>>
-hash_join::hash_join_impl::left_join(cudf::table_view const& probe,
-                                     std::optional<std::size_t> output_size,
-                                     rmm::cuda_stream_view stream,
-                                     rmm::mr::device_memory_resource* mr) const
+hash_join<Hasher>::left_join(cudf::table_view const& probe,
+                             std::optional<std::size_t> output_size,
+                             rmm::cuda_stream_view stream,
+                             rmm::mr::device_memory_resource* mr) const
 {
   CUDF_FUNC_RANGE();
   return compute_hash_join<cudf::detail::join_kind::LEFT_JOIN>(probe, output_size, stream, mr);
 }
 
+template <typename Hasher>
 std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
           std::unique_ptr<rmm::device_uvector<size_type>>>
-hash_join::hash_join_impl::full_join(cudf::table_view const& probe,
-                                     std::optional<std::size_t> output_size,
-                                     rmm::cuda_stream_view stream,
-                                     rmm::mr::device_memory_resource* mr) const
+hash_join<Hasher>::full_join(cudf::table_view const& probe,
+                             std::optional<std::size_t> output_size,
+                             rmm::cuda_stream_view stream,
+                             rmm::mr::device_memory_resource* mr) const
 {
   CUDF_FUNC_RANGE();
   return compute_hash_join<cudf::detail::join_kind::FULL_JOIN>(probe, output_size, stream, mr);
 }
 
-std::size_t hash_join::hash_join_impl::inner_join_size(cudf::table_view const& probe,
-                                                       rmm::cuda_stream_view stream) const
+template <typename Hasher>
+std::size_t hash_join<Hasher>::inner_join_size(cudf::table_view const& probe,
+                                               rmm::cuda_stream_view stream) const
 {
   CUDF_FUNC_RANGE();
 
@@ -320,8 +366,9 @@ std::size_t hash_join::hash_join_impl::inner_join_size(cudf::table_view const& p
     stream);
 }
 
-std::size_t hash_join::hash_join_impl::left_join_size(cudf::table_view const& probe,
-                                                      rmm::cuda_stream_view stream) const
+template <typename Hasher>
+std::size_t hash_join<Hasher>::left_join_size(cudf::table_view const& probe,
+                                              rmm::cuda_stream_view stream) const
 {
   CUDF_FUNC_RANGE();
 
@@ -344,9 +391,10 @@ std::size_t hash_join::hash_join_impl::left_join_size(cudf::table_view const& pr
     stream);
 }
 
-std::size_t hash_join::hash_join_impl::full_join_size(cudf::table_view const& probe,
-                                                      rmm::cuda_stream_view stream,
-                                                      rmm::mr::device_memory_resource* mr) const
+template <typename Hasher>
+std::size_t hash_join<Hasher>::full_join_size(cudf::table_view const& probe,
+                                              rmm::cuda_stream_view stream,
+                                              rmm::mr::device_memory_resource* mr) const
 {
   CUDF_FUNC_RANGE();
 
@@ -370,13 +418,51 @@ std::size_t hash_join::hash_join_impl::full_join_size(cudf::table_view const& pr
     mr);
 }
 
+template <typename Hasher>
+template <cudf::detail::join_kind JoinKind>
+std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
+          std::unique_ptr<rmm::device_uvector<size_type>>>
+hash_join<Hasher>::probe_join_indices(cudf::table_view const& probe_table,
+                                      std::optional<std::size_t> output_size,
+                                      rmm::cuda_stream_view stream,
+                                      rmm::mr::device_memory_resource* mr) const
+{
+  // Trivial left join case - exit early
+  if (_is_empty and JoinKind != cudf::detail::join_kind::INNER_JOIN) {
+    return get_trivial_left_join_indices(probe_table, stream, mr);
+  }
+
+  CUDF_EXPECTS(!_is_empty, "Hash table of hash join is null.");
+
+  auto build_table_ptr = cudf::table_device_view::create(_build, stream);
+  auto probe_table_ptr = cudf::table_device_view::create(probe_table, stream);
+
+  auto join_indices = cudf::detail::probe_join_hash_table<JoinKind>(
+    *build_table_ptr,
+    *probe_table_ptr,
+    _hash_table,
+    cudf::has_nulls(probe_table) | cudf::has_nulls(_build),
+    _nulls_equal,
+    output_size,
+    stream,
+    mr);
+
+  if constexpr (JoinKind == cudf::detail::join_kind::FULL_JOIN) {
+    auto complement_indices = detail::get_left_join_indices_complement(
+      join_indices.second, probe_table.num_rows(), _build.num_rows(), stream, mr);
+    join_indices = detail::concatenate_vector_pairs(join_indices, complement_indices, stream);
+  }
+  return join_indices;
+}
+
+template <typename Hasher>
 template <cudf::detail::join_kind JoinKind>
 std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
           std::unique_ptr<rmm::device_uvector<size_type>>>
-hash_join::hash_join_impl::compute_hash_join(cudf::table_view const& probe,
-                                             std::optional<std::size_t> output_size,
-                                             rmm::cuda_stream_view stream,
-                                             rmm::mr::device_memory_resource* mr) const
+hash_join<Hasher>::compute_hash_join(cudf::table_view const& probe,
+                                     std::optional<std::size_t> output_size,
+                                     rmm::cuda_stream_view stream,
+                                     rmm::mr::device_memory_resource* mr) const
 {
   CUDF_EXPECTS(0 != probe.num_columns(), "Hash join probe table is empty");
   CUDF_EXPECTS(probe.num_rows() < cudf::detail::MAX_JOIN_SIZE,
@@ -403,41 +489,64 @@ hash_join::hash_join_impl::compute_hash_join(cudf::table_view const& probe,
 
   return probe_join_indices<JoinKind>(flattened_probe_table, output_size, stream, mr);
 }
+}  // namespace detail
+
+hash_join::~hash_join() = default;
+
+hash_join::hash_join(cudf::table_view const& build,
+                     null_equality compare_nulls,
+                     rmm::cuda_stream_view stream)
+  : _impl{std::make_unique<const impl_type>(build, compare_nulls, stream)}
+{
+}
 
-template <cudf::detail::join_kind JoinKind>
 std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
           std::unique_ptr<rmm::device_uvector<size_type>>>
-hash_join::hash_join_impl::probe_join_indices(cudf::table_view const& probe_table,
-                                              std::optional<std::size_t> output_size,
-                                              rmm::cuda_stream_view stream,
-                                              rmm::mr::device_memory_resource* mr) const
+hash_join::inner_join(cudf::table_view const& probe,
+                      std::optional<std::size_t> output_size,
+                      rmm::cuda_stream_view stream,
+                      rmm::mr::device_memory_resource* mr) const
 {
-  // Trivial left join case - exit early
-  if (_is_empty and JoinKind != cudf::detail::join_kind::INNER_JOIN) {
-    return get_trivial_left_join_indices(probe_table, stream, mr);
-  }
+  return _impl->inner_join(probe, output_size, stream, mr);
+}
 
-  CUDF_EXPECTS(!_is_empty, "Hash table of hash join is null.");
+std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
+          std::unique_ptr<rmm::device_uvector<size_type>>>
+hash_join::left_join(cudf::table_view const& probe,
+                     std::optional<std::size_t> output_size,
+                     rmm::cuda_stream_view stream,
+                     rmm::mr::device_memory_resource* mr) const
+{
+  return _impl->left_join(probe, output_size, stream, mr);
+}
 
-  auto build_table_ptr = cudf::table_device_view::create(_build, stream);
-  auto probe_table_ptr = cudf::table_device_view::create(probe_table, stream);
+std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
+          std::unique_ptr<rmm::device_uvector<size_type>>>
+hash_join::full_join(cudf::table_view const& probe,
+                     std::optional<std::size_t> output_size,
+                     rmm::cuda_stream_view stream,
+                     rmm::mr::device_memory_resource* mr) const
+{
+  return _impl->full_join(probe, output_size, stream, mr);
+}
 
-  auto join_indices = cudf::detail::probe_join_hash_table<JoinKind>(
-    *build_table_ptr,
-    *probe_table_ptr,
-    _hash_table,
-    cudf::has_nulls(probe_table) | cudf::has_nulls(_build),
-    _nulls_equal,
-    output_size,
-    stream,
-    mr);
+std::size_t hash_join::inner_join_size(cudf::table_view const& probe,
+                                       rmm::cuda_stream_view stream) const
+{
+  return _impl->inner_join_size(probe, stream);
+}
 
-  if constexpr (JoinKind == cudf::detail::join_kind::FULL_JOIN) {
-    auto complement_indices = detail::get_left_join_indices_complement(
-      join_indices.second, probe_table.num_rows(), _build.num_rows(), stream, mr);
-    join_indices = detail::concatenate_vector_pairs(join_indices, complement_indices, stream);
-  }
-  return join_indices;
+std::size_t hash_join::left_join_size(cudf::table_view const& probe,
+                                      rmm::cuda_stream_view stream) const
+{
+  return _impl->left_join_size(probe, stream);
+}
+
+std::size_t hash_join::full_join_size(cudf::table_view const& probe,
+                                      rmm::cuda_stream_view stream,
+                                      rmm::mr::device_memory_resource* mr) const
+{
+  return _impl->full_join_size(probe, stream, mr);
 }
 
 }  // namespace cudf
diff --git a/cpp/src/join/hash_join.cuh b/cpp/src/join/hash_join.cuh
deleted file mode 100644
index e55de043372..00000000000
--- a/cpp/src/join/hash_join.cuh
+++ /dev/null
@@ -1,289 +0,0 @@
-/*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#pragma once
-
-#include <join/join_common_utils.cuh>
-#include <join/join_common_utils.hpp>
-
-#include <cudf/detail/concatenate.cuh>
-#include <cudf/detail/iterator.cuh>
-#include <cudf/detail/null_mask.hpp>
-#include <cudf/detail/structs/utilities.hpp>
-#include <cudf/detail/utilities/cuda.cuh>
-#include <cudf/join.hpp>
-#include <cudf/table/table.hpp>
-#include <cudf/table/table_device_view.cuh>
-#include <cudf/table/table_view.hpp>
-
-#include <rmm/cuda_stream_view.hpp>
-#include <rmm/device_uvector.hpp>
-#include <rmm/exec_policy.hpp>
-
-#include <thrust/iterator/counting_iterator.h>
-#include <thrust/sequence.h>
-
-#include <cstddef>
-#include <limits>
-
-namespace cudf {
-namespace detail {
-
-/**
- * @brief Remaps a hash value to a new value if it is equal to the specified sentinel value.
- *
- * @param hash The hash value to potentially remap
- * @param sentinel The reserved value
- */
-template <typename H, typename S>
-constexpr auto remap_sentinel_hash(H hash, S sentinel)
-{
-  // Arbitrarily choose hash - 1
-  return (hash == sentinel) ? (hash - 1) : hash;
-}
-
-/**
- * @brief Device functor to create a pair of hash value and index for a given row.
- */
-class make_pair_function {
- public:
-  CUDF_HOST_DEVICE make_pair_function(row_hash const& hash,
-                                      hash_value_type const empty_key_sentinel)
-    : _hash{hash}, _empty_key_sentinel{empty_key_sentinel}
-  {
-  }
-
-  __device__ __forceinline__ cudf::detail::pair_type operator()(size_type i) const noexcept
-  {
-    // Compute the hash value of row `i`
-    auto row_hash_value = remap_sentinel_hash(_hash(i), _empty_key_sentinel);
-    return cuco::make_pair(row_hash_value, i);
-  }
-
- private:
-  row_hash _hash;
-  hash_value_type const _empty_key_sentinel;
-};
-
-/**
- * @brief Calculates the exact size of the join output produced when
- * joining two tables together.
- *
- * @throw cudf::logic_error if JoinKind is not INNER_JOIN or LEFT_JOIN
- *
- * @tparam JoinKind The type of join to be performed
- * @tparam multimap_type The type of the hash table
- *
- * @param build_table The right hand table
- * @param probe_table The left hand table
- * @param hash_table A hash table built on the build table that maps the index
- * of every row to the hash value of that row.
- * @param nulls_equal Flag to denote nulls are equal or not.
- * @param stream CUDA stream used for device memory operations and kernel launches
- *
- * @return The exact size of the output of the join operation
- */
-template <join_kind JoinKind, typename multimap_type>
-std::size_t compute_join_output_size(table_device_view build_table,
-                                     table_device_view probe_table,
-                                     multimap_type const& hash_table,
-                                     bool const has_nulls,
-                                     cudf::null_equality const nulls_equal,
-                                     rmm::cuda_stream_view stream)
-{
-  const size_type build_table_num_rows{build_table.num_rows()};
-  const size_type probe_table_num_rows{probe_table.num_rows()};
-
-  // If the build table is empty, we know exactly how large the output
-  // will be for the different types of joins and can return immediately
-  if (0 == build_table_num_rows) {
-    switch (JoinKind) {
-      // Inner join with an empty table will have no output
-      case join_kind::INNER_JOIN: return 0;
-
-      // Left join with an empty table will have an output of NULL rows
-      // equal to the number of rows in the probe table
-      case join_kind::LEFT_JOIN: return probe_table_num_rows;
-
-      default: CUDF_FAIL("Unsupported join type");
-    }
-  }
-
-  auto const probe_nulls = cudf::nullate::DYNAMIC{has_nulls};
-  pair_equality equality{probe_table, build_table, probe_nulls, nulls_equal};
-
-  row_hash hash_probe{probe_nulls, probe_table};
-  auto const empty_key_sentinel = hash_table.get_empty_key_sentinel();
-  make_pair_function pair_func{hash_probe, empty_key_sentinel};
-
-  auto iter = cudf::detail::make_counting_transform_iterator(0, pair_func);
-
-  std::size_t size;
-  if constexpr (JoinKind == join_kind::LEFT_JOIN) {
-    size = hash_table.pair_count_outer(iter, iter + probe_table_num_rows, equality, stream.value());
-  } else {
-    size = hash_table.pair_count(iter, iter + probe_table_num_rows, equality, stream.value());
-  }
-
-  return size;
-}
-
-std::pair<std::unique_ptr<table>, std::unique_ptr<table>> get_empty_joined_table(
-  table_view const& probe, table_view const& build);
-
-std::unique_ptr<cudf::table> combine_table_pair(std::unique_ptr<cudf::table>&& left,
-                                                std::unique_ptr<cudf::table>&& right);
-
-/**
- * @brief Builds the hash table based on the given `build_table`.
- *
- * @tparam MultimapType The type of the hash table
- *
- * @param build Table of columns used to build join hash.
- * @param hash_table Build hash table.
- * @param nulls_equal Flag to denote nulls are equal or not.
- * @param stream CUDA stream used for device memory operations and kernel launches.
- *
- */
-template <typename MultimapType>
-void build_join_hash_table(cudf::table_view const& build,
-                           MultimapType& hash_table,
-                           null_equality const nulls_equal,
-                           rmm::cuda_stream_view stream)
-{
-  auto build_table_ptr = cudf::table_device_view::create(build, stream);
-
-  CUDF_EXPECTS(0 != build_table_ptr->num_columns(), "Selected build dataset is empty");
-  CUDF_EXPECTS(0 != build_table_ptr->num_rows(), "Build side table has no rows");
-
-  row_hash hash_build{nullate::DYNAMIC{cudf::has_nulls(build)}, *build_table_ptr};
-  auto const empty_key_sentinel = hash_table.get_empty_key_sentinel();
-  make_pair_function pair_func{hash_build, empty_key_sentinel};
-
-  auto iter = cudf::detail::make_counting_transform_iterator(0, pair_func);
-
-  size_type const build_table_num_rows{build_table_ptr->num_rows()};
-  if (nulls_equal == cudf::null_equality::EQUAL or (not nullable(build))) {
-    hash_table.insert(iter, iter + build_table_num_rows, stream.value());
-  } else {
-    thrust::counting_iterator<size_type> stencil(0);
-    auto const row_bitmask = cudf::detail::bitmask_and(build, stream).first;
-    row_is_valid pred{static_cast<bitmask_type const*>(row_bitmask.data())};
-
-    // insert valid rows
-    hash_table.insert_if(iter, iter + build_table_num_rows, stencil, pred, stream.value());
-  }
-}
-}  // namespace detail
-
-struct hash_join::hash_join_impl {
- public:
-  hash_join_impl() = delete;
-  ~hash_join_impl();
-  hash_join_impl(hash_join_impl const&) = delete;
-  hash_join_impl(hash_join_impl&&)      = delete;
-  hash_join_impl& operator=(hash_join_impl const&) = delete;
-  hash_join_impl& operator=(hash_join_impl&&) = delete;
-
- private:
-  bool const _is_empty;
-  cudf::null_equality const _nulls_equal;
-  cudf::table_view _build;
-  std::vector<std::unique_ptr<cudf::column>> _created_null_columns;
-  cudf::structs::detail::flattened_table _flattened_build_table;
-  cudf::detail::multimap_type _hash_table;
-
- public:
-  /**
-   * @brief Constructor that internally builds the hash table based on the given `build` table
-   *
-   * @throw cudf::logic_error if the number of columns in `build` table is 0.
-   * @throw cudf::logic_error if the number of rows in `build` table exceeds MAX_JOIN_SIZE.
-   *
-   * @param build The build table, from which the hash table is built.
-   * @param compare_nulls Controls whether null join-key values should match or not.
-   * @param stream CUDA stream used for device memory operations and kernel launches.
-   */
-  hash_join_impl(cudf::table_view const& build,
-                 null_equality compare_nulls,
-                 rmm::cuda_stream_view stream = rmm::cuda_stream_default);
-
-  std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
-            std::unique_ptr<rmm::device_uvector<size_type>>>
-  inner_join(cudf::table_view const& probe,
-             std::optional<std::size_t> output_size,
-             rmm::cuda_stream_view stream,
-             rmm::mr::device_memory_resource* mr) const;
-
-  std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
-            std::unique_ptr<rmm::device_uvector<size_type>>>
-  left_join(cudf::table_view const& probe,
-            std::optional<std::size_t> output_size,
-            rmm::cuda_stream_view stream,
-            rmm::mr::device_memory_resource* mr) const;
-
-  std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
-            std::unique_ptr<rmm::device_uvector<size_type>>>
-  full_join(cudf::table_view const& probe,
-            std::optional<std::size_t> output_size,
-            rmm::cuda_stream_view stream,
-            rmm::mr::device_memory_resource* mr) const;
-
-  [[nodiscard]] std::size_t inner_join_size(cudf::table_view const& probe,
-                                            rmm::cuda_stream_view stream) const;
-
-  [[nodiscard]] std::size_t left_join_size(cudf::table_view const& probe,
-                                           rmm::cuda_stream_view stream) const;
-
-  std::size_t full_join_size(cudf::table_view const& probe,
-                             rmm::cuda_stream_view stream,
-                             rmm::mr::device_memory_resource* mr) const;
-
- private:
-  template <cudf::detail::join_kind JoinKind>
-  std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
-            std::unique_ptr<rmm::device_uvector<size_type>>>
-  compute_hash_join(cudf::table_view const& probe,
-                    std::optional<std::size_t> output_size,
-                    rmm::cuda_stream_view stream,
-                    rmm::mr::device_memory_resource* mr) const;
-
-  /**
-   * @brief Probes the `_hash_table` built from `_build` for tuples in `probe_table`,
-   * and returns the output indices of `build_table` and `probe_table` as a combined table,
-   * i.e. if full join is specified as the join type then left join is called. Behavior
-   * is undefined if the provided `output_size` is smaller than the actual output size.
-   *
-   * @throw cudf::logic_error if hash table is null.
-   *
-   * @tparam JoinKind The type of join to be performed.
-   *
-   * @param probe_table Table of probe side columns to join.
-   * @param output_size Optional value which allows users to specify the exact output size.
-   * @param stream CUDA stream used for device memory operations and kernel launches.
-   * @param mr Device memory resource used to allocate the returned vectors.
-   *
-   * @return Join output indices vector pair.
-   */
-  template <cudf::detail::join_kind JoinKind>
-  std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
-            std::unique_ptr<rmm::device_uvector<size_type>>>
-  probe_join_indices(cudf::table_view const& probe_table,
-                     std::optional<std::size_t> output_size,
-                     rmm::cuda_stream_view stream,
-                     rmm::mr::device_memory_resource* mr) const;
-};
-
-}  // namespace cudf
diff --git a/cpp/src/join/join.cu b/cpp/src/join/join.cu
index 15aed83b641..5c529c88d9d 100644
--- a/cpp/src/join/join.cu
+++ b/cpp/src/join/join.cu
@@ -13,8 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "join/hash_join.cuh"
-#include "join/join_common_utils.hpp"
+#include "join_common_utils.hpp"
 
 #include <cudf/detail/gather.cuh>
 #include <cudf/dictionary/detail/update_keys.hpp>
@@ -26,6 +25,26 @@
 
 namespace cudf {
 namespace detail {
+namespace {
+std::pair<std::unique_ptr<table>, std::unique_ptr<table>> get_empty_joined_table(
+  table_view const& probe, table_view const& build)
+{
+  std::unique_ptr<table> empty_probe = empty_like(probe);
+  std::unique_ptr<table> empty_build = empty_like(build);
+  return std::pair(std::move(empty_probe), std::move(empty_build));
+}
+
+std::unique_ptr<cudf::table> combine_table_pair(std::unique_ptr<cudf::table>&& left,
+                                                std::unique_ptr<cudf::table>&& right)
+{
+  auto joined_cols = left->release();
+  auto right_cols  = right->release();
+  joined_cols.insert(joined_cols.end(),
+                     std::make_move_iterator(right_cols.begin()),
+                     std::make_move_iterator(right_cols.end()));
+  return std::make_unique<cudf::table>(std::move(joined_cols));
+}
+}  // namespace
 
 std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
           std::unique_ptr<rmm::device_uvector<size_type>>>
@@ -222,69 +241,8 @@ std::unique_ptr<table> full_join(table_view const& left_input,
                                                        mr);
   return combine_table_pair(std::move(left_result), std::move(right_result));
 }
-
 }  // namespace detail
 
-hash_join::~hash_join() = default;
-
-hash_join::hash_join(cudf::table_view const& build,
-                     null_equality compare_nulls,
-                     rmm::cuda_stream_view stream)
-  : impl{std::make_unique<const hash_join::hash_join_impl>(build, compare_nulls, stream)}
-{
-}
-
-std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
-          std::unique_ptr<rmm::device_uvector<size_type>>>
-hash_join::inner_join(cudf::table_view const& probe,
-                      std::optional<std::size_t> output_size,
-                      rmm::cuda_stream_view stream,
-                      rmm::mr::device_memory_resource* mr) const
-{
-  return impl->inner_join(probe, output_size, stream, mr);
-}
-
-std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
-          std::unique_ptr<rmm::device_uvector<size_type>>>
-hash_join::left_join(cudf::table_view const& probe,
-                     std::optional<std::size_t> output_size,
-                     rmm::cuda_stream_view stream,
-                     rmm::mr::device_memory_resource* mr) const
-{
-  return impl->left_join(probe, output_size, stream, mr);
-}
-
-std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
-          std::unique_ptr<rmm::device_uvector<size_type>>>
-hash_join::full_join(cudf::table_view const& probe,
-                     std::optional<std::size_t> output_size,
-                     rmm::cuda_stream_view stream,
-                     rmm::mr::device_memory_resource* mr) const
-{
-  return impl->full_join(probe, output_size, stream, mr);
-}
-
-std::size_t hash_join::inner_join_size(cudf::table_view const& probe,
-                                       rmm::cuda_stream_view stream) const
-{
-  return impl->inner_join_size(probe, stream);
-}
-
-std::size_t hash_join::left_join_size(cudf::table_view const& probe,
-                                      rmm::cuda_stream_view stream) const
-{
-  return impl->left_join_size(probe, stream);
-}
-
-std::size_t hash_join::full_join_size(cudf::table_view const& probe,
-                                      rmm::cuda_stream_view stream,
-                                      rmm::mr::device_memory_resource* mr) const
-{
-  return impl->full_join_size(probe, stream, mr);
-}
-
-// external APIs
-
 std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
           std::unique_ptr<rmm::device_uvector<size_type>>>
 inner_join(table_view const& left,
@@ -353,5 +311,4 @@ std::unique_ptr<table> full_join(table_view const& left,
   return detail::full_join(
     left, right, left_on, right_on, compare_nulls, rmm::cuda_stream_default, mr);
 }
-
 }  // namespace cudf
diff --git a/cpp/src/join/join_common_utils.cuh b/cpp/src/join/join_common_utils.cuh
index b778f13b5e1..fdb63419c84 100644
--- a/cpp/src/join/join_common_utils.cuh
+++ b/cpp/src/join/join_common_utils.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -15,8 +15,10 @@
  */
 #pragma once
 
-#include <join/join_common_utils.hpp>
+#include "join_common_utils.hpp"
 
+#include <cudf/detail/iterator.cuh>
+#include <cudf/detail/null_mask.hpp>
 #include <cudf/detail/utilities/cuda.cuh>
 
 #include <rmm/cuda_stream_view.hpp>
@@ -26,6 +28,41 @@
 
 namespace cudf {
 namespace detail {
+/**
+ * @brief Remaps a hash value to a new value if it is equal to the specified sentinel value.
+ *
+ * @param hash The hash value to potentially remap
+ * @param sentinel The reserved value
+ */
+template <typename H, typename S>
+constexpr auto remap_sentinel_hash(H hash, S sentinel)
+{
+  // Arbitrarily choose hash - 1
+  return (hash == sentinel) ? (hash - 1) : hash;
+}
+
+/**
+ * @brief Device functor to create a pair of hash value and index for a given row.
+ */
+class make_pair_function {
+ public:
+  CUDF_HOST_DEVICE make_pair_function(row_hash const& hash,
+                                      hash_value_type const empty_key_sentinel)
+    : _hash{hash}, _empty_key_sentinel{empty_key_sentinel}
+  {
+  }
+
+  __device__ __forceinline__ cudf::detail::pair_type operator()(size_type i) const noexcept
+  {
+    // Compute the hash value of row `i`
+    auto row_hash_value = remap_sentinel_hash(_hash(i), _empty_key_sentinel);
+    return cuco::make_pair(row_hash_value, i);
+  }
+
+ private:
+  row_hash _hash;
+  hash_value_type const _empty_key_sentinel;
+};
 
 /**
  * @brief Device functor to determine if a row is valid.
@@ -98,6 +135,47 @@ get_trivial_left_join_indices(
   rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
+/**
+ * @brief Builds the hash table based on the given `build_table`.
+ *
+ * @tparam MultimapType The type of the hash table
+ *
+ * @param build Table of columns used to build join hash.
+ * @param hash_table Build hash table.
+ * @param nulls_equal Flag to denote nulls are equal or not.
+ * @param stream CUDA stream used for device memory operations and kernel launches.
+ *
+ */
+template <typename MultimapType>
+void build_join_hash_table(cudf::table_view const& build,
+                           MultimapType& hash_table,
+                           null_equality const nulls_equal,
+                           rmm::cuda_stream_view stream)
+{
+  auto build_table_ptr = cudf::table_device_view::create(build, stream);
+
+  CUDF_EXPECTS(0 != build_table_ptr->num_columns(), "Selected build dataset is empty");
+  CUDF_EXPECTS(0 != build_table_ptr->num_rows(), "Build side table has no rows");
+
+  row_hash hash_build{nullate::DYNAMIC{cudf::has_nulls(build)}, *build_table_ptr};
+  auto const empty_key_sentinel = hash_table.get_empty_key_sentinel();
+  make_pair_function pair_func{hash_build, empty_key_sentinel};
+
+  auto iter = cudf::detail::make_counting_transform_iterator(0, pair_func);
+
+  size_type const build_table_num_rows{build_table_ptr->num_rows()};
+  if (nulls_equal == cudf::null_equality::EQUAL or (not nullable(build))) {
+    hash_table.insert(iter, iter + build_table_num_rows, stream.value());
+  } else {
+    thrust::counting_iterator<size_type> stencil(0);
+    auto const row_bitmask = cudf::detail::bitmask_and(build, stream).first;
+    row_is_valid pred{static_cast<bitmask_type const*>(row_bitmask.data())};
+
+    // insert valid rows
+    hash_table.insert_if(iter, iter + build_table_num_rows, stencil, pred, stream.value());
+  }
+}
+
 // Convenient alias for a pair of unique pointers to device uvectors.
 using VectorPair = std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
                              std::unique_ptr<rmm::device_uvector<size_type>>>;
diff --git a/cpp/src/join/join_common_utils.hpp b/cpp/src/join/join_common_utils.hpp
index 526c22d1d5c..060e8bff6f8 100644
--- a/cpp/src/join/join_common_utils.hpp
+++ b/cpp/src/join/join_common_utils.hpp
@@ -15,8 +15,10 @@
  */
 #pragma once
 
+#include <cudf/detail/join.hpp>
 #include <cudf/detail/utilities/device_atomics.cuh>
 #include <cudf/detail/utilities/hash_functions.cuh>
+#include <cudf/join.hpp>
 #include <cudf/table/row_operators.cuh>
 #include <cudf/table/table_view.hpp>
 
@@ -34,7 +36,6 @@ namespace cudf {
 namespace detail {
 constexpr size_type MAX_JOIN_SIZE{std::numeric_limits<size_type>::max()};
 
-constexpr int DEFAULT_JOIN_CG_SIZE    = 2;
 constexpr int DEFAULT_JOIN_BLOCK_SIZE = 128;
 constexpr int DEFAULT_JOIN_CACHE_SIZE = 128;
 constexpr size_type JoinNoneValue     = std::numeric_limits<size_type>::min();
@@ -45,12 +46,7 @@ using hash_type = cuco::detail::MurmurHash3_32<hash_value_type>;
 
 using hash_table_allocator_type = rmm::mr::stream_allocator_adaptor<default_allocator<char>>;
 
-using multimap_type =
-  cuco::static_multimap<hash_value_type,
-                        size_type,
-                        cuda::thread_scope_device,
-                        hash_table_allocator_type,
-                        cuco::double_hashing<DEFAULT_JOIN_CG_SIZE, hash_type, hash_type>>;
+using multimap_type = cudf::hash_join::impl_type::map_type;
 
 // Multimap type used for mixed joins. TODO: This is a temporary alias used
 // until the mixed joins are converted to using CGs properly. Right now it's
@@ -68,9 +64,6 @@ using row_hash = cudf::row_hasher<default_hash, cudf::nullate::DYNAMIC>;
 
 using row_equality = cudf::row_equality_comparator<cudf::nullate::DYNAMIC>;
 
-enum class join_kind { INNER_JOIN, LEFT_JOIN, FULL_JOIN, LEFT_SEMI_JOIN, LEFT_ANTI_JOIN };
-
 bool is_trivial_join(table_view const& left, table_view const& right, join_kind join_type);
-
 }  // namespace detail
 }  // namespace cudf
diff --git a/cpp/src/join/join_utils.cu b/cpp/src/join/join_utils.cu
index 1eb2d4cf4a7..7fa6642b19f 100644
--- a/cpp/src/join/join_utils.cu
+++ b/cpp/src/join/join_utils.cu
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include <join/join_common_utils.cuh>
+#include "join_common_utils.cuh"
 
 #include <rmm/exec_policy.hpp>
 
diff --git a/cpp/src/join/mixed_join.cu b/cpp/src/join/mixed_join.cu
index b540c013f47..27ee77e3edd 100644
--- a/cpp/src/join/mixed_join.cu
+++ b/cpp/src/join/mixed_join.cu
@@ -14,6 +14,10 @@
  * limitations under the License.
  */
 
+#include "join_common_utils.cuh"
+#include "join_common_utils.hpp"
+#include "mixed_join_kernels.cuh"
+
 #include <cudf/ast/detail/expression_parser.hpp>
 #include <cudf/ast/expressions.hpp>
 #include <cudf/detail/utilities/cuda.cuh>
@@ -23,12 +27,9 @@
 #include <cudf/table/table_view.hpp>
 #include <cudf/types.hpp>
 #include <cudf/utilities/span.hpp>
-#include <join/hash_join.cuh>
-#include <join/join_common_utils.cuh>
-#include <join/join_common_utils.hpp>
-#include <join/mixed_join_kernels.cuh>
 
 #include <rmm/cuda_stream_view.hpp>
+#include <rmm/exec_policy.hpp>
 
 #include <thrust/fill.h>
 #include <thrust/scan.h>
diff --git a/cpp/src/join/mixed_join_kernel.cuh b/cpp/src/join/mixed_join_kernel.cuh
index f7081cc4d63..38955ef4667 100644
--- a/cpp/src/join/mixed_join_kernel.cuh
+++ b/cpp/src/join/mixed_join_kernel.cuh
@@ -16,10 +16,9 @@
 
 #pragma once
 
-#include <join/hash_join.cuh>
-#include <join/join_common_utils.cuh>
-#include <join/join_common_utils.hpp>
-#include <join/mixed_join_common_utils.cuh>
+#include "join_common_utils.cuh"
+#include "join_common_utils.hpp"
+#include "mixed_join_common_utils.cuh"
 
 #include <cudf/ast/detail/expression_evaluator.cuh>
 #include <cudf/ast/detail/expression_parser.hpp>
diff --git a/cpp/src/join/mixed_join_semi.cu b/cpp/src/join/mixed_join_semi.cu
index 60cc74991ef..13a1f1a0ce2 100644
--- a/cpp/src/join/mixed_join_semi.cu
+++ b/cpp/src/join/mixed_join_semi.cu
@@ -14,8 +14,14 @@
  * limitations under the License.
  */
 
+#include "join_common_utils.cuh"
+#include "join_common_utils.hpp"
+#include "mixed_join_kernels_semi.cuh"
+
 #include <cudf/ast/detail/expression_parser.hpp>
 #include <cudf/ast/expressions.hpp>
+#include <cudf/detail/iterator.cuh>
+#include <cudf/detail/null_mask.hpp>
 #include <cudf/detail/utilities/cuda.cuh>
 #include <cudf/join.hpp>
 #include <cudf/table/table.hpp>
@@ -23,12 +29,9 @@
 #include <cudf/table/table_view.hpp>
 #include <cudf/types.hpp>
 #include <cudf/utilities/span.hpp>
-#include <join/hash_join.cuh>
-#include <join/join_common_utils.cuh>
-#include <join/join_common_utils.hpp>
-#include <join/mixed_join_kernels_semi.cuh>
 
 #include <rmm/cuda_stream_view.hpp>
+#include <rmm/exec_policy.hpp>
 
 #include <thrust/fill.h>
 #include <thrust/iterator/counting_iterator.h>
diff --git a/cpp/src/join/mixed_join_size_kernel.cuh b/cpp/src/join/mixed_join_size_kernel.cuh
index 9eedc1a8015..ce70f7f18ee 100644
--- a/cpp/src/join/mixed_join_size_kernel.cuh
+++ b/cpp/src/join/mixed_join_size_kernel.cuh
@@ -14,10 +14,9 @@
  * limitations under the License.
  */
 
-#include <join/hash_join.cuh>
-#include <join/join_common_utils.cuh>
-#include <join/join_common_utils.hpp>
-#include <join/mixed_join_common_utils.cuh>
+#include "join_common_utils.cuh"
+#include "join_common_utils.hpp"
+#include "mixed_join_common_utils.cuh"
 
 #include <cudf/ast/detail/expression_evaluator.cuh>
 #include <cudf/ast/detail/expression_parser.hpp>
diff --git a/cpp/src/join/semi_join.cu b/cpp/src/join/semi_join.cu
index 9e1aa27a4e7..687e553fefd 100644
--- a/cpp/src/join/semi_join.cu
+++ b/cpp/src/join/semi_join.cu
@@ -45,7 +45,7 @@ namespace {
 /**
  * @brief Device functor to create a pair of hash value and index for a given row.
  */
-struct make_pair_function {
+struct make_pair_fn {
   __device__ __forceinline__ cudf::detail::pair_type operator()(size_type i) const noexcept
   {
     // The value is irrelevant since we only ever use the hash map to check for
@@ -101,7 +101,7 @@ std::unique_ptr<rmm::device_uvector<cudf::size_type>> left_semi_anti_join(
   auto const right_nulls = cudf::nullate::DYNAMIC{cudf::has_nulls(right_flattened_keys)};
   row_hash const hash_build{right_nulls, *right_rows_d};
   row_equality equality_build{right_nulls, *right_rows_d, *right_rows_d, compare_nulls};
-  make_pair_function pair_func_build{};
+  make_pair_fn pair_func_build{};
 
   auto iter = cudf::detail::make_counting_transform_iterator(0, pair_func_build);
 

From 9ac24773d186c22ffbacbe31d92dad60ed2cdb5f Mon Sep 17 00:00:00 2001
From: Karthikeyan <6488848+karthikeyann@users.noreply.github.com>
Date: Thu, 28 Apr 2022 21:52:46 +0530
Subject: [PATCH 9/9] Implement all methods of groupby rank aggregation in
 libcudf, python (#9569)

Addresses part of https://github.com/rapidsai/cudf/issues/3591

- [x] move RANK (min method), DENSE_RANK (dense method) into single RANK aggregation
- [x] max method
- [x] average method
- [x] first method
- [x] percentage
- [x] order, null order
RANK, DENSE_RANK was implemented for spark requirement. Pandas groupby has 3 more methods. `rank(column_view, rank_method)` already has all 5 methods implemented.

Current implementation has 2 separate aggregations RANK and DENSE_RANK. This is merged to single RANK with parameters `rank_aggregation(rank_method method, null_policy null_handling, bool percentage)`
Groupby.rank support for 3 more methods will be added.

This PR is also pre-requisite for spearman correlation.


Additionally
- [x] Cython, Python plumbing
- [x] benchmark for groupby rank (all methods)
- [x] PERCENT_RANK aggregation is replaced with MIN_0_INDEXED rank_method in RANK aggregation

Authors:
  - Karthikeyan (https://github.com/karthikeyann)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)
  - MithunR (https://github.com/mythrocks)
  - Jake Hemstad (https://github.com/jrhemstad)

URL: https://github.com/rapidsai/cudf/pull/9569
---
 cpp/benchmarks/CMakeLists.txt                 |  12 +-
 .../groupby/group_rank_benchmark.cu           | 109 +++++++
 cpp/include/cudf/aggregation.hpp              | 186 ++++--------
 .../cudf/detail/aggregation/aggregation.hpp   |  92 +++---
 cpp/include/cudf/detail/scan.hpp              |   9 +-
 cpp/include/cudf/sorting.hpp                  |  16 +-
 cpp/src/aggregation/aggregation.cpp           |  79 ++---
 cpp/src/groupby/groupby.cu                    |  13 +-
 cpp/src/groupby/sort/functors.hpp             |   3 +-
 cpp/src/groupby/sort/group_rank_scan.cu       | 287 ++++++++++++++----
 cpp/src/groupby/sort/group_scan.hpp           |  99 ++++--
 cpp/src/groupby/sort/scan.cpp                 | 113 ++++---
 cpp/src/reductions/scan/rank_scan.cu          |   9 +-
 cpp/src/reductions/scan/scan.cpp              |  22 +-
 cpp/tests/groupby/rank_scan_tests.cpp         | 176 ++++++-----
 cpp/tests/reductions/list_rank_test.cpp       |  63 ++--
 cpp/tests/reductions/rank_tests.cpp           |  21 +-
 cpp/tests/reductions/scan_tests.cpp           |   1 +
 java/src/main/native/src/AggregationJni.cpp   |  11 +-
 python/cudf/cudf/_lib/aggregation.pyx         |  41 ++-
 python/cudf/cudf/_lib/cpp/aggregation.pxd     |  24 ++
 python/cudf/cudf/_lib/cpp/sorting.pxd         |   9 +-
 python/cudf/cudf/_lib/groupby.pyx             |   2 +-
 python/cudf/cudf/_lib/sort.pxd                |   3 -
 python/cudf/cudf/_lib/sort.pyx                |  14 +-
 python/cudf/cudf/core/groupby/groupby.py      |  24 ++
 python/cudf/cudf/core/indexed_frame.py        |   2 +-
 python/cudf/cudf/tests/test_groupby.py        |  44 +++
 28 files changed, 944 insertions(+), 540 deletions(-)
 create mode 100644 cpp/benchmarks/groupby/group_rank_benchmark.cu
 delete mode 100644 python/cudf/cudf/_lib/sort.pxd

diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt
index 26bb10da69f..e93b2bf4f25 100644
--- a/cpp/benchmarks/CMakeLists.txt
+++ b/cpp/benchmarks/CMakeLists.txt
@@ -194,10 +194,18 @@ ConfigureBench(FILL_BENCH filling/repeat.cpp)
 # ##################################################################################################
 # * groupby benchmark -----------------------------------------------------------------------------
 ConfigureBench(
-  GROUPBY_BENCH groupby/group_sum.cu groupby/group_nth.cu groupby/group_shift.cu
-  groupby/group_struct.cu groupby/group_no_requests.cu groupby/group_scan.cu
+  GROUPBY_BENCH
+  groupby/group_sum.cu
+  groupby/group_nth.cu
+  groupby/group_shift.cu
+  groupby/group_struct.cu
+  groupby/group_no_requests.cu
+  groupby/group_scan.cu
+  groupby/group_rank_benchmark.cu
 )
 
+ConfigureNVBench(GROUPBY_NVBENCH groupby/group_rank_benchmark.cu)
+
 # ##################################################################################################
 # * hashing benchmark -----------------------------------------------------------------------------
 ConfigureBench(HASHING_BENCH hashing/hash.cpp hashing/partition.cpp)
diff --git a/cpp/benchmarks/groupby/group_rank_benchmark.cu b/cpp/benchmarks/groupby/group_rank_benchmark.cu
new file mode 100644
index 00000000000..1eeb15debe9
--- /dev/null
+++ b/cpp/benchmarks/groupby/group_rank_benchmark.cu
@@ -0,0 +1,109 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <benchmarks/common/generate_input.hpp>
+#include <benchmarks/fixture/rmm_pool_raii.hpp>
+#include <benchmarks/synchronization/synchronization.hpp>
+
+#include <cudf/groupby.hpp>
+#include <cudf/sorting.hpp>
+#include <cudf/table/table_view.hpp>
+#include <cudf/types.hpp>
+
+#include <nvbench/nvbench.cuh>
+
+template <cudf::rank_method method>
+static void nvbench_groupby_rank(nvbench::state& state,
+                                 nvbench::type_list<nvbench::enum_type<method>>)
+{
+  using namespace cudf;
+  using type           = int64_t;
+  constexpr auto dtype = type_to_id<int64_t>();
+  cudf::rmm_pool_raii pool_raii;
+
+  bool const is_sorted              = state.get_int64("is_sorted");
+  cudf::size_type const column_size = state.get_int64("data_size");
+  constexpr int num_groups          = 100;
+
+  data_profile profile;
+  profile.set_null_frequency(std::nullopt);
+  profile.set_cardinality(0);
+  profile.set_distribution_params<type>(dtype, distribution_id::UNIFORM, 0, num_groups);
+
+  auto source_table = create_random_table({dtype, dtype}, row_count{column_size}, profile);
+
+  // values to be pre-sorted too for groupby rank
+  if (is_sorted) source_table = cudf::sort(*source_table);
+
+  table_view keys{{source_table->view().column(0)}};
+  column_view order_by{source_table->view().column(1)};
+
+  auto agg = cudf::make_rank_aggregation<groupby_scan_aggregation>(method);
+  std::vector<groupby::scan_request> requests;
+  requests.emplace_back(groupby::scan_request());
+  requests[0].values = order_by;
+  requests[0].aggregations.push_back(std::move(agg));
+
+  groupby::groupby gb_obj(keys, null_policy::EXCLUDE, is_sorted ? sorted::YES : sorted::NO);
+
+  state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
+    rmm::cuda_stream_view stream_view{launch.get_stream()};
+    // groupby scan uses sort implementation
+    auto result = gb_obj.scan(requests);
+  });
+}
+
+enum class rank_method : int32_t {};
+
+NVBENCH_DECLARE_ENUM_TYPE_STRINGS(
+  cudf::rank_method,
+  [](cudf::rank_method value) {
+    switch (value) {
+      case cudf::rank_method::FIRST: return "FIRST";
+      case cudf::rank_method::AVERAGE: return "AVERAGE";
+      case cudf::rank_method::MIN: return "MIN";
+      case cudf::rank_method::MAX: return "MAX";
+      case cudf::rank_method::DENSE: return "DENSE";
+      default: return "unknown";
+    }
+  },
+  [](cudf::rank_method value) {
+    switch (value) {
+      case cudf::rank_method::FIRST: return "cudf::rank_method::FIRST";
+      case cudf::rank_method::AVERAGE: return "cudf::rank_method::AVERAGE";
+      case cudf::rank_method::MIN: return "cudf::rank_method::MIN";
+      case cudf::rank_method::MAX: return "cudf::rank_method::MAX";
+      case cudf::rank_method::DENSE: return "cudf::rank_method::DENSE";
+      default: return "unknown";
+    }
+  })
+
+using methods = nvbench::enum_type_list<cudf::rank_method::AVERAGE,
+                                        cudf::rank_method::DENSE,
+                                        cudf::rank_method::FIRST,
+                                        cudf::rank_method::MAX,
+                                        cudf::rank_method::MIN>;
+
+NVBENCH_BENCH_TYPES(nvbench_groupby_rank, NVBENCH_TYPE_AXES(methods))
+  .set_type_axes_names({"rank_method"})
+  .set_name("groupby_rank")
+  .add_int64_axis("data_size",
+                  {
+                    1000000,    // 1M
+                    10000000,   // 10M
+                    100000000,  // 100M
+                  })
+
+  .add_int64_axis("is_sorted", {0, 1});
diff --git a/cpp/include/cudf/aggregation.hpp b/cpp/include/cudf/aggregation.hpp
index 539a7c04106..5c7513a6c99 100644
--- a/cpp/include/cudf/aggregation.hpp
+++ b/cpp/include/cudf/aggregation.hpp
@@ -43,6 +43,32 @@ namespace detail {
 class simple_aggregations_collector;
 class aggregation_finalizer;
 }  // namespace detail
+
+/**
+ * @brief Tie-breaker method to use for ranking the column.
+ *
+ * @see cudf::make_rank_aggregation for more details.
+ * @ingroup column_sort
+ */
+enum class rank_method : int32_t {
+  FIRST,    ///< stable sort order ranking (no ties)
+  AVERAGE,  ///< mean of first in the group
+  MIN,      ///< min of first in the group
+  MAX,      ///< max of first in the group
+  DENSE     ///< rank always increases by 1 between groups
+};
+
+/**
+ * @brief Whether returned rank should be percentage or not and
+ *  mention the type of percentage normalization.
+ *
+ */
+enum class rank_percentage : int32_t {
+  NONE,             ///< rank
+  ZERO_NORMALIZED,  ///< rank / count
+  ONE_NORMALIZED    ///< (rank - 1) / (count - 1)
+};
+
 /**
  * @brief Abstract base class for specifying the desired aggregation in an
  * `aggregation_request`.
@@ -77,9 +103,7 @@ class aggregation {
     NUNIQUE,         ///< count number of unique elements
     NTH_ELEMENT,     ///< get the nth element
     ROW_NUMBER,      ///< get row-number of current index (relative to rolling window)
-    RANK,            ///< get rank       of current index
-    DENSE_RANK,      ///< get dense rank of current index
-    PERCENT_RANK,    ///< get percent (i.e. fractional) rank of current index
+    RANK,            ///< get rank of current index
     COLLECT_LIST,    ///< collect values into a list
     COLLECT_SET,     ///< collect values into a list without duplicate entries
     LEAD,            ///< window function, accesses row at specified offset following current row
@@ -323,9 +347,11 @@ std::unique_ptr<Base> make_row_number_aggregation();
 /**
  * @brief Factory to create a RANK aggregation
  *
- * `RANK` returns a non-nullable column of size_type "ranks": the number of rows preceding or
- * equal to the current row plus one. As a result, ranks are not unique and gaps will appear in
- * the ranking sequence.
+ * `RANK` returns a column of size_type or double "ranks" (see note 3 below for how the
+ * data type is determined) for a given rank method and column order.
+ * If nulls are excluded, the rank will be null for those rows, otherwise a non-nullable column is
+ * returned. Double precision column is returned only when percentage!=NONE and when rank method is
+ * average.
  *
  * This aggregation only works with "scan" algorithms. The input column into the group or
  * ungrouped scan is an orderby column that orders the rows that the aggregate function ranks.
@@ -333,10 +359,12 @@ std::unique_ptr<Base> make_row_number_aggregation();
  * column containing the ordering columns.
  *
  * Note:
- *  1. This method requires that the rows are presorted by the group keys and order_by columns.
- *  2. `RANK` aggregations will return a fully valid column regardless of null_handling policy
- *     specified in the scan.
- *  3. `RANK` aggregations are not compatible with exclusive scans.
+ *  1. This method could work faster with the rows that are presorted by the group keys and order_by
+ *     columns. Though groupby object does not require order_by column to be sorted, groupby rank
+ *     scan aggregation does require the order_by column to be sorted if the keys are sorted.
+ *  2. `RANK` aggregations are not compatible with exclusive scans.
+ *  3. All rank methods except AVERAGE method and percentage!=NONE returns size_type column.
+ *     For AVERAGE method and percentage!=NONE, the return type is double column.
  *
  * @code{.pseudo}
  * Example: Consider a motor-racing statistics dataset, containing the following columns:
@@ -362,123 +390,37 @@ std::unique_ptr<Base> make_row_number_aggregation();
  * A grouped rank aggregation scan with:
  *   groupby column      : venue
  *   input orderby column: time
- * Produces the following rank column:
- * {   1,     2,     3,     3,     5,      1,     2,     2,     4,     5}
- * (This corresponds to the following grouping and `driver` rows:)
- * { "HAM", "LEC", "BOT", "NOR", "RIC",  "RIC", "NOR", "BOT", "LEC", "PER" }
- *   <----------silverstone----------->|<-------------monza-------------->
- * @endcode
- */
-template <typename Base = aggregation>
-std::unique_ptr<Base> make_rank_aggregation();
-
-/**
- * @brief Factory to create a DENSE_RANK aggregation
- *
- * `DENSE_RANK` returns a non-nullable column of size_type "dense ranks": the preceding unique
- * value's rank plus one. As a result, ranks are not unique but there are no gaps in the ranking
- * sequence (unlike RANK aggregations).
- *
- * This aggregation only works with "scan" algorithms. The input column into the group or
- * ungrouped scan is an orderby column that orders the rows that the aggregate function ranks.
- * If rows are ordered by more than one column, the orderby input column should be a struct
- * column containing the ordering columns.
- *
- * Note:
- *  1. This method requires that the rows are presorted by the group keys and order_by columns.
- *  2. `DENSE_RANK` aggregations will return a fully valid column regardless of null_handling
- *     policy specified in the scan.
- *  3. `DENSE_RANK` aggregations are not compatible with exclusive scans.
- *
- * @code{.pseudo}
- * Example: Consider a motor-racing statistics dataset, containing the following columns:
- *   1. venue:  (STRING) Location of the race event
- *   2. driver: (STRING) Name of the car driver (abbreviated to 3 characters)
- *   3. time:   (INT32)  Time taken to complete the circuit
- *
- * For the following presorted data:
+ * Produces the following rank column for each methods:
+ * first:   {   1,     2,     3,     4,     5,      1,     2,     3,     4,     5}
+ * average: {   1,     2,   3.5,   3.5,     5,      1,   2.5,   2.5,     4,     5}
+ * min:     {   1,     2,     3,     3,     5,      1,     2,     2,     4,     5}
+ * max:     {   1,     2,     4,     4,     5,      1,     3,     3,     4,     5}
+ * dense:   {   1,     2,     3,     3,     4,      1,     2,     2,     3,     4}
+ * This corresponds to the following grouping and `driver` rows:
+ *          { "HAM", "LEC", "BOT", "NOR", "RIC",  "RIC", "NOR", "BOT", "LEC", "PER" }
+ *            <----------silverstone----------->|<-------------monza-------------->
+ *
+ * min rank for each percentage types:
+ * NONE:             {   1,      2,     3,     3,     5,      1,     2,     2,     4,     5 }
+ * ZERO_NORMALIZED : { 0.16,  0.33,  0.50,  0.50,  0.83,   0.16,  0.33,  0.33,  0.66,  0.83 }
+ * ONE_NORMALIZED:   { 0.00,  0.25,  0.50,  0.50,  1.00,   0.00,  0.25,  0.25,  0.75,  1.00 }
+ * where count corresponds to the number of rows in the group. @see cudf::rank_percentage
  *
- *  [ //      venue,           driver,           time
- *    {   "silverstone",  "HAM" ("hamilton"),   15823},
- *    {   "silverstone",  "LEC" ("leclerc"),    15827},
- *    {   "silverstone",  "BOT" ("bottas"),     15834},  // <-- Tied for 3rd place.
- *    {   "silverstone",  "NOR" ("norris"),     15834},  // <-- Tied for 3rd place.
- *    {   "silverstone",  "RIC" ("ricciardo"),  15905},
- *    {      "monza",     "RIC" ("ricciardo"),  12154},
- *    {      "monza",     "NOR" ("norris"),     12156},  // <-- Tied for 2nd place.
- *    {      "monza",     "BOT" ("bottas"),     12156},  // <-- Tied for 2nd place.
- *    {      "monza",     "LEC" ("leclerc"),    12201},
- *    {      "monza",     "PER" ("perez"),      12203}
- *  ]
- *
- * A grouped dense rank aggregation scan with:
- *   groupby column      : venue
- *   input orderby column: time
- * Produces the following dense rank column:
- * {   1,     2,     3,     3,     4,      1,     2,     2,     3,     4}
- * (This corresponds to the following grouping and `driver` rows:)
- * { "HAM", "LEC", "BOT", "NOR", "RIC",  "RIC", "NOR", "BOT", "LEC", "PER" }
- *   <----------silverstone----------->|<-------------monza-------------->
  * @endcode
- */
-template <typename Base = aggregation>
-std::unique_ptr<Base> make_dense_rank_aggregation();
-
-/**
- * @brief Factory to create a PERCENT_RANK aggregation
  *
- * `PERCENT_RANK` returns a non-nullable column of double precision "fractional" ranks.
- * For row index `i`, the percent rank of row `i` is defined as:
- *   percent_rank = (rank - 1) / (group_row_count - 1)
- * where,
- *   1. rank is the `RANK` of the row within the group
- *   2. group_row_count is the number of rows in the group
- *
- * This aggregation only works with "scan" algorithms. The input to the grouped or
- * ungrouped scan is an orderby column that orders the rows that the aggregate function ranks.
- * If rows are ordered by more than one column, the orderby input column should be a struct
- * column containing the ordering columns.
- *
- * Note:
- *  1. This method requires that the rows are presorted by the group keys and order_by columns.
- *  2. `PERCENT_RANK` aggregations will return a fully valid column regardless of null_handling
- *     policy specified in the scan.
- *  3. `PERCENT_RANK` aggregations are not compatible with exclusive scans.
- *
- * @code{.pseudo}
- * Example: Consider a motor-racing statistics dataset, containing the following columns:
- *   1. venue:  (STRING) Location of the race event
- *   2. driver: (STRING) Name of the car driver (abbreviated to 3 characters)
- *   3. time:   (INT32)  Time taken to complete the circuit
- *
- * For the following presorted data:
- *
- *  [ //      venue,           driver,           time
- *    {   "silverstone",  "HAM" ("hamilton"),   15823},
- *    {   "silverstone",  "LEC" ("leclerc"),    15827},
- *    {   "silverstone",  "BOT" ("bottas"),     15834},  // <-- Tied for 3rd place.
- *    {   "silverstone",  "NOR" ("norris"),     15834},  // <-- Tied for 3rd place.
- *    {   "silverstone",  "RIC" ("ricciardo"),  15905},
- *    {      "monza",     "RIC" ("ricciardo"),  12154},
- *    {      "monza",     "NOR" ("norris"),     12156},  // <-- Tied for 2nd place.
- *    {      "monza",     "BOT" ("bottas"),     12156},  // <-- Tied for 2nd place.
- *    {      "monza",     "LEC" ("leclerc"),    12201},
- *    {      "monza",     "PER" ("perez"),      12203}
- *  ]
- *
- * A grouped percent rank aggregation scan with:
- *   groupby column      : venue
- *   input orderby column: time
- * Produces the following percent rank column:
- * { 0.00,  0.25,  0.50,  0.50,  1.00,   0.00,  0.25,  0.25,  0.75,  1.00 }
- *
- * (This corresponds to the following grouping and `driver` rows:)
- * { "HAM", "LEC", "BOT", "NOR", "RIC",  "RIC", "NOR", "BOT", "LEC", "PER" }
- *   <----------silverstone----------->|<-------------monza-------------->
- * @endcode
+ * @param method The ranking method used for tie breaking (same values).
+ * @param column_order The desired sort order for ranking
+ * @param null_handling  flag to include nulls during ranking. If nulls are not included,
+ * the corresponding rank will be null.
+ * @param null_precedence The desired order of null compared to other elements for column
+ * @param percentage enum to denote the type of conversion of ranks to percentage in range (0,1]
  */
 template <typename Base = aggregation>
-std::unique_ptr<Base> make_percent_rank_aggregation();
+std::unique_ptr<Base> make_rank_aggregation(rank_method method,
+                                            order column_order         = order::ASCENDING,
+                                            null_policy null_handling  = null_policy::EXCLUDE,
+                                            null_order null_precedence = null_order::AFTER,
+                                            rank_percentage percentage = rank_percentage::NONE);
 
 /**
  * @brief Factory to create a COLLECT_LIST aggregation
diff --git a/cpp/include/cudf/detail/aggregation/aggregation.hpp b/cpp/include/cudf/detail/aggregation/aggregation.hpp
index 886151fb9d6..8ca49dd7d5f 100644
--- a/cpp/include/cudf/detail/aggregation/aggregation.hpp
+++ b/cpp/include/cudf/detail/aggregation/aggregation.hpp
@@ -75,10 +75,6 @@ class simple_aggregations_collector {  // Declares the interface for the simple
                                                           class row_number_aggregation const& agg);
   virtual std::vector<std::unique_ptr<aggregation>> visit(data_type col_type,
                                                           class rank_aggregation const& agg);
-  virtual std::vector<std::unique_ptr<aggregation>> visit(data_type col_type,
-                                                          class dense_rank_aggregation const& agg);
-  virtual std::vector<std::unique_ptr<aggregation>> visit(
-    data_type col_type, class percent_rank_aggregation const& agg);
   virtual std::vector<std::unique_ptr<aggregation>> visit(
     data_type col_type, class collect_list_aggregation const& agg);
   virtual std::vector<std::unique_ptr<aggregation>> visit(data_type col_type,
@@ -127,8 +123,6 @@ class aggregation_finalizer {  // Declares the interface for the finalizer
   virtual void visit(class nth_element_aggregation const& agg);
   virtual void visit(class row_number_aggregation const& agg);
   virtual void visit(class rank_aggregation const& agg);
-  virtual void visit(class dense_rank_aggregation const& agg);
-  virtual void visit(class percent_rank_aggregation const& agg);
   virtual void visit(class collect_list_aggregation const& agg);
   virtual void visit(class collect_set_aggregation const& agg);
   virtual void visit(class lead_lag_aggregation const& agg);
@@ -642,32 +636,42 @@ class rank_aggregation final : public rolling_aggregation,
                                public groupby_scan_aggregation,
                                public scan_aggregation {
  public:
-  rank_aggregation() : aggregation{RANK} {}
-
-  [[nodiscard]] std::unique_ptr<aggregation> clone() const override
+  rank_aggregation(rank_method method,
+                   order column_order,
+                   null_policy null_handling,
+                   null_order null_precedence,
+                   rank_percentage percentage)
+    : aggregation{RANK},
+      _method{method},
+      _column_order{column_order},
+      _null_handling{null_handling},
+      _null_precedence{null_precedence},
+      _percentage(percentage)
   {
-    return std::make_unique<rank_aggregation>(*this);
   }
-  std::vector<std::unique_ptr<aggregation>> get_simple_aggregations(
-    data_type col_type, simple_aggregations_collector& collector) const override
+  rank_method const _method;          ///< rank method
+  order const _column_order;          ///< order of the column to rank
+  null_policy const _null_handling;   ///< include or exclude nulls in ranks
+  null_order const _null_precedence;  ///< order of nulls in ranks
+  rank_percentage const _percentage;  ///< whether to return percentage ranks
+
+  [[nodiscard]] bool is_equal(aggregation const& _other) const override
   {
-    return collector.visit(col_type, *this);
+    if (!this->aggregation::is_equal(_other)) { return false; }
+    auto const& other = dynamic_cast<rank_aggregation const&>(_other);
+    return _method == other._method and _null_handling == other._null_handling and
+           _column_order == other._column_order and _null_precedence == other._null_precedence and
+           _percentage == other._percentage;
   }
-  void finalize(aggregation_finalizer& finalizer) const override { finalizer.visit(*this); }
-};
 
-/**
- * @brief Derived class for specifying a dense rank aggregation
- */
-class dense_rank_aggregation final : public rolling_aggregation,
-                                     public groupby_scan_aggregation,
-                                     public scan_aggregation {
- public:
-  dense_rank_aggregation() : aggregation{DENSE_RANK} {}
+  [[nodiscard]] size_t do_hash() const override
+  {
+    return this->aggregation::do_hash() ^ hash_impl();
+  }
 
   [[nodiscard]] std::unique_ptr<aggregation> clone() const override
   {
-    return std::make_unique<dense_rank_aggregation>(*this);
+    return std::make_unique<rank_aggregation>(*this);
   }
   std::vector<std::unique_ptr<aggregation>> get_simple_aggregations(
     data_type col_type, simple_aggregations_collector& collector) const override
@@ -675,24 +679,16 @@ class dense_rank_aggregation final : public rolling_aggregation,
     return collector.visit(col_type, *this);
   }
   void finalize(aggregation_finalizer& finalizer) const override { finalizer.visit(*this); }
-};
-
-class percent_rank_aggregation final : public rolling_aggregation,
-                                       public groupby_scan_aggregation,
-                                       public scan_aggregation {
- public:
-  percent_rank_aggregation() : aggregation{PERCENT_RANK} {}
 
-  [[nodiscard]] std::unique_ptr<aggregation> clone() const override
-  {
-    return std::make_unique<percent_rank_aggregation>(*this);
-  }
-  std::vector<std::unique_ptr<aggregation>> get_simple_aggregations(
-    data_type col_type, simple_aggregations_collector& collector) const override
+ private:
+  [[nodiscard]] size_t hash_impl() const
   {
-    return collector.visit(col_type, *this);
+    return std::hash<int>{}(static_cast<int>(_method)) ^
+           std::hash<int>{}(static_cast<int>(_column_order)) ^
+           std::hash<int>{}(static_cast<int>(_null_handling)) ^
+           std::hash<int>{}(static_cast<int>(_null_precedence)) ^
+           std::hash<int>{}(static_cast<int>(_percentage));
   }
-  void finalize(aggregation_finalizer& finalizer) const override { finalizer.visit(*this); }
 };
 
 /**
@@ -1278,19 +1274,7 @@ struct target_type_impl<Source, aggregation::ROW_NUMBER> {
 // Always use size_type accumulator for RANK
 template <typename Source>
 struct target_type_impl<Source, aggregation::RANK> {
-  using type = size_type;
-};
-
-// Always use size_type accumulator for DENSE_RANK
-template <typename Source>
-struct target_type_impl<Source, aggregation::DENSE_RANK> {
-  using type = size_type;
-};
-
-// Always use double for PERCENT_RANK
-template <typename SourceType>
-struct target_type_impl<SourceType, aggregation::PERCENT_RANK> {
-  using type = double;
+  using type = size_type;  // double for percentage=true.
 };
 
 // Always use list for COLLECT_LIST
@@ -1453,10 +1437,6 @@ CUDF_HOST_DEVICE inline decltype(auto) aggregation_dispatcher(aggregation::Kind
       return f.template operator()<aggregation::ROW_NUMBER>(std::forward<Ts>(args)...);
     case aggregation::RANK:
       return f.template operator()<aggregation::RANK>(std::forward<Ts>(args)...);
-    case aggregation::DENSE_RANK:
-      return f.template operator()<aggregation::DENSE_RANK>(std::forward<Ts>(args)...);
-    case aggregation::PERCENT_RANK:
-      return f.template operator()<aggregation::PERCENT_RANK>(std::forward<Ts>(args)...);
     case aggregation::COLLECT_LIST:
       return f.template operator()<aggregation::COLLECT_LIST>(std::forward<Ts>(args)...);
     case aggregation::COLLECT_SET:
diff --git a/cpp/include/cudf/detail/scan.hpp b/cpp/include/cudf/detail/scan.hpp
index fc829617c2d..13dddd3b0c8 100644
--- a/cpp/include/cudf/detail/scan.hpp
+++ b/cpp/include/cudf/detail/scan.hpp
@@ -103,16 +103,17 @@ std::unique_ptr<column> inclusive_dense_rank_scan(column_view const& order_by,
                                                   rmm::mr::device_memory_resource* mr);
 
 /**
- * @brief Generate row percent ranks for a column.
+ * @brief Generate row ONE_NORMALIZED percent ranks for a column.
+ * Also, knowns as ANSI SQL PERCENT RANK.
+ * Calculated by (rank - 1) / (count - 1).
  *
  * @param order_by Input column to generate ranks for.
  * @param stream CUDA stream used for device memory operations and kernel launches.
  * @param mr Device memory resource used to allocate the returned column's device memory.
  * @return rank values.
  */
-std::unique_ptr<column> inclusive_percent_rank_scan(column_view const& order_by,
-                                                    rmm::cuda_stream_view stream,
-                                                    rmm::mr::device_memory_resource* mr);
+std::unique_ptr<column> inclusive_one_normalized_percent_rank_scan(
+  column_view const& order_by, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr);
 
 }  // namespace detail
 }  // namespace cudf
diff --git a/cpp/include/cudf/sorting.hpp b/cpp/include/cudf/sorting.hpp
index ff334b9ee85..b7e915650dc 100644
--- a/cpp/include/cudf/sorting.hpp
+++ b/cpp/include/cudf/sorting.hpp
@@ -16,6 +16,7 @@
 
 #pragma once
 
+#include <cudf/aggregation.hpp>
 #include <cudf/types.hpp>
 
 #include <memory>
@@ -23,19 +24,6 @@
 
 namespace cudf {
 
-/**
- * @brief Tie-breaker method to use for ranking the column.
- *
- * @ingroup column_sort
- */
-enum class rank_method {
-  FIRST,    ///< stable sort order ranking (no ties)
-  AVERAGE,  ///< mean of first in the group
-  MIN,      ///< min of first in the group
-  MAX,      ///< max of first in the group
-  DENSE     ///< rank always increases by 1 between groups
-};
-
 /**
  * @addtogroup column_sort
  * @{
@@ -198,7 +186,7 @@ std::unique_ptr<table> stable_sort_by_key(
  * included, corresponding rank will be null.
  * @param null_precedence The desired order of null compared to other elements
  * for column
- * @param percentage flag to convert ranks to percentage in range (0,1}
+ * @param percentage flag to convert ranks to percentage in range (0,1]
  * @param mr Device memory resource used to allocate the returned column's device memory
  * @return std::unique_ptr<column> A column of containing the rank of the each
  * element of the column of `input`. The output column type will be `size_type`
diff --git a/cpp/src/aggregation/aggregation.cpp b/cpp/src/aggregation/aggregation.cpp
index 8fedf641c8f..27732b25401 100644
--- a/cpp/src/aggregation/aggregation.cpp
+++ b/cpp/src/aggregation/aggregation.cpp
@@ -154,18 +154,6 @@ std::vector<std::unique_ptr<aggregation>> simple_aggregations_collector::visit(
   return visit(col_type, static_cast<aggregation const&>(agg));
 }
 
-std::vector<std::unique_ptr<aggregation>> simple_aggregations_collector::visit(
-  data_type col_type, dense_rank_aggregation const& agg)
-{
-  return visit(col_type, static_cast<aggregation const&>(agg));
-}
-
-std::vector<std::unique_ptr<aggregation>> simple_aggregations_collector::visit(
-  data_type col_type, percent_rank_aggregation const& agg)
-{
-  return visit(col_type, static_cast<aggregation const&>(agg));
-}
-
 std::vector<std::unique_ptr<aggregation>> simple_aggregations_collector::visit(
   data_type col_type, collect_list_aggregation const& agg)
 {
@@ -334,16 +322,6 @@ void aggregation_finalizer::visit(rank_aggregation const& agg)
   visit(static_cast<aggregation const&>(agg));
 }
 
-void aggregation_finalizer::visit(dense_rank_aggregation const& agg)
-{
-  visit(static_cast<aggregation const&>(agg));
-}
-
-void aggregation_finalizer::visit(percent_rank_aggregation const& agg)
-{
-  visit(static_cast<aggregation const&>(agg));
-}
-
 void aggregation_finalizer::visit(collect_list_aggregation const& agg)
 {
   visit(static_cast<aggregation const&>(agg));
@@ -644,36 +622,33 @@ template std::unique_ptr<rolling_aggregation> make_row_number_aggregation<rollin
 
 /// Factory to create a RANK aggregation
 template <typename Base>
-std::unique_ptr<Base> make_rank_aggregation()
-{
-  return std::make_unique<detail::rank_aggregation>();
-}
-template std::unique_ptr<aggregation> make_rank_aggregation<aggregation>();
-template std::unique_ptr<groupby_scan_aggregation>
-make_rank_aggregation<groupby_scan_aggregation>();
-template std::unique_ptr<scan_aggregation> make_rank_aggregation<scan_aggregation>();
-
-/// Factory to create a DENSE_RANK aggregation
-template <typename Base>
-std::unique_ptr<Base> make_dense_rank_aggregation()
-{
-  return std::make_unique<detail::dense_rank_aggregation>();
-}
-template std::unique_ptr<aggregation> make_dense_rank_aggregation<aggregation>();
-template std::unique_ptr<groupby_scan_aggregation>
-make_dense_rank_aggregation<groupby_scan_aggregation>();
-template std::unique_ptr<scan_aggregation> make_dense_rank_aggregation<scan_aggregation>();
-
-/// Factory to create a PERCENT_RANK aggregation
-template <typename Base>
-std::unique_ptr<Base> make_percent_rank_aggregation()
-{
-  return std::make_unique<detail::percent_rank_aggregation>();
-}
-template std::unique_ptr<aggregation> make_percent_rank_aggregation<aggregation>();
-template std::unique_ptr<groupby_scan_aggregation>
-make_percent_rank_aggregation<groupby_scan_aggregation>();
-template std::unique_ptr<scan_aggregation> make_percent_rank_aggregation<scan_aggregation>();
+std::unique_ptr<Base> make_rank_aggregation(rank_method method,
+                                            order column_order,
+                                            null_policy null_handling,
+                                            null_order null_precedence,
+                                            rank_percentage percentage)
+{
+  return std::make_unique<detail::rank_aggregation>(
+    method, column_order, null_handling, null_precedence, percentage);
+}
+template std::unique_ptr<aggregation> make_rank_aggregation<aggregation>(
+  rank_method method,
+  order column_order,
+  null_policy null_handling,
+  null_order null_precedence,
+  rank_percentage percentage);
+template std::unique_ptr<groupby_scan_aggregation> make_rank_aggregation<groupby_scan_aggregation>(
+  rank_method method,
+  order column_order,
+  null_policy null_handling,
+  null_order null_precedence,
+  rank_percentage percentage);
+template std::unique_ptr<scan_aggregation> make_rank_aggregation<scan_aggregation>(
+  rank_method method,
+  order column_order,
+  null_policy null_handling,
+  null_order null_precedence,
+  rank_percentage percentage);
 
 /// Factory to create a COLLECT_LIST aggregation
 template <typename Base>
diff --git a/cpp/src/groupby/groupby.cu b/cpp/src/groupby/groupby.cu
index 79882239b38..a002b0bb744 100644
--- a/cpp/src/groupby/groupby.cu
+++ b/cpp/src/groupby/groupby.cu
@@ -101,9 +101,12 @@ namespace {
  * Adds special handling for COLLECT_LIST/COLLECT_SET, because:
  * 1. `make_empty_column()` does not support construction of nested columns.
  * 2. Empty lists need empty child columns, to persist type information.
+ * Adds special handling for RANK, because it needs to return double type column when rank_method is
+ * AVERAGE or percentage is true.
  */
 struct empty_column_constructor {
   column_view values;
+  aggregation const& agg;
 
   template <typename ValuesType, aggregation::Kind k>
   std::unique_ptr<cudf::column> operator()() const
@@ -116,6 +119,14 @@ struct empty_column_constructor {
         0, make_empty_column(type_to_id<offset_type>()), empty_like(values), 0, {});
     }
 
+    if constexpr (k == aggregation::Kind::RANK) {
+      auto const& rank_agg = dynamic_cast<cudf::detail::rank_aggregation const&>(agg);
+      if (rank_agg._method == cudf::rank_method::AVERAGE or
+          rank_agg._percentage != rank_percentage::NONE)
+        return make_empty_column(type_to_id<double>());
+      return make_empty_column(target_type(values.type(), k));
+    }
+
     // If `values` is LIST typed, and the aggregation results match the type,
     // construct empty results based on `values`.
     // Most generally, this applies if input type matches output type.
@@ -148,7 +159,7 @@ auto empty_results(host_span<RequestType const> requests)
         std::back_inserter(results),
         [&request](auto const& agg) {
           return cudf::detail::dispatch_type_and_aggregation(
-            request.values.type(), agg->kind, empty_column_constructor{request.values});
+            request.values.type(), agg->kind, empty_column_constructor{request.values, *agg});
         });
 
       return aggregation_result{std::move(results)};
diff --git a/cpp/src/groupby/sort/functors.hpp b/cpp/src/groupby/sort/functors.hpp
index fa3d19bdcfd..748e34a583d 100644
--- a/cpp/src/groupby/sort/functors.hpp
+++ b/cpp/src/groupby/sort/functors.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -87,7 +87,6 @@ struct store_result_functor {
    */
   column_view get_sorted_values()
   {
-    if (is_presorted()) { return values; }
     return sorted_values ? sorted_values->view()
                          : (sorted_values = helper.sorted_values(values, stream))->view();
   };
diff --git a/cpp/src/groupby/sort/group_rank_scan.cu b/cpp/src/groupby/sort/group_rank_scan.cu
index 77d68edaa3a..0b25ab9a33d 100644
--- a/cpp/src/groupby/sort/group_rank_scan.cu
+++ b/cpp/src/groupby/sort/group_rank_scan.cu
@@ -18,6 +18,7 @@
 #include <cudf/column/column_factories.hpp>
 #include <cudf/detail/aggregation/aggregation.hpp>
 #include <cudf/detail/iterator.cuh>
+#include <cudf/detail/null_mask.hpp>
 #include <cudf/detail/structs/utilities.hpp>
 #include <cudf/detail/utilities/device_operators.cuh>
 #include <cudf/table/row_operators.cuh>
@@ -27,6 +28,7 @@
 #include <rmm/exec_policy.hpp>
 
 #include <thrust/functional.h>
+#include <thrust/iterator/reverse_iterator.h>
 #include <thrust/scan.h>
 #include <thrust/tabulate.h>
 #include <thrust/transform.h>
@@ -35,23 +37,59 @@ namespace cudf {
 namespace groupby {
 namespace detail {
 namespace {
+
+/**
+ * @brief Functor to compare two rows of a table in given permutation order
+ * This is useful to identify unique elements in a sorted order table, when the permutation order is
+ * the sorted order of the table.
+ *
+ */
+template <typename Iterator>
+struct permuted_comparator {
+  /**
+   * @brief comparator object which compares two rows of the table in given permutation order
+   *
+   * @param device_table Device table to compare
+   * @param permutation The permutation order, integer type column.
+   * @param has_nulls whether the table has nulls
+   */
+  permuted_comparator(table_device_view device_table, Iterator const permutation, bool has_nulls)
+    : comparator(nullate::DYNAMIC{has_nulls}, device_table, device_table, null_equality::EQUAL),
+      permutation(permutation)
+  {
+  }
+  __device__ bool operator()(size_type index1, size_type index2) const
+  {
+    return comparator(permutation[index1], permutation[index2]);
+  };
+
+ private:
+  row_equality_comparator<nullate::DYNAMIC> comparator;
+  Iterator const permutation;
+};
+
 /**
  * @brief generate grouped row ranks or dense ranks using a row comparison then scan the results
  *
+ * @tparam forward true if the rank scan computation should use forward iterator traversal (default)
+ * else reverse iterator traversal
  * @tparam value_resolver flag value resolver function with boolean first and row number arguments
  * @tparam scan_operator scan function ran on the flag values
- * @param order_by input column to generate ranks for
+ * @param grouped_values input column to generate ranks for
+ * @param value_order column of type INT32 that contains the order of the values in the
+ * grouped_values column
  * @param group_labels ID of group that the corresponding value belongs to
  * @param group_offsets group index offsets with group ID indices
  * @param resolver flag value resolver
  * @param scan_op scan operation ran on the flag results
- * @param has_nulls true if nulls are included in the `order_by` column
+ * @param has_nulls true if nulls are included in the `grouped_values` column
  * @param stream CUDA stream used for device memory operations and kernel launches
  * @param mr Device memory resource used to allocate the returned column's device memory
  * @return std::unique_ptr<column> rank values
  */
-template <typename value_resolver, typename scan_operator>
-std::unique_ptr<column> rank_generator(column_view const& order_by,
+template <bool forward, typename value_resolver, typename scan_operator>
+std::unique_ptr<column> rank_generator(column_view const& grouped_values,
+                                       column_view const& value_order,
                                        device_span<size_type const> group_labels,
                                        device_span<size_type const> group_offsets,
                                        value_resolver resolver,
@@ -61,10 +99,11 @@ std::unique_ptr<column> rank_generator(column_view const& order_by,
                                        rmm::mr::device_memory_resource* mr)
 {
   auto const flattened = cudf::structs::detail::flatten_nested_columns(
-    table_view{{order_by}}, {}, {}, structs::detail::column_nullability::MATCH_INCOMING);
+    table_view{{grouped_values}}, {}, {}, structs::detail::column_nullability::MATCH_INCOMING);
   auto const d_flat_order = table_device_view::create(flattened, stream);
-  row_equality_comparator comparator(
-    nullate::DYNAMIC{has_nulls}, *d_flat_order, *d_flat_order, null_equality::EQUAL);
+  auto sorted_index_order = value_order.begin<size_type>();
+  auto comparator         = permuted_comparator(*d_flat_order, sorted_index_order, has_nulls);
+
   auto ranks         = make_fixed_width_column(data_type{type_to_id<size_type>()},
                                        flattened.flattened_columns().num_rows(),
                                        mask_state::UNALLOCATED,
@@ -72,100 +111,218 @@ std::unique_ptr<column> rank_generator(column_view const& order_by,
                                        mr);
   auto mutable_ranks = ranks->mutable_view();
 
-  thrust::tabulate(
-    rmm::exec_policy(stream),
-    mutable_ranks.begin<size_type>(),
-    mutable_ranks.end<size_type>(),
-    [comparator, resolver, labels = group_labels.data(), offsets = group_offsets.data()] __device__(
-      size_type row_index) {
-      auto group_start = offsets[labels[row_index]];
+  auto unique_identifier = [labels  = group_labels.begin(),
+                            offsets = group_offsets.begin(),
+                            comparator,
+                            resolver] __device__(size_type row_index) {
+    auto const group_start = offsets[labels[row_index]];
+    if constexpr (forward) {
+      // First value of equal values is 1.
       return resolver(row_index == group_start || !comparator(row_index, row_index - 1),
                       row_index - group_start);
-    });
+    } else {
+      auto const group_end = offsets[labels[row_index] + 1];
+      // Last value of equal values is 1.
+      return resolver(row_index + 1 == group_end || !comparator(row_index, row_index + 1),
+                      row_index - group_start);
+    }
+  };
+  thrust::tabulate(rmm::exec_policy(stream),
+                   mutable_ranks.begin<size_type>(),
+                   mutable_ranks.end<size_type>(),
+                   unique_identifier);
 
+  auto [group_labels_begin, mutable_rank_begin] = [&]() {
+    if constexpr (forward) {
+      return thrust::pair{group_labels.begin(), mutable_ranks.begin<size_type>()};
+    } else {
+      return thrust::pair{thrust::reverse_iterator(group_labels.end()),
+                          thrust::reverse_iterator(mutable_ranks.end<size_type>())};
+    }
+  }();
   thrust::inclusive_scan_by_key(rmm::exec_policy(stream),
-                                group_labels.begin(),
-                                group_labels.end(),
-                                mutable_ranks.begin<size_type>(),
-                                mutable_ranks.begin<size_type>(),
+                                group_labels_begin,
+                                group_labels_begin + group_labels.size(),
+                                mutable_rank_begin,
+                                mutable_rank_begin,
                                 thrust::equal_to{},
                                 scan_op);
-
   return ranks;
 }
 }  // namespace
 
-std::unique_ptr<column> rank_scan(column_view const& order_by,
-                                  device_span<size_type const> group_labels,
-                                  device_span<size_type const> group_offsets,
-                                  rmm::cuda_stream_view stream,
-                                  rmm::mr::device_memory_resource* mr)
+std::unique_ptr<column> min_rank_scan(column_view const& grouped_values,
+                                      column_view const& value_order,
+                                      device_span<size_type const> group_labels,
+                                      device_span<size_type const> group_offsets,
+                                      rmm::cuda_stream_view stream,
+                                      rmm::mr::device_memory_resource* mr)
 {
-  return rank_generator(
-    order_by,
+  return rank_generator<true>(
+    grouped_values,
+    value_order,
     group_labels,
     group_offsets,
     [] __device__(bool unequal, auto row_index_in_group) {
       return unequal ? row_index_in_group + 1 : 0;
     },
     DeviceMax{},
-    has_nested_nulls(table_view{{order_by}}),
+    has_nested_nulls(table_view{{grouped_values}}),
     stream,
     mr);
 }
 
-std::unique_ptr<column> dense_rank_scan(column_view const& order_by,
-                                        device_span<size_type const> group_labels,
-                                        device_span<size_type const> group_offsets,
-                                        rmm::cuda_stream_view stream,
-                                        rmm::mr::device_memory_resource* mr)
+std::unique_ptr<column> max_rank_scan(column_view const& grouped_values,
+                                      column_view const& value_order,
+                                      device_span<size_type const> group_labels,
+                                      device_span<size_type const> group_offsets,
+                                      rmm::cuda_stream_view stream,
+                                      rmm::mr::device_memory_resource* mr)
 {
-  return rank_generator(
-    order_by,
+  return rank_generator<false>(
+    grouped_values,
+    value_order,
     group_labels,
     group_offsets,
-    [] __device__(bool const unequal, size_type const) { return unequal ? 1 : 0; },
-    DeviceSum{},
-    has_nested_nulls(table_view{{order_by}}),
+    [] __device__(bool unequal, auto row_index_in_group) {
+      return unequal ? row_index_in_group + 1 : std::numeric_limits<size_type>::max();
+    },
+    DeviceMin{},
+    has_nested_nulls(table_view{{grouped_values}}),
     stream,
     mr);
 }
 
-std::unique_ptr<column> percent_rank_scan(column_view const& order_by,
+std::unique_ptr<column> first_rank_scan(column_view const& grouped_values,
+                                        column_view const&,
+                                        device_span<size_type const> group_labels,
+                                        device_span<size_type const> group_offsets,
+                                        rmm::cuda_stream_view stream,
+                                        rmm::mr::device_memory_resource* mr)
+{
+  auto ranks = make_fixed_width_column(
+    data_type{type_to_id<size_type>()}, group_labels.size(), mask_state::UNALLOCATED, stream, mr);
+  auto mutable_ranks = ranks->mutable_view();
+  thrust::tabulate(rmm::exec_policy(stream),
+                   mutable_ranks.begin<size_type>(),
+                   mutable_ranks.end<size_type>(),
+                   [labels  = group_labels.begin(),
+                    offsets = group_offsets.begin()] __device__(size_type row_index) {
+                     auto group_start = offsets[labels[row_index]];
+                     return row_index - group_start + 1;
+                   });
+  return ranks;
+}
+
+std::unique_ptr<column> average_rank_scan(column_view const& grouped_values,
+                                          column_view const& value_order,
                                           device_span<size_type const> group_labels,
                                           device_span<size_type const> group_offsets,
                                           rmm::cuda_stream_view stream,
                                           rmm::mr::device_memory_resource* mr)
 {
-  auto const rank_column = rank_scan(
-    order_by, group_labels, group_offsets, stream, rmm::mr::get_current_device_resource());
-  auto const rank_view       = rank_column->view();
-  auto const group_size_iter = cudf::detail::make_counting_transform_iterator(
-    0,
-    [labels  = group_labels.begin(),
-     offsets = group_offsets.begin()] __device__(size_type row_index) {
-      auto const group_label = labels[row_index];
-      auto const group_start = offsets[group_label];
-      auto const group_end   = offsets[group_label + 1];
-      return group_end - group_start;
-    });
-
-  // Result type for PERCENT_RANK is independent of input type.
-  using result_type = cudf::detail::target_type_t<int32_t, cudf::aggregation::Kind::PERCENT_RANK>;
-
-  auto percent_rank_result = cudf::make_fixed_width_column(
-    data_type{type_to_id<result_type>()}, rank_view.size(), mask_state::UNALLOCATED, stream, mr);
-
+  auto max_rank = max_rank_scan(grouped_values,
+                                value_order,
+                                group_labels,
+                                group_offsets,
+                                stream,
+                                rmm::mr::get_current_device_resource());
+  auto min_rank = min_rank_scan(grouped_values,
+                                value_order,
+                                group_labels,
+                                group_offsets,
+                                stream,
+                                rmm::mr::get_current_device_resource());
+  auto ranks    = make_fixed_width_column(
+    data_type{type_to_id<double>()}, group_labels.size(), mask_state::UNALLOCATED, stream, mr);
+  auto mutable_ranks = ranks->mutable_view();
   thrust::transform(rmm::exec_policy(stream),
-                    rank_view.begin<size_type>(),
-                    rank_view.end<size_type>(),
-                    group_size_iter,
-                    percent_rank_result->mutable_view().begin<result_type>(),
-                    [] __device__(auto const rank, auto const group_size) {
-                      return group_size == 1 ? 0.0 : ((rank - 1.0) / (group_size - 1));
+                    max_rank->view().begin<size_type>(),
+                    max_rank->view().end<size_type>(),
+                    min_rank->view().begin<size_type>(),
+                    mutable_ranks.begin<double>(),
+                    [] __device__(auto max_rank, auto min_rank) -> double {
+                      return min_rank + (max_rank - min_rank) / 2.0;
                     });
+  return ranks;
+}
 
-  return percent_rank_result;
+std::unique_ptr<column> dense_rank_scan(column_view const& grouped_values,
+                                        column_view const& value_order,
+                                        device_span<size_type const> group_labels,
+                                        device_span<size_type const> group_offsets,
+                                        rmm::cuda_stream_view stream,
+                                        rmm::mr::device_memory_resource* mr)
+{
+  return rank_generator<true>(
+    grouped_values,
+    value_order,
+    group_labels,
+    group_offsets,
+    [] __device__(bool const unequal, size_type const) { return unequal ? 1 : 0; },
+    DeviceSum{},
+    has_nested_nulls(table_view{{grouped_values}}),
+    stream,
+    mr);
+}
+
+std::unique_ptr<column> group_rank_to_percentage(rank_method const method,
+                                                 rank_percentage const percentage,
+                                                 column_view const& rank,
+                                                 column_view const& count,
+                                                 device_span<size_type const> group_labels,
+                                                 device_span<size_type const> group_offsets,
+                                                 rmm::cuda_stream_view stream,
+                                                 rmm::mr::device_memory_resource* mr)
+{
+  CUDF_EXPECTS(percentage != rank_percentage::NONE, "Percentage cannot be NONE");
+  auto ranks = make_fixed_width_column(
+    data_type{type_to_id<double>()}, group_labels.size(), mask_state::UNALLOCATED, stream, mr);
+  ranks->set_null_mask(copy_bitmask(rank, stream, mr));
+  auto mutable_ranks = ranks->mutable_view();
+
+  auto one_normalized = [] __device__(auto const rank, auto const group_size) {
+    return group_size == 1 ? 0.0 : ((rank - 1.0) / (group_size - 1));
+  };
+  if (method == rank_method::DENSE) {
+    thrust::tabulate(rmm::exec_policy(stream),
+                     mutable_ranks.begin<double>(),
+                     mutable_ranks.end<double>(),
+                     [percentage,
+                      one_normalized,
+                      is_double = rank.type().id() == type_id::FLOAT64,
+                      dcount    = count.begin<size_type>(),
+                      labels    = group_labels.begin(),
+                      offsets   = group_offsets.begin(),
+                      d_rank    = rank.begin<double>(),
+                      s_rank = rank.begin<size_type>()] __device__(size_type row_index) -> double {
+                       double const r   = is_double ? d_rank[row_index] : s_rank[row_index];
+                       auto const count = dcount[labels[row_index]];
+                       size_type const last_rank_index = offsets[labels[row_index]] + count - 1;
+                       auto const last_rank            = s_rank[last_rank_index];
+                       return percentage == rank_percentage::ZERO_NORMALIZED
+                                ? r / last_rank
+                                : one_normalized(r, last_rank);
+                     });
+  } else {
+    thrust::tabulate(rmm::exec_policy(stream),
+                     mutable_ranks.begin<double>(),
+                     mutable_ranks.end<double>(),
+                     [percentage,
+                      one_normalized,
+                      is_double = rank.type().id() == type_id::FLOAT64,
+                      dcount    = count.begin<size_type>(),
+                      labels    = group_labels.begin(),
+                      d_rank    = rank.begin<double>(),
+                      s_rank = rank.begin<size_type>()] __device__(size_type row_index) -> double {
+                       double const r   = is_double ? d_rank[row_index] : s_rank[row_index];
+                       auto const count = dcount[labels[row_index]];
+                       return percentage == rank_percentage::ZERO_NORMALIZED
+                                ? r / count
+                                : one_normalized(r, count);
+                     });
+  }
+  return ranks;
 }
 
 }  // namespace detail
diff --git a/cpp/src/groupby/sort/group_scan.hpp b/cpp/src/groupby/sort/group_scan.hpp
index 76a7f3f73c7..dc0eb691748 100644
--- a/cpp/src/groupby/sort/group_scan.hpp
+++ b/cpp/src/groupby/sort/group_scan.hpp
@@ -85,52 +85,115 @@ std::unique_ptr<column> count_scan(device_span<size_type const> group_labels,
                                    rmm::mr::device_memory_resource* mr);
 
 /**
- * @brief Internal API to calculate groupwise rank value
+ * @brief Internal API to calculate groupwise min rank value
  *
- * @param order_by column or struct column that rows within a group are sorted by
+ * @param grouped_values column or struct column that rows within a group are sorted by
+ * @param value_order column of type INT32 that contains the order of the values in the
+ * grouped_values column
  * @param group_labels ID of group that the corresponding value belongs to
  * @param group_offsets group index offsets with group ID indices
  * @param stream CUDA stream used for device memory operations and kernel launches
  * @param mr Device memory resource used to allocate the returned column's device memory
  * @return Column of type size_type of rank values
  */
-std::unique_ptr<column> rank_scan(column_view const& order_by,
-                                  device_span<size_type const> group_labels,
-                                  device_span<size_type const> group_offsets,
-                                  rmm::cuda_stream_view stream,
-                                  rmm::mr::device_memory_resource* mr);
+std::unique_ptr<column> min_rank_scan(column_view const& grouped_values,
+                                      column_view const& value_order,
+                                      device_span<size_type const> group_labels,
+                                      device_span<size_type const> group_offsets,
+                                      rmm::cuda_stream_view stream,
+                                      rmm::mr::device_memory_resource* mr);
+
+/**
+ * @brief Internal API to calculate groupwise max rank value
+ *
+ * @details  @copydetails min_rank_scan(column_view const& grouped_values,
+ *                                      column_view const& value_order,
+ *                                      device_span<size_type const> group_labels,
+ *                                      device_span<size_type const> group_offsets,
+ *                                      rmm::cuda_stream_view stream,
+ *                                      rmm::mr::device_memory_resource* mr)
+ */
+std::unique_ptr<column> max_rank_scan(column_view const& grouped_values,
+                                      column_view const& value_order,
+                                      device_span<size_type const> group_labels,
+                                      device_span<size_type const> group_offsets,
+                                      rmm::cuda_stream_view stream,
+                                      rmm::mr::device_memory_resource* mr);
+
+/**
+ * @brief Internal API to calculate groupwise first rank value
+ *
+ * @details  @copydetails min_rank_scan(column_view const& grouped_values,
+ *                                      column_view const& value_order,
+ *                                      device_span<size_type const> group_labels,
+ *                                      device_span<size_type const> group_offsets,
+ *                                      rmm::cuda_stream_view stream,
+ *                                      rmm::mr::device_memory_resource* mr)
+ */
+std::unique_ptr<column> first_rank_scan(column_view const& grouped_values,
+                                        column_view const& value_order,
+                                        device_span<size_type const> group_labels,
+                                        device_span<size_type const> group_offsets,
+                                        rmm::cuda_stream_view stream,
+                                        rmm::mr::device_memory_resource* mr);
+
+/**
+ * @brief Internal API to calculate groupwise average rank value
+ *
+ * @details  @copydetails min_rank_scan(column_view const& grouped_values,
+ *                                      column_view const& value_order,
+ *                                      device_span<size_type const> group_labels,
+ *                                      device_span<size_type const> group_offsets,
+ *                                      rmm::cuda_stream_view stream,
+ *                                      rmm::mr::device_memory_resource* mr)
+ */
+std::unique_ptr<column> average_rank_scan(column_view const& grouped_values,
+                                          column_view const& value_order,
+                                          device_span<size_type const> group_labels,
+                                          device_span<size_type const> group_offsets,
+                                          rmm::cuda_stream_view stream,
+                                          rmm::mr::device_memory_resource* mr);
 
 /**
  * @brief Internal API to calculate groupwise dense rank value
  *
- * @param order_by column or struct column that rows within a group are sorted by
+ * @param grouped_values column or struct column that rows within a group are sorted by
  * @param group_labels ID of group that the corresponding value belongs to
  * @param group_offsets group index offsets with group ID indices
  * @param stream CUDA stream used for device memory operations and kernel launches
  * @param mr Device memory resource used to allocate the returned column's device memory
  * @return Column of type size_type of dense rank values
  */
-std::unique_ptr<column> dense_rank_scan(column_view const& order_by,
+std::unique_ptr<column> dense_rank_scan(column_view const& grouped_values,
+                                        column_view const& value_order,
                                         device_span<size_type const> group_labels,
                                         device_span<size_type const> group_offsets,
                                         rmm::cuda_stream_view stream,
                                         rmm::mr::device_memory_resource* mr);
 
 /**
- * @brief Internal API to calculate groupwise percent rank value
+ * @brief Convert groupwise rank to groupwise percentage rank
  *
- * @param order_by column or struct column by which the rows within a group are sorted
- * @param group_labels ID of group to which the row belongs
+ * @param method rank method
+ * @param percentage enum to denote the type of conversion ranks to percentage in range (0,1]
+ * @param rank Groupwise rank column
+ * @param count Groupwise count column
+ * @param group_labels ID of group that the corresponding value belongs to
  * @param group_offsets group index offsets with group ID indices
  * @param stream CUDA stream used for device memory operations and kernel launches
  * @param mr Device memory resource used to allocate the returned column's device memory
- * @return Column of type `double` of percent rank values
+ * @return Column of type double of rank values
+
  */
-std::unique_ptr<column> percent_rank_scan(column_view const& order_by,
-                                          device_span<size_type const> group_labels,
-                                          device_span<size_type const> group_offsets,
-                                          rmm::cuda_stream_view stream,
-                                          rmm::mr::device_memory_resource* mr);
+std::unique_ptr<column> group_rank_to_percentage(rank_method const method,
+                                                 rank_percentage const percentage,
+                                                 column_view const& rank,
+                                                 column_view const& count,
+                                                 device_span<size_type const> group_labels,
+                                                 device_span<size_type const> group_offsets,
+                                                 rmm::cuda_stream_view stream,
+                                                 rmm::mr::device_memory_resource* mr);
+
 }  // namespace detail
 }  // namespace groupby
 }  // namespace cudf
diff --git a/cpp/src/groupby/sort/scan.cpp b/cpp/src/groupby/sort/scan.cpp
index 20edc1b3f50..5d345273782 100644
--- a/cpp/src/groupby/sort/scan.cpp
+++ b/cpp/src/groupby/sort/scan.cpp
@@ -16,14 +16,20 @@
 
 #include <groupby/common/utils.hpp>
 #include <groupby/sort/functors.hpp>
+#include <groupby/sort/group_reductions.hpp>
 #include <groupby/sort/group_scan.hpp>
 
 #include <cudf/aggregation.hpp>
 #include <cudf/column/column_view.hpp>
 #include <cudf/detail/aggregation/aggregation.hpp>
 #include <cudf/detail/aggregation/result_cache.hpp>
+#include <cudf/detail/null_mask.hpp>
+#include <cudf/detail/scatter.hpp>
+#include <cudf/detail/sequence.hpp>
+#include <cudf/detail/sorting.hpp>
 #include <cudf/detail/structs/utilities.hpp>
 #include <cudf/groupby.hpp>
+#include <cudf/scalar/scalar_factories.hpp>
 #include <cudf/table/table.hpp>
 #include <cudf/types.hpp>
 #include <cudf/utilities/error.hpp>
@@ -115,51 +121,70 @@ template <>
 void scan_result_functor::operator()<aggregation::RANK>(aggregation const& agg)
 {
   if (cache.has_result(values, agg)) return;
-  CUDF_EXPECTS(helper.is_presorted(),
-               "Rank aggregate in groupby scan requires the keys to be presorted");
-  auto const order_by = get_grouped_values();
-  CUDF_EXPECTS(!cudf::structs::detail::is_or_has_nested_lists(order_by),
-               "Unsupported list type in grouped rank scan.");
-
-  cache.add_result(
-    values,
-    agg,
-    detail::rank_scan(
-      order_by, helper.group_labels(stream), helper.group_offsets(stream), stream, mr));
-}
-
-template <>
-void scan_result_functor::operator()<aggregation::DENSE_RANK>(aggregation const& agg)
-{
-  if (cache.has_result(values, agg)) return;
-  CUDF_EXPECTS(helper.is_presorted(),
-               "Dense rank aggregate in groupby scan requires the keys to be presorted");
-  auto const order_by = get_grouped_values();
-  CUDF_EXPECTS(!cudf::structs::detail::is_or_has_nested_lists(order_by),
-               "Unsupported list type in grouped dense_rank scan.");
 
-  cache.add_result(
-    values,
-    agg,
-    detail::dense_rank_scan(
-      order_by, helper.group_labels(stream), helper.group_offsets(stream), stream, mr));
-}
-
-template <>
-void scan_result_functor::operator()<aggregation::PERCENT_RANK>(aggregation const& agg)
-{
-  if (cache.has_result(values, agg)) return;
-  CUDF_EXPECTS(helper.is_presorted(),
-               "Percent rank aggregate in groupby scan requires the keys to be presorted");
-  auto const order_by = get_grouped_values();
-  CUDF_EXPECTS(!cudf::structs::detail::is_or_has_nested_lists(order_by),
-               "Unsupported list type in grouped percent_rank scan.");
-
-  cache.add_result(
-    values,
-    agg,
-    detail::percent_rank_scan(
-      order_by, helper.group_labels(stream), helper.group_offsets(stream), stream, mr));
+  CUDF_EXPECTS(!cudf::structs::detail::is_or_has_nested_lists(values),
+               "Unsupported list type in grouped rank scan.");
+  auto const& rank_agg         = dynamic_cast<cudf::detail::rank_aggregation const&>(agg);
+  auto const& group_labels     = helper.group_labels(stream);
+  auto const group_labels_view = column_view(cudf::device_span<const size_type>(group_labels));
+  auto const gather_map        = [&]() {
+    if (is_presorted()) {  // assumes both keys and values are sorted, Spark does this.
+      return cudf::detail::sequence(
+        group_labels.size(), *cudf::make_fixed_width_scalar(size_type{0}, stream), stream);
+    } else {
+      auto sort_order = (rank_agg._method == rank_method::FIRST ? cudf::detail::stable_sorted_order
+                                                                       : cudf::detail::sorted_order);
+      return sort_order(table_view({group_labels_view, get_grouped_values()}),
+                        {order::ASCENDING, rank_agg._column_order},
+                        {null_order::AFTER, rank_agg._null_precedence},
+                        stream,
+                        rmm::mr::get_current_device_resource());
+    }
+  }();
+
+  auto rank_scan = [&]() {
+    switch (rank_agg._method) {
+      case rank_method::FIRST: return detail::first_rank_scan;
+      case rank_method::AVERAGE: return detail::average_rank_scan;
+      case rank_method::DENSE: return detail::dense_rank_scan;
+      case rank_method::MIN: return detail::min_rank_scan;
+      case rank_method::MAX: return detail::max_rank_scan;
+      default: CUDF_FAIL("Unsupported rank method in groupby scan");
+    }
+  }();
+  auto result = rank_scan(get_grouped_values(),
+                          *gather_map,
+                          helper.group_labels(stream),
+                          helper.group_offsets(stream),
+                          stream,
+                          rmm::mr::get_current_device_resource());
+  if (rank_agg._percentage != rank_percentage::NONE) {
+    auto count = get_grouped_values().nullable() and rank_agg._null_handling == null_policy::EXCLUDE
+                   ? detail::group_count_valid(get_grouped_values(),
+                                               helper.group_labels(stream),
+                                               helper.num_groups(stream),
+                                               stream,
+                                               rmm::mr::get_current_device_resource())
+                   : detail::group_count_all(helper.group_offsets(stream),
+                                             helper.num_groups(stream),
+                                             stream,
+                                             rmm::mr::get_current_device_resource());
+    result     = detail::group_rank_to_percentage(rank_agg._method,
+                                              rank_agg._percentage,
+                                              *result,
+                                              *count,
+                                              helper.group_labels(stream),
+                                              helper.group_offsets(stream),
+                                              stream,
+                                              mr);
+  }
+  result = std::move(cudf::detail::scatter(
+                       table_view{{*result}}, *gather_map, table_view{{*result}}, false, stream, mr)
+                       ->release()[0]);
+  if (rank_agg._null_handling == null_policy::EXCLUDE) {
+    result->set_null_mask(cudf::detail::copy_bitmask(get_grouped_values(), stream, mr));
+  }
+  cache.add_result(values, agg, std::move(result));
 }
 }  // namespace detail
 
diff --git a/cpp/src/reductions/scan/rank_scan.cu b/cpp/src/reductions/scan/rank_scan.cu
index 521f8e2d06f..0ababbf0a3d 100644
--- a/cpp/src/reductions/scan/rank_scan.cu
+++ b/cpp/src/reductions/scan/rank_scan.cu
@@ -102,16 +102,15 @@ std::unique_ptr<column> inclusive_rank_scan(column_view const& order_by,
     mr);
 }
 
-std::unique_ptr<column> inclusive_percent_rank_scan(column_view const& order_by,
-                                                    rmm::cuda_stream_view stream,
-                                                    rmm::mr::device_memory_resource* mr)
+std::unique_ptr<column> inclusive_one_normalized_percent_rank_scan(
+  column_view const& order_by, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr)
 {
   auto const rank_column =
     inclusive_rank_scan(order_by, stream, rmm::mr::get_current_device_resource());
   auto const rank_view = rank_column->view();
 
-  // Result type for PERCENT_RANK is independent of input type.
-  using result_type = cudf::detail::target_type_t<int32_t, cudf::aggregation::Kind::PERCENT_RANK>;
+  // Result type for min 0-index percent rank is independent of input type.
+  using result_type        = double;
   auto percent_rank_result = cudf::make_fixed_width_column(
     data_type{type_to_id<result_type>()}, rank_view.size(), mask_state::UNALLOCATED, stream, mr);
 
diff --git a/cpp/src/reductions/scan/scan.cpp b/cpp/src/reductions/scan/scan.cpp
index 52aaad5ddcf..b678b9441a5 100644
--- a/cpp/src/reductions/scan/scan.cpp
+++ b/cpp/src/reductions/scan/scan.cpp
@@ -35,17 +35,17 @@ std::unique_ptr<column> scan(column_view const& input,
   if (agg->kind == aggregation::RANK) {
     CUDF_EXPECTS(inclusive == scan_type::INCLUSIVE,
                  "Rank aggregation operator requires an inclusive scan");
-    return inclusive_rank_scan(input, rmm::cuda_stream_default, mr);
-  }
-  if (agg->kind == aggregation::DENSE_RANK) {
-    CUDF_EXPECTS(inclusive == scan_type::INCLUSIVE,
-                 "Dense rank aggregation operator requires an inclusive scan");
-    return inclusive_dense_rank_scan(input, rmm::cuda_stream_default, mr);
-  }
-  if (agg->kind == aggregation::PERCENT_RANK) {
-    CUDF_EXPECTS(inclusive == scan_type::INCLUSIVE,
-                 "Percent rank aggregation operator requires an inclusive scan");
-    return inclusive_percent_rank_scan(input, rmm::cuda_stream_default, mr);
+    auto const& rank_agg = dynamic_cast<cudf::detail::rank_aggregation const&>(*agg);
+    if (rank_agg._method == rank_method::MIN) {
+      if (rank_agg._percentage == rank_percentage::NONE) {
+        return inclusive_rank_scan(input, rmm::cuda_stream_default, mr);
+      } else if (rank_agg._percentage == rank_percentage::ONE_NORMALIZED) {
+        return inclusive_one_normalized_percent_rank_scan(input, rmm::cuda_stream_default, mr);
+      }
+    } else if (rank_agg._method == rank_method::DENSE) {
+      return inclusive_dense_rank_scan(input, rmm::cuda_stream_default, mr);
+    }
+    CUDF_FAIL("Unsupported rank aggregation method for inclusive scan");
   }
 
   return inclusive == scan_type::EXCLUSIVE
diff --git a/cpp/tests/groupby/rank_scan_tests.cpp b/cpp/tests/groupby/rank_scan_tests.cpp
index 81369beb2ec..d4e8b4cbf0f 100644
--- a/cpp/tests/groupby/rank_scan_tests.cpp
+++ b/cpp/tests/groupby/rank_scan_tests.cpp
@@ -29,11 +29,9 @@ namespace test {
 using namespace iterators;
 
 template <typename T>
-using input           = fixed_width_column_wrapper<T>;
-using rank_result_col = fixed_width_column_wrapper<size_type>;
-using percent_result_t =
-  cudf::detail::target_type_t<int32_t, cudf::aggregation::Kind::PERCENT_RANK>;
-using percent_result_col = fixed_width_column_wrapper<percent_result_t>;
+using input              = fixed_width_column_wrapper<T>;
+using rank_result_col    = fixed_width_column_wrapper<size_type>;
+using percent_result_col = fixed_width_column_wrapper<double>;
 using null_iter_t        = decltype(nulls_at({}));
 
 auto constexpr X     = int32_t{0};  // Placeholder for NULL rows.
@@ -45,27 +43,31 @@ inline void test_rank_scans(column_view const& keys,
                             column_view const& expected_rank,
                             column_view const& expected_percent_rank)
 {
-  test_single_scan(keys,
-                   order,
-                   keys,
-                   expected_dense,
-                   make_dense_rank_aggregation<groupby_scan_aggregation>(),
-                   null_policy::INCLUDE,
-                   sorted::YES);
-  test_single_scan(keys,
-                   order,
-                   keys,
-                   expected_rank,
-                   make_rank_aggregation<groupby_scan_aggregation>(),
-                   null_policy::INCLUDE,
-                   sorted::YES);
-  test_single_scan(keys,
-                   order,
-                   keys,
-                   expected_percent_rank,
-                   make_percent_rank_aggregation<groupby_scan_aggregation>(),
-                   null_policy::INCLUDE,
-                   sorted::YES);
+  test_single_scan(
+    keys,
+    order,
+    keys,
+    expected_dense,
+    make_rank_aggregation<groupby_scan_aggregation>(rank_method::DENSE, {}, null_policy::INCLUDE),
+    null_policy::INCLUDE,
+    sorted::YES);
+  test_single_scan(
+    keys,
+    order,
+    keys,
+    expected_rank,
+    make_rank_aggregation<groupby_scan_aggregation>(rank_method::MIN, {}, null_policy::INCLUDE),
+    null_policy::INCLUDE,
+    sorted::YES);
+  test_single_scan(
+    keys,
+    order,
+    keys,
+    expected_percent_rank,
+    make_rank_aggregation<groupby_scan_aggregation>(
+      rank_method::MIN, {}, null_policy::INCLUDE, {}, rank_percentage::ONE_NORMALIZED),
+    null_policy::INCLUDE,
+    sorted::YES);
 }
 
 struct groupby_rank_scan_test : public BaseFixture {
@@ -148,7 +150,7 @@ TYPED_TEST(typed_groupby_rank_scan_test, basic)
 {
   using T = TypeParam;
 
-  auto const keys            = input<T>{0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1};
+  auto const keys            = /*        */ input<T>{0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1};
   auto const make_order_by   = [&] { return input<T>{5, 5, 5, 4, 4, 4, 3, 3, 2, 2, 1, 1}; };
   auto const order_by        = make_order_by();
   auto const order_by_struct = [&] {
@@ -244,9 +246,12 @@ TYPED_TEST(typed_groupby_rank_scan_test, mixedStructs)
   std::vector<groupby::scan_request> requests;
   requests.emplace_back(groupby::scan_request());
   requests[0].values = *struct_col;
-  requests[0].aggregations.push_back(make_dense_rank_aggregation<groupby_scan_aggregation>());
-  requests[0].aggregations.push_back(make_rank_aggregation<groupby_scan_aggregation>());
-  requests[0].aggregations.push_back(make_percent_rank_aggregation<groupby_scan_aggregation>());
+  requests[0].aggregations.push_back(
+    make_rank_aggregation<groupby_scan_aggregation>(rank_method::DENSE, {}, null_policy::INCLUDE));
+  requests[0].aggregations.push_back(
+    make_rank_aggregation<groupby_scan_aggregation>(rank_method::MIN, {}, null_policy::INCLUDE));
+  requests[0].aggregations.push_back(make_rank_aggregation<groupby_scan_aggregation>(
+    rank_method::MIN, {}, null_policy::INCLUDE, {}, rank_percentage::ONE_NORMALIZED));
 
   groupby::groupby gb_obj(table_view({keys}), null_policy::INCLUDE, sorted::YES);
   auto [result_keys, agg_results] = gb_obj.scan(requests);
@@ -288,13 +293,19 @@ TYPED_TEST(typed_groupby_rank_scan_test, nestedStructs)
   requests.emplace_back(groupby::scan_request());
   requests.emplace_back(groupby::scan_request());
   requests[0].values = *nested_structs;
-  requests[0].aggregations.push_back(make_dense_rank_aggregation<groupby_scan_aggregation>());
-  requests[0].aggregations.push_back(make_rank_aggregation<groupby_scan_aggregation>());
-  requests[0].aggregations.push_back(make_percent_rank_aggregation<groupby_scan_aggregation>());
+  requests[0].aggregations.push_back(
+    make_rank_aggregation<groupby_scan_aggregation>(rank_method::DENSE));
+  requests[0].aggregations.push_back(
+    make_rank_aggregation<groupby_scan_aggregation>(rank_method::MIN));
+  requests[0].aggregations.push_back(make_rank_aggregation<groupby_scan_aggregation>(
+    rank_method::MIN, {}, null_policy::INCLUDE, {}, rank_percentage::ONE_NORMALIZED));
   requests[1].values = *flat_struct;
-  requests[1].aggregations.push_back(make_dense_rank_aggregation<groupby_scan_aggregation>());
-  requests[1].aggregations.push_back(make_rank_aggregation<groupby_scan_aggregation>());
-  requests[1].aggregations.push_back(make_percent_rank_aggregation<groupby_scan_aggregation>());
+  requests[1].aggregations.push_back(
+    make_rank_aggregation<groupby_scan_aggregation>(rank_method::DENSE));
+  requests[1].aggregations.push_back(
+    make_rank_aggregation<groupby_scan_aggregation>(rank_method::MIN));
+  requests[1].aggregations.push_back(make_rank_aggregation<groupby_scan_aggregation>(
+    rank_method::MIN, {}, null_policy::INCLUDE, {}, rank_percentage::ONE_NORMALIZED));
 
   groupby::groupby gb_obj(table_view({keys}), null_policy::INCLUDE, sorted::YES);
   auto [result_keys, agg_results] = gb_obj.scan(requests);
@@ -339,13 +350,19 @@ TYPED_TEST(typed_groupby_rank_scan_test, structsWithNullPushdown)
   requests.emplace_back(groupby::scan_request());
   requests.emplace_back(groupby::scan_request());
   requests[0].values = *possibly_null_structs;
-  requests[0].aggregations.push_back(make_dense_rank_aggregation<groupby_scan_aggregation>());
-  requests[0].aggregations.push_back(make_rank_aggregation<groupby_scan_aggregation>());
-  requests[0].aggregations.push_back(make_percent_rank_aggregation<groupby_scan_aggregation>());
+  requests[0].aggregations.push_back(
+    make_rank_aggregation<groupby_scan_aggregation>(rank_method::DENSE, {}, null_policy::INCLUDE));
+  requests[0].aggregations.push_back(
+    make_rank_aggregation<groupby_scan_aggregation>(rank_method::MIN, {}, null_policy::INCLUDE));
+  requests[0].aggregations.push_back(make_rank_aggregation<groupby_scan_aggregation>(
+    rank_method::MIN, {}, null_policy::INCLUDE, {}, rank_percentage::ONE_NORMALIZED));
   requests[1].values = *definitely_null_structs;
-  requests[1].aggregations.push_back(make_dense_rank_aggregation<groupby_scan_aggregation>());
-  requests[1].aggregations.push_back(make_rank_aggregation<groupby_scan_aggregation>());
-  requests[1].aggregations.push_back(make_percent_rank_aggregation<groupby_scan_aggregation>());
+  requests[1].aggregations.push_back(
+    make_rank_aggregation<groupby_scan_aggregation>(rank_method::DENSE, {}, null_policy::INCLUDE));
+  requests[1].aggregations.push_back(
+    make_rank_aggregation<groupby_scan_aggregation>(rank_method::MIN, {}, null_policy::INCLUDE));
+  requests[1].aggregations.push_back(make_rank_aggregation<groupby_scan_aggregation>(
+    rank_method::MIN, {}, null_policy::INCLUDE, {}, rank_percentage::ONE_NORMALIZED));
 
   groupby::groupby gb_obj(table_view({keys}), null_policy::INCLUDE, sorted::YES);
   auto [result_keys, agg_results] = gb_obj.scan(requests);
@@ -405,11 +422,11 @@ TYPED_TEST(list_groupby_rank_scan_test, lists)
   requests.emplace_back(groupby::aggregation_request());
   requests.emplace_back(groupby::aggregation_request());
   requests[0].values = list_col;
-  requests[0].aggregations.push_back(make_dense_rank_aggregation<groupby_scan_aggregation>());
-  requests[0].aggregations.push_back(make_rank_aggregation<groupby_scan_aggregation>());
+  requests[0].aggregations.push_back(make_rank_aggregation<groupby_scan_aggregation>(rank_method::DENSE));
+  requests[0].aggregations.push_back(make_rank_aggregation<groupby_scan_aggregation>(rank_method::MIN));
   requests[1].values = struct_col;
-  requests[1].aggregations.push_back(make_dense_rank_aggregation<groupby_scan_aggregation>());
-  requests[1].aggregations.push_back(make_rank_aggregation<groupby_scan_aggregation>());
+  requests[1].aggregations.push_back(make_rank_aggregation<groupby_scan_aggregation>(rank_method::DENSE));
+  requests[1].aggregations.push_back(make_rank_aggregation<groupby_scan_aggregation>(rank_method::MIN));
 
   groupby::groupby gb_obj(table_view({keys}), null_policy::INCLUDE, sorted::YES);
   auto result = gb_obj.scan(requests);
@@ -484,7 +501,7 @@ TEST(groupby_rank_scan_test, strings)
     keys, order_by_structs_with_nulls, expected_dense, expected_rank, expected_percent);
 }
 
-TEST_F(groupby_rank_scan_test_failures, test_exception_triggers)
+TEST_F(groupby_rank_scan_test_failures, DISABLED_test_exception_triggers)
 {
   using T = uint32_t;
 
@@ -496,57 +513,60 @@ TEST_F(groupby_rank_scan_test_failures, test_exception_triggers)
                      col,
                      keys,
                      col,
-                     make_dense_rank_aggregation<groupby_scan_aggregation>(),
+                     make_rank_aggregation<groupby_scan_aggregation>(rank_method::DENSE),
                      null_policy::INCLUDE,
                      sorted::NO),
-    "Dense rank aggregate in groupby scan requires the keys to be presorted");
+    "Rank aggregate in groupby scan requires the keys to be presorted");
 
-  CUDF_EXPECT_THROW_MESSAGE(test_single_scan(keys,
-                                             col,
-                                             keys,
-                                             col,
-                                             make_rank_aggregation<groupby_scan_aggregation>(),
-                                             null_policy::INCLUDE,
-                                             sorted::NO),
-                            "Rank aggregate in groupby scan requires the keys to be presorted");
+  CUDF_EXPECT_THROW_MESSAGE(
+    test_single_scan(keys,
+                     col,
+                     keys,
+                     col,
+                     make_rank_aggregation<groupby_scan_aggregation>(rank_method::MIN),
+                     null_policy::INCLUDE,
+                     sorted::NO),
+    "Rank aggregate in groupby scan requires the keys to be presorted");
+
+  CUDF_EXPECT_THROW_MESSAGE(
+    test_single_scan(keys,
+                     col,
+                     keys,
+                     col,
+                     make_rank_aggregation<groupby_scan_aggregation>(rank_method::DENSE),
+                     null_policy::EXCLUDE,
+                     sorted::YES),
+    "Rank aggregate in groupby scan requires the keys to be presorted");
 
   CUDF_EXPECT_THROW_MESSAGE(
     test_single_scan(keys,
                      col,
                      keys,
                      col,
-                     make_dense_rank_aggregation<groupby_scan_aggregation>(),
+                     make_rank_aggregation<groupby_scan_aggregation>(rank_method::MIN),
                      null_policy::EXCLUDE,
                      sorted::YES),
-    "Dense rank aggregate in groupby scan requires the keys to be presorted");
+    "Rank aggregate in groupby scan requires the keys to be presorted");
 
-  CUDF_EXPECT_THROW_MESSAGE(test_single_scan(keys,
-                                             col,
-                                             keys,
-                                             col,
-                                             make_rank_aggregation<groupby_scan_aggregation>(),
-                                             null_policy::EXCLUDE,
-                                             sorted::YES),
-                            "Rank aggregate in groupby scan requires the keys to be presorted");
+  CUDF_EXPECT_THROW_MESSAGE(
+    test_single_scan(keys,
+                     col,
+                     keys,
+                     col,
+                     make_rank_aggregation<groupby_scan_aggregation>(rank_method::DENSE),
+                     null_policy::EXCLUDE,
+                     sorted::NO),
+    "Rank aggregate in groupby scan requires the keys to be presorted");
 
   CUDF_EXPECT_THROW_MESSAGE(
     test_single_scan(keys,
                      col,
                      keys,
                      col,
-                     make_dense_rank_aggregation<groupby_scan_aggregation>(),
+                     make_rank_aggregation<groupby_scan_aggregation>(rank_method::MIN),
                      null_policy::EXCLUDE,
                      sorted::NO),
-    "Dense rank aggregate in groupby scan requires the keys to be presorted");
-
-  CUDF_EXPECT_THROW_MESSAGE(test_single_scan(keys,
-                                             col,
-                                             keys,
-                                             col,
-                                             make_rank_aggregation<groupby_scan_aggregation>(),
-                                             null_policy::EXCLUDE,
-                                             sorted::NO),
-                            "Rank aggregate in groupby scan requires the keys to be presorted");
+    "Rank aggregate in groupby scan requires the keys to be presorted");
 }
 
 }  // namespace test
diff --git a/cpp/tests/reductions/list_rank_test.cpp b/cpp/tests/reductions/list_rank_test.cpp
index d263677f23b..b3a8e7e0c28 100644
--- a/cpp/tests/reductions/list_rank_test.cpp
+++ b/cpp/tests/reductions/list_rank_test.cpp
@@ -42,10 +42,11 @@ TEST_F(ListRankScanTest, BasicList)
 
   auto const expected_dense_vals =
     cudf::test::fixed_width_column_wrapper<cudf::size_type>{1, 1, 2, 3, 4, 5, 6, 7, 7, 8, 9, 9};
-  this->test_ungrouped_rank_scan(col,
-                                 expected_dense_vals,
-                                 cudf::make_dense_rank_aggregation<cudf::scan_aggregation>(),
-                                 cudf::null_policy::INCLUDE);
+  this->test_ungrouped_rank_scan(
+    col,
+    expected_dense_vals,
+    cudf::make_rank_aggregation<cudf::scan_aggregation>(cudf::rank_method::DENSE),
+    cudf::null_policy::INCLUDE);
 }
 
 TEST_F(ListRankScanTest, DeepList)
@@ -73,20 +74,22 @@ TEST_F(ListRankScanTest, DeepList)
   {  // Non-sliced
     auto const expected_dense_vals = cudf::test::fixed_width_column_wrapper<cudf::size_type>{
       1, 1, 2, 3, 4, 5, 5, 5, 6, 6, 7, 7, 8, 9, 10, 11};
-    this->test_ungrouped_rank_scan(col,
-                                   expected_dense_vals,
-                                   cudf::make_dense_rank_aggregation<cudf::scan_aggregation>(),
-                                   cudf::null_policy::INCLUDE);
+    this->test_ungrouped_rank_scan(
+      col,
+      expected_dense_vals,
+      cudf::make_rank_aggregation<cudf::scan_aggregation>(cudf::rank_method::DENSE),
+      cudf::null_policy::INCLUDE);
   }
 
   {  // sliced
     auto sliced_col = cudf::slice(col, {3, 12})[0];
     auto const expected_dense_vals =
       cudf::test::fixed_width_column_wrapper<cudf::size_type>{1, 2, 3, 3, 3, 4, 4, 5, 5};
-    this->test_ungrouped_rank_scan(sliced_col,
-                                   expected_dense_vals,
-                                   cudf::make_dense_rank_aggregation<cudf::scan_aggregation>(),
-                                   cudf::null_policy::INCLUDE);
+    this->test_ungrouped_rank_scan(
+      sliced_col,
+      expected_dense_vals,
+      cudf::make_rank_aggregation<cudf::scan_aggregation>(cudf::rank_method::DENSE),
+      cudf::null_policy::INCLUDE);
   }
 }
 
@@ -138,10 +141,11 @@ TEST_F(ListRankScanTest, ListOfStruct)
     auto expect = cudf::test::fixed_width_column_wrapper<cudf::size_type>{
       1, 1, 2, 2, 3, 4, 4, 4, 5, 6, 7, 8, 8, 9, 9, 10, 10};
 
-    this->test_ungrouped_rank_scan(list_column,
-                                   expect,
-                                   cudf::make_dense_rank_aggregation<cudf::scan_aggregation>(),
-                                   cudf::null_policy::INCLUDE);
+    this->test_ungrouped_rank_scan(
+      list_column,
+      expect,
+      cudf::make_rank_aggregation<cudf::scan_aggregation>(cudf::rank_method::DENSE),
+      cudf::null_policy::INCLUDE);
   }
 
   {  // Sliced
@@ -149,10 +153,11 @@ TEST_F(ListRankScanTest, ListOfStruct)
     auto expect =
       cudf::test::fixed_width_column_wrapper<cudf::size_type>{1, 2, 3, 3, 3, 4, 5, 6, 7, 7, 8, 8};
 
-    this->test_ungrouped_rank_scan(sliced_col,
-                                   expect,
-                                   cudf::make_dense_rank_aggregation<cudf::scan_aggregation>(),
-                                   cudf::null_policy::INCLUDE);
+    this->test_ungrouped_rank_scan(
+      sliced_col,
+      expect,
+      cudf::make_rank_aggregation<cudf::scan_aggregation>(cudf::rank_method::DENSE),
+      cudf::null_policy::INCLUDE);
   }
 }
 
@@ -192,10 +197,11 @@ TEST_F(ListRankScanTest, ListOfEmptyStruct)
   auto expect =
     cudf::test::fixed_width_column_wrapper<cudf::size_type>{1, 1, 2, 2, 3, 3, 3, 4, 4, 5, 5, 6, 6};
 
-  this->test_ungrouped_rank_scan(*list_column,
-                                 expect,
-                                 cudf::make_dense_rank_aggregation<cudf::scan_aggregation>(),
-                                 cudf::null_policy::INCLUDE);
+  this->test_ungrouped_rank_scan(
+    *list_column,
+    expect,
+    cudf::make_rank_aggregation<cudf::scan_aggregation>(cudf::rank_method::DENSE),
+    cudf::null_policy::INCLUDE);
 }
 
 TEST_F(ListRankScanTest, EmptyDeepList)
@@ -221,8 +227,9 @@ TEST_F(ListRankScanTest, EmptyDeepList)
 
   auto expect = cudf::test::fixed_width_column_wrapper<cudf::size_type>{1, 1, 2, 2};
 
-  this->test_ungrouped_rank_scan(*list_column,
-                                 expect,
-                                 cudf::make_dense_rank_aggregation<cudf::scan_aggregation>(),
-                                 cudf::null_policy::INCLUDE);
+  this->test_ungrouped_rank_scan(
+    *list_column,
+    expect,
+    cudf::make_rank_aggregation<cudf::scan_aggregation>(cudf::rank_method::DENSE),
+    cudf::null_policy::INCLUDE);
 }
diff --git a/cpp/tests/reductions/rank_tests.cpp b/cpp/tests/reductions/rank_tests.cpp
index fb2cd17fe30..3bf2899ce2f 100644
--- a/cpp/tests/reductions/rank_tests.cpp
+++ b/cpp/tests/reductions/rank_tests.cpp
@@ -36,15 +36,14 @@ namespace cudf::test {
 using namespace iterators;
 
 template <typename T>
-using input           = fixed_width_column_wrapper<T>;
-using rank_result_col = fixed_width_column_wrapper<size_type>;
-using percent_result_t =
-  cudf::detail::target_type_t<int32_t, cudf::aggregation::Kind::PERCENT_RANK>;
-using percent_result_col = fixed_width_column_wrapper<percent_result_t>;
+using input              = fixed_width_column_wrapper<T>;
+using rank_result_col    = fixed_width_column_wrapper<size_type>;
+using percent_result_col = fixed_width_column_wrapper<double>;
 
-auto const rank         = cudf::make_rank_aggregation<scan_aggregation>();
-auto const dense_rank   = cudf::make_dense_rank_aggregation<scan_aggregation>();
-auto const percent_rank = cudf::make_percent_rank_aggregation<scan_aggregation>();
+auto const rank         = cudf::make_rank_aggregation<scan_aggregation>(cudf::rank_method::MIN);
+auto const dense_rank   = cudf::make_rank_aggregation<scan_aggregation>(cudf::rank_method::DENSE);
+auto const percent_rank = cudf::make_rank_aggregation<scan_aggregation>(
+  cudf::rank_method::MIN, {}, null_policy::INCLUDE, {}, rank_percentage::ONE_NORMALIZED);
 
 auto constexpr INCLUSIVE_SCAN = cudf::scan_type::INCLUSIVE;
 auto constexpr INCLUDE_NULLS  = cudf::null_policy::INCLUDE;
@@ -56,6 +55,8 @@ struct TypedRankScanTest : BaseScanTest<T> {
                                        std::unique_ptr<scan_aggregation> const& agg)
   {
     auto col_out = cudf::scan(input, agg, INCLUSIVE_SCAN, INCLUDE_NULLS);
+    std::cout << "expect type: " << static_cast<int>(expect_vals.type().id()) << std::endl;
+    std::cout << "out type: " << static_cast<int>(col_out->type().id()) << std::endl;
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expect_vals, col_out->view());
   }
 };
@@ -318,11 +319,11 @@ TEST(RankScanTest, ExclusiveScan)
   auto const vals = input<uint32_t>{3, 4, 5};
 
   CUDF_EXPECT_THROW_MESSAGE(cudf::scan(vals, dense_rank, scan_type::EXCLUSIVE, INCLUDE_NULLS),
-                            "Dense rank aggregation operator requires an inclusive scan");
+                            "Rank aggregation operator requires an inclusive scan");
   CUDF_EXPECT_THROW_MESSAGE(cudf::scan(vals, rank, scan_type::EXCLUSIVE, INCLUDE_NULLS),
                             "Rank aggregation operator requires an inclusive scan");
   CUDF_EXPECT_THROW_MESSAGE(cudf::scan(vals, percent_rank, scan_type::EXCLUSIVE, INCLUDE_NULLS),
-                            "Percent rank aggregation operator requires an inclusive scan");
+                            "Rank aggregation operator requires an inclusive scan");
 }
 
 }  // namespace cudf::test
diff --git a/cpp/tests/reductions/scan_tests.cpp b/cpp/tests/reductions/scan_tests.cpp
index d533a91f4d0..68b4d85db2a 100644
--- a/cpp/tests/reductions/scan_tests.cpp
+++ b/cpp/tests/reductions/scan_tests.cpp
@@ -84,6 +84,7 @@ struct ScanTest : public BaseScanTest<T> {
         case aggregation::PRODUCT: return std::is_invocable_v<cudf::DeviceProduct, T, T>;
         case aggregation::MIN: return std::is_invocable_v<cudf::DeviceMin, T, T>;
         case aggregation::MAX: return std::is_invocable_v<cudf::DeviceMax, T, T>;
+        case aggregation::RANK: return std::is_invocable_v<cudf::DeviceMax, T, T>;  // comparable
         default: return false;
       }
       return false;
diff --git a/java/src/main/native/src/AggregationJni.cpp b/java/src/main/native/src/AggregationJni.cpp
index f8c448566c8..6ac73282615 100644
--- a/java/src/main/native/src/AggregationJni.cpp
+++ b/java/src/main/native/src/AggregationJni.cpp
@@ -82,11 +82,14 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_Aggregation_createNoParamAgg(JNIEnv
         case 27: // MERGE_M2
           return cudf::make_merge_m2_aggregation();
         case 28: // RANK
-          return cudf::make_rank_aggregation();
+          return cudf::make_rank_aggregation(cudf::rank_method::MIN, {},
+                                             cudf::null_policy::INCLUDE);
         case 29: // DENSE_RANK
-          return cudf::make_dense_rank_aggregation();
-        case 30: // PERCENT_RANK
-          return cudf::make_percent_rank_aggregation();
+          return cudf::make_rank_aggregation(cudf::rank_method::DENSE, {},
+                                             cudf::null_policy::INCLUDE);
+        case 30: // ANSI SQL PERCENT_RANK
+          return cudf::make_rank_aggregation(cudf::rank_method::MIN, {}, cudf::null_policy::INCLUDE,
+                                             {}, cudf::rank_percentage::ONE_NORMALIZED);
         default: throw std::logic_error("Unsupported No Parameter Aggregation Operation");
       }
     }();
diff --git a/python/cudf/cudf/_lib/aggregation.pyx b/python/cudf/cudf/_lib/aggregation.pyx
index 4dc91268d57..84dd9c3a576 100644
--- a/python/cudf/cudf/_lib/aggregation.pyx
+++ b/python/cudf/cudf/_lib/aggregation.pyx
@@ -30,7 +30,10 @@ from cudf._lib.types import Interpolation
 
 cimport cudf._lib.cpp.aggregation as libcudf_aggregation
 cimport cudf._lib.cpp.types as libcudf_types
-from cudf._lib.cpp.aggregation cimport underlying_type_t_correlation_type
+from cudf._lib.cpp.aggregation cimport (
+    underlying_type_t_correlation_type,
+    underlying_type_t_rank_method,
+)
 
 import cudf
 
@@ -54,6 +57,7 @@ class AggregationKind(Enum):
     ARGMIN = libcudf_aggregation.aggregation.Kind.ARGMIN
     NUNIQUE = libcudf_aggregation.aggregation.Kind.NUNIQUE
     NTH = libcudf_aggregation.aggregation.Kind.NTH_ELEMENT
+    RANK = libcudf_aggregation.aggregation.Kind.RANK
     COLLECT = libcudf_aggregation.aggregation.Kind.COLLECT
     UNIQUE = libcudf_aggregation.aggregation.Kind.COLLECT_SET
     PTX = libcudf_aggregation.aggregation.Kind.PTX
@@ -77,6 +81,14 @@ class CorrelationType(IntEnum):
     )
 
 
+class RankMethod(IntEnum):
+    FIRST = libcudf_aggregation.rank_method.FIRST
+    AVERAGE = libcudf_aggregation.rank_method.AVERAGE
+    MIN = libcudf_aggregation.rank_method.MIN
+    MAX = libcudf_aggregation.rank_method.MAX
+    DENSE = libcudf_aggregation.rank_method.DENSE
+
+
 cdef class RollingAggregation:
     """A Cython wrapper for rolling window aggregations.
 
@@ -564,6 +576,33 @@ cdef class GroupbyScanAggregation:
     cummin = min
     cummax = max
 
+    @classmethod
+    def rank(cls, method, ascending, na_option, pct):
+        cdef GroupbyScanAggregation agg = cls()
+        cdef libcudf_aggregation.rank_method c_method = (
+            <libcudf_aggregation.rank_method> (
+                <underlying_type_t_rank_method> (
+                    RankMethod[method.upper()]
+                )
+            )
+        )
+        agg.c_obj = move(
+            libcudf_aggregation.
+            make_rank_aggregation[groupby_scan_aggregation](
+                c_method,
+                (libcudf_types.order.ASCENDING if ascending else
+                    libcudf_types.order.DESCENDING),
+                (libcudf_types.null_policy.EXCLUDE if na_option == "keep" else
+                    libcudf_types.null_policy.INCLUDE),
+                (libcudf_types.null_order.BEFORE
+                    if (na_option == "top") == ascending else
+                    libcudf_types.null_order.AFTER),
+                (libcudf_aggregation.rank_percentage.ZERO_NORMALIZED
+                    if pct else
+                    libcudf_aggregation.rank_percentage.NONE)
+            ))
+        return agg
+
 
 cdef class ReduceAggregation:
     """A Cython wrapper for reduce aggregations.
diff --git a/python/cudf/cudf/_lib/cpp/aggregation.pxd b/python/cudf/cudf/_lib/cpp/aggregation.pxd
index 399deb74c9c..a1d1485e1e8 100644
--- a/python/cudf/cudf/_lib/cpp/aggregation.pxd
+++ b/python/cudf/cudf/_lib/cpp/aggregation.pxd
@@ -1,5 +1,6 @@
 # Copyright (c) 2020-2022, NVIDIA CORPORATION.
 from libc.stdint cimport int32_t
+from libcpp cimport bool
 from libcpp.memory cimport unique_ptr
 from libcpp.string cimport string
 from libcpp.vector cimport vector
@@ -7,11 +8,14 @@ from libcpp.vector cimport vector
 from cudf._lib.cpp.types cimport (
     data_type,
     interpolation,
+    null_order,
     null_policy,
+    order,
     size_type,
 )
 
 ctypedef int32_t underlying_type_t_correlation_type
+ctypedef int32_t underlying_type_t_rank_method
 
 cdef extern from "cudf/aggregation.hpp" namespace "cudf" nogil:
 
@@ -35,6 +39,7 @@ cdef extern from "cudf/aggregation.hpp" namespace "cudf" nogil:
             ARGMIN 'cudf::aggregation::ARGMIN'
             NUNIQUE 'cudf::aggregation::NUNIQUE'
             NTH_ELEMENT 'cudf::aggregation::NTH_ELEMENT'
+            RANK 'cudf::aggregation::RANK'
             COLLECT 'cudf::aggregation::COLLECT_LIST'
             COLLECT_SET 'cudf::aggregation::COLLECT_SET'
             PTX 'cudf::aggregation::PTX'
@@ -68,6 +73,18 @@ cdef extern from "cudf/aggregation.hpp" namespace "cudf" nogil:
         KENDALL 'cudf::correlation_type::KENDALL'
         SPEARMAN 'cudf::correlation_type::SPEARMAN'
 
+    ctypedef enum rank_method:
+        FIRST "cudf::rank_method::FIRST"
+        AVERAGE "cudf::rank_method::AVERAGE"
+        MIN "cudf::rank_method::MIN"
+        MAX "cudf::rank_method::MAX"
+        DENSE "cudf::rank_method::DENSE"
+
+    ctypedef enum rank_percentage:
+        NONE "cudf::rank_percentage::NONE"
+        ZERO_NORMALIZED "cudf::rank_percentage::ZERO_NORMALIZED"
+        ONE_NORMALIZED "cudf::rank_percentage::ONE_NORMALIZED"
+
     cdef unique_ptr[T] make_sum_aggregation[T]() except +
 
     cdef unique_ptr[T] make_product_aggregation[T]() except +
@@ -127,3 +144,10 @@ cdef extern from "cudf/aggregation.hpp" namespace "cudf" nogil:
 
     cdef unique_ptr[T] make_covariance_aggregation[T](
         size_type min_periods, size_type ddof) except +
+
+    cdef unique_ptr[T] make_rank_aggregation[T](
+        rank_method method,
+        order column_order,
+        null_policy null_handling,
+        null_order null_precedence,
+        rank_percentage percentage) except +
diff --git a/python/cudf/cudf/_lib/cpp/sorting.pxd b/python/cudf/cudf/_lib/cpp/sorting.pxd
index 243b841ce4b..c6c42c327ac 100644
--- a/python/cudf/cudf/_lib/cpp/sorting.pxd
+++ b/python/cudf/cudf/_lib/cpp/sorting.pxd
@@ -7,20 +7,13 @@ from libcpp.vector cimport vector
 from cudf._lib.types import cudf_to_np_types, np_to_cudf_types
 
 cimport cudf._lib.cpp.types as libcudf_types
+from cudf._lib.cpp.aggregation cimport rank_method
 from cudf._lib.cpp.column.column cimport column
 from cudf._lib.cpp.column.column_view cimport column_view
 from cudf._lib.cpp.table.table cimport table
 from cudf._lib.cpp.table.table_view cimport table_view
 
 
-cdef extern from "cudf/sorting.hpp" namespace "cudf" nogil:
-    ctypedef enum rank_method:
-        FIRST "cudf::rank_method::FIRST"
-        AVERAGE "cudf::rank_method::AVERAGE"
-        MIN "cudf::rank_method::MIN"
-        MAX "cudf::rank_method::MAX"
-        DENSE "cudf::rank_method::DENSE"
-
 cdef extern from "cudf/sorting.hpp" namespace "cudf" nogil:
     cdef unique_ptr[column] sorted_order(
         table_view source_table,
diff --git a/python/cudf/cudf/_lib/groupby.pyx b/python/cudf/cudf/_lib/groupby.pyx
index 48f566b846d..be5bb2741b4 100644
--- a/python/cudf/cudf/_lib/groupby.pyx
+++ b/python/cudf/cudf/_lib/groupby.pyx
@@ -341,7 +341,7 @@ cdef class GroupBy:
         return columns_from_unique_ptr(move(c_result.second))
 
 
-_GROUPBY_SCANS = {"cumcount", "cumsum", "cummin", "cummax"}
+_GROUPBY_SCANS = {"cumcount", "cumsum", "cummin", "cummax", "rank"}
 
 
 def _is_all_scan_aggregate(all_aggs):
diff --git a/python/cudf/cudf/_lib/sort.pxd b/python/cudf/cudf/_lib/sort.pxd
deleted file mode 100644
index d7488889555..00000000000
--- a/python/cudf/cudf/_lib/sort.pxd
+++ /dev/null
@@ -1,3 +0,0 @@
-from libc.stdint cimport int32_t
-
-ctypedef int32_t underlying_type_t_rank_method
diff --git a/python/cudf/cudf/_lib/sort.pyx b/python/cudf/cudf/_lib/sort.pyx
index faa4279c1ca..1d7204a0a39 100644
--- a/python/cudf/cudf/_lib/sort.pyx
+++ b/python/cudf/cudf/_lib/sort.pyx
@@ -8,19 +8,21 @@ from libcpp.vector cimport vector
 from enum import IntEnum
 
 from cudf._lib.column cimport Column
+from cudf._lib.cpp.aggregation cimport (
+    rank_method,
+    underlying_type_t_rank_method,
+)
 from cudf._lib.cpp.column.column cimport column
 from cudf._lib.cpp.column.column_view cimport column_view
 from cudf._lib.cpp.search cimport lower_bound, upper_bound
 from cudf._lib.cpp.sorting cimport (
     is_sorted as cpp_is_sorted,
     rank,
-    rank_method,
     sorted_order,
 )
 from cudf._lib.cpp.table.table cimport table
 from cudf._lib.cpp.table.table_view cimport table_view
 from cudf._lib.cpp.types cimport null_order, null_policy, order
-from cudf._lib.sort cimport underlying_type_t_rank_method
 from cudf._lib.utils cimport columns_from_unique_ptr, table_view_from_columns
 
 
@@ -190,14 +192,6 @@ def digitize(list source_columns, list bins, bool right=False):
     return Column.from_unique_ptr(move(c_result))
 
 
-class RankMethod(IntEnum):
-    FIRST = < underlying_type_t_rank_method > rank_method.FIRST
-    AVERAGE = < underlying_type_t_rank_method > rank_method.AVERAGE
-    MIN = < underlying_type_t_rank_method > rank_method.MIN
-    MAX = < underlying_type_t_rank_method > rank_method.MAX
-    DENSE = < underlying_type_t_rank_method > rank_method.DENSE
-
-
 def rank_columns(list source_columns, object method, str na_option,
                  bool ascending, bool pct
                  ):
diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py
index 1af84920057..013ae7ad033 100644
--- a/python/cudf/cudf/core/groupby/groupby.py
+++ b/python/cudf/cudf/core/groupby/groupby.py
@@ -207,6 +207,30 @@ def cumcount(self):
             .reset_index(drop=True)
         )
 
+    def rank(
+        self,
+        method="average",
+        ascending=True,
+        na_option="keep",
+        pct=False,
+        axis=0,
+    ):
+        """
+        Return the rank of values within each group.
+        """
+        if not axis == 0:
+            raise NotImplementedError("Only axis=0 is supported.")
+
+        def rank(x):
+            return getattr(x, "rank")(
+                method=method,
+                ascending=ascending,
+                na_option=na_option,
+                pct=pct,
+            )
+
+        return self.agg(rank)
+
     @cached_property
     def _groupby(self):
         return libgroupby.GroupBy(
diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py
index a77fca098bc..1361fc56fa0 100644
--- a/python/cudf/cudf/core/indexed_frame.py
+++ b/python/cudf/cudf/core/indexed_frame.py
@@ -3632,7 +3632,7 @@ def rank(
         if method not in {"average", "min", "max", "first", "dense"}:
             raise KeyError(method)
 
-        method_enum = libcudf.sort.RankMethod[method.upper()]
+        method_enum = libcudf.aggregation.RankMethod[method.upper()]
         if na_option not in {"keep", "top", "bottom"}:
             raise ValueError(
                 "na_option must be one of 'keep', 'top', or 'bottom'"
diff --git a/python/cudf/cudf/tests/test_groupby.py b/python/cudf/cudf/tests/test_groupby.py
index 1411d7ba64c..9e87fdbd3be 100644
--- a/python/cudf/cudf/tests/test_groupby.py
+++ b/python/cudf/cudf/tests/test_groupby.py
@@ -1811,6 +1811,50 @@ def test_groupby_2keys_scan(nelem, func):
     assert_groupby_results_equal(got_df, expect_df, check_dtype=check_dtype)
 
 
+@pytest.mark.parametrize("nelem", [100, 1000])
+@pytest.mark.parametrize("method", ["average", "min", "max", "first", "dense"])
+@pytest.mark.parametrize("ascending", [True, False])
+@pytest.mark.parametrize("na_option", ["keep", "top", "bottom"])
+@pytest.mark.parametrize("pct", [False, True])
+def test_groupby_2keys_rank(nelem, method, ascending, na_option, pct):
+    t = rand_dataframe(
+        dtypes_meta=[
+            {"dtype": "int64", "null_frequency": 0, "cardinality": 10},
+            {"dtype": "int64", "null_frequency": 0, "cardinality": 10},
+            {"dtype": "int64", "null_frequency": 0.4, "cardinality": 10},
+        ],
+        rows=nelem,
+        use_threads=False,
+    )
+    pdf = t.to_pandas()
+    pdf.columns = ["x", "y", "z"]
+    gdf = cudf.from_pandas(pdf)
+    expect_df = pdf.groupby(["x", "y"], sort=True).rank(
+        method=method, ascending=ascending, na_option=na_option, pct=pct
+    )
+    got_df = gdf.groupby(["x", "y"], sort=True).rank(
+        method=method, ascending=ascending, na_option=na_option, pct=pct
+    )
+
+    assert_groupby_results_equal(got_df, expect_df, check_dtype=False)
+
+
+def test_groupby_rank_fails():
+    gdf = cudf.DataFrame(
+        {"x": [1, 2, 3, 4], "y": [1, 2, 3, 4], "z": [1, 2, 3, 4]}
+    )
+    with pytest.raises(NotImplementedError):
+        gdf.groupby(["x", "y"]).rank(method="min", axis=1)
+    gdf = cudf.DataFrame(
+        {
+            "a": [1, 1, 1, 2, 2, 2],
+            "b": [[1, 2], [3, None, 5], None, [], [7, 8], [9]],
+        }
+    )
+    with pytest.raises(NotImplementedError):
+        gdf.groupby(["a"]).rank(method="min", axis=1)
+
+
 def test_groupby_mix_agg_scan():
     err_msg = "Cannot perform both aggregation and scan in one operation"
     func = ["cumsum", "sum"]