diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 3e875b71ca6..b663d4f4b6d 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -1,5 +1,5 @@
 #=============================================================================
-# Copyright (c) 2018-2020, NVIDIA CORPORATION.
+# Copyright (c) 2018-2021, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -194,7 +194,7 @@ add_library(cudf
     src/groupby/hash/groupby.cu
     src/groupby/sort/group_argmax.cu
     src/groupby/sort/group_argmin.cu
-    src/groupby/sort/groupby.cu
+    src/groupby/sort/aggregate.cpp
     src/groupby/sort/group_collect.cu
     src/groupby/sort/group_count.cu
     src/groupby/sort/group_max.cu
@@ -204,6 +204,11 @@ add_library(cudf
     src/groupby/sort/group_quantiles.cu
     src/groupby/sort/group_std.cu
     src/groupby/sort/group_sum.cu
+    src/groupby/sort/scan.cpp
+    src/groupby/sort/group_count_scan.cu
+    src/groupby/sort/group_max_scan.cu
+    src/groupby/sort/group_min_scan.cu
+    src/groupby/sort/group_sum_scan.cu
     src/groupby/sort/sort_helper.cu
     src/hash/hashing.cu
     src/interop/dlpack.cpp
diff --git a/cpp/include/cudf/detail/groupby/sort_helper.hpp b/cpp/include/cudf/detail/groupby/sort_helper.hpp
index cadcb1265c4..a68d649b8c8 100644
--- a/cpp/include/cudf/detail/groupby/sort_helper.hpp
+++ b/cpp/include/cudf/detail/groupby/sort_helper.hpp
@@ -63,8 +63,8 @@ struct sort_groupby_helper {
                       sorted keys_pre_sorted        = sorted::NO)
     : _keys(keys),
       _num_keys(-1),
-      _include_null_keys(include_null_keys),
-      _keys_pre_sorted(keys_pre_sorted)
+      _keys_pre_sorted(keys_pre_sorted),
+      _include_null_keys(include_null_keys)
   {
     if (keys_pre_sorted == sorted::YES and include_null_keys == null_policy::EXCLUDE and
         has_nulls(keys)) {
diff --git a/cpp/include/cudf/detail/null_mask.cuh b/cpp/include/cudf/detail/null_mask.cuh
index 93f54cff588..08dae998944 100644
--- a/cpp/include/cudf/detail/null_mask.cuh
+++ b/cpp/include/cudf/detail/null_mask.cuh
@@ -23,8 +23,6 @@
 
 #include <rmm/cuda_stream_view.hpp>
 
-using cudf::device_span;
-
 namespace cudf {
 namespace detail {
 /**
diff --git a/cpp/include/cudf/groupby.hpp b/cpp/include/cudf/groupby.hpp
index f7f7f51479d..1dfacd53e0d 100644
--- a/cpp/include/cudf/groupby.hpp
+++ b/cpp/include/cudf/groupby.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2020, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -166,6 +166,61 @@ class groupby {
     std::vector<aggregation_request> const& requests,
     rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
+  /**
+   * @brief Performs grouped scans on the specified values.
+   *
+   * The values to aggregate and the aggregations to perform are specifed in an
+   * `aggregation_request`. Each request contains a `column_view` of values to
+   * aggregate and a set of `aggregation`s to perform on those elements.
+   *
+   * For each `aggregation` in a request, `values[i]` is scan aggregated with
+   * all previous `values[j]` where rows `i` and `j` in `keys` are equivalent.
+   *
+   * The `size()` of the request column must equal `keys.num_rows()`.
+   *
+   * For every `aggregation_request` an `aggregation_result` will be returned.
+   * The `aggregation_result` holds the resulting column(s) for each requested
+   * aggregation on the `request`s values. The order of the columns in each
+   * result is the same order as was specified in the request.
+   *
+   * The returned `table` contains the group labels for each row, i.e., the
+   * `keys` given to groupby object. Element `i` across all aggregation results
+   * belongs to the group at row `i` in the group labels table.
+   *
+   * The order of the rows in the group labels is arbitrary. Furthermore,
+   * successive `groupby::scan` calls may return results in different orders.
+   *
+   * @throws cudf::logic_error If `requests[i].values.size() !=
+   * keys.num_rows()`.
+   *
+   * Example:
+   * ```
+   * Input:
+   * keys:     {1 2 1 3 1}
+   *           {1 2 1 4 1}
+   * request:
+   *   values: {3 1 4 9 2}
+   *   aggregations: {{SUM}, {MIN}}
+   *
+   * result:
+   *
+   * keys:  {3 1 1 1 2}
+   *        {4 1 1 1 2}
+   * values:
+   *   SUM: {9 3 7 9 1}
+   *   MIN: {9 3 3 2 1}
+   * ```
+   *
+   * @param requests The set of columns to scan and the scans to perform
+   * @param mr Device memory resource used to allocate the returned table and columns' device memory
+   * @return Pair containing the table with each group's key and
+   * a vector of aggregation_results for each request in the same order as
+   * specified in `requests`.
+   */
+  std::pair<std::unique_ptr<table>, std::vector<aggregation_result>> scan(
+    std::vector<aggregation_request> const& requests,
+    rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+
   /**
    * @brief The grouped data corresponding to a groupby operation on a set of values.
    *
@@ -231,6 +286,11 @@ class groupby {
     std::vector<aggregation_request> const& requests,
     rmm::cuda_stream_view stream,
     rmm::mr::device_memory_resource* mr);
+
+  std::pair<std::unique_ptr<table>, std::vector<aggregation_result>> sort_scan(
+    std::vector<aggregation_request> const& requests,
+    rmm::cuda_stream_view stream,
+    rmm::mr::device_memory_resource* mr);
 };
 /** @} */
 }  // namespace groupby
diff --git a/cpp/src/bitmask/null_mask.cu b/cpp/src/bitmask/null_mask.cu
index 60167d77507..845a5512c27 100644
--- a/cpp/src/bitmask/null_mask.cu
+++ b/cpp/src/bitmask/null_mask.cu
@@ -44,8 +44,6 @@
 #include <numeric>
 #include <type_traits>
 
-using cudf::device_span;
-
 namespace cudf {
 size_type state_null_count(mask_state state, size_type size)
 {
diff --git a/cpp/src/groupby/groupby.cu b/cpp/src/groupby/groupby.cu
index 487aed4b411..cdd8ceb0a6c 100644
--- a/cpp/src/groupby/groupby.cu
+++ b/cpp/src/groupby/groupby.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2020, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -159,6 +159,24 @@ std::pair<std::unique_ptr<table>, std::vector<aggregation_result>> groupby::aggr
   return dispatch_aggregation(requests, 0, mr);
 }
 
+// Compute scan requests
+std::pair<std::unique_ptr<table>, std::vector<aggregation_result>> groupby::scan(
+  std::vector<aggregation_request> const& requests, rmm::mr::device_memory_resource* mr)
+{
+  CUDF_FUNC_RANGE();
+  CUDF_EXPECTS(
+    std::all_of(requests.begin(),
+                requests.end(),
+                [this](auto const& request) { return request.values.size() == _keys.num_rows(); }),
+    "Size mismatch between request values and groupby keys.");
+
+  verify_valid_requests(requests);
+
+  if (_keys.num_rows() == 0) { return std::make_pair(empty_like(_keys), empty_results(requests)); }
+
+  return sort_scan(requests, rmm::cuda_stream_default, mr);
+}
+
 groupby::groups groupby::get_groups(table_view values, rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
diff --git a/cpp/src/groupby/sort/groupby.cu b/cpp/src/groupby/sort/aggregate.cpp
similarity index 79%
rename from cpp/src/groupby/sort/groupby.cu
rename to cpp/src/groupby/sort/aggregate.cpp
index 5c54dd3cb4c..ace25820ac9 100644
--- a/cpp/src/groupby/sort/groupby.cu
+++ b/cpp/src/groupby/sort/aggregate.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2020, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -15,7 +15,8 @@
  */
 
 #include <groupby/common/utils.hpp>
-#include "group_reductions.hpp"
+#include <groupby/sort/functors.hpp>
+#include <groupby/sort/group_reductions.hpp>
 
 #include <cudf/aggregation.hpp>
 #include <cudf/column/column.hpp>
@@ -51,71 +52,16 @@ namespace detail {
  * memoised sorted and/or grouped values and re-using will save on computation
  * of these values.
  */
-struct store_result_functor {
-  store_result_functor(size_type col_idx,
-                       column_view const& values,
-                       sort::sort_groupby_helper& helper,
-                       cudf::detail::result_cache& cache,
-                       rmm::cuda_stream_view stream,
-                       rmm::mr::device_memory_resource* mr)
-    : col_idx(col_idx), helper(helper), cache(cache), values(values), stream(stream), mr(mr)
-  {
-  }
-
+struct aggregrate_result_functor final : store_result_functor {
+  using store_result_functor::store_result_functor;
   template <aggregation::Kind k>
   void operator()(aggregation const& agg)
   {
   }
-
- private:
-  /**
-   * @brief Get the grouped values
-   *
-   * Computes the grouped values from @p values on first invocation and returns
-   * the stored result on subsequent invocation
-   */
-  column_view get_grouped_values()
-  {
-    // TODO (dm): After implementing single pass multi-agg, explore making a
-    //            cache of all grouped value columns rather than one at a time
-    if (grouped_values)
-      return grouped_values->view();
-    else if (sorted_values)
-      // TODO (dm): When we implement scan, it wouldn't be ok to return sorted
-      //            values when asked for grouped values. Change this then.
-      return sorted_values->view();
-    else
-      grouped_values = helper.grouped_values(values);
-    return grouped_values->view();
-  };
-
-  /**
-   * @brief Get the grouped and sorted values
-   *
-   * Computes the grouped and sorted (within each group) values from @p values
-   * on first invocation and returns the stored result on subsequent invocation
-   */
-  column_view get_sorted_values()
-  {
-    if (not sorted_values) sorted_values = helper.sorted_values(values);
-    return sorted_values->view();
-  };
-
- private:
-  size_type col_idx;                  ///< Index of column in requests being operated on
-  sort::sort_groupby_helper& helper;  ///< Sort helper
-  cudf::detail::result_cache& cache;  ///< cache of results to store into
-  column_view const& values;          ///< Column of values to group and aggregate
-
-  rmm::cuda_stream_view stream;         ///< CUDA stream on which to execute kernels
-  rmm::mr::device_memory_resource* mr;  ///< Memory resource to allocate space for results
-
-  std::unique_ptr<column> sorted_values;   ///< Memoised grouped and sorted values
-  std::unique_ptr<column> grouped_values;  ///< Memoised grouped values
 };
 
 template <>
-void store_result_functor::operator()<aggregation::COUNT_VALID>(aggregation const& agg)
+void aggregrate_result_functor::operator()<aggregation::COUNT_VALID>(aggregation const& agg)
 {
   if (cache.has_result(col_idx, agg)) return;
 
@@ -129,7 +75,7 @@ void store_result_functor::operator()<aggregation::COUNT_VALID>(aggregation cons
 }
 
 template <>
-void store_result_functor::operator()<aggregation::COUNT_ALL>(aggregation const& agg)
+void aggregrate_result_functor::operator()<aggregation::COUNT_ALL>(aggregation const& agg)
 {
   if (cache.has_result(col_idx, agg)) return;
 
@@ -138,7 +84,7 @@ void store_result_functor::operator()<aggregation::COUNT_ALL>(aggregation const&
 }
 
 template <>
-void store_result_functor::operator()<aggregation::SUM>(aggregation const& agg)
+void aggregrate_result_functor::operator()<aggregation::SUM>(aggregation const& agg)
 {
   if (cache.has_result(col_idx, agg)) return;
 
@@ -149,7 +95,7 @@ void store_result_functor::operator()<aggregation::SUM>(aggregation const& agg)
 };
 
 template <>
-void store_result_functor::operator()<aggregation::ARGMAX>(aggregation const& agg)
+void aggregrate_result_functor::operator()<aggregation::ARGMAX>(aggregation const& agg)
 {
   if (cache.has_result(col_idx, agg)) return;
 
@@ -164,7 +110,7 @@ void store_result_functor::operator()<aggregation::ARGMAX>(aggregation const& ag
 };
 
 template <>
-void store_result_functor::operator()<aggregation::ARGMIN>(aggregation const& agg)
+void aggregrate_result_functor::operator()<aggregation::ARGMIN>(aggregation const& agg)
 {
   if (cache.has_result(col_idx, agg)) return;
 
@@ -179,7 +125,7 @@ void store_result_functor::operator()<aggregation::ARGMIN>(aggregation const& ag
 };
 
 template <>
-void store_result_functor::operator()<aggregation::MIN>(aggregation const& agg)
+void aggregrate_result_functor::operator()<aggregation::MIN>(aggregation const& agg)
 {
   if (cache.has_result(col_idx, agg)) return;
 
@@ -216,7 +162,7 @@ void store_result_functor::operator()<aggregation::MIN>(aggregation const& agg)
 };
 
 template <>
-void store_result_functor::operator()<aggregation::MAX>(aggregation const& agg)
+void aggregrate_result_functor::operator()<aggregation::MAX>(aggregation const& agg)
 {
   if (cache.has_result(col_idx, agg)) return;
 
@@ -253,7 +199,7 @@ void store_result_functor::operator()<aggregation::MAX>(aggregation const& agg)
 };
 
 template <>
-void store_result_functor::operator()<aggregation::MEAN>(aggregation const& agg)
+void aggregrate_result_functor::operator()<aggregation::MEAN>(aggregation const& agg)
 {
   if (cache.has_result(col_idx, agg)) return;
 
@@ -277,7 +223,7 @@ void store_result_functor::operator()<aggregation::MEAN>(aggregation const& agg)
 };
 
 template <>
-void store_result_functor::operator()<aggregation::VARIANCE>(aggregation const& agg)
+void aggregrate_result_functor::operator()<aggregation::VARIANCE>(aggregation const& agg)
 {
   if (cache.has_result(col_idx, agg)) return;
 
@@ -300,7 +246,7 @@ void store_result_functor::operator()<aggregation::VARIANCE>(aggregation const&
 };
 
 template <>
-void store_result_functor::operator()<aggregation::STD>(aggregation const& agg)
+void aggregrate_result_functor::operator()<aggregation::STD>(aggregation const& agg)
 {
   if (cache.has_result(col_idx, agg)) return;
 
@@ -314,7 +260,7 @@ void store_result_functor::operator()<aggregation::STD>(aggregation const& agg)
 };
 
 template <>
-void store_result_functor::operator()<aggregation::QUANTILE>(aggregation const& agg)
+void aggregrate_result_functor::operator()<aggregation::QUANTILE>(aggregation const& agg)
 {
   if (cache.has_result(col_idx, agg)) return;
 
@@ -335,7 +281,7 @@ void store_result_functor::operator()<aggregation::QUANTILE>(aggregation const&
 };
 
 template <>
-void store_result_functor::operator()<aggregation::MEDIAN>(aggregation const& agg)
+void aggregrate_result_functor::operator()<aggregation::MEDIAN>(aggregation const& agg)
 {
   if (cache.has_result(col_idx, agg)) return;
 
@@ -355,7 +301,7 @@ void store_result_functor::operator()<aggregation::MEDIAN>(aggregation const& ag
 };
 
 template <>
-void store_result_functor::operator()<aggregation::NUNIQUE>(aggregation const& agg)
+void aggregrate_result_functor::operator()<aggregation::NUNIQUE>(aggregation const& agg)
 {
   if (cache.has_result(col_idx, agg)) return;
 
@@ -372,7 +318,7 @@ void store_result_functor::operator()<aggregation::NUNIQUE>(aggregation const& a
 };
 
 template <>
-void store_result_functor::operator()<aggregation::NTH_ELEMENT>(aggregation const& agg)
+void aggregrate_result_functor::operator()<aggregation::NTH_ELEMENT>(aggregation const& agg)
 {
   if (cache.has_result(col_idx, agg)) return;
 
@@ -401,7 +347,7 @@ void store_result_functor::operator()<aggregation::NTH_ELEMENT>(aggregation cons
 }
 
 template <>
-void store_result_functor::operator()<aggregation::COLLECT>(aggregation const& agg)
+void aggregrate_result_functor::operator()<aggregation::COLLECT>(aggregation const& agg)
 {
   auto null_handling =
     static_cast<cudf::detail::collect_list_aggregation const&>(agg)._null_handling;
@@ -431,7 +377,7 @@ std::pair<std::unique_ptr<table>, std::vector<aggregation_result>> groupby::sort
 
   for (size_t i = 0; i < requests.size(); i++) {
     auto store_functor =
-      detail::store_result_functor(i, requests[i].values, helper(), cache, stream, mr);
+      detail::aggregrate_result_functor(i, requests[i].values, helper(), cache, stream, mr);
     for (size_t j = 0; j < requests[i].aggregations.size(); j++) {
       // TODO (dm): single pass compute all supported reductions
       cudf::detail::aggregation_dispatcher(
diff --git a/cpp/src/groupby/sort/functors.hpp b/cpp/src/groupby/sort/functors.hpp
new file mode 100644
index 00000000000..565320fbe80
--- /dev/null
+++ b/cpp/src/groupby/sort/functors.hpp
@@ -0,0 +1,96 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <cudf/column/column.hpp>
+#include <cudf/column/column_view.hpp>
+#include <cudf/detail/aggregation/result_cache.hpp>
+#include <cudf/detail/groupby/sort_helper.hpp>
+#include <cudf/types.hpp>
+
+#include <rmm/cuda_stream_view.hpp>
+
+#include <memory>
+
+namespace cudf {
+namespace groupby {
+namespace detail {
+/**
+ * @brief Functor to dispatch aggregation with
+ *
+ * This functor is to be used with `aggregation_dispatcher` to compute the
+ * appropriate aggregation. If the values on which to run the aggregation are
+ * unchanged, then this functor should be re-used. This is because it stores
+ * memoised sorted and/or grouped values and re-using will save on computation
+ * of these values.
+ */
+struct store_result_functor {
+  store_result_functor(size_type col_idx,
+                       column_view const& values,
+                       sort::sort_groupby_helper& helper,
+                       cudf::detail::result_cache& cache,
+                       rmm::cuda_stream_view stream,
+                       rmm::mr::device_memory_resource* mr)
+    : col_idx(col_idx), helper(helper), cache(cache), values(values), stream(stream), mr(mr)
+  {
+  }
+
+ protected:
+  /**
+   * @brief Get the grouped values
+   *
+   * Computes the grouped values from @p values on first invocation and returns
+   * the stored result on subsequent invocation
+   */
+  column_view get_grouped_values()
+  {
+    // TODO (dm): After implementing single pass multi-agg, explore making a
+    //            cache of all grouped value columns rather than one at a time
+    if (grouped_values)
+      return grouped_values->view();
+    else if (sorted_values)
+      // In scan, it wouldn't be ok to return sorted values when asked for grouped values.
+      // It's overridden in scan implementation.
+      return sorted_values->view();
+    else
+      return (grouped_values = helper.grouped_values(values))->view();
+  };
+
+  /**
+   * @brief Get the grouped and sorted values
+   *
+   * Computes the grouped and sorted (within each group) values from @p values
+   * on first invocation and returns the stored result on subsequent invocation
+   */
+  column_view get_sorted_values()
+  {
+    return sorted_values ? sorted_values->view()
+                         : (sorted_values = helper.sorted_values(values))->view();
+  };
+
+ protected:
+  size_type col_idx;                  ///< Index of column in requests being operated on
+  sort::sort_groupby_helper& helper;  ///< Sort helper
+  cudf::detail::result_cache& cache;  ///< cache of results to store into
+  column_view const& values;          ///< Column of values to group and aggregate
+
+  rmm::cuda_stream_view stream;         ///< CUDA stream on which to execute kernels
+  rmm::mr::device_memory_resource* mr;  ///< Memory resource to allocate space for results
+
+  std::unique_ptr<column> sorted_values;   ///< Memoised grouped and sorted values
+  std::unique_ptr<column> grouped_values;  ///< Memoised grouped values
+};
+}  // namespace detail
+}  // namespace groupby
+}  // namespace cudf
diff --git a/cpp/src/groupby/sort/group_count.cu b/cpp/src/groupby/sort/group_count.cu
index 60e0ce31db1..121e4bb889d 100644
--- a/cpp/src/groupby/sort/group_count.cu
+++ b/cpp/src/groupby/sort/group_count.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2020, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/cpp/src/groupby/sort/group_count_scan.cu b/cpp/src/groupby/sort/group_count_scan.cu
new file mode 100644
index 00000000000..4ad533aebdc
--- /dev/null
+++ b/cpp/src/groupby/sort/group_count_scan.cu
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cudf/column/column.hpp>
+#include <cudf/column/column_factories.hpp>
+#include <cudf/types.hpp>
+#include <cudf/utilities/span.hpp>
+
+#include <thrust/iterator/constant_iterator.h>
+#include <thrust/scan.h>
+
+#include <rmm/cuda_stream_view.hpp>
+#include <rmm/device_vector.hpp>
+#include <rmm/exec_policy.hpp>
+
+namespace cudf {
+namespace groupby {
+namespace detail {
+std::unique_ptr<column> count_scan(cudf::device_span<size_type const> group_labels,
+                                   rmm::cuda_stream_view stream,
+                                   rmm::mr::device_memory_resource* mr)
+{
+  std::unique_ptr<column> result = make_fixed_width_column(
+    data_type{type_id::INT32}, group_labels.size(), mask_state::UNALLOCATED, stream, mr);
+
+  if (group_labels.empty()) { return result; }
+
+  auto resultview = result->mutable_view();
+  // aggregation::COUNT_ALL
+  thrust::exclusive_scan_by_key(rmm::exec_policy(stream),
+                                group_labels.begin(),
+                                group_labels.end(),
+                                thrust::make_constant_iterator<size_type>(1),
+                                resultview.begin<size_type>());
+  return result;
+}
+
+}  // namespace detail
+}  // namespace groupby
+}  // namespace cudf
diff --git a/cpp/src/groupby/sort/group_max.cu b/cpp/src/groupby/sort/group_max.cu
index bd4e676b83d..3f5592186df 100644
--- a/cpp/src/groupby/sort/group_max.cu
+++ b/cpp/src/groupby/sort/group_max.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2020, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/cpp/src/groupby/sort/group_max_scan.cu b/cpp/src/groupby/sort/group_max_scan.cu
new file mode 100644
index 00000000000..303d606be9d
--- /dev/null
+++ b/cpp/src/groupby/sort/group_max_scan.cu
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <groupby/sort/group_scan_util.cuh>
+
+#include <rmm/cuda_stream_view.hpp>
+
+namespace cudf {
+namespace groupby {
+namespace detail {
+std::unique_ptr<column> max_scan(column_view const& values,
+                                 size_type num_groups,
+                                 cudf::device_span<size_type const> group_labels,
+                                 rmm::cuda_stream_view stream,
+                                 rmm::mr::device_memory_resource* mr)
+{
+  return type_dispatcher(
+    values.type(), scan_functor<aggregation::MAX>{}, values, num_groups, group_labels, stream, mr);
+}
+
+}  // namespace detail
+}  // namespace groupby
+}  // namespace cudf
diff --git a/cpp/src/groupby/sort/group_min_scan.cu b/cpp/src/groupby/sort/group_min_scan.cu
new file mode 100644
index 00000000000..4a692cdf0bd
--- /dev/null
+++ b/cpp/src/groupby/sort/group_min_scan.cu
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <groupby/sort/group_scan_util.cuh>
+
+#include <rmm/cuda_stream_view.hpp>
+
+namespace cudf {
+namespace groupby {
+namespace detail {
+std::unique_ptr<column> min_scan(column_view const& values,
+                                 size_type num_groups,
+                                 cudf::device_span<size_type const> group_labels,
+                                 rmm::cuda_stream_view stream,
+                                 rmm::mr::device_memory_resource* mr)
+{
+  return type_dispatcher(
+    values.type(), scan_functor<aggregation::MIN>{}, values, num_groups, group_labels, stream, mr);
+}
+
+}  // namespace detail
+}  // namespace groupby
+}  // namespace cudf
diff --git a/cpp/src/groupby/sort/group_scan.hpp b/cpp/src/groupby/sort/group_scan.hpp
new file mode 100644
index 00000000000..efb39068d2e
--- /dev/null
+++ b/cpp/src/groupby/sort/group_scan.hpp
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <cudf/aggregation.hpp>
+#include <cudf/column/column.hpp>
+#include <cudf/utilities/span.hpp>
+
+#include <rmm/cuda_stream_view.hpp>
+
+#include <memory>
+
+namespace cudf {
+namespace groupby {
+namespace detail {
+/**
+ * @brief Internal API to calculate groupwise cumulative sum
+ *
+ * @param values Grouped values to get sum of
+ * @param num_groups Number of groups
+ * @param group_labels ID of group that the corresponding value belongs to
+ * @param mr Device memory resource used to allocate the returned column's device memory
+ * @param stream CUDA stream used for device memory operations and kernel launches.
+ */
+std::unique_ptr<column> sum_scan(column_view const& values,
+                                 size_type num_groups,
+                                 cudf::device_span<size_type const> group_labels,
+                                 rmm::cuda_stream_view stream,
+                                 rmm::mr::device_memory_resource* mr);
+
+/**
+ * @brief Internal API to calculate groupwise cumulative minimum value
+ *
+ * @param values Grouped values to get minimum from
+ * @param num_groups Number of groups
+ * @param group_labels ID of group that the corresponding value belongs to
+ * @param mr Device memory resource used to allocate the returned column's device memory
+ * @param stream CUDA stream used for device memory operations and kernel launches.
+ */
+std::unique_ptr<column> min_scan(column_view const& values,
+                                 size_type num_groups,
+                                 cudf::device_span<size_type const> group_labels,
+                                 rmm::cuda_stream_view stream,
+                                 rmm::mr::device_memory_resource* mr);
+
+/**
+ * @brief Internal API to calculate groupwise cumulative maximum value
+ *
+ * @param values Grouped values to get maximum from
+ * @param num_groups Number of groups
+ * @param group_labels ID of group that the corresponding value belongs to
+ * @param mr Device memory resource used to allocate the returned column's device memory
+ * @param stream CUDA stream used for device memory operations and kernel launches.
+ */
+std::unique_ptr<column> max_scan(column_view const& values,
+                                 size_type num_groups,
+                                 cudf::device_span<size_type const> group_labels,
+                                 rmm::cuda_stream_view stream,
+                                 rmm::mr::device_memory_resource* mr);
+
+/**
+ * @brief Internal API to calculate cumulative number of values in each group
+ *
+ * @param group_labels ID of group that the corresponding value belongs to
+ * @param mr Device memory resource used to allocate the returned column's device memory
+ * @param stream CUDA stream used for device memory operations and kernel launches.
+ * @return Column of type INT32 of count values
+ */
+std::unique_ptr<column> count_scan(cudf::device_span<size_type const> group_labels,
+                                   rmm::cuda_stream_view stream,
+                                   rmm::mr::device_memory_resource* mr);
+}  // namespace detail
+}  // namespace groupby
+}  // namespace cudf
diff --git a/cpp/src/groupby/sort/group_scan_util.cuh b/cpp/src/groupby/sort/group_scan_util.cuh
new file mode 100644
index 00000000000..9f8614a61b4
--- /dev/null
+++ b/cpp/src/groupby/sort/group_scan_util.cuh
@@ -0,0 +1,115 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <cudf/column/column.hpp>
+#include <cudf/column/column_factories.hpp>
+#include <cudf/column/column_view.hpp>
+#include <cudf/detail/aggregation/aggregation.cuh>
+#include <cudf/detail/iterator.cuh>
+#include <cudf/detail/null_mask.hpp>
+#include <cudf/table/table_device_view.cuh>
+#include <cudf/types.hpp>
+#include <cudf/utilities/span.hpp>
+
+#include <rmm/cuda_stream_view.hpp>
+#include <rmm/device_vector.hpp>
+#include <rmm/exec_policy.hpp>
+
+#include <thrust/functional.h>
+#include <thrust/scan.h>
+
+namespace cudf {
+namespace groupby {
+namespace detail {
+template <aggregation::Kind K>
+struct scan_functor {
+  template <typename T>
+  static constexpr bool is_supported()
+  {
+    if (K == aggregation::SUM)
+      return cudf::is_numeric<T>() || cudf::is_duration<T>() || cudf::is_fixed_point<T>();
+    else if (K == aggregation::MIN or K == aggregation::MAX)
+      return cudf::is_fixed_width<T>() and is_relationally_comparable<T, T>();
+    else
+      return false;
+  }
+
+  template <typename T>
+  std::enable_if_t<is_supported<T>(), std::unique_ptr<column>> operator()(
+    column_view const& values,
+    size_type num_groups,
+    cudf::device_span<cudf::size_type const> group_labels,
+    rmm::cuda_stream_view stream,
+    rmm::mr::device_memory_resource* mr)
+  {
+    using DeviceType       = device_storage_type_t<T>;
+    using OpType           = cudf::detail::corresponding_operator_t<K>;
+    using ResultType       = cudf::detail::target_type_t<T, K>;
+    using ResultDeviceType = device_storage_type_t<ResultType>;
+
+    auto result_type = is_fixed_point<T>()
+                         ? data_type{type_to_id<ResultType>(), values.type().scale()}
+                         : data_type{type_to_id<ResultType>()};
+
+    std::unique_ptr<column> result =
+      make_fixed_width_column(result_type, values.size(), mask_state::UNALLOCATED, stream, mr);
+
+    if (values.is_empty()) { return result; }
+
+    auto result_table = mutable_table_view({*result});
+    cudf::detail::initialize_with_identity(result_table, {K}, stream);
+
+    auto result_view = mutable_column_device_view::create(result->mutable_view(), stream);
+    auto values_view = column_device_view::create(values, stream);
+
+    if (values.has_nulls()) {
+      auto input = thrust::make_transform_iterator(
+        make_null_replacement_iterator(*values_view, OpType::template identity<DeviceType>()),
+        thrust::identity<ResultDeviceType>{});
+      thrust::inclusive_scan_by_key(rmm::exec_policy(stream),
+                                    group_labels.begin(),
+                                    group_labels.end(),
+                                    input,
+                                    result_view->begin<ResultDeviceType>(),
+                                    thrust::equal_to<size_type>{},
+                                    OpType{});
+      result->set_null_mask(cudf::detail::copy_bitmask(values, stream));
+    } else {
+      auto input = thrust::make_transform_iterator(values_view->begin<DeviceType>(),
+                                                   thrust::identity<ResultDeviceType>{});
+      thrust::inclusive_scan_by_key(rmm::exec_policy(stream),
+                                    group_labels.begin(),
+                                    group_labels.end(),
+                                    input,
+                                    result_view->begin<ResultDeviceType>(),
+                                    thrust::equal_to<size_type>{},
+                                    OpType{});
+    }
+    return result;
+  }
+
+  template <typename T, typename... Args>
+  std::enable_if_t<not is_supported<T>(), std::unique_ptr<column>> operator()(Args&&... args)
+  {
+    CUDF_FAIL("Unsupported groupby scan type-agg combination");
+  }
+};
+
+}  // namespace detail
+}  // namespace groupby
+}  // namespace cudf
diff --git a/cpp/src/groupby/sort/group_sum_scan.cu b/cpp/src/groupby/sort/group_sum_scan.cu
new file mode 100644
index 00000000000..ae9b1c321d4
--- /dev/null
+++ b/cpp/src/groupby/sort/group_sum_scan.cu
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <groupby/sort/group_scan_util.cuh>
+
+#include <rmm/cuda_stream_view.hpp>
+
+namespace cudf {
+namespace groupby {
+namespace detail {
+std::unique_ptr<column> sum_scan(column_view const& values,
+                                 size_type num_groups,
+                                 cudf::device_span<size_type const> group_labels,
+                                 rmm::cuda_stream_view stream,
+                                 rmm::mr::device_memory_resource* mr)
+{
+  return type_dispatcher(
+    values.type(), scan_functor<aggregation::SUM>{}, values, num_groups, group_labels, stream, mr);
+}
+
+}  // namespace detail
+}  // namespace groupby
+}  // namespace cudf
diff --git a/cpp/src/groupby/sort/scan.cpp b/cpp/src/groupby/sort/scan.cpp
new file mode 100644
index 00000000000..63de4ea8684
--- /dev/null
+++ b/cpp/src/groupby/sort/scan.cpp
@@ -0,0 +1,133 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <groupby/common/utils.hpp>
+#include <groupby/sort/functors.hpp>
+#include <groupby/sort/group_scan.hpp>
+
+#include <cudf/aggregation.hpp>
+#include <cudf/column/column_view.hpp>
+#include <cudf/detail/aggregation/aggregation.hpp>
+#include <cudf/detail/aggregation/result_cache.hpp>
+#include <cudf/groupby.hpp>
+#include <cudf/table/table.hpp>
+#include <cudf/types.hpp>
+#include <cudf/utilities/error.hpp>
+
+#include <rmm/cuda_stream_view.hpp>
+
+#include <memory>
+
+namespace cudf {
+namespace groupby {
+namespace detail {
+/**
+ * @brief Functor to dispatch aggregation with
+ *
+ * This functor is to be used with `aggregation_dispatcher` to compute the
+ * appropriate aggregation. If the values on which to run the aggregation are
+ * unchanged, then this functor should be re-used. This is because it stores
+ * memoised sorted and/or grouped values and re-using will save on computation
+ * of these values.
+ */
+struct scan_result_functor final : store_result_functor {
+  using store_result_functor::store_result_functor;
+  template <aggregation::Kind k>
+  void operator()(aggregation const& agg)
+  {
+    CUDF_FAIL("Unsupported groupby scan aggregation");
+  }
+
+ private:
+  column_view get_grouped_values()
+  {
+    // TODO (dm): After implementing single pass multi-agg, explore making a
+    //            cache of all grouped value columns rather than one at a time
+    if (grouped_values)
+      return grouped_values->view();
+    else
+      return (grouped_values = helper.grouped_values(values))->view();
+  };
+};
+
+template <>
+void scan_result_functor::operator()<aggregation::SUM>(aggregation const& agg)
+{
+  if (cache.has_result(col_idx, agg)) return;
+
+  cache.add_result(
+    col_idx,
+    agg,
+    detail::sum_scan(get_grouped_values(), helper.num_groups(), helper.group_labels(), stream, mr));
+}
+
+template <>
+void scan_result_functor::operator()<aggregation::MIN>(aggregation const& agg)
+{
+  if (cache.has_result(col_idx, agg)) return;
+
+  cache.add_result(
+    col_idx,
+    agg,
+    detail::min_scan(get_grouped_values(), helper.num_groups(), helper.group_labels(), stream, mr));
+}
+
+template <>
+void scan_result_functor::operator()<aggregation::MAX>(aggregation const& agg)
+{
+  if (cache.has_result(col_idx, agg)) return;
+
+  cache.add_result(
+    col_idx,
+    agg,
+    detail::max_scan(get_grouped_values(), helper.num_groups(), helper.group_labels(), stream, mr));
+}
+
+template <>
+void scan_result_functor::operator()<aggregation::COUNT_ALL>(aggregation const& agg)
+{
+  if (cache.has_result(col_idx, agg)) return;
+
+  cache.add_result(col_idx, agg, detail::count_scan(helper.group_labels(), stream, mr));
+}
+}  // namespace detail
+
+// Sort-based groupby
+std::pair<std::unique_ptr<table>, std::vector<aggregation_result>> groupby::sort_scan(
+  std::vector<aggregation_request> const& requests,
+  rmm::cuda_stream_view stream,
+  rmm::mr::device_memory_resource* mr)
+{
+  // We're going to start by creating a cache of results so that aggs that
+  // depend on other aggs will not have to be recalculated. e.g. mean depends on
+  // sum and count. std depends on mean and count
+  cudf::detail::result_cache cache(requests.size());
+
+  for (size_t i = 0; i < requests.size(); i++) {
+    auto store_functor =
+      detail::scan_result_functor(i, requests[i].values, helper(), cache, stream, mr);
+    for (auto const& aggregation : requests[i].aggregations) {
+      // TODO (dm): single pass compute all supported reductions
+      cudf::detail::aggregation_dispatcher(aggregation->kind, store_functor, *aggregation);
+    }
+  }
+
+  auto results = detail::extract_results(requests, cache);
+
+  return std::make_pair(helper().sorted_keys(stream, mr), std::move(results));
+}
+}  // namespace groupby
+}  // namespace cudf
diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt
index e95aab16098..4b2d1e04ac5 100644
--- a/cpp/tests/CMakeLists.txt
+++ b/cpp/tests/CMakeLists.txt
@@ -70,7 +70,11 @@ ConfigureTest(GROUPBY_TEST
     groupby/group_quantile_test.cpp
     groupby/group_nunique_test.cpp
     groupby/group_nth_element_test.cpp
-    groupby/group_collect_test.cpp)
+    groupby/group_collect_test.cpp
+    groupby/group_sum_scan_test.cpp
+    groupby/group_min_scan_test.cpp
+    groupby/group_max_scan_test.cpp
+    groupby/group_count_scan_test.cpp)
 
 ###################################################################################################
 # - join tests ------------------------------------------------------------------------------------
diff --git a/cpp/tests/groupby/group_count_scan_test.cpp b/cpp/tests/groupby/group_count_scan_test.cpp
new file mode 100644
index 00000000000..b7b18982f51
--- /dev/null
+++ b/cpp/tests/groupby/group_count_scan_test.cpp
@@ -0,0 +1,213 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <tests/groupby/groupby_test_util.hpp>
+
+#include <cudf_test/base_fixture.hpp>
+#include <cudf_test/column_wrapper.hpp>
+#include <cudf_test/type_lists.hpp>
+
+#include <cudf/detail/aggregation/aggregation.hpp>
+
+namespace cudf {
+namespace test {
+using K           = int32_t;
+using key_wrapper = fixed_width_column_wrapper<K>;
+
+template <typename T>
+struct groupby_count_scan_test : public cudf::test::BaseFixture {
+  using V              = T;
+  using R              = cudf::detail::target_type_t<V, aggregation::COUNT_ALL>;
+  using value_wrapper  = fixed_width_column_wrapper<V, int32_t>;
+  using result_wrapper = fixed_width_column_wrapper<R, int32_t>;
+};
+
+TYPED_TEST_CASE(groupby_count_scan_test, cudf::test::AllTypes);
+
+TYPED_TEST(groupby_count_scan_test, basic)
+{
+  using value_wrapper  = typename TestFixture::value_wrapper;
+  using result_wrapper = typename TestFixture::result_wrapper;
+
+  // clang-format off
+  key_wrapper keys  {1, 2, 3, 1, 2, 2, 1, 3, 3, 2};
+  value_wrapper vals{0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
+
+  key_wrapper expect_keys   {1, 1, 1, 2, 2, 2, 2, 3, 3, 3};
+  result_wrapper expect_vals{0, 1, 2, 0, 1, 2, 3, 0, 1, 2};
+  // clang-format on
+
+  auto agg1 = cudf::make_count_aggregation();
+  CUDF_EXPECT_THROW_MESSAGE(test_single_scan(keys, vals, expect_keys, expect_vals, std::move(agg1)),
+                            "Unsupported groupby scan aggregation");
+
+  auto agg2 = cudf::make_count_aggregation(null_policy::INCLUDE);
+  test_single_scan(keys, vals, expect_keys, expect_vals, std::move(agg2));
+}
+
+TYPED_TEST(groupby_count_scan_test, empty_cols)
+{
+  using value_wrapper  = typename TestFixture::value_wrapper;
+  using result_wrapper = typename TestFixture::result_wrapper;
+
+  // clang-format off
+  key_wrapper keys;
+  value_wrapper vals;
+
+  key_wrapper expect_keys;
+  result_wrapper expect_vals;
+  // clang-format on
+
+  auto agg1 = cudf::make_count_aggregation();
+  EXPECT_NO_THROW(test_single_scan(keys, vals, expect_keys, expect_vals, std::move(agg1)));
+
+  auto agg2 = cudf::make_count_aggregation(null_policy::INCLUDE);
+  test_single_scan(keys, vals, expect_keys, expect_vals, std::move(agg2));
+}
+
+TYPED_TEST(groupby_count_scan_test, zero_valid_keys)
+{
+  using value_wrapper  = typename TestFixture::value_wrapper;
+  using result_wrapper = typename TestFixture::result_wrapper;
+
+  // clang-format off
+  key_wrapper keys( {1, 2, 3}, all_null());
+  value_wrapper vals{3, 4, 5};
+
+  key_wrapper expect_keys{};
+  result_wrapper expect_vals{};
+  // clang-format on
+
+  auto agg2 = cudf::make_count_aggregation(null_policy::INCLUDE);
+  test_single_scan(keys, vals, expect_keys, expect_vals, std::move(agg2));
+}
+
+TYPED_TEST(groupby_count_scan_test, zero_valid_values)
+{
+  using value_wrapper  = typename TestFixture::value_wrapper;
+  using result_wrapper = typename TestFixture::result_wrapper;
+
+  // clang-format off
+  key_wrapper keys   {1, 1, 1};
+  value_wrapper vals({3, 4, 5}, all_null());
+
+  key_wrapper expect_keys{1, 1, 1};
+  result_wrapper expect_vals{0, 1, 2};
+  // clang-format on
+
+  auto agg2 = cudf::make_count_aggregation(null_policy::INCLUDE);
+  test_single_scan(keys, vals, expect_keys, expect_vals, std::move(agg2));
+}
+
+TYPED_TEST(groupby_count_scan_test, null_keys_and_values)
+{
+  using value_wrapper  = typename TestFixture::value_wrapper;
+  using result_wrapper = typename TestFixture::result_wrapper;
+
+  // clang-format off
+  key_wrapper keys(  {1, 2, 3, 1, 2, 2, 1, 3, 3, 2, 4}, {1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1});
+  value_wrapper vals({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 4}, {0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0});
+
+  //                        {1, 1, 1, 2, 2, 2, 2, 3, _, 3, 4}
+  key_wrapper expect_keys(  {1, 1, 1, 2, 2, 2, 2, 3,    3, 4}, all_valid());
+  //                        {0, 3, 6, 1, 4, _, 9, 2, 7, 8, -}
+  result_wrapper expect_vals{0, 1, 2, 0, 1,    2, 3, 0, 1, 0};
+  // clang-format on
+
+  auto agg2 = cudf::make_count_aggregation(null_policy::INCLUDE);
+  test_single_scan(keys, vals, expect_keys, expect_vals, std::move(agg2));
+}
+
+struct groupby_count_scan_string_test : public cudf::test::BaseFixture {
+};
+
+TEST_F(groupby_count_scan_string_test, basic)
+{
+  using V              = cudf::string_view;
+  using R              = cudf::detail::target_type_t<V, aggregation::COUNT_ALL>;
+  using result_wrapper = fixed_width_column_wrapper<R, int32_t>;
+
+  // clang-format off
+  key_wrapper keys           {  1,   3,   3,   5,   5,   0};
+  strings_column_wrapper vals{"1", "1", "1", "1", "1", "1"};
+
+  key_wrapper expect_keys   {0, 1, 3, 3, 5, 5};
+  result_wrapper expect_vals{0, 0, 0, 1, 0, 1};
+  // clang-format on
+
+  auto agg2 = cudf::make_count_aggregation(null_policy::INCLUDE);
+  test_single_scan(keys, vals, expect_keys, expect_vals, std::move(agg2));
+}
+
+template <typename T>
+struct FixedPointTestBothReps : public cudf::test::BaseFixture {
+};
+
+TYPED_TEST_CASE(FixedPointTestBothReps, cudf::test::FixedPointTypes);
+
+TYPED_TEST(FixedPointTestBothReps, GroupByCountScan)
+{
+  using namespace numeric;
+  using decimalXX  = TypeParam;
+  using RepType    = cudf::device_storage_type_t<decimalXX>;
+  using fp_wrapper = fixed_point_column_wrapper<RepType>;
+
+  using V              = decimalXX;
+  using R              = cudf::detail::target_type_t<V, aggregation::COUNT_ALL>;
+  using result_wrapper = fixed_width_column_wrapper<R, int32_t>;
+
+  auto const scale = scale_type{-1};
+  // clang-format off
+  auto const keys = key_wrapper{1, 2, 3, 1, 2, 2, 1, 3, 3, 2};
+  auto const vals = fp_wrapper{{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}, scale};
+
+  auto const expect_keys = key_wrapper{1, 1, 1, 2, 2, 2, 2, 3, 3, 3};
+  auto const expect_vals = result_wrapper{0, 1, 2, 0, 1, 2, 3, 0, 1, 2};
+  // clang-format on
+
+  CUDF_EXPECT_THROW_MESSAGE(
+    test_single_scan(keys, vals, expect_keys, expect_vals, cudf::make_count_aggregation()),
+    "Unsupported groupby scan aggregation");
+
+  auto agg2 = cudf::make_count_aggregation(null_policy::INCLUDE);
+  test_single_scan(keys, vals, expect_keys, expect_vals, std::move(agg2));
+}
+
+struct groupby_dictionary_count_scan_test : public cudf::test::BaseFixture {
+};
+
+TEST_F(groupby_dictionary_count_scan_test, basic)
+{
+  using V              = std::string;
+  using R              = cudf::detail::target_type_t<V, aggregation::COUNT_ALL>;
+  using result_wrapper = fixed_width_column_wrapper<R, int32_t>;
+
+  // clang-format off
+  strings_column_wrapper keys{"1", "3", "3", "5", "5", "0"};
+  dictionary_column_wrapper<K> vals{1, 1, 1, 1, 1, 1};
+  strings_column_wrapper expect_keys{"0", "1", "3", "3", "5", "5"};
+  result_wrapper expect_vals{0, 0, 0, 1, 0, 1};
+  // clang-format on
+
+  auto agg1 = cudf::make_count_aggregation();
+  CUDF_EXPECT_THROW_MESSAGE(test_single_scan(keys, vals, expect_keys, expect_vals, std::move(agg1)),
+                            "Unsupported groupby scan aggregation");
+  test_single_scan(
+    keys, vals, expect_keys, expect_vals, cudf::make_count_aggregation(null_policy::INCLUDE));
+}
+
+}  // namespace test
+}  // namespace cudf
diff --git a/cpp/tests/groupby/group_max_scan_test.cpp b/cpp/tests/groupby/group_max_scan_test.cpp
new file mode 100644
index 00000000000..c1fc48ca698
--- /dev/null
+++ b/cpp/tests/groupby/group_max_scan_test.cpp
@@ -0,0 +1,158 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <tests/groupby/groupby_test_util.hpp>
+
+#include <cudf_test/base_fixture.hpp>
+#include <cudf_test/column_wrapper.hpp>
+#include <cudf_test/type_lists.hpp>
+
+#include <cudf/detail/aggregation/aggregation.hpp>
+#include <cudf/dictionary/update_keys.hpp>
+
+namespace cudf {
+namespace test {
+using K           = int32_t;
+using key_wrapper = fixed_width_column_wrapper<K>;
+
+template <typename T>
+struct groupby_max_scan_test : public cudf::test::BaseFixture {
+  using V              = T;
+  using R              = cudf::detail::target_type_t<V, aggregation::MAX>;
+  using value_wrapper  = fixed_width_column_wrapper<V, int32_t>;
+  using result_wrapper = fixed_width_column_wrapper<R, int32_t>;
+};
+
+TYPED_TEST_CASE(groupby_max_scan_test, cudf::test::FixedWidthTypesWithoutFixedPoint);
+
+TYPED_TEST(groupby_max_scan_test, basic)
+{
+  using value_wrapper  = typename TestFixture::value_wrapper;
+  using result_wrapper = typename TestFixture::result_wrapper;
+
+  // clang-format off
+  key_wrapper keys   {1, 2, 3, 1, 2, 2, 1, 3, 3, 2};
+  value_wrapper vals({5, 6, 7, 8, 9, 0, 1, 2, 3, 4});
+
+  key_wrapper expect_keys    {1, 1, 1, 2, 2, 2, 2, 3, 3, 3};
+                          // {5, 8, 1, 6, 9, 0, 4, 7, 2, 3}
+  result_wrapper expect_vals({5, 8, 8, 6, 9, 9, 9, 7, 7, 7});
+  // clang-format on
+
+  auto agg = cudf::make_max_aggregation();
+  test_single_scan(keys, vals, expect_keys, expect_vals, std::move(agg));
+}
+
+TYPED_TEST(groupby_max_scan_test, empty_cols)
+{
+  using value_wrapper  = typename TestFixture::value_wrapper;
+  using result_wrapper = typename TestFixture::result_wrapper;
+
+  key_wrapper keys{};
+  value_wrapper vals{};
+
+  key_wrapper expect_keys{};
+  result_wrapper expect_vals{};
+
+  auto agg = cudf::make_max_aggregation();
+  test_single_scan(keys, vals, expect_keys, expect_vals, std::move(agg));
+}
+
+TYPED_TEST(groupby_max_scan_test, zero_valid_keys)
+{
+  using value_wrapper  = typename TestFixture::value_wrapper;
+  using result_wrapper = typename TestFixture::result_wrapper;
+
+  // clang-format off
+  key_wrapper keys(  {1, 2, 3}, all_null());
+  value_wrapper vals({3, 4, 5});
+
+  key_wrapper expect_keys{};
+  result_wrapper expect_vals{};
+  // clang-format on
+
+  auto agg = cudf::make_max_aggregation();
+  test_single_scan(keys, vals, expect_keys, expect_vals, std::move(agg));
+}
+
+TYPED_TEST(groupby_max_scan_test, zero_valid_values)
+{
+  using value_wrapper  = typename TestFixture::value_wrapper;
+  using result_wrapper = typename TestFixture::result_wrapper;
+
+  // clang-format off
+  key_wrapper keys   {1, 1, 1};
+  value_wrapper vals({3, 4, 5}, all_null());
+
+  key_wrapper expect_keys    {1, 1, 1};
+  result_wrapper expect_vals({-1, -1, -1}, all_null());
+  // clang-format on
+
+  auto agg = cudf::make_max_aggregation();
+  test_single_scan(keys, vals, expect_keys, expect_vals, std::move(agg));
+}
+
+TYPED_TEST(groupby_max_scan_test, null_keys_and_values)
+{
+  using value_wrapper  = typename TestFixture::value_wrapper;
+  using result_wrapper = typename TestFixture::result_wrapper;
+
+  // clang-format off
+  key_wrapper keys(  {1, 2, 3, 1, 2, 2, 1, 3, 3, 2, 4}, {1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1});
+  value_wrapper vals({5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 4}, {0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0});
+
+                         //  {1, 1, 1, 2, 2, 2, 2, 3,   _, 3, 4}
+  key_wrapper expect_keys(   {1, 1, 1, 2, 2, 2, 2, 3,      3, 4}, all_valid());
+                         //  { -, 3, 6, 1, 4,  -, 9, 2, _, 8, -}
+  result_wrapper expect_vals({-1, 8, 8, 6, 9, -1, 9, 7,    7, -1},
+                             { 0, 1, 1, 1, 1,  0, 1, 1,    1, 0});
+  // clang-format on
+
+  auto agg = cudf::make_max_aggregation();
+  test_single_scan(keys, vals, expect_keys, expect_vals, std::move(agg));
+}
+
+template <typename T>
+struct FixedPointTestBothReps : public cudf::test::BaseFixture {
+};
+
+TYPED_TEST_CASE(FixedPointTestBothReps, cudf::test::FixedPointTypes);
+
+TYPED_TEST(FixedPointTestBothReps, GroupBySortMaxScanDecimalAsValue)
+{
+  using namespace numeric;
+  using decimalXX  = TypeParam;
+  using RepType    = cudf::device_storage_type_t<decimalXX>;
+  using fp_wrapper = fixed_point_column_wrapper<RepType>;
+
+  for (auto const i : {2, 1, 0, -1, -2}) {
+    auto const scale = scale_type{i};
+    // clang-format off
+    auto const keys = key_wrapper{1, 2, 3, 1, 2, 2, 1, 3, 3, 2};
+    auto const vals = fp_wrapper{{5, 6, 7, 8, 9, 0, 1, 2, 3, 4}, scale};
+
+    //                                                        {5, 8, 1, 6, 9, 0, 4, 7, 2, 3}
+    auto const expect_keys     = key_wrapper{1, 1, 1, 2, 2, 2, 2, 3, 3, 3};
+    auto const expect_vals_max = fp_wrapper{{5, 8, 8, 6, 9, 9, 9, 7, 7, 7}, scale};
+    // clang-format on
+
+    auto agg = cudf::make_max_aggregation();
+    test_single_scan(keys, vals, expect_keys, expect_vals_max, std::move(agg));
+  }
+}
+
+}  // namespace test
+}  // namespace cudf
diff --git a/cpp/tests/groupby/group_min_scan_test.cpp b/cpp/tests/groupby/group_min_scan_test.cpp
new file mode 100644
index 00000000000..d3186d880cc
--- /dev/null
+++ b/cpp/tests/groupby/group_min_scan_test.cpp
@@ -0,0 +1,173 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <tests/groupby/groupby_test_util.hpp>
+
+#include <cudf_test/base_fixture.hpp>
+#include <cudf_test/column_wrapper.hpp>
+#include <cudf_test/type_lists.hpp>
+
+#include <cudf/detail/aggregation/aggregation.hpp>
+
+namespace cudf {
+namespace test {
+using K           = int32_t;
+using key_wrapper = fixed_width_column_wrapper<K>;
+
+template <typename T>
+struct groupby_min_scan_test : public cudf::test::BaseFixture {
+  using V              = T;
+  using R              = cudf::detail::target_type_t<V, aggregation::MIN>;
+  using value_wrapper  = fixed_width_column_wrapper<V, int32_t>;
+  using result_wrapper = fixed_width_column_wrapper<R, int32_t>;
+};
+
+TYPED_TEST_CASE(groupby_min_scan_test, cudf::test::FixedWidthTypesWithoutFixedPoint);
+
+TYPED_TEST(groupby_min_scan_test, basic)
+{
+  using value_wrapper  = typename TestFixture::value_wrapper;
+  using result_wrapper = typename TestFixture::result_wrapper;
+
+  // clang-format off
+  key_wrapper keys   {1, 2, 3, 1, 2, 2, 1, 3, 3, 2};
+  value_wrapper vals({5, 6, 7, 8, 9, 0, 1, 2, 3, 4});
+
+  key_wrapper expect_keys    {1, 1, 1, 2, 2, 2, 2, 3, 3, 3};
+  result_wrapper expect_vals({5, 5, 1, 6, 6, 0, 0, 7, 2, 2});
+  // clang-format on
+
+  auto agg = cudf::make_min_aggregation();
+  test_single_scan(keys, vals, expect_keys, expect_vals, std::move(agg));
+}
+
+TYPED_TEST(groupby_min_scan_test, empty_cols)
+{
+  using value_wrapper  = typename TestFixture::value_wrapper;
+  using result_wrapper = typename TestFixture::result_wrapper;
+
+  key_wrapper keys{};
+  value_wrapper vals{};
+
+  key_wrapper expect_keys{};
+  result_wrapper expect_vals{};
+
+  auto agg = cudf::make_min_aggregation();
+  test_single_scan(keys, vals, expect_keys, expect_vals, std::move(agg));
+}
+
+TYPED_TEST(groupby_min_scan_test, zero_valid_keys)
+{
+  using value_wrapper  = typename TestFixture::value_wrapper;
+  using result_wrapper = typename TestFixture::result_wrapper;
+
+  // clang-format off
+  key_wrapper keys({1, 2, 3}, all_null());
+  value_wrapper vals({3, 4, 5});
+
+  key_wrapper expect_keys{};
+  result_wrapper expect_vals{};
+  // clang-format on
+
+  auto agg = cudf::make_min_aggregation();
+  test_single_scan(keys, vals, expect_keys, expect_vals, std::move(agg));
+}
+
+TYPED_TEST(groupby_min_scan_test, zero_valid_values)
+{
+  using value_wrapper  = typename TestFixture::value_wrapper;
+  using result_wrapper = typename TestFixture::result_wrapper;
+
+  // clang-format off
+  key_wrapper keys   {1, 1, 1};
+  value_wrapper vals({3, 4, 5}, all_null());
+
+  key_wrapper expect_keys    {1, 1, 1};
+  result_wrapper expect_vals({-1, -1, -1}, all_null());
+  // clang-format on
+
+  auto agg = cudf::make_min_aggregation();
+  test_single_scan(keys, vals, expect_keys, expect_vals, std::move(agg));
+}
+
+TYPED_TEST(groupby_min_scan_test, null_keys_and_values)
+{
+  using value_wrapper  = typename TestFixture::value_wrapper;
+  using result_wrapper = typename TestFixture::result_wrapper;
+
+  // clang-format off
+  key_wrapper keys(  {1, 2, 3, 1, 2, 2, 1, 3, 3, 2, 4}, {1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1});
+  value_wrapper vals({5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 4}, {0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0});
+
+                         //  { 1, 1, 1, 2, 2,  2, 2, 3, _, 3, 4}
+  key_wrapper expect_keys(   { 1, 1, 1, 2, 2,  2, 2, 3,    3, 4}, all_valid());
+                         //  { _, 8, 1, 6, 9,  _, 4, 7, 2, 3, _}
+  result_wrapper expect_vals({-1, 8, 1, 6, 6, -1, 4, 7,    3, -1},
+                             { 0, 1, 1, 1, 1,  0, 1, 1,    1, 0});
+  // clang-format on
+
+  auto agg = cudf::make_min_aggregation();
+  test_single_scan(keys, vals, expect_keys, expect_vals, std::move(agg));
+}
+
+struct groupby_min_scan_string_test : public cudf::test::BaseFixture {
+};
+
+TEST_F(groupby_min_scan_string_test, basic)
+{
+  key_wrapper keys{1, 2, 3, 1, 2, 2, 1, 3, 3, 2};
+  strings_column_wrapper vals{"año", "bit", "₹1", "aaa", "zit", "bat", "aaa", "$1", "₹1", "wut"};
+
+  key_wrapper expect_keys{1, 1, 1, 2, 2, 2, 2, 3, 3, 3};
+  strings_column_wrapper expect_vals;
+
+  auto agg = cudf::make_min_aggregation();
+  CUDF_EXPECT_THROW_MESSAGE(test_single_scan(keys, vals, expect_keys, expect_vals, std::move(agg)),
+                            "Unsupported groupby scan type-agg combination");
+}
+
+template <typename T>
+struct FixedPointTestBothReps : public cudf::test::BaseFixture {
+};
+
+TYPED_TEST_CASE(FixedPointTestBothReps, cudf::test::FixedPointTypes);
+
+TYPED_TEST(FixedPointTestBothReps, GroupBySortMinScanDecimalAsValue)
+{
+  using namespace numeric;
+  using decimalXX  = TypeParam;
+  using RepType    = cudf::device_storage_type_t<decimalXX>;
+  using fp_wrapper = fixed_point_column_wrapper<RepType>;
+
+  for (auto const i : {2, 1, 0, -1, -2}) {
+    auto const scale = scale_type{i};
+
+    // clang-format off
+    auto const keys = key_wrapper{1, 2, 3, 1, 2, 2, 1, 3, 3, 2};
+    auto const vals = fp_wrapper{{5, 6, 7, 8, 9, 0, 1, 2, 3, 4}, scale};
+
+    // {5, 8, 1, 6, 9, 0, 4, 7, 2, 3}
+    auto const expect_keys     = key_wrapper{1, 1, 1, 2, 2, 2, 2, 3, 3, 3};
+    auto const expect_vals_min = fp_wrapper{{5, 5, 1, 6, 6, 0, 0, 7, 2, 2}, scale};
+    // clang-format on
+
+    auto agg = cudf::make_min_aggregation();
+    test_single_scan(keys, vals, expect_keys, expect_vals_min, std::move(agg));
+  }
+}
+
+}  // namespace test
+}  // namespace cudf
diff --git a/cpp/tests/groupby/group_sum_scan_test.cpp b/cpp/tests/groupby/group_sum_scan_test.cpp
new file mode 100644
index 00000000000..9f6c21462b3
--- /dev/null
+++ b/cpp/tests/groupby/group_sum_scan_test.cpp
@@ -0,0 +1,162 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <tests/groupby/groupby_test_util.hpp>
+
+#include <cudf_test/base_fixture.hpp>
+#include <cudf_test/column_wrapper.hpp>
+#include <cudf_test/type_lists.hpp>
+
+#include <cudf/detail/aggregation/aggregation.hpp>
+
+namespace cudf {
+namespace test {
+using K           = int32_t;
+using key_wrapper = fixed_width_column_wrapper<K>;
+
+template <typename T>
+struct groupby_sum_scan_test : public cudf::test::BaseFixture {
+  using V              = T;
+  using R              = cudf::detail::target_type_t<V, aggregation::SUM>;
+  using value_wrapper  = fixed_width_column_wrapper<V, int32_t>;
+  using result_wrapper = fixed_width_column_wrapper<R, int32_t>;
+};
+
+using supported_types =
+  cudf::test::Concat<cudf::test::Types<int8_t, int16_t, int32_t, int64_t, float, double>,
+                     cudf::test::DurationTypes>;
+
+TYPED_TEST_CASE(groupby_sum_scan_test, supported_types);
+
+TYPED_TEST(groupby_sum_scan_test, basic)
+{
+  using value_wrapper  = typename TestFixture::value_wrapper;
+  using result_wrapper = typename TestFixture::result_wrapper;
+
+  // clang-format off
+  key_wrapper keys  {1, 2, 3, 1, 2, 2, 1, 3, 3, 2};
+  value_wrapper vals{0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
+
+  key_wrapper expect_keys   {1, 1, 1, 2, 2, 2, 2, 3, 3, 3};
+  //                        {0, 3, 6, 1, 4, 5, 9, 2, 7, 8}
+  result_wrapper expect_vals{0, 3, 9, 1, 5, 10, 19, 2, 9, 17};
+  // clang-format on
+  auto agg = cudf::make_sum_aggregation();
+  test_single_scan(keys, vals, expect_keys, expect_vals, std::move(agg));
+}
+
+TYPED_TEST(groupby_sum_scan_test, empty_cols)
+{
+  using value_wrapper  = typename TestFixture::value_wrapper;
+  using result_wrapper = typename TestFixture::result_wrapper;
+
+  // clang-format off
+  key_wrapper keys{};
+  value_wrapper vals{};
+
+  key_wrapper expect_keys{};
+  result_wrapper expect_vals{};
+  // clang-format on
+
+  auto agg = cudf::make_sum_aggregation();
+  test_single_scan(keys, vals, expect_keys, expect_vals, std::move(agg));
+}
+
+TYPED_TEST(groupby_sum_scan_test, zero_valid_keys)
+{
+  using value_wrapper  = typename TestFixture::value_wrapper;
+  using result_wrapper = typename TestFixture::result_wrapper;
+
+  // clang-format off
+  key_wrapper keys({1, 2, 3}, all_null());
+  value_wrapper vals{3, 4, 5};
+
+  key_wrapper expect_keys{};
+  result_wrapper expect_vals{};
+  // clang-format on
+
+  auto agg = cudf::make_sum_aggregation();
+  test_single_scan(keys, vals, expect_keys, expect_vals, std::move(agg));
+}
+
+TYPED_TEST(groupby_sum_scan_test, zero_valid_values)
+{
+  using value_wrapper  = typename TestFixture::value_wrapper;
+  using result_wrapper = typename TestFixture::result_wrapper;
+
+  // clang-format off
+  key_wrapper keys   {1, 1, 1};
+  value_wrapper vals({3, 4, 5}, all_null());
+
+  key_wrapper expect_keys    {1, 1, 1};
+  result_wrapper expect_vals({3, 4, 5}, all_null());
+  // clang-format on
+
+  auto agg = cudf::make_sum_aggregation();
+  test_single_scan(keys, vals, expect_keys, expect_vals, std::move(agg));
+}
+
+TYPED_TEST(groupby_sum_scan_test, null_keys_and_values)
+{
+  using value_wrapper  = typename TestFixture::value_wrapper;
+  using result_wrapper = typename TestFixture::result_wrapper;
+
+  // clang-format off
+  key_wrapper keys(  {1, 2, 3, 1, 2, 2, 1, 3, 3, 2, 4}, {1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1});
+  value_wrapper vals({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 4}, {0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0});
+
+  //                         { 1, 1, 1, 2, 2,  2,  2, 3, *, 3, 4};
+  key_wrapper expect_keys(   { 1, 1, 1, 2, 2,  2,  2, 3,    3, 4}, all_valid());
+                          // { -, 3, 6, 1, 4,  -,  9, 2, _, 8, -}
+  result_wrapper expect_vals({-1, 3, 9, 1, 5, -1, 14, 2,   10, -1},
+                             { 0, 1, 1, 1, 1,  0,  1, 1,    1, 0});
+  // clang-format on
+
+  auto agg = cudf::make_sum_aggregation();
+  test_single_scan(keys, vals, expect_keys, expect_vals, std::move(agg));
+}
+
+template <typename T>
+struct FixedPointTestBothReps : public cudf::test::BaseFixture {
+};
+
+TYPED_TEST_CASE(FixedPointTestBothReps, cudf::test::FixedPointTypes);
+
+TYPED_TEST(FixedPointTestBothReps, GroupBySortSumScanDecimalAsValue)
+{
+  using namespace numeric;
+  using decimalXX      = TypeParam;
+  using RepType        = cudf::device_storage_type_t<decimalXX>;
+  using fp_wrapper     = fixed_point_column_wrapper<RepType>;
+  using out_fp_wrapper = fixed_point_column_wrapper<int64_t>;
+
+  for (auto const i : {2, 1, 0, -1, -2}) {
+    auto const scale = scale_type{i};
+    // clang-format off
+    auto const keys = key_wrapper{1, 2, 3, 1, 2, 2, 1, 3, 3, 2};
+    auto const vals = fp_wrapper{{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}, scale};
+
+    auto const expect_keys     = key_wrapper    {1, 1, 1, 2, 2, 2, 2, 3, 3, 3};
+    auto const expect_vals_sum = out_fp_wrapper{{0, 3, 9, 1, 5, 10, 19, 2, 9, 17}, scale};
+    // clang-format on
+
+    auto agg2 = cudf::make_sum_aggregation();
+    test_single_scan(keys, vals, expect_keys, expect_vals_sum, std::move(agg2));
+  }
+}
+
+}  // namespace test
+}  // namespace cudf
diff --git a/cpp/tests/groupby/groupby_keys_test.cpp b/cpp/tests/groupby/groupby_keys_test.cpp
index 06ec9eb8968..78299e1a18c 100644
--- a/cpp/tests/groupby/groupby_keys_test.cpp
+++ b/cpp/tests/groupby/groupby_keys_test.cpp
@@ -33,166 +33,229 @@ using supported_types = cudf::test::
 
 TYPED_TEST_CASE(groupby_keys_test, supported_types);
 
-// clang-format off
 TYPED_TEST(groupby_keys_test, basic)
 {
-    using K = TypeParam;
-    using V = int32_t;
-    using R = cudf::detail::target_type_t<V, aggregation::COUNT_VALID>;
+  using K = TypeParam;
+  using V = int32_t;
+  using R = cudf::detail::target_type_t<V, aggregation::COUNT_VALID>;
 
-    fixed_width_column_wrapper<K> keys        { 1, 2, 3, 1, 2, 2, 1, 3, 3, 2};
-    fixed_width_column_wrapper<V> vals        { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
+  // clang-format off
+  fixed_width_column_wrapper<K> keys        { 1, 2, 3, 1, 2, 2, 1, 3, 3, 2};
+  fixed_width_column_wrapper<V> vals        { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
 
-    fixed_width_column_wrapper<K> expect_keys { 1, 2, 3 };
-    fixed_width_column_wrapper<R> expect_vals { 3, 4, 3 };
+  fixed_width_column_wrapper<K> expect_keys { 1, 2, 3 };
+  fixed_width_column_wrapper<R> expect_vals { 3, 4, 3 };
+  // clang-format on
 
-    auto agg = cudf::make_count_aggregation();
-    test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg));
+  auto agg = cudf::make_count_aggregation();
+  test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg));
 }
 
 TYPED_TEST(groupby_keys_test, zero_valid_keys)
 {
-    using K = TypeParam;
-    using V = int32_t;
-    using R = cudf::detail::target_type_t<V, aggregation::COUNT_VALID>;
+  using K = TypeParam;
+  using V = int32_t;
+  using R = cudf::detail::target_type_t<V, aggregation::COUNT_VALID>;
 
-    fixed_width_column_wrapper<K> keys      ( { 1, 2, 3}, all_null() );
-    fixed_width_column_wrapper<V> vals        { 3, 4, 5};
+  // clang-format off
+  fixed_width_column_wrapper<K> keys      ( { 1, 2, 3}, all_null() );
+  fixed_width_column_wrapper<V> vals        { 3, 4, 5};
 
-    fixed_width_column_wrapper<K> expect_keys { };
-    fixed_width_column_wrapper<R> expect_vals { };
+  fixed_width_column_wrapper<K> expect_keys { };
+  fixed_width_column_wrapper<R> expect_vals { };
+  // clang-format on
 
-    auto agg = cudf::make_count_aggregation();
-    test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg));
+  auto agg = cudf::make_count_aggregation();
+  test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg));
 }
 
 TYPED_TEST(groupby_keys_test, some_null_keys)
 {
-    using K = TypeParam;
-    using V = int32_t;
-    using R = cudf::detail::target_type_t<V, aggregation::COUNT_VALID>;
-
-    fixed_width_column_wrapper<K> keys(       { 1, 2, 3, 1, 2, 2, 1, 3, 3, 2, 4},
-                                              { 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1});
-    fixed_width_column_wrapper<V> vals        { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 4};
+  using K = TypeParam;
+  using V = int32_t;
+  using R = cudf::detail::target_type_t<V, aggregation::COUNT_VALID>;
 
-                                          //  { 1, 1, 1,  2, 2, 2, 2,  3, 3,  4}
-    fixed_width_column_wrapper<K> expect_keys({ 1,        2,           3,     4}, all_valid());
-                                          //  { 0, 3, 6,  1, 4, 5, 9,  2, 8,  -}
-    fixed_width_column_wrapper<R> expect_vals { 3,        4,           2,     1};
+  // clang-format off
+  fixed_width_column_wrapper<K> keys(       { 1, 2, 3, 1, 2, 2, 1, 3, 3, 2, 4},
+                                            { 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1});
+  fixed_width_column_wrapper<V> vals        { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 4};
+
+                                        //  { 1, 1, 1,  2, 2, 2, 2,  3, 3,  4}
+  fixed_width_column_wrapper<K> expect_keys({ 1,        2,           3,     4}, all_valid());
+                                        //  { 0, 3, 6,  1, 4, 5, 9,  2, 8,  -}
+  fixed_width_column_wrapper<R> expect_vals { 3,        4,           2,     1};
+  // clang-format on
 
-    auto agg = cudf::make_count_aggregation();
-    test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg));
+  auto agg = cudf::make_count_aggregation();
+  test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg));
 }
 
 TYPED_TEST(groupby_keys_test, include_null_keys)
 {
-    using K = TypeParam;
-    using V = int32_t;
-    using R = cudf::detail::target_type_t<V, aggregation::SUM>;
-
-    fixed_width_column_wrapper<K> keys(       { 1, 2, 3, 1, 2, 2, 1, 3, 3, 2, 4},
-                                              { 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1});
-    fixed_width_column_wrapper<V> vals        { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 4};
-
-                                          //  { 1, 1, 1,  2, 2, 2, 2,  3, 3,  4,  -}
-    fixed_width_column_wrapper<K> expect_keys({ 1,        2,           3,     4,  3},
-                                              { 1,        1,           1,     1,  0});
-                                          //  { 0, 3, 6,  1, 4, 5, 9,  2, 8,  -,  -}
-    fixed_width_column_wrapper<R> expect_vals { 9,        19,          10,    4,  7};
-
-    auto agg = cudf::make_sum_aggregation();
-    test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg),
-        force_use_sort_impl::NO, null_policy::INCLUDE);
+  using K = TypeParam;
+  using V = int32_t;
+  using R = cudf::detail::target_type_t<V, aggregation::SUM>;
+
+  // clang-format off
+  fixed_width_column_wrapper<K> keys(       { 1, 2, 3, 1, 2, 2, 1, 3, 3, 2, 4},
+                                            { 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1});
+  fixed_width_column_wrapper<V> vals        { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 4};
+
+                                        //  { 1, 1, 1,  2, 2, 2, 2,  3, 3,  4,  -}
+  fixed_width_column_wrapper<K> expect_keys({ 1,        2,           3,     4,  3},
+                                            { 1,        1,           1,     1,  0});
+                                        //  { 0, 3, 6,  1, 4, 5, 9,  2, 8,  -,  -}
+  fixed_width_column_wrapper<R> expect_vals { 9,        19,          10,    4,  7};
+  // clang-format on
+
+  auto agg = cudf::make_sum_aggregation();
+  test_single_agg(keys,
+                  vals,
+                  expect_keys,
+                  expect_vals,
+                  std::move(agg),
+                  force_use_sort_impl::NO,
+                  null_policy::INCLUDE);
 }
 
 TYPED_TEST(groupby_keys_test, pre_sorted_keys)
 {
-    using K = TypeParam;
-    using V = int32_t;
-    using R = cudf::detail::target_type_t<V, aggregation::SUM>;
+  using K = TypeParam;
+  using V = int32_t;
+  using R = cudf::detail::target_type_t<V, aggregation::SUM>;
 
-    fixed_width_column_wrapper<K> keys        { 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 4};
-    fixed_width_column_wrapper<V> vals        { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 4};
+  // clang-format off
+  fixed_width_column_wrapper<K> keys        { 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 4};
+  fixed_width_column_wrapper<V> vals        { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 4};
 
-    fixed_width_column_wrapper<K> expect_keys { 1,       2,          3,       4};
-    fixed_width_column_wrapper<R> expect_vals { 3,       18,         24,      4};
+  fixed_width_column_wrapper<K> expect_keys { 1,       2,          3,       4};
+  fixed_width_column_wrapper<R> expect_vals { 3,       18,         24,      4};
+  // clang-format on
 
-    auto agg = cudf::make_sum_aggregation();
-    test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg), 
-        force_use_sort_impl::YES, null_policy::EXCLUDE, sorted::YES);
+  auto agg = cudf::make_sum_aggregation();
+  test_single_agg(keys,
+                  vals,
+                  expect_keys,
+                  expect_vals,
+                  std::move(agg),
+                  force_use_sort_impl::YES,
+                  null_policy::EXCLUDE,
+                  sorted::YES);
 }
 
 TYPED_TEST(groupby_keys_test, pre_sorted_keys_descending)
 {
-    using K = TypeParam;
-    using V = int32_t;
-    using R = cudf::detail::target_type_t<V, aggregation::SUM>;
+  using K = TypeParam;
+  using V = int32_t;
+  using R = cudf::detail::target_type_t<V, aggregation::SUM>;
 
-    fixed_width_column_wrapper<K> keys        { 4, 3, 3, 3, 2, 2, 2, 2, 1, 1, 1};
-    fixed_width_column_wrapper<V> vals        { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 4};
+  // clang-format off
+  fixed_width_column_wrapper<K> keys        { 4, 3, 3, 3, 2, 2, 2, 2, 1, 1, 1};
+  fixed_width_column_wrapper<V> vals        { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 4};
 
-    fixed_width_column_wrapper<K> expect_keys { 4, 3,       2,          1      };
-    fixed_width_column_wrapper<R> expect_vals { 0, 6,       22,        21      };
+  fixed_width_column_wrapper<K> expect_keys { 4, 3,       2,          1      };
+  fixed_width_column_wrapper<R> expect_vals { 0, 6,       22,        21      };
+  // clang-format on
 
-    auto agg = cudf::make_sum_aggregation();
-    test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg),
-        force_use_sort_impl::YES, null_policy::EXCLUDE, sorted::YES, {order::DESCENDING});
+  auto agg = cudf::make_sum_aggregation();
+  test_single_agg(keys,
+                  vals,
+                  expect_keys,
+                  expect_vals,
+                  std::move(agg),
+                  force_use_sort_impl::YES,
+                  null_policy::EXCLUDE,
+                  sorted::YES,
+                  {order::DESCENDING});
 }
 
 TYPED_TEST(groupby_keys_test, pre_sorted_keys_nullable)
 {
-    using K = TypeParam;
-    using V = int32_t;
-    using R = cudf::detail::target_type_t<V, aggregation::SUM>;
+  using K = TypeParam;
+  using V = int32_t;
+  using R = cudf::detail::target_type_t<V, aggregation::SUM>;
 
-    fixed_width_column_wrapper<K> keys(       { 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 4},
-                                              { 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1});
-    fixed_width_column_wrapper<V> vals        { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 4};
+  // clang-format off
+  fixed_width_column_wrapper<K> keys(       { 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 4},
+                                            { 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1});
+  fixed_width_column_wrapper<V> vals        { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 4};
 
-    fixed_width_column_wrapper<K> expect_keys({ 1,       2,          3,       4}, all_valid());
-    fixed_width_column_wrapper<R> expect_vals { 3,       15,         17,      4};
+  fixed_width_column_wrapper<K> expect_keys({ 1,       2,          3,       4}, all_valid());
+  fixed_width_column_wrapper<R> expect_vals { 3,       15,         17,      4};
+  // clang-format on
 
-    auto agg = cudf::make_sum_aggregation();
-    test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg),
-        force_use_sort_impl::YES, null_policy::EXCLUDE, sorted::YES); 
+  auto agg = cudf::make_sum_aggregation();
+  test_single_agg(keys,
+                  vals,
+                  expect_keys,
+                  expect_vals,
+                  std::move(agg),
+                  force_use_sort_impl::YES,
+                  null_policy::EXCLUDE,
+                  sorted::YES);
 }
 
 TYPED_TEST(groupby_keys_test, pre_sorted_keys_nulls_before_include_nulls)
 {
-    using K = TypeParam;
-    using V = int32_t;
-    using R = cudf::detail::target_type_t<V, aggregation::SUM>;
-
-    fixed_width_column_wrapper<K> keys(       { 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 4},
-                                              { 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1});
-    fixed_width_column_wrapper<V> vals        { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 4};
-
-                                          //  { 1, 1, 1,  -, -,  2, 2,  -,  3, 3,  4}
-    fixed_width_column_wrapper<K> expect_keys({ 1,        2,     2,     3,  3,     4},
-                                              { 1,        0,     1,     0,  1,     1});
-    fixed_width_column_wrapper<R> expect_vals { 3,        7,     11,    7,  17,    4};
-
-    auto agg = cudf::make_sum_aggregation();
-    test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg),
-        force_use_sort_impl::YES, null_policy::INCLUDE, sorted::YES); 
+  using K = TypeParam;
+  using V = int32_t;
+  using R = cudf::detail::target_type_t<V, aggregation::SUM>;
+
+  // clang-format off
+  fixed_width_column_wrapper<K> keys(       { 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 4},
+                                            { 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1});
+  fixed_width_column_wrapper<V> vals        { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 4};
+
+                                        //  { 1, 1, 1,  -, -,  2, 2,  -,  3, 3,  4}
+  fixed_width_column_wrapper<K> expect_keys({ 1,        2,     2,     3,  3,     4},
+                                            { 1,        0,     1,     0,  1,     1});
+  fixed_width_column_wrapper<R> expect_vals { 3,        7,     11,    7,  17,    4};
+  // clang-format on
+
+  auto agg = cudf::make_sum_aggregation();
+  test_single_agg(keys,
+                  vals,
+                  expect_keys,
+                  expect_vals,
+                  std::move(agg),
+                  force_use_sort_impl::YES,
+                  null_policy::INCLUDE,
+                  sorted::YES);
+}
+
+TYPED_TEST(groupby_keys_test, mismatch_num_rows)
+{
+  using K = TypeParam;
+  using V = int32_t;
+
+  fixed_width_column_wrapper<K> keys{1, 2, 3};
+  fixed_width_column_wrapper<V> vals{0, 1, 2, 3, 4};
+
+  auto agg = cudf::make_count_aggregation();
+  CUDF_EXPECT_THROW_MESSAGE(test_single_agg(keys, vals, keys, vals, std::move(agg)),
+                            "Size mismatch between request values and groupby keys.");
+  CUDF_EXPECT_THROW_MESSAGE(test_single_scan(keys, vals, keys, vals, std::move(agg)),
+                            "Size mismatch between request values and groupby keys.");
 }
 
-struct groupby_string_keys_test : public cudf::test::BaseFixture {};
+struct groupby_string_keys_test : public cudf::test::BaseFixture {
+};
 
 TEST_F(groupby_string_keys_test, basic)
 {
-    using V = int32_t;
-    using R = cudf::detail::target_type_t<V, aggregation::SUM>;
+  using V = int32_t;
+  using R = cudf::detail::target_type_t<V, aggregation::SUM>;
 
-    strings_column_wrapper        keys        { "aaa", "año", "₹1", "aaa", "año", "año", "aaa", "₹1", "₹1", "año"};
-    fixed_width_column_wrapper<V> vals        {     0,     1,    2,     3,     4,     5,     6,    7,    8,     9};
+  // clang-format off
+  strings_column_wrapper        keys        { "aaa", "año", "₹1", "aaa", "año", "año", "aaa", "₹1", "₹1", "año"};
+  fixed_width_column_wrapper<V> vals        {     0,     1,    2,     3,     4,     5,     6,    7,    8,     9};
 
-    strings_column_wrapper        expect_keys({ "aaa", "año", "₹1" });
-    fixed_width_column_wrapper<R> expect_vals {     9,    19,   17 };
+  strings_column_wrapper        expect_keys({ "aaa", "año", "₹1" });
+  fixed_width_column_wrapper<R> expect_vals {     9,    19,   17 };
+  // clang-format on
 
-    auto agg = cudf::make_sum_aggregation();
-    test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg));
+  auto agg = cudf::make_sum_aggregation();
+  test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg));
 }
 // clang-format on
 
diff --git a/cpp/tests/groupby/groupby_test_util.hpp b/cpp/tests/groupby/groupby_test_util.hpp
index 0b68b7bbfb4..c7e27cd6367 100644
--- a/cpp/tests/groupby/groupby_test_util.hpp
+++ b/cpp/tests/groupby/groupby_test_util.hpp
@@ -99,6 +99,32 @@ inline void test_single_agg(column_view const& keys,
   }
 }
 
+inline void test_single_scan(column_view const& keys,
+                             column_view const& values,
+                             column_view const& expect_keys,
+                             column_view const& expect_vals,
+                             std::unique_ptr<aggregation>&& agg,
+                             null_policy include_null_keys                  = null_policy::EXCLUDE,
+                             sorted keys_are_sorted                         = sorted::NO,
+                             std::vector<order> const& column_order         = {},
+                             std::vector<null_order> const& null_precedence = {})
+{
+  std::vector<groupby::aggregation_request> requests;
+  requests.emplace_back(groupby::aggregation_request());
+  requests[0].values = values;
+
+  requests[0].aggregations.push_back(std::move(agg));
+
+  groupby::groupby gb_obj(
+    table_view({keys}), include_null_keys, keys_are_sorted, column_order, null_precedence);
+
+  // groupby scan uses sort implementation
+  auto result = gb_obj.scan(requests);
+
+  CUDF_TEST_EXPECT_TABLES_EQUAL(table_view({expect_keys}), result.first->view());
+  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expect_vals, *result.second[0].results[0], true);
+}
+
 inline auto all_valid()
 {
   auto all_valid = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return true; });