diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt
index 63f6857ee08..370f84fc14a 100644
--- a/cpp/benchmarks/CMakeLists.txt
+++ b/cpp/benchmarks/CMakeLists.txt
@@ -177,6 +177,7 @@ ConfigureBench(
   groupby/group_shift_benchmark.cu
   groupby/group_struct_benchmark.cu
   groupby/group_no_requests_benchmark.cu
+  groupby/group_scan_benchmark.cu
 )
 
 # ##################################################################################################
diff --git a/cpp/benchmarks/groupby/group_benchmark_common.hpp b/cpp/benchmarks/groupby/group_benchmark_common.hpp
new file mode 100644
index 00000000000..fba5bc28822
--- /dev/null
+++ b/cpp/benchmarks/groupby/group_benchmark_common.hpp
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <random>
+
+template <typename T>
+T random_int(T min, T max)
+{
+  static unsigned seed = 13377331;
+  static std::mt19937 engine{seed};
+  static std::uniform_int_distribution<T> uniform{min, max};
+
+  return uniform(engine);
+}
diff --git a/cpp/benchmarks/groupby/group_no_requests_benchmark.cu b/cpp/benchmarks/groupby/group_no_requests_benchmark.cu
index 7dbe1888cee..209155862bd 100644
--- a/cpp/benchmarks/groupby/group_no_requests_benchmark.cu
+++ b/cpp/benchmarks/groupby/group_no_requests_benchmark.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,32 +14,23 @@
  * limitations under the License.
  */
 
+#include <benchmarks/fixture/benchmark_fixture.hpp>
+#include <benchmarks/groupby/group_benchmark_common.hpp>
+#include <benchmarks/synchronization/synchronization.hpp>
+
 #include <cudf/copying.hpp>
 #include <cudf/detail/aggregation/aggregation.hpp>
 #include <cudf/groupby.hpp>
 #include <cudf/sorting.hpp>
 #include <cudf/table/table.hpp>
+
 #include <cudf_test/column_wrapper.hpp>
-#include <fixture/benchmark_fixture.hpp>
-#include <synchronization/synchronization.hpp>
 
 #include <memory>
-#include <random>
 
 class Groupby : public cudf::benchmark {
 };
 
-// TODO: put it in a struct so `uniform` can be remade with different min, max
-template <typename T>
-T random_int(T min, T max)
-{
-  static unsigned seed = 13377331;
-  static std::mt19937 engine{seed};
-  static std::uniform_int_distribution<T> uniform{min, max};
-
-  return uniform(engine);
-}
-
 void BM_basic_no_requests(benchmark::State& state)
 {
   using wrapper = cudf::test::fixed_width_column_wrapper<int64_t>;
diff --git a/cpp/benchmarks/groupby/group_nth_benchmark.cu b/cpp/benchmarks/groupby/group_nth_benchmark.cu
index 8d1de36db95..107b3839c4c 100644
--- a/cpp/benchmarks/groupby/group_nth_benchmark.cu
+++ b/cpp/benchmarks/groupby/group_nth_benchmark.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,32 +14,23 @@
  * limitations under the License.
  */
 
+#include <benchmarks/fixture/benchmark_fixture.hpp>
+#include <benchmarks/groupby/group_benchmark_common.hpp>
+#include <benchmarks/synchronization/synchronization.hpp>
+
 #include <cudf/copying.hpp>
 #include <cudf/detail/aggregation/aggregation.hpp>
 #include <cudf/groupby.hpp>
 #include <cudf/sorting.hpp>
 #include <cudf/table/table.hpp>
+
 #include <cudf_test/column_wrapper.hpp>
-#include <fixture/benchmark_fixture.hpp>
-#include <synchronization/synchronization.hpp>
 
 #include <memory>
-#include <random>
 
 class Groupby : public cudf::benchmark {
 };
 
-// TODO: put it in a struct so `uniform` can be remade with different min, max
-template <typename T>
-T random_int(T min, T max)
-{
-  static unsigned seed = 13377331;
-  static std::mt19937 engine{seed};
-  static std::uniform_int_distribution<T> uniform{min, max};
-
-  return uniform(engine);
-}
-
 void BM_pre_sorted_nth(benchmark::State& state)
 {
   using wrapper = cudf::test::fixed_width_column_wrapper<int64_t>;
diff --git a/cpp/benchmarks/groupby/group_scan_benchmark.cu b/cpp/benchmarks/groupby/group_scan_benchmark.cu
new file mode 100644
index 00000000000..d9849e53498
--- /dev/null
+++ b/cpp/benchmarks/groupby/group_scan_benchmark.cu
@@ -0,0 +1,110 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <benchmarks/fixture/benchmark_fixture.hpp>
+#include <benchmarks/groupby/group_benchmark_common.hpp>
+#include <benchmarks/synchronization/synchronization.hpp>
+
+#include <cudf/copying.hpp>
+#include <cudf/detail/aggregation/aggregation.hpp>
+#include <cudf/groupby.hpp>
+#include <cudf/sorting.hpp>
+#include <cudf/table/table.hpp>
+
+#include <cudf_test/column_wrapper.hpp>
+
+class Groupby : public cudf::benchmark {
+};
+
+void BM_basic_sum_scan(benchmark::State& state)
+{
+  using wrapper = cudf::test::fixed_width_column_wrapper<int64_t>;
+
+  const cudf::size_type column_size{(cudf::size_type)state.range(0)};
+
+  auto data_it = cudf::detail::make_counting_transform_iterator(
+    0, [=](cudf::size_type row) { return random_int(0, 100); });
+
+  wrapper keys(data_it, data_it + column_size);
+  wrapper vals(data_it, data_it + column_size);
+
+  cudf::groupby::groupby gb_obj(cudf::table_view({keys, keys, keys}));
+
+  std::vector<cudf::groupby::scan_request> requests;
+  requests.emplace_back(cudf::groupby::scan_request());
+  requests[0].values = vals;
+  requests[0].aggregations.push_back(cudf::make_sum_aggregation<cudf::groupby_scan_aggregation>());
+
+  for (auto _ : state) {
+    cuda_event_timer timer(state, true);
+
+    auto result = gb_obj.scan(requests);
+  }
+}
+
+BENCHMARK_DEFINE_F(Groupby, BasicSumScan)(::benchmark::State& state) { BM_basic_sum_scan(state); }
+
+BENCHMARK_REGISTER_F(Groupby, BasicSumScan)
+  ->UseManualTime()
+  ->Unit(benchmark::kMillisecond)
+  ->Arg(1000000)
+  ->Arg(10000000)
+  ->Arg(100000000);
+
+void BM_pre_sorted_sum_scan(benchmark::State& state)
+{
+  using wrapper = cudf::test::fixed_width_column_wrapper<int64_t>;
+
+  const cudf::size_type column_size{(cudf::size_type)state.range(0)};
+
+  auto data_it = cudf::detail::make_counting_transform_iterator(
+    0, [=](cudf::size_type row) { return random_int(0, 100); });
+  auto valid_it = cudf::detail::make_counting_transform_iterator(
+    0, [=](cudf::size_type row) { return random_int(0, 100) < 90; });
+
+  wrapper keys(data_it, data_it + column_size);
+  wrapper vals(data_it, data_it + column_size, valid_it);
+
+  auto keys_table  = cudf::table_view({keys});
+  auto sort_order  = cudf::sorted_order(keys_table);
+  auto sorted_keys = cudf::gather(keys_table, *sort_order);
+  // No need to sort values using sort_order because they were generated randomly
+
+  cudf::groupby::groupby gb_obj(*sorted_keys, cudf::null_policy::EXCLUDE, cudf::sorted::YES);
+
+  std::vector<cudf::groupby::scan_request> requests;
+  requests.emplace_back(cudf::groupby::scan_request());
+  requests[0].values = vals;
+  requests[0].aggregations.push_back(cudf::make_sum_aggregation<cudf::groupby_scan_aggregation>());
+
+  for (auto _ : state) {
+    cuda_event_timer timer(state, true);
+
+    auto result = gb_obj.scan(requests);
+  }
+}
+
+BENCHMARK_DEFINE_F(Groupby, PreSortedSumScan)(::benchmark::State& state)
+{
+  BM_pre_sorted_sum_scan(state);
+}
+
+BENCHMARK_REGISTER_F(Groupby, PreSortedSumScan)
+  ->UseManualTime()
+  ->Unit(benchmark::kMillisecond)
+  ->Arg(1000000)
+  ->Arg(10000000)
+  ->Arg(100000000);
diff --git a/cpp/benchmarks/groupby/group_shift_benchmark.cu b/cpp/benchmarks/groupby/group_shift_benchmark.cu
index 81afcdd80e1..6b0710f4044 100644
--- a/cpp/benchmarks/groupby/group_shift_benchmark.cu
+++ b/cpp/benchmarks/groupby/group_shift_benchmark.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -13,7 +13,9 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
 #include <benchmarks/fixture/benchmark_fixture.hpp>
+#include <benchmarks/groupby/group_benchmark_common.hpp>
 #include <benchmarks/synchronization/synchronization.hpp>
 
 #include <cudf/detail/iterator.cuh>
@@ -24,24 +26,9 @@
 
 #include <cudf_test/column_wrapper.hpp>
 
-#include <benchmark/benchmark.h>
-
-#include <random>
-
 class Groupby : public cudf::benchmark {
 };
 
-// TODO: put it in a struct so `uniform` can be remade with different min, max
-template <typename T>
-T random_int(T min, T max)
-{
-  static unsigned seed = 13377331;
-  static std::mt19937 engine{seed};
-  static std::uniform_int_distribution<T> uniform{min, max};
-
-  return uniform(engine);
-}
-
 void BM_group_shift(benchmark::State& state)
 {
   using wrapper = cudf::test::fixed_width_column_wrapper<int64_t>;
diff --git a/cpp/benchmarks/groupby/group_sum_benchmark.cu b/cpp/benchmarks/groupby/group_sum_benchmark.cu
index 0e9f5061a1a..63f9aa02070 100644
--- a/cpp/benchmarks/groupby/group_sum_benchmark.cu
+++ b/cpp/benchmarks/groupby/group_sum_benchmark.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,32 +14,23 @@
  * limitations under the License.
  */
 
+#include <benchmarks/fixture/benchmark_fixture.hpp>
+#include <benchmarks/groupby/group_benchmark_common.hpp>
+#include <benchmarks/synchronization/synchronization.hpp>
+
 #include <cudf/copying.hpp>
 #include <cudf/detail/aggregation/aggregation.hpp>
 #include <cudf/groupby.hpp>
 #include <cudf/sorting.hpp>
 #include <cudf/table/table.hpp>
+
 #include <cudf_test/column_wrapper.hpp>
-#include <fixture/benchmark_fixture.hpp>
-#include <synchronization/synchronization.hpp>
 
 #include <memory>
-#include <random>
 
 class Groupby : public cudf::benchmark {
 };
 
-// TODO: put it in a struct so `uniform` can be remade with different min, max
-template <typename T>
-T random_int(T min, T max)
-{
-  static unsigned seed = 13377331;
-  static std::mt19937 engine{seed};
-  static std::uniform_int_distribution<T> uniform{min, max};
-
-  return uniform(engine);
-}
-
 void BM_basic_sum(benchmark::State& state)
 {
   using wrapper = cudf::test::fixed_width_column_wrapper<int64_t>;
diff --git a/cpp/src/groupby/sort/functors.hpp b/cpp/src/groupby/sort/functors.hpp
index cbe5f08639a..05330a7c492 100644
--- a/cpp/src/groupby/sort/functors.hpp
+++ b/cpp/src/groupby/sort/functors.hpp
@@ -40,12 +40,23 @@ struct store_result_functor {
                        sort::sort_groupby_helper& helper,
                        cudf::detail::result_cache& cache,
                        rmm::cuda_stream_view stream,
-                       rmm::mr::device_memory_resource* mr)
-    : helper(helper), cache(cache), values(values), stream(stream), mr(mr)
+                       rmm::mr::device_memory_resource* mr,
+                       sorted keys_are_sorted = sorted::NO)
+    : helper(helper),
+      cache(cache),
+      values(values),
+      stream(stream),
+      mr(mr),
+      keys_are_sorted(keys_are_sorted)
   {
   }
 
  protected:
+  /**
+   * @brief Check if the groupby keys are presorted
+   */
+  bool is_presorted() const { return keys_are_sorted == sorted::YES; }
+
   /**
    * @brief Get the grouped values
    *
@@ -54,6 +65,8 @@ struct store_result_functor {
    */
   column_view get_grouped_values()
   {
+    if (is_presorted()) { return values; }
+
     // TODO (dm): After implementing single pass multi-agg, explore making a
     //            cache of all grouped value columns rather than one at a time
     if (grouped_values)
@@ -74,6 +87,7 @@ struct store_result_functor {
    */
   column_view get_sorted_values()
   {
+    if (is_presorted()) { return values; }
     return sorted_values ? sorted_values->view()
                          : (sorted_values = helper.sorted_values(values, stream))->view();
   };
@@ -86,6 +100,7 @@ struct store_result_functor {
   rmm::cuda_stream_view stream;         ///< CUDA stream on which to execute kernels
   rmm::mr::device_memory_resource* mr;  ///< Memory resource to allocate space for results
 
+  sorted keys_are_sorted;                  ///< Whether the keys are sorted
   std::unique_ptr<column> sorted_values;   ///< Memoised grouped and sorted values
   std::unique_ptr<column> grouped_values;  ///< Memoised grouped values
 };
diff --git a/cpp/src/groupby/sort/scan.cpp b/cpp/src/groupby/sort/scan.cpp
index 6ac416c1a30..402ff8c47ed 100644
--- a/cpp/src/groupby/sort/scan.cpp
+++ b/cpp/src/groupby/sort/scan.cpp
@@ -55,6 +55,9 @@ struct scan_result_functor final : store_result_functor {
  private:
   column_view get_grouped_values()
   {
+    // early exit if presorted
+    if (is_presorted()) { return values; }
+
     // TODO (dm): After implementing single pass multi-agg, explore making a
     //            cache of all grouped value columns rather than one at a time
     if (grouped_values)
@@ -155,7 +158,8 @@ std::pair<std::unique_ptr<table>, std::vector<aggregation_result>> groupby::sort
   cudf::detail::result_cache cache(requests.size());
 
   for (auto const& request : requests) {
-    auto store_functor = detail::scan_result_functor(request.values, helper(), cache, stream, mr);
+    auto store_functor =
+      detail::scan_result_functor(request.values, helper(), cache, stream, mr, _keys_are_sorted);
     for (auto const& aggregation : request.aggregations) {
       // TODO (dm): single pass compute all supported reductions
       cudf::detail::aggregation_dispatcher(aggregation->kind, store_functor, *aggregation);
diff --git a/cpp/tests/groupby/max_scan_tests.cpp b/cpp/tests/groupby/max_scan_tests.cpp
index 196aeed0430..19935dd4c91 100644
--- a/cpp/tests/groupby/max_scan_tests.cpp
+++ b/cpp/tests/groupby/max_scan_tests.cpp
@@ -59,6 +59,24 @@ TYPED_TEST(groupby_max_scan_test, basic)
   test_single_scan(keys, vals, expect_keys, expect_vals, std::move(agg));
 }
 
+TYPED_TEST(groupby_max_scan_test, pre_sorted)
+{
+  using value_wrapper  = typename TestFixture::value_wrapper;
+  using result_wrapper = typename TestFixture::result_wrapper;
+
+  // clang-format off
+  key_wrapper keys   {1, 1, 1, 2, 2, 2, 2, 3, 3, 3};
+  value_wrapper vals({5, 8, 1, 6, 9, 0, 4, 7, 2, 3});
+
+  key_wrapper expect_keys    {1, 1, 1, 2, 2, 2, 2, 3, 3, 3};
+  result_wrapper expect_vals({5, 8, 8, 6, 9, 9, 9, 7, 7, 7});
+  // clang-format on
+
+  auto agg = cudf::make_max_aggregation<groupby_scan_aggregation>();
+  test_single_scan(
+    keys, vals, expect_keys, expect_vals, std::move(agg), null_policy::EXCLUDE, sorted::YES);
+}
+
 TYPED_TEST(groupby_max_scan_test, empty_cols)
 {
   using value_wrapper  = typename TestFixture::value_wrapper;
diff --git a/cpp/tests/groupby/min_scan_tests.cpp b/cpp/tests/groupby/min_scan_tests.cpp
index e4c018a9ce1..c672209c7b0 100644
--- a/cpp/tests/groupby/min_scan_tests.cpp
+++ b/cpp/tests/groupby/min_scan_tests.cpp
@@ -57,6 +57,24 @@ TYPED_TEST(groupby_min_scan_test, basic)
   test_single_scan(keys, vals, expect_keys, expect_vals, std::move(agg));
 }
 
+TYPED_TEST(groupby_min_scan_test, pre_sorted)
+{
+  using value_wrapper  = typename TestFixture::value_wrapper;
+  using result_wrapper = typename TestFixture::result_wrapper;
+
+  // clang-format off
+  key_wrapper keys   {1, 1, 1, 2, 2, 2, 2, 3, 3, 3};
+  value_wrapper vals({5, 8, 1, 6, 9, 0, 4, 7, 2, 3});
+
+  key_wrapper expect_keys    {1, 1, 1, 2, 2, 2, 2, 3, 3, 3};
+  result_wrapper expect_vals({5, 5, 1, 6, 6, 0, 0, 7, 2, 2});
+  // clang-format on
+
+  auto agg = cudf::make_min_aggregation<groupby_scan_aggregation>();
+  test_single_scan(
+    keys, vals, expect_keys, expect_vals, std::move(agg), null_policy::EXCLUDE, sorted::YES);
+}
+
 TYPED_TEST(groupby_min_scan_test, empty_cols)
 {
   using value_wrapper  = typename TestFixture::value_wrapper;
diff --git a/cpp/tests/groupby/sum_scan_tests.cpp b/cpp/tests/groupby/sum_scan_tests.cpp
index 3117f8b1557..f4ac3a94d19 100644
--- a/cpp/tests/groupby/sum_scan_tests.cpp
+++ b/cpp/tests/groupby/sum_scan_tests.cpp
@@ -61,6 +61,24 @@ TYPED_TEST(groupby_sum_scan_test, basic)
   test_single_scan(keys, vals, expect_keys, expect_vals, std::move(agg));
 }
 
+TYPED_TEST(groupby_sum_scan_test, pre_sorted)
+{
+  using value_wrapper  = typename TestFixture::value_wrapper;
+  using result_wrapper = typename TestFixture::result_wrapper;
+
+  // clang-format off
+  key_wrapper keys  {1, 1, 1, 2, 2, 2, 2, 3, 3, 3};
+  value_wrapper vals{0, 3, 6, 1, 4, 5, 9, 2, 7, 8};
+
+  key_wrapper expect_keys   {1, 1, 1, 2, 2,  2,  2, 3, 3, 3};
+  result_wrapper expect_vals{0, 3, 9, 1, 5, 10, 19, 2, 9, 17};
+  // clang-format on
+
+  auto agg = cudf::make_sum_aggregation<groupby_scan_aggregation>();
+  test_single_scan(
+    keys, vals, expect_keys, expect_vals, std::move(agg), null_policy::EXCLUDE, sorted::YES);
+}
+
 TYPED_TEST(groupby_sum_scan_test, empty_cols)
 {
   using value_wrapper  = typename TestFixture::value_wrapper;