diff --git a/cpp/benchmarks/groupby/group_shift_benchmark.cu b/cpp/benchmarks/groupby/group_shift_benchmark.cu
index 6e48fab7220..81afcdd80e1 100644
--- a/cpp/benchmarks/groupby/group_shift_benchmark.cu
+++ b/cpp/benchmarks/groupby/group_shift_benchmark.cu
@@ -57,8 +57,8 @@ void BM_group_shift(benchmark::State& state)
 
   cudf::groupby::groupby gb_obj(cudf::table_view({keys}));
 
-  cudf::size_type offset =
-    static_cast<cudf::size_type>(column_size / float(num_groups) * 0.5);  // forward shift half way
+  std::vector<cudf::size_type> offsets{
+    static_cast<cudf::size_type>(column_size / float(num_groups) * 0.5)};  // forward shift half way
   // null fill value
   auto fill_value = cudf::make_default_constructed_scalar(cudf::data_type(cudf::type_id::INT64));
   // non null fill value
@@ -66,7 +66,7 @@ void BM_group_shift(benchmark::State& state)
 
   for (auto _ : state) {
     cuda_event_timer timer(state, true);
-    auto result = gb_obj.shift(vals, offset, *fill_value);
+    auto result = gb_obj.shift(cudf::table_view{{vals}}, offsets, {*fill_value});
   }
 }
 
diff --git a/cpp/include/cudf/groupby.hpp b/cpp/include/cudf/groupby.hpp
index 0a08c978715..85c469f58f8 100644
--- a/cpp/include/cudf/groupby.hpp
+++ b/cpp/include/cudf/groupby.hpp
@@ -228,37 +228,57 @@ class groupby {
   /**
    * @brief Performs grouped shifts for specified values.
    *
-   * For each group, `i`th element is determined by the `i - offset`th element
-   * of the group. If `i - offset < 0 or >= group_size`, the value is determined by
-   * @p fill_value.
+   * In `j`th column, for each group, `i`th element is determined by the `i - offsets[j]`th
+   * element of the group. If `i - offsets[j] < 0 or >= group_size`, the value is determined by
+   * @p fill_values[j].
+   *
+   * @note The first returned table stores the keys passed to the groupby object. Row `i` of the key
+   * table corresponds to the group labels of row `i` in the shifted columns. The key order in
+   * each group matches the input order. The order of each group is arbitrary. The group order
+   * in successive calls to `groupby::shifts` may be different.
    *
    * Example:
    * @code{.pseudo}
-   * keys:   {1 1 1 1 2 2 2}
-   * values: {3 1 4 7 9 2 5}
-   * offset: 2
-   * fill_value: @
-   * result: {@ @ 3 1 @ @ 9}
+   * keys:    {1 4 1 3 4 4 1}
+   *          {1 2 1 3 2 2 1}
+   * values:  {3 9 1 4 2 5 7}
+   *          {"a" "c" "bb" "ee" "z" "x" "d"}
+   * offset:  {2, -1}
+   * fill_value: {@, @}
+   * result (group order maybe different):
+   *    keys:   {3 1 1 1 4 4 4}
+   *            {3 1 1 1 2 2 2}
+   *    values: {@ @ @ 3 @ @ 9}
+   *            {@ "bb" "d" @ "z" "x" @}
+   *
    * -------------------------------------------------
-   * keys:   {1 1 1 1 2 2 2}
-   * values: {3 1 4 7 9 2 5}
-   * offset: -2
-   * fill_value: -1
-   * result: {4 7 -1 -1 5 -1 -1}
+   * keys:    {1 4 1 3 4 4 1}
+   *          {1 2 1 3 2 2 1}
+   * values:  {3 9 1 4 2 5 7}
+   *          {"a" "c" "bb" "ee" "z" "x" "d"}
+   * offset:  {-2, 1}
+   * fill_value: {-1, "42"}
+   * result (group order maybe different):
+   *    keys:   {3 1 1 1 4 4 4}
+   *            {3 1 1 1 2 2 2}
+   *    values: {-1 7 -1 -1 5 -1 -1}
+   *            {"42" "42" "a" "bb" "42" "c" "z"}
+   *
    * @endcode
    *
-   * @param values Column to be shifted
-   * @param offset The off set by which to shift the input
-   * @param fill_value Fill value for indeterminable outputs
+   * @param values Table whose columns to be shifted
+   * @param offsets The offsets by which to shift the input
+   * @param fill_values Fill values for indeterminable outputs
    * @param mr Device memory resource used to allocate the returned table and columns' device memory
-   * @return Pair containing the table with each group's key and the column shifted
+   * @return Pair containing the tables with each group's key and the columns shifted
    *
-   * @throws cudf::logic_error if @p fill_value dtype does not match @p input dtype
+   * @throws cudf::logic_error if @p fill_value[i] dtype does not match @p values[i] dtype for
+   * `i`th column
    */
-  std::pair<std::unique_ptr<table>, std::unique_ptr<column>> shift(
-    column_view const& values,
-    size_type offset,
-    scalar const& fill_value,
+  std::pair<std::unique_ptr<table>, std::unique_ptr<table>> shift(
+    table_view const& values,
+    host_span<size_type const> offsets,
+    std::vector<std::reference_wrapper<const scalar>> const& fill_values,
     rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
   /**
diff --git a/cpp/src/groupby/groupby.cu b/cpp/src/groupby/groupby.cu
index 72fbabe100c..b265e1c3112 100644
--- a/cpp/src/groupby/groupby.cu
+++ b/cpp/src/groupby/groupby.cu
@@ -35,6 +35,7 @@
 #include <rmm/cuda_stream_view.hpp>
 
 #include <thrust/copy.h>
+#include <thrust/iterator/counting_iterator.h>
 
 #include <memory>
 #include <utility>
@@ -262,23 +263,36 @@ detail::sort::sort_groupby_helper& groupby::helper()
   return *_helper;
 };
 
-std::pair<std::unique_ptr<table>, std::unique_ptr<column>> groupby::shift(
-  column_view const& values,
-  size_type offset,
-  scalar const& fill_value,
+std::pair<std::unique_ptr<table>, std::unique_ptr<table>> groupby::shift(
+  table_view const& values,
+  host_span<size_type const> offsets,
+  std::vector<std::reference_wrapper<const scalar>> const& fill_values,
   rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  CUDF_EXPECTS(values.type() == fill_value.type(),
-               "values and fill_value should have the same type.");
+  CUDF_EXPECTS(values.num_columns() == static_cast<size_type>(fill_values.size()),
+               "Mismatch number of fill_values and columns.");
+  CUDF_EXPECTS(
+    std::all_of(thrust::make_counting_iterator(0),
+                thrust::make_counting_iterator(values.num_columns()),
+                [&](auto i) { return values.column(i).type() == fill_values[i].get().type(); }),
+    "values and fill_value should have the same type.");
 
-  auto stream         = rmm::cuda_stream_default;
-  auto grouped_values = helper().grouped_values(values, stream);
+  auto stream = rmm::cuda_stream_default;
+  std::vector<std::unique_ptr<column>> results;
+  auto const& group_offsets = helper().group_offsets(stream);
+  std::transform(
+    thrust::make_counting_iterator(0),
+    thrust::make_counting_iterator(values.num_columns()),
+    std::back_inserter(results),
+    [&](size_type i) {
+      auto grouped_values = helper().grouped_values(values.column(i), stream);
+      return cudf::detail::segmented_shift(
+        grouped_values->view(), group_offsets, offsets[i], fill_values[i].get(), stream, mr);
+    });
 
-  return std::make_pair(
-    helper().sorted_keys(stream, mr),
-    std::move(cudf::detail::segmented_shift(
-      grouped_values->view(), helper().group_offsets(stream), offset, fill_value, stream, mr)));
+  return std::make_pair(helper().sorted_keys(stream, mr),
+                        std::make_unique<cudf::table>(std::move(results)));
 }
 
 }  // namespace groupby
diff --git a/cpp/tests/groupby/shift_tests.cpp b/cpp/tests/groupby/shift_tests.cpp
index 5974a9408b2..3a934071427 100644
--- a/cpp/tests/groupby/shift_tests.cpp
+++ b/cpp/tests/groupby/shift_tests.cpp
@@ -16,8 +16,10 @@
 
 #include <cudf_test/base_fixture.hpp>
 #include <cudf_test/column_wrapper.hpp>
+#include <cudf_test/table_utilities.hpp>
 #include <cudf_test/type_lists.hpp>
 
+#include <cudf/fixed_point/fixed_point.hpp>
 #include <cudf/groupby.hpp>
 #include <cudf/scalar/scalar.hpp>
 #include <cudf/scalar/scalar_factories.hpp>
@@ -33,15 +35,16 @@ struct groupby_shift_fixed_width_test : public BaseFixture {
 TYPED_TEST_CASE(groupby_shift_fixed_width_test, FixedWidthTypes);
 
 template <typename V>
-void test_groupby_shift_fixed_width(fixed_width_column_wrapper<K> const& key,
-                                    fixed_width_column_wrapper<V> const& value,
-                                    size_type offset,
-                                    scalar const& fill_value,
-                                    fixed_width_column_wrapper<V> const& expected)
+void test_groupby_shift_fixed_width_single(fixed_width_column_wrapper<K> const& key,
+                                           fixed_width_column_wrapper<V> const& value,
+                                           size_type offset,
+                                           scalar const& fill_value,
+                                           fixed_width_column_wrapper<V> const& expected)
 {
   groupby::groupby gb_obj(table_view({key}));
-  auto got = gb_obj.shift(value, offset, fill_value);
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*got.second, expected);
+  std::vector<size_type> offsets{offset};
+  auto got = gb_obj.shift(table_view{{value}}, offsets, {fill_value});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL((*got.second).view().column(0), expected);
 }
 
 TYPED_TEST(groupby_shift_fixed_width_test, ForwardShiftWithoutNull_NullScalar)
@@ -54,7 +57,7 @@ TYPED_TEST(groupby_shift_fixed_width_test, ForwardShiftWithoutNull_NullScalar)
   size_type offset = 2;
   auto slr         = cudf::make_default_constructed_scalar(column_view(val).type());
 
-  test_groupby_shift_fixed_width<V>(key, val, offset, *slr, expected);
+  test_groupby_shift_fixed_width_single<V>(key, val, offset, *slr, expected);
 }
 
 TYPED_TEST(groupby_shift_fixed_width_test, ForwardShiftWithNull_NullScalar)
@@ -67,7 +70,7 @@ TYPED_TEST(groupby_shift_fixed_width_test, ForwardShiftWithNull_NullScalar)
   size_type offset = 2;
   auto slr         = cudf::make_default_constructed_scalar(column_view(val).type());
 
-  test_groupby_shift_fixed_width<V>(key, val, offset, *slr, expected);
+  test_groupby_shift_fixed_width_single<V>(key, val, offset, *slr, expected);
 }
 
 TYPED_TEST(groupby_shift_fixed_width_test, ForwardShiftWithoutNull_ValidScalar)
@@ -80,7 +83,7 @@ TYPED_TEST(groupby_shift_fixed_width_test, ForwardShiftWithoutNull_ValidScalar)
   size_type offset = 3;
   auto slr         = cudf::scalar_type_t<TypeParam>(make_type_param_scalar<TypeParam>(42), true);
 
-  test_groupby_shift_fixed_width<V>(key, val, offset, slr, expected);
+  test_groupby_shift_fixed_width_single<V>(key, val, offset, slr, expected);
 }
 
 TYPED_TEST(groupby_shift_fixed_width_test, ForwardShiftWithNull_ValidScalar)
@@ -95,7 +98,7 @@ TYPED_TEST(groupby_shift_fixed_width_test, ForwardShiftWithNull_ValidScalar)
   size_type offset = 3;
   auto slr         = cudf::scalar_type_t<TypeParam>(make_type_param_scalar<TypeParam>(42), true);
 
-  test_groupby_shift_fixed_width<V>(key, val, offset, slr, expected);
+  test_groupby_shift_fixed_width_single<V>(key, val, offset, slr, expected);
 }
 
 TYPED_TEST(groupby_shift_fixed_width_test, BackwardShiftWithoutNull_NullScalar)
@@ -108,7 +111,7 @@ TYPED_TEST(groupby_shift_fixed_width_test, BackwardShiftWithoutNull_NullScalar)
   size_type offset = -1;
   auto slr         = cudf::make_default_constructed_scalar(column_view(val).type());
 
-  test_groupby_shift_fixed_width<V>(key, val, offset, *slr, expected);
+  test_groupby_shift_fixed_width_single<V>(key, val, offset, *slr, expected);
 }
 
 TYPED_TEST(groupby_shift_fixed_width_test, BackwardShiftWithNull_NullScalar)
@@ -121,7 +124,7 @@ TYPED_TEST(groupby_shift_fixed_width_test, BackwardShiftWithNull_NullScalar)
   size_type offset = -1;
   auto slr         = cudf::make_default_constructed_scalar(column_view(val).type());
 
-  test_groupby_shift_fixed_width<V>(key, val, offset, *slr, expected);
+  test_groupby_shift_fixed_width_single<V>(key, val, offset, *slr, expected);
 }
 
 TYPED_TEST(groupby_shift_fixed_width_test, BackwardShiftWithoutNull_ValidScalar)
@@ -134,7 +137,7 @@ TYPED_TEST(groupby_shift_fixed_width_test, BackwardShiftWithoutNull_ValidScalar)
   size_type offset = -5;
   auto slr         = cudf::scalar_type_t<TypeParam>(make_type_param_scalar<TypeParam>(42), true);
 
-  test_groupby_shift_fixed_width<V>(key, val, offset, slr, expected);
+  test_groupby_shift_fixed_width_single<V>(key, val, offset, slr, expected);
 }
 
 TYPED_TEST(groupby_shift_fixed_width_test, BackwardShiftWithNull_ValidScalar)
@@ -149,7 +152,7 @@ TYPED_TEST(groupby_shift_fixed_width_test, BackwardShiftWithNull_ValidScalar)
   size_type offset = -1;
   auto slr         = cudf::scalar_type_t<TypeParam>(make_type_param_scalar<TypeParam>(42), true);
 
-  test_groupby_shift_fixed_width<V>(key, val, offset, slr, expected);
+  test_groupby_shift_fixed_width_single<V>(key, val, offset, slr, expected);
 }
 
 TYPED_TEST(groupby_shift_fixed_width_test, ZeroShiftNullScalar)
@@ -162,7 +165,7 @@ TYPED_TEST(groupby_shift_fixed_width_test, ZeroShiftNullScalar)
   size_type offset = 0;
   auto slr         = cudf::make_default_constructed_scalar(column_view(val).type());
 
-  test_groupby_shift_fixed_width<V>(key, val, offset, *slr, expected);
+  test_groupby_shift_fixed_width_single<V>(key, val, offset, *slr, expected);
 }
 
 TYPED_TEST(groupby_shift_fixed_width_test, ZeroShiftValidScalar)
@@ -175,7 +178,7 @@ TYPED_TEST(groupby_shift_fixed_width_test, ZeroShiftValidScalar)
   size_type offset = 0;
   auto slr         = cudf::scalar_type_t<TypeParam>(make_type_param_scalar<TypeParam>(42), true);
 
-  test_groupby_shift_fixed_width<V>(key, val, offset, slr, expected);
+  test_groupby_shift_fixed_width_single<V>(key, val, offset, slr, expected);
 }
 
 TYPED_TEST(groupby_shift_fixed_width_test, VeryLargeForwardOffset)
@@ -189,7 +192,7 @@ TYPED_TEST(groupby_shift_fixed_width_test, VeryLargeForwardOffset)
   size_type offset = 1024;
   auto slr         = cudf::make_default_constructed_scalar(column_view(val).type());
 
-  test_groupby_shift_fixed_width<V>(key, val, offset, *slr, expected);
+  test_groupby_shift_fixed_width_single<V>(key, val, offset, *slr, expected);
 }
 
 TYPED_TEST(groupby_shift_fixed_width_test, VeryLargeBackwardOffset)
@@ -203,21 +206,22 @@ TYPED_TEST(groupby_shift_fixed_width_test, VeryLargeBackwardOffset)
   size_type offset = -1024;
   auto slr         = cudf::make_default_constructed_scalar(column_view(val).type());
 
-  test_groupby_shift_fixed_width<V>(key, val, offset, *slr, expected);
+  test_groupby_shift_fixed_width_single<V>(key, val, offset, *slr, expected);
 }
 
 struct groupby_shift_string_test : public BaseFixture {
 };
 
-void test_groupby_shift_string(fixed_width_column_wrapper<K> const& key,
-                               strings_column_wrapper const& value,
-                               size_type offset,
-                               scalar const& fill_value,
-                               strings_column_wrapper const& expected)
+void test_groupby_shift_string_single(fixed_width_column_wrapper<K> const& key,
+                                      strings_column_wrapper const& value,
+                                      size_type offset,
+                                      scalar const& fill_value,
+                                      strings_column_wrapper const& expected)
 {
   groupby::groupby gb_obj(table_view({key}));
-  auto got = gb_obj.shift(value, offset, fill_value);
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*got.second, expected);
+  std::vector<size_type> offsets{offset};
+  auto got = gb_obj.shift(table_view{{value}}, offsets, {fill_value});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL((*got.second).view().column(0), expected);
 }
 
 TEST_F(groupby_shift_string_test, ForwardShiftWithoutNull_NullScalar)
@@ -228,7 +232,7 @@ TEST_F(groupby_shift_string_test, ForwardShiftWithoutNull_NullScalar)
   size_type offset = 1;
   auto slr         = cudf::make_default_constructed_scalar(column_view(val).type());
 
-  test_groupby_shift_string(key, val, offset, *slr, expected);
+  test_groupby_shift_string_single(key, val, offset, *slr, expected);
 }
 
 TEST_F(groupby_shift_string_test, ForwardShiftWithNull_NullScalar)
@@ -239,7 +243,7 @@ TEST_F(groupby_shift_string_test, ForwardShiftWithNull_NullScalar)
   size_type offset = 2;
   auto slr         = cudf::make_default_constructed_scalar(column_view(val).type());
 
-  test_groupby_shift_string(key, val, offset, *slr, expected);
+  test_groupby_shift_string_single(key, val, offset, *slr, expected);
 }
 
 TEST_F(groupby_shift_string_test, ForwardShiftWithoutNull_ValidScalar)
@@ -251,7 +255,7 @@ TEST_F(groupby_shift_string_test, ForwardShiftWithoutNull_ValidScalar)
   size_type offset = 2;
   auto slr         = cudf::make_string_scalar("42");
 
-  test_groupby_shift_string(key, val, offset, *slr, expected);
+  test_groupby_shift_string_single(key, val, offset, *slr, expected);
 }
 
 TEST_F(groupby_shift_string_test, ForwardShiftWithNull_ValidScalar)
@@ -263,7 +267,7 @@ TEST_F(groupby_shift_string_test, ForwardShiftWithNull_ValidScalar)
   size_type offset = 1;
   auto slr         = cudf::make_string_scalar("42");
 
-  test_groupby_shift_string(key, val, offset, *slr, expected);
+  test_groupby_shift_string_single(key, val, offset, *slr, expected);
 }
 
 TEST_F(groupby_shift_string_test, BackwardShiftWithoutNull_NullScalar)
@@ -275,7 +279,7 @@ TEST_F(groupby_shift_string_test, BackwardShiftWithoutNull_NullScalar)
   size_type offset = -3;
   auto slr         = cudf::make_default_constructed_scalar(column_view(val).type());
 
-  test_groupby_shift_string(key, val, offset, *slr, expected);
+  test_groupby_shift_string_single(key, val, offset, *slr, expected);
 }
 
 TEST_F(groupby_shift_string_test, BackwardShiftWithNull_NullScalar)
@@ -287,7 +291,7 @@ TEST_F(groupby_shift_string_test, BackwardShiftWithNull_NullScalar)
   size_type offset = -1;
   auto slr         = cudf::make_default_constructed_scalar(column_view(val).type());
 
-  test_groupby_shift_string(key, val, offset, *slr, expected);
+  test_groupby_shift_string_single(key, val, offset, *slr, expected);
 }
 
 TEST_F(groupby_shift_string_test, BackwardShiftWithoutNull_ValidScalar)
@@ -299,7 +303,7 @@ TEST_F(groupby_shift_string_test, BackwardShiftWithoutNull_ValidScalar)
   size_type offset = -4;
   auto slr         = cudf::make_string_scalar("42");
 
-  test_groupby_shift_string(key, val, offset, *slr, expected);
+  test_groupby_shift_string_single(key, val, offset, *slr, expected);
 }
 
 TEST_F(groupby_shift_string_test, BackwardShiftWithNull_ValidScalar)
@@ -311,7 +315,7 @@ TEST_F(groupby_shift_string_test, BackwardShiftWithNull_ValidScalar)
   size_type offset = -2;
   auto slr         = cudf::make_string_scalar("42");
 
-  test_groupby_shift_string(key, val, offset, *slr, expected);
+  test_groupby_shift_string_single(key, val, offset, *slr, expected);
 }
 
 TEST_F(groupby_shift_string_test, ZeroShiftNullScalar)
@@ -323,7 +327,7 @@ TEST_F(groupby_shift_string_test, ZeroShiftNullScalar)
   size_type offset = 0;
   auto slr         = cudf::make_default_constructed_scalar(column_view(val).type());
 
-  test_groupby_shift_string(key, val, offset, *slr, expected);
+  test_groupby_shift_string_single(key, val, offset, *slr, expected);
 }
 
 TEST_F(groupby_shift_string_test, ZeroShiftValidScalar)
@@ -335,7 +339,7 @@ TEST_F(groupby_shift_string_test, ZeroShiftValidScalar)
   size_type offset = 0;
   auto slr         = cudf::make_string_scalar("42");
 
-  test_groupby_shift_string(key, val, offset, *slr, expected);
+  test_groupby_shift_string_single(key, val, offset, *slr, expected);
 }
 
 TEST_F(groupby_shift_string_test, VeryLargeForwardOffset)
@@ -347,7 +351,7 @@ TEST_F(groupby_shift_string_test, VeryLargeForwardOffset)
   size_type offset = 1024;
   auto slr         = cudf::make_string_scalar("42");
 
-  test_groupby_shift_string(key, val, offset, *slr, expected);
+  test_groupby_shift_string_single(key, val, offset, *slr, expected);
 }
 
 TEST_F(groupby_shift_string_test, VeryLargeBackwardOffset)
@@ -359,7 +363,116 @@ TEST_F(groupby_shift_string_test, VeryLargeBackwardOffset)
   size_type offset = -1024;
   auto slr         = cudf::make_string_scalar("42");
 
-  test_groupby_shift_string(key, val, offset, *slr, expected);
+  test_groupby_shift_string_single(key, val, offset, *slr, expected);
+}
+
+template <typename T>
+struct groupby_shift_mixed_test : public BaseFixture {
+};
+
+TYPED_TEST_CASE(groupby_shift_mixed_test, FixedWidthTypes);
+
+void test_groupby_shift_multi(fixed_width_column_wrapper<K> const& key,
+                              table_view const& value,
+                              std::vector<size_type> offsets,
+                              std::vector<std::reference_wrapper<const scalar>> fill_values,
+                              table_view const& expected)
+{
+  groupby::groupby gb_obj(table_view({key}));
+  auto got = gb_obj.shift(value, offsets, fill_values);
+  CUDF_TEST_EXPECT_TABLES_EQUAL((*got.second).view(), expected);
+}
+
+TYPED_TEST(groupby_shift_mixed_test, NoFill)
+{
+  fixed_width_column_wrapper<K> key{1, 2, 1, 2, 2, 1, 1};
+  strings_column_wrapper v1{"a", "bb", "cc", "d", "eee", "f", "gg"};
+  fixed_width_column_wrapper<TypeParam> v2{1, 2, 3, 4, 5, 6, 7};
+  table_view value{{v1, v2}};
+
+  strings_column_wrapper e1({"", "", "a", "cc", "", "", "bb"}, {0, 0, 1, 1, 0, 0, 1});
+  fixed_width_column_wrapper<TypeParam> e2({-1, 1, 3, 6, -1, 2, 4}, {0, 1, 1, 1, 0, 1, 1});
+  table_view expected{{e1, e2}};
+
+  std::vector<size_type> offset{2, 1};
+  auto slr1 = cudf::make_default_constructed_scalar(column_view(v1).type());
+  auto slr2 = cudf::make_default_constructed_scalar(column_view(v2).type());
+  std::vector<std::reference_wrapper<const scalar>> fill_values{*slr1, *slr2};
+
+  test_groupby_shift_multi(key, value, offset, fill_values, expected);
+}
+
+TYPED_TEST(groupby_shift_mixed_test, Fill)
+{
+  fixed_width_column_wrapper<K> key{1, 2, 1, 2, 2, 1, 1};
+  strings_column_wrapper v1{"a", "bb", "cc", "d", "eee", "f", "gg"};
+  fixed_width_column_wrapper<TypeParam> v2{1, 2, 3, 4, 5, 6, 7};
+  table_view value{{v1, v2}};
+
+  strings_column_wrapper e1({"cc", "f", "gg", "42", "d", "eee", "42"});
+  fixed_width_column_wrapper<TypeParam> e2({6, 7, 42, 42, 5, 42, 42});
+  table_view expected{{e1, e2}};
+
+  std::vector<size_type> offset{-1, -2};
+
+  auto slr1 = cudf::make_string_scalar("42");
+  auto slr2 = cudf::scalar_type_t<TypeParam>(make_type_param_scalar<TypeParam>(42), true);
+  std::vector<std::reference_wrapper<const scalar>> fill_values{*slr1, slr2};
+
+  test_groupby_shift_multi(key, value, offset, fill_values, expected);
+}
+
+struct groupby_shift_fixed_point_type_test : public BaseFixture {
+};
+
+TEST_F(groupby_shift_fixed_point_type_test, Matching)
+{
+  fixed_width_column_wrapper<K> key{2, 3, 4, 4, 3, 2, 2, 4};
+  fixed_point_column_wrapper<int32_t> v1{{10, 10, 40, 40, 20, 20, 30, 40}, numeric::scale_type{-1}};
+  fixed_point_column_wrapper<int64_t> v2{{5, 5, 8, 8, 6, 7, 9, 7}, numeric::scale_type{3}};
+  table_view value{{v1, v2}};
+
+  std::vector<size_type> offset{-3, 1};
+  auto slr1 = make_fixed_point_scalar<numeric::decimal32>(-42, numeric::scale_type{-1});
+  auto slr2 = make_fixed_point_scalar<numeric::decimal64>(42, numeric::scale_type{3});
+  std::vector<std::reference_wrapper<const scalar>> fill_values{*slr1, *slr2};
+
+  fixed_point_column_wrapper<int32_t> e1{{-42, -42, -42, -42, -42, -42, -42, -42},
+                                         numeric::scale_type{-1}};
+  fixed_point_column_wrapper<int64_t> e2{{42, 5, 7, 42, 5, 42, 8, 8}, numeric::scale_type{3}};
+  table_view expected{{e1, e2}};
+
+  test_groupby_shift_multi(key, value, offset, fill_values, expected);
+}
+
+TEST_F(groupby_shift_fixed_point_type_test, MismatchScaleType)
+{
+  fixed_width_column_wrapper<K> key{2, 3, 4, 4, 3, 2, 2, 4};
+  fixed_point_column_wrapper<int32_t> v1{{10, 10, 40, 40, 20, 20, 30, 40}, numeric::scale_type{-1}};
+
+  std::vector<size_type> offset{-3};
+  auto slr1 = make_fixed_point_scalar<numeric::decimal32>(-42, numeric::scale_type{-4});
+
+  fixed_point_column_wrapper<int32_t> stub{{-42, -42, -42, -42, -42, -42, -42, -42},
+                                           numeric::scale_type{-1}};
+
+  EXPECT_THROW(test_groupby_shift_multi(key, table_view{{v1}}, offset, {*slr1}, table_view{{stub}}),
+               logic_error);
+}
+
+TEST_F(groupby_shift_fixed_point_type_test, MismatchRepType)
+{
+  fixed_width_column_wrapper<K> key{2, 3, 4, 4, 3, 2, 2, 4};
+  fixed_point_column_wrapper<int64_t> v1{{10, 10, 40, 40, 20, 20, 30, 40}, numeric::scale_type{-1}};
+
+  std::vector<size_type> offset{-3};
+  auto slr1 = make_fixed_point_scalar<numeric::decimal32>(-42, numeric::scale_type{-1});
+
+  fixed_point_column_wrapper<int32_t> stub{{-42, -42, -42, -42, -42, -42, -42, -42},
+                                           numeric::scale_type{-1}};
+
+  EXPECT_THROW(test_groupby_shift_multi(key, table_view{{v1}}, offset, {*slr1}, table_view{{stub}}),
+               logic_error);
 }
 
 }  // namespace test
diff --git a/python/cudf/cudf/_lib/cpp/groupby.pxd b/python/cudf/cudf/_lib/cpp/groupby.pxd
index f3bad855725..af09b27d916 100644
--- a/python/cudf/cudf/_lib/cpp/groupby.pxd
+++ b/python/cudf/cudf/_lib/cpp/groupby.pxd
@@ -5,15 +5,20 @@ from libcpp.memory cimport unique_ptr
 from libcpp.pair cimport pair
 from libcpp cimport bool
 
+from cudf._lib.cpp.libcpp.functional cimport reference_wrapper
 from cudf._lib.cpp.table.table cimport table
 from cudf._lib.cpp.table.table_view cimport table_view
 from cudf._lib.cpp.column.column_view cimport column_view
 from cudf._lib.cpp.column.column cimport column
 from cudf._lib.cpp.aggregation cimport aggregation
+from cudf._lib.cpp.scalar.scalar cimport scalar
 from cudf._lib.cpp.types cimport size_type, order, null_order, null_policy
 from cudf._lib.cpp.replace cimport replace_policy
 from cudf._lib.cpp.utilities.host_span cimport host_span
 
+# workaround for https://github.com/cython/cython/issues/3885
+ctypedef const scalar constscalar
+
 
 cdef extern from "cudf/groupby.hpp" \
         namespace "cudf::groupby" nogil:
@@ -74,6 +79,15 @@ cdef extern from "cudf/groupby.hpp" \
             const vector[aggregation_request]& requests,
         ) except +
 
+        pair[
+            unique_ptr[table],
+            unique_ptr[table]
+        ] shift(
+            const table_view values,
+            const vector[size_type] offset,
+            const vector[reference_wrapper[constscalar]] fill_values
+        ) except +
+
         groups get_groups() except +
         groups get_groups(table_view values) except +
 
diff --git a/python/cudf/cudf/_lib/groupby.pyx b/python/cudf/cudf/_lib/groupby.pyx
index 1979ddf8f0c..17f6991c25d 100644
--- a/python/cudf/cudf/_lib/groupby.pyx
+++ b/python/cudf/cudf/_lib/groupby.pyx
@@ -22,8 +22,13 @@ from libcpp cimport bool
 
 from cudf._lib.column cimport Column
 from cudf._lib.table cimport Table
+from cudf._lib.scalar cimport DeviceScalar
+from cudf._lib.scalar import as_device_scalar
 from cudf._lib.aggregation cimport Aggregation, make_aggregation
 
+from cudf._lib.cpp.types cimport size_type
+from cudf._lib.cpp.scalar.scalar cimport scalar
+from cudf._lib.cpp.libcpp.functional cimport reference_wrapper
 from cudf._lib.cpp.column.column cimport column
 from cudf._lib.cpp.column.column_view cimport column_view
 from cudf._lib.cpp.table.table cimport table, table_view
@@ -45,6 +50,8 @@ _INTERVAL_AGGS = set()
 _DECIMAL_AGGS = {"COUNT", "SUM", "ARGMIN", "ARGMAX", "MIN", "MAX", "NUNIQUE",
                  "NTH", "COLLECT"}
 
+# workaround for https://github.com/cython/cython/issues/3885
+ctypedef const scalar constscalar
 
 cdef class GroupBy:
     cdef unique_ptr[libcudf_groupby.groupby] c_obj
@@ -207,6 +214,40 @@ cdef class GroupBy:
 
         return Table(data=result_data, index=grouped_keys)
 
+    def shift(self, Table values, int periods, list fill_values):
+        cdef table_view view = values.view()
+        cdef size_type num_col = view.num_columns()
+        cdef vector[size_type] offsets = vector[size_type](num_col, periods)
+
+        cdef vector[reference_wrapper[constscalar]] c_fill_values
+        cdef DeviceScalar d_slr
+        d_slrs = []
+        c_fill_values.reserve(num_col)
+        for val, col in zip(fill_values, values._columns):
+            d_slr = as_device_scalar(val, dtype=col.dtype)
+            d_slrs.append(d_slr)
+            c_fill_values.push_back(
+                reference_wrapper[constscalar](d_slr.get_raw_ptr()[0])
+            )
+
+        cdef pair[unique_ptr[table], unique_ptr[table]] c_result
+
+        with nogil:
+            c_result = move(
+                self.c_obj.get()[0].shift(view, offsets, c_fill_values)
+            )
+
+        grouped_keys = Table.from_unique_ptr(
+            move(c_result.first),
+            column_names=self.keys._column_names
+        )
+
+        shifted = Table.from_unique_ptr(
+            move(c_result.second), column_names=values._column_names
+        )
+
+        return Table(data=shifted._data, index=grouped_keys)
+
     def replace_nulls(self, Table values, object method):
         cdef table_view val_view = values.view()
         cdef pair[unique_ptr[table], unique_ptr[table]] c_result
diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py
index c1060d5f505..6a298df32d6 100644
--- a/python/cudf/cudf/core/groupby/groupby.py
+++ b/python/cudf/cudf/core/groupby/groupby.py
@@ -8,7 +8,9 @@
 
 import cudf
 from cudf._lib import groupby as libgroupby
+from cudf._lib.table import Table
 from cudf.core.abc import Serializable
+from cudf.utils.dtypes import is_list_like
 from cudf.utils.utils import GetAttrGetItemMixin, cached_property
 
 
@@ -703,6 +705,61 @@ def cummax(self):
         """Get the column-wise cumulative maximum value in each group."""
         return self.agg("cummax")
 
+    def shift(self, periods=1, freq=None, axis=0, fill_value=None):
+        """
+        Shift each group by ``periods`` positions.
+
+        Parameters
+        ----------
+        periods : int, default 1
+            Number of periods to shift.
+        freq : str, unsupported
+        axis : 0, axis to shift
+            Shift direction. Only row-wise shift is supported
+        fill_value : scalar or list of scalars, optional
+            The scalar value to use for newly introduced missing values. Can be
+            specified with `None`, a single value or multiple values:
+
+            - `None` (default): sets all indeterminable values to null.
+            - Single value: fill all shifted columns with this value. Should
+              match the data type of all columns.
+            - List of values: fill shifted columns with corresponding value in
+              the list. The length of the list should match the number of
+              columns shifted. Each value should match the data type of the
+              column to fill.
+
+        Returns
+        -------
+        Series or DataFrame
+            Object shifted within each group.
+
+        Notes
+        -----
+        Parameter ``freq`` is unsupported.
+        """
+
+        if freq is not None:
+            raise NotImplementedError("Parameter freq is unsupported.")
+
+        if not axis == 0:
+            raise NotImplementedError("Only axis=0 is supported.")
+
+        value_column_names = [
+            x for x in self.obj._column_names if x not in self.grouping.names
+        ]
+        num_columns_to_shift = len(value_column_names)
+        if is_list_like(fill_value):
+            if not len(fill_value) == num_columns_to_shift:
+                raise ValueError(
+                    "Mismatched number of columns and values to fill."
+                )
+        else:
+            fill_value = [fill_value] * num_columns_to_shift
+
+        value_columns = self.obj._data.select_by_label(value_column_names)
+        result = self._groupby.shift(Table(value_columns), periods, fill_value)
+        return self.obj.__class__._from_table(result)
+
 
 class DataFrameGroupBy(GroupBy, GetAttrGetItemMixin):
     _PROTECTED_KEYS = frozenset(("obj",))
diff --git a/python/cudf/cudf/tests/dataset_generator.py b/python/cudf/cudf/tests/dataset_generator.py
index d7adf175f3f..5e03068f818 100644
--- a/python/cudf/cudf/tests/dataset_generator.py
+++ b/python/cudf/cudf/tests/dataset_generator.py
@@ -282,7 +282,9 @@ def get_dataframe(parameters, use_threads):
     return tbl
 
 
-def rand_dataframe(dtypes_meta, rows, seed=random.randint(0, 2 ** 32 - 1)):
+def rand_dataframe(
+    dtypes_meta, rows, seed=random.randint(0, 2 ** 32 - 1), use_threads=True
+):
     """
     Generates a random table.
 
@@ -300,6 +302,8 @@ def rand_dataframe(dtypes_meta, rows, seed=random.randint(0, 2 ** 32 - 1)):
     seed : int
         Specifies the `seed` value to be utilized by all downstream
         random data generation APIs.
+    use_threads : bool
+        Indicates whether to use threads pools to build the columns
 
     Returns
     -------
@@ -457,7 +461,7 @@ def rand_dataframe(dtypes_meta, rows, seed=random.randint(0, 2 ** 32 - 1)):
 
     df = get_dataframe(
         Parameters(num_rows=rows, column_parameters=column_params, seed=seed,),
-        use_threads=True,
+        use_threads=use_threads,
     )
 
     return df
diff --git a/python/cudf/cudf/tests/test_groupby.py b/python/cudf/cudf/tests/test_groupby.py
index 2430b0da5ef..e774bda4914 100644
--- a/python/cudf/cudf/tests/test_groupby.py
+++ b/python/cudf/cudf/tests/test_groupby.py
@@ -15,6 +15,7 @@
 import cudf
 from cudf.core import DataFrame, Series
 from cudf.core._compat import PANDAS_GE_110
+from cudf.tests.dataset_generator import rand_dataframe
 from cudf.tests.utils import (
     DATETIME_TYPES,
     SIGNED_TYPES,
@@ -1696,3 +1697,207 @@ def test_groupby_mix_agg_scan():
     gb.agg(func[1:])
     with pytest.raises(NotImplementedError, match=err_msg):
         gb.agg(func)
+
+
+@pytest.mark.parametrize("nelem", [2, 3, 100, 1000])
+@pytest.mark.parametrize("shift_perc", [0.5, 1.0, 1.5])
+@pytest.mark.parametrize("direction", [1, -1])
+@pytest.mark.parametrize("fill_value", [None, np.nan, 42])
+def test_groupby_shift_row(nelem, shift_perc, direction, fill_value):
+    pdf = make_frame(pd.DataFrame, nelem=nelem, extra_vals=["val2"])
+    gdf = cudf.from_pandas(pdf)
+    n_shift = int(nelem * shift_perc) * direction
+
+    expected = pdf.groupby(["x", "y"]).shift(
+        periods=n_shift, fill_value=fill_value
+    )
+    got = gdf.groupby(["x", "y"]).shift(periods=n_shift, fill_value=fill_value)
+
+    # Pandas returns shifted column in original row order. We set its index
+    # to be the key columns, so that `assert_groupby_results_equal` can sort
+    # rows by key columns to make sure cudf and pandas results matches.
+    expected.index = pd.MultiIndex.from_frame(gdf[["x", "y"]].to_pandas())
+    assert_groupby_results_equal(
+        expected[["val", "val2"]], got[["val", "val2"]]
+    )
+
+
+@pytest.mark.parametrize("nelem", [10, 50, 100, 1000])
+@pytest.mark.parametrize("shift_perc", [0.5, 1.0, 1.5])
+@pytest.mark.parametrize("direction", [1, -1])
+@pytest.mark.parametrize("fill_value", [None, 0, 42])
+def test_groupby_shift_row_mixed_numerics(
+    nelem, shift_perc, direction, fill_value
+):
+    t = rand_dataframe(
+        dtypes_meta=[
+            {"dtype": "int64", "null_frequency": 0, "cardinality": 10},
+            {"dtype": "int64", "null_frequency": 0.4, "cardinality": 10},
+            {"dtype": "float32", "null_frequency": 0.4, "cardinality": 10},
+            {
+                "dtype": "datetime64[ns]",
+                "null_frequency": 0.4,
+                "cardinality": 10,
+            },
+            {
+                "dtype": "timedelta64[ns]",
+                "null_frequency": 0.4,
+                "cardinality": 10,
+            },
+        ],
+        rows=nelem,
+        use_threads=False,
+    )
+    pdf = t.to_pandas()
+    gdf = cudf.from_pandas(pdf)
+    n_shift = int(nelem * shift_perc) * direction
+
+    expected = pdf.groupby(["0"]).shift(periods=n_shift, fill_value=fill_value)
+    got = gdf.groupby(["0"]).shift(periods=n_shift, fill_value=fill_value)
+
+    # Pandas returns shifted column in original row order. We set its index
+    # to be the key columns, so that `assert_groupby_results_equal` can sort
+    # rows by key columns to make sure cudf and pandas results matches.
+    expected.index = gdf["0"].to_pandas()
+    assert_groupby_results_equal(
+        expected[["1", "2", "3", "4"]], got[["1", "2", "3", "4"]]
+    )
+
+
+# TODO: Shifting list columns is currently unsupported because we cannot
+# construct a null list scalar in python. Support once it is added.
+@pytest.mark.parametrize("nelem", [10, 50, 100, 1000])
+@pytest.mark.parametrize("shift_perc", [0.5, 1.0, 1.5])
+@pytest.mark.parametrize("direction", [1, -1])
+def test_groupby_shift_row_mixed(nelem, shift_perc, direction):
+    t = rand_dataframe(
+        dtypes_meta=[
+            {"dtype": "int64", "null_frequency": 0, "cardinality": 10},
+            {"dtype": "int64", "null_frequency": 0.4, "cardinality": 10},
+            {"dtype": "str", "null_frequency": 0.4, "cardinality": 10},
+            {
+                "dtype": "datetime64[ns]",
+                "null_frequency": 0.4,
+                "cardinality": 10,
+            },
+            {
+                "dtype": "timedelta64[ns]",
+                "null_frequency": 0.4,
+                "cardinality": 10,
+            },
+        ],
+        rows=nelem,
+        use_threads=False,
+    )
+    pdf = t.to_pandas()
+    gdf = cudf.from_pandas(pdf)
+    n_shift = int(nelem * shift_perc) * direction
+
+    expected = pdf.groupby(["0"]).shift(periods=n_shift)
+    got = gdf.groupby(["0"]).shift(periods=n_shift)
+
+    # Pandas returns shifted column in original row order. We set its index
+    # to be the key columns, so that `assert_groupby_results_equal` can sort
+    # rows by key columns to make sure cudf and pandas results matches.
+    expected.index = gdf["0"].to_pandas()
+    assert_groupby_results_equal(
+        expected[["1", "2", "3", "4"]], got[["1", "2", "3", "4"]]
+    )
+
+
+@pytest.mark.parametrize("nelem", [10, 50, 100, 1000])
+@pytest.mark.parametrize("shift_perc", [0.5, 1.0, 1.5])
+@pytest.mark.parametrize("direction", [1, -1])
+@pytest.mark.parametrize(
+    "fill_value",
+    [
+        [
+            42,
+            "fill",
+            np.datetime64(123, "ns"),
+            cudf.Scalar(456, dtype="timedelta64[ns]"),
+        ]
+    ],
+)
+def test_groupby_shift_row_mixed_fill(
+    nelem, shift_perc, direction, fill_value
+):
+    t = rand_dataframe(
+        dtypes_meta=[
+            {"dtype": "int64", "null_frequency": 0, "cardinality": 10},
+            {"dtype": "int64", "null_frequency": 0.4, "cardinality": 10},
+            {"dtype": "str", "null_frequency": 0.4, "cardinality": 10},
+            {
+                "dtype": "datetime64[ns]",
+                "null_frequency": 0.4,
+                "cardinality": 10,
+            },
+            {
+                "dtype": "timedelta64[ns]",
+                "null_frequency": 0.4,
+                "cardinality": 10,
+            },
+        ],
+        rows=nelem,
+        use_threads=False,
+    )
+    pdf = t.to_pandas()
+    gdf = cudf.from_pandas(pdf)
+    n_shift = int(nelem * shift_perc) * direction
+
+    # Pandas does not support specifing different fill_value by column, so we
+    # simulate it column by column
+    expected = pdf.copy()
+    for col, single_fill in zip(pdf.iloc[:, 1:], fill_value):
+        if isinstance(single_fill, cudf.Scalar):
+            single_fill = single_fill._host_value
+        expected[col] = (
+            pdf[col]
+            .groupby(pdf["0"])
+            .shift(periods=n_shift, fill_value=single_fill)
+        )
+
+    got = gdf.groupby(["0"]).shift(periods=n_shift, fill_value=fill_value)
+
+    # Pandas returns shifted column in original row order. We set its index
+    # to be the key columns, so that `assert_groupby_results_equal` can sort
+    # rows by key columns to make sure cudf and pandas results matches.
+    expected.index = gdf["0"].to_pandas()
+    assert_groupby_results_equal(
+        expected[["1", "2", "3", "4"]], got[["1", "2", "3", "4"]]
+    )
+
+
+@pytest.mark.parametrize("nelem", [10, 50, 100, 1000])
+@pytest.mark.parametrize("fill_value", [None, 0, 42])
+def test_groupby_shift_row_zero_shift(nelem, fill_value):
+    t = rand_dataframe(
+        dtypes_meta=[
+            {"dtype": "int64", "null_frequency": 0, "cardinality": 10},
+            {"dtype": "int64", "null_frequency": 0.4, "cardinality": 10},
+            {"dtype": "float32", "null_frequency": 0.4, "cardinality": 10},
+            {
+                "dtype": "datetime64[ns]",
+                "null_frequency": 0.4,
+                "cardinality": 10,
+            },
+            {
+                "dtype": "timedelta64[ns]",
+                "null_frequency": 0.4,
+                "cardinality": 10,
+            },
+        ],
+        rows=nelem,
+        use_threads=False,
+    )
+    gdf = cudf.from_pandas(t.to_pandas())
+
+    expected = gdf
+    got = gdf.groupby(["0"]).shift(periods=0, fill_value=fill_value)
+
+    # Here, the result should be the same as input due to 0-shift, only the
+    # key orders are different.
+    expected = expected.set_index("0")
+    assert_groupby_results_equal(
+        expected[["1", "2", "3", "4"]], got[["1", "2", "3", "4"]]
+    )