From 21ed2519e20f95bad680bbd8d8fc6d379e320cee Mon Sep 17 00:00:00 2001
From: Michael Wang <isVoid@users.noreply.github.com>
Date: Fri, 18 Mar 2022 14:42:48 -0700
Subject: [PATCH] Use list of columns for methods in `Groupby.pyx` (#10419)

Part of #10153

This PR changes the APIs in `groupby.pyx` to accept a list of columns as input, not a Frame. This change affects both keys and values. The `Groupby` object now only stores a list of columns in the `keys` attribute and other APIs (`groups`, `aggregate`, `shift`, `replace_nulls`) now only accept a list of columns as its value columns. The `aggregation` communication protocol has changed from a dictionary mapping column names to list of agg names to a list of list of agg names. See changes in `_normalize_aggs` for detail.
This PR also tries to simplify post-processing of `result` frame in `agg` method now that we have a finer control in pure python.

I gave an attempt to rewrite `aggregate_internal` and `scan_internal` but ended up in futile because the unified aggregation object  is a cdef type and precludes separating the aggregation filtering step outside of it's current place. Besides, I tried unifying aggregation and scan with cython fused type but didn't make it due to limitation of using fused type with c++ templated type in cython.

Overall, the performance of `agg` call is on par with main branch. With -3%-13% performance diff depending on agg types.

<details>
<summary>Raw Benchmark</summary>

```
========================================================================== 36 passed in 33.48s ==========================================================================
(rapids) rapids@compose:~/scratch/cudf_benchmarks$ ./compare.sh bench_groupby.py

--------------------------------------------------------------- benchmark 'False-False-agg1-100': 2 tests ---------------------------------------------------------------
Name (time in ms)                               Min               Max              Mean            StdDev            Median               IQR            Outliers  Rounds
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------
groupby_agg[False-False-agg1-100] (afte)     2.5090 (1.0)      2.8418 (1.0)      2.5280 (1.0)      0.0290 (2.40)     2.5229 (1.0)      0.0103 (1.05)        15;19     273
groupby_agg[False-False-agg1-100] (befo)     2.7681 (1.10)     2.8441 (1.00)     2.7877 (1.10)     0.0121 (1.0)      2.7849 (1.10)     0.0098 (1.0)         60;26     252
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------

--------------------------------------------------------------- benchmark 'False-False-agg1-10000': 2 tests ---------------------------------------------------------------
Name (time in ms)                                 Min               Max              Mean            StdDev            Median               IQR            Outliers  Rounds
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------
groupby_agg[False-False-agg1-10000] (afte)     2.7803 (1.0)      3.4156 (1.05)     2.8131 (1.0)      0.0548 (1.57)     2.8007 (1.0)      0.0253 (1.0)         10;12     252
groupby_agg[False-False-agg1-10000] (befo)     3.0402 (1.09)     3.2407 (1.0)      3.1571 (1.12)     0.0348 (1.0)      3.1535 (1.13)     0.0509 (2.01)         39;6     236
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------

----------------------------------------------------------------- benchmark 'False-False-agg1-1000000': 2 tests -----------------------------------------------------------------
Name (time in ms)                                    Min                Max               Mean            StdDev             Median               IQR            Outliers  Rounds
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
groupby_agg[False-False-agg1-1000000] (afte)     13.2601 (1.0)      14.0128 (1.01)     13.4242 (1.0)      0.1056 (1.28)     13.4004 (1.0)      0.0286 (1.0)           5;8      68
groupby_agg[False-False-agg1-1000000] (befo)     13.5150 (1.02)     13.9165 (1.0)      13.6015 (1.01)     0.0826 (1.0)      13.5944 (1.01)     0.0696 (2.43)          8;5      66
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

--------------------------------------------------------------- benchmark 'False-False-agg2-100': 2 tests ---------------------------------------------------------------
Name (time in ms)                               Min               Max              Mean            StdDev            Median               IQR            Outliers  Rounds
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------
groupby_agg[False-False-agg2-100] (afte)     2.5342 (1.0)      2.8621 (1.0)      2.5591 (1.0)      0.0431 (3.18)     2.5509 (1.0)      0.0106 (1.01)        13;18     273
groupby_agg[False-False-agg2-100] (befo)     2.8797 (1.14)     2.9507 (1.03)     2.8997 (1.13)     0.0136 (1.0)      2.8965 (1.14)     0.0105 (1.0)         52;28     227
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------

--------------------------------------------------------------- benchmark 'False-False-agg2-10000': 2 tests ---------------------------------------------------------------
Name (time in ms)                                 Min               Max              Mean            StdDev            Median               IQR            Outliers  Rounds
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------
groupby_agg[False-False-agg2-10000] (afte)     2.7922 (1.0)      3.2884 (1.0)      2.8205 (1.0)      0.0473 (1.40)     2.8118 (1.0)      0.0096 (1.0)         10;18     251
groupby_agg[False-False-agg2-10000] (befo)     3.1491 (1.13)     3.4791 (1.06)     3.1752 (1.13)     0.0338 (1.0)      3.1687 (1.13)     0.0108 (1.12)         6;17     172
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------

----------------------------------------------------------------- benchmark 'False-False-agg2-1000000': 2 tests -----------------------------------------------------------------
Name (time in ms)                                    Min                Max               Mean            StdDev             Median               IQR            Outliers  Rounds
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
groupby_agg[False-False-agg2-1000000] (afte)     13.4699 (1.0)      14.6287 (1.0)      13.6020 (1.0)      0.1359 (1.0)      13.5769 (1.0)      0.0270 (1.0)           3;8      69
groupby_agg[False-False-agg2-1000000] (befo)     13.6079 (1.01)     29.8318 (2.04)     14.0777 (1.03)     1.9806 (14.57)    13.7795 (1.01)     0.0567 (2.10)          2;6      68
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

--------------------------------------------------------------- benchmark 'False-False-sum-100': 2 tests ---------------------------------------------------------------
Name (time in ms)                              Min               Max              Mean            StdDev            Median               IQR            Outliers  Rounds
------------------------------------------------------------------------------------------------------------------------------------------------------------------------
groupby_agg[False-False-sum-100] (afte)     2.1667 (1.0)      2.2855 (1.0)      2.1831 (1.0)      0.0146 (1.49)     2.1802 (1.0)      0.0111 (1.14)        25;16     301
groupby_agg[False-False-sum-100] (befo)     2.4142 (1.11)     2.4782 (1.08)     2.4319 (1.11)     0.0098 (1.0)      2.4309 (1.11)     0.0097 (1.0)         65;15     278
------------------------------------------------------------------------------------------------------------------------------------------------------------------------

--------------------------------------------------------------- benchmark 'False-False-sum-10000': 2 tests ---------------------------------------------------------------
Name (time in ms)                                Min               Max              Mean            StdDev            Median               IQR            Outliers  Rounds
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------
groupby_agg[False-False-sum-10000] (afte)     2.4293 (1.0)      2.6593 (1.0)      2.4493 (1.0)      0.0206 (1.66)     2.4455 (1.0)      0.0115 (1.10)        17;19     278
groupby_agg[False-False-sum-10000] (befo)     2.6646 (1.10)     2.7706 (1.04)     2.6832 (1.10)     0.0124 (1.0)      2.6811 (1.10)     0.0105 (1.0)         49;14     257
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------

---------------------------------------------------------------- benchmark 'False-False-sum-1000000': 2 tests ---------------------------------------------------------------
Name (time in ms)                                  Min                Max              Mean            StdDev            Median               IQR            Outliers  Rounds
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------
groupby_agg[False-False-sum-1000000] (afte)     9.3678 (1.0)      21.0480 (2.07)     9.6817 (1.0)      1.2252 (16.49)    9.5286 (1.0)      0.0342 (1.28)          1;9      89
groupby_agg[False-False-sum-1000000] (befo)     9.6830 (1.03)     10.1832 (1.0)      9.7434 (1.01)     0.0743 (1.0)      9.7238 (1.02)     0.0266 (1.0)           6;6      86
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------

--------------------------------------------------------------- benchmark 'False-True-agg1-100': 2 tests ---------------------------------------------------------------
Name (time in ms)                              Min               Max              Mean            StdDev            Median               IQR            Outliers  Rounds
------------------------------------------------------------------------------------------------------------------------------------------------------------------------
groupby_agg[False-True-agg1-100] (afte)     2.4392 (1.0)      2.7474 (1.06)     2.4598 (1.0)      0.0287 (2.07)     2.4545 (1.0)      0.0103 (1.0)         10;17     278
groupby_agg[False-True-agg1-100] (befo)     2.5183 (1.03)     2.6017 (1.0)      2.5354 (1.03)     0.0139 (1.0)      2.5332 (1.03)     0.0134 (1.30)        51;18     268
------------------------------------------------------------------------------------------------------------------------------------------------------------------------

--------------------------------------------------------------- benchmark 'False-True-agg1-10000': 2 tests ---------------------------------------------------------------
Name (time in ms)                                Min               Max              Mean            StdDev            Median               IQR            Outliers  Rounds
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------
groupby_agg[False-True-agg1-10000] (afte)     2.7196 (1.0)      3.2290 (1.06)     2.7446 (1.0)      0.0462 (2.17)     2.7359 (1.0)      0.0106 (1.00)        11;17     257
groupby_agg[False-True-agg1-10000] (befo)     2.7807 (1.02)     3.0590 (1.0)      2.8039 (1.02)     0.0213 (1.0)      2.8004 (1.02)     0.0106 (1.0)         16;18     251
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------

----------------------------------------------------------------- benchmark 'False-True-agg1-1000000': 2 tests -----------------------------------------------------------------
Name (time in ms)                                   Min                Max               Mean            StdDev             Median               IQR            Outliers  Rounds
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
groupby_agg[False-True-agg1-1000000] (afte)     13.2259 (1.01)     13.7344 (1.0)      13.3449 (1.00)     0.0797 (1.0)      13.3288 (1.00)     0.0322 (1.41)          5;8      69
groupby_agg[False-True-agg1-1000000] (befo)     13.0875 (1.0)      14.1552 (1.03)     13.3135 (1.0)      0.1325 (1.66)     13.2901 (1.0)      0.0229 (1.0)           4;7      68
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

--------------------------------------------------------------- benchmark 'False-True-agg2-100': 2 tests ---------------------------------------------------------------
Name (time in ms)                              Min               Max              Mean            StdDev            Median               IQR            Outliers  Rounds
------------------------------------------------------------------------------------------------------------------------------------------------------------------------
groupby_agg[False-True-agg2-100] (afte)     2.4580 (1.0)      2.5791 (1.0)      2.4792 (1.0)      0.0174 (1.92)     2.4756 (1.0)      0.0121 (1.37)        21;14     277
groupby_agg[False-True-agg2-100] (befo)     2.6094 (1.06)     2.6686 (1.03)     2.6260 (1.06)     0.0091 (1.0)      2.6255 (1.06)     0.0088 (1.0)         66;21     264
------------------------------------------------------------------------------------------------------------------------------------------------------------------------

--------------------------------------------------------------- benchmark 'False-True-agg2-10000': 2 tests ---------------------------------------------------------------
Name (time in ms)                                Min               Max              Mean            StdDev            Median               IQR            Outliers  Rounds
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------
groupby_agg[False-True-agg2-10000] (afte)     2.7218 (1.0)      2.8843 (1.0)      2.7415 (1.0)      0.0180 (1.0)      2.7383 (1.0)      0.0116 (1.12)        21;16     257
groupby_agg[False-True-agg2-10000] (befo)     2.8771 (1.06)     3.1227 (1.08)     2.8956 (1.06)     0.0185 (1.03)     2.8922 (1.06)     0.0104 (1.0)         16;16     244
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------

----------------------------------------------------------------- benchmark 'False-True-agg2-1000000': 2 tests -----------------------------------------------------------------
Name (time in ms)                                   Min                Max               Mean            StdDev             Median               IQR            Outliers  Rounds
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
groupby_agg[False-True-agg2-1000000] (afte)     13.4555 (1.01)     13.7924 (1.0)      13.5244 (1.00)     0.0601 (1.0)      13.5099 (1.00)     0.0362 (1.0)           7;6      70
groupby_agg[False-True-agg2-1000000] (befo)     13.3841 (1.0)      13.9437 (1.01)     13.4948 (1.0)      0.0773 (1.29)     13.4768 (1.0)      0.0443 (1.22)          5;5      68
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

--------------------------------------------------------------- benchmark 'False-True-sum-100': 2 tests ---------------------------------------------------------------
Name (time in ms)                             Min               Max              Mean            StdDev            Median               IQR            Outliers  Rounds
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------
groupby_agg[False-True-sum-100] (afte)     2.1270 (1.0)      2.2397 (1.0)      2.1435 (1.0)      0.0158 (1.01)     2.1407 (1.0)      0.0105 (1.0)         27;22     302
groupby_agg[False-True-sum-100] (befo)     2.1881 (1.03)     2.3309 (1.04)     2.2048 (1.03)     0.0156 (1.0)      2.2014 (1.03)     0.0111 (1.06)        35;30     297
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------

--------------------------------------------------------------- benchmark 'False-True-sum-10000': 2 tests ---------------------------------------------------------------
Name (time in ms)                               Min               Max              Mean            StdDev            Median               IQR            Outliers  Rounds
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------
groupby_agg[False-True-sum-10000] (afte)     2.4018 (1.0)      2.6107 (1.0)      2.4183 (1.0)      0.0198 (1.16)     2.4149 (1.0)      0.0108 (1.12)        14;14     277
groupby_agg[False-True-sum-10000] (befo)     2.4406 (1.02)     2.6840 (1.03)     2.4606 (1.02)     0.0170 (1.0)      2.4585 (1.02)     0.0097 (1.0)         15;14     274
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------

--------------------------------------------------------------- benchmark 'False-True-sum-1000000': 2 tests ----------------------------------------------------------------
Name (time in ms)                                 Min                Max              Mean            StdDev            Median               IQR            Outliers  Rounds
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------
groupby_agg[False-True-sum-1000000] (afte)     9.4459 (1.01)     10.0397 (1.0)      9.4983 (1.0)      0.0706 (1.0)      9.4846 (1.0)      0.0216 (1.0)           4;6      89
groupby_agg[False-True-sum-1000000] (befo)     9.3064 (1.0)      10.2732 (1.02)     9.5150 (1.00)     0.1107 (1.57)     9.4933 (1.00)     0.0239 (1.10)         6;10      88
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------

---------------------------------------------------------------- benchmark 'True-False-agg1-100': 2 tests ---------------------------------------------------------------
Name (time in ms)                              Min                Max              Mean            StdDev            Median               IQR            Outliers  Rounds
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------
groupby_agg[True-False-agg1-100] (afte)     4.3327 (1.0)       4.4800 (1.0)      4.3504 (1.0)      0.0202 (1.0)      4.3457 (1.0)      0.0103 (1.0)         10;16     181
groupby_agg[True-False-agg1-100] (befo)     4.6486 (1.07)     12.4651 (2.78)     4.8006 (1.10)     0.7100 (35.18)    4.6664 (1.07)     0.0191 (1.86)        10;19     170
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------

--------------------------------------------------------------- benchmark 'True-False-agg1-10000': 2 tests ---------------------------------------------------------------
Name (time in ms)                                Min               Max              Mean            StdDev            Median               IQR            Outliers  Rounds
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------
groupby_agg[True-False-agg1-10000] (afte)     4.9246 (1.0)      5.1165 (1.0)      4.9491 (1.0)      0.0269 (1.0)      4.9407 (1.0)      0.0133 (1.06)        16;19     164
groupby_agg[True-False-agg1-10000] (befo)     5.2464 (1.07)     5.6002 (1.09)     5.2700 (1.06)     0.0370 (1.38)     5.2623 (1.07)     0.0126 (1.0)         10;17     154
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------

----------------------------------------------------------------- benchmark 'True-False-agg1-1000000': 2 tests -----------------------------------------------------------------
Name (time in ms)                                   Min                Max               Mean            StdDev             Median               IQR            Outliers  Rounds
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
groupby_agg[True-False-agg1-1000000] (afte)     36.5089 (1.00)     37.2874 (1.0)      36.8305 (1.0)      0.2321 (1.0)      36.7404 (1.0)      0.2208 (1.0)           7;5      28
groupby_agg[True-False-agg1-1000000] (befo)     36.3558 (1.0)      47.0329 (1.26)     37.7670 (1.03)     2.7313 (11.77)    36.8183 (1.00)     0.8527 (3.86)          2;3      26
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

--------------------------------------------------------------- benchmark 'True-False-agg2-100': 2 tests ---------------------------------------------------------------
Name (time in ms)                              Min               Max              Mean            StdDev            Median               IQR            Outliers  Rounds
------------------------------------------------------------------------------------------------------------------------------------------------------------------------
groupby_agg[True-False-agg2-100] (afte)     4.6287 (1.0)      5.2921 (1.02)     4.6918 (1.0)      0.1017 (4.64)     4.6526 (1.0)      0.0496 (3.27)        21;23     167
groupby_agg[True-False-agg2-100] (befo)     4.9776 (1.08)     5.1737 (1.0)      5.0060 (1.07)     0.0219 (1.0)      4.9995 (1.07)     0.0152 (1.0)         18;10     161
------------------------------------------------------------------------------------------------------------------------------------------------------------------------

--------------------------------------------------------------- benchmark 'True-False-agg2-10000': 2 tests ---------------------------------------------------------------
Name (time in ms)                                Min               Max              Mean            StdDev            Median               IQR            Outliers  Rounds
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------
groupby_agg[True-False-agg2-10000] (afte)     5.2022 (1.0)      6.7622 (1.16)     5.2405 (1.0)      0.1267 (2.98)     5.2219 (1.0)      0.0157 (1.0)          2;16     155
groupby_agg[True-False-agg2-10000] (befo)     5.5802 (1.07)     5.8531 (1.0)      5.6166 (1.07)     0.0424 (1.0)      5.6041 (1.07)     0.0206 (1.31)        11;14     147
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------

----------------------------------------------------------------- benchmark 'True-False-agg2-1000000': 2 tests -----------------------------------------------------------------
Name (time in ms)                                   Min                Max               Mean            StdDev             Median               IQR            Outliers  Rounds
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
groupby_agg[True-False-agg2-1000000] (afte)     37.9639 (1.0)      38.7598 (1.0)      38.2381 (1.0)      0.1221 (1.0)      38.2346 (1.00)     0.0583 (1.0)           2;2      27
groupby_agg[True-False-agg2-1000000] (befo)     38.0569 (1.00)     41.5735 (1.07)     38.7983 (1.01)     1.1968 (9.80)     38.1696 (1.0)      0.6344 (10.88)         5;5      26
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

--------------------------------------------------------------- benchmark 'True-False-sum-100': 2 tests ---------------------------------------------------------------
Name (time in ms)                             Min               Max              Mean            StdDev            Median               IQR            Outliers  Rounds
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------
groupby_agg[True-False-sum-100] (afte)     3.6893 (1.0)      4.2792 (1.03)     3.7130 (1.0)      0.0580 (4.15)     3.7022 (1.0)      0.0079 (1.0)         10;16     206
groupby_agg[True-False-sum-100] (befo)     4.0016 (1.08)     4.1370 (1.0)      4.0218 (1.08)     0.0140 (1.0)      4.0180 (1.09)     0.0097 (1.23)        27;17     188
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------

--------------------------------------------------------------- benchmark 'True-False-sum-10000': 2 tests ---------------------------------------------------------------
Name (time in ms)                               Min               Max              Mean            StdDev            Median               IQR            Outliers  Rounds
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------
groupby_agg[True-False-sum-10000] (afte)     4.2660 (1.0)      4.6651 (1.0)      4.2913 (1.0)      0.0493 (2.97)     4.2799 (1.0)      0.0097 (1.0)         10;21     185
groupby_agg[True-False-sum-10000] (befo)     4.5702 (1.07)     4.7321 (1.01)     4.5904 (1.07)     0.0166 (1.0)      4.5858 (1.07)     0.0134 (1.37)         24;8     172
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------

----------------------------------------------------------------- benchmark 'True-False-sum-1000000': 2 tests -----------------------------------------------------------------
Name (time in ms)                                  Min                Max               Mean            StdDev             Median               IQR            Outliers  Rounds
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
groupby_agg[True-False-sum-1000000] (afte)     30.5871 (1.00)     30.9527 (1.0)      30.6797 (1.00)     0.0628 (1.0)      30.6720 (1.00)     0.0421 (1.0)           4;3      32
groupby_agg[True-False-sum-1000000] (befo)     30.5386 (1.0)      31.8930 (1.03)     30.6654 (1.0)      0.2383 (3.80)     30.6013 (1.0)      0.0573 (1.36)          1;4      31
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

--------------------------------------------------------------- benchmark 'True-True-agg1-100': 2 tests ---------------------------------------------------------------
Name (time in ms)                             Min               Max              Mean            StdDev            Median               IQR            Outliers  Rounds
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------
groupby_agg[True-True-agg1-100] (afte)     4.2812 (1.0)      4.5815 (1.0)      4.3304 (1.0)      0.0495 (1.43)     4.3134 (1.0)      0.0647 (4.80)         22;4     173
groupby_agg[True-True-agg1-100] (befo)     4.4126 (1.03)     4.7356 (1.03)     4.4357 (1.02)     0.0348 (1.0)      4.4253 (1.03)     0.0135 (1.0)         14;18     158
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------

--------------------------------------------------------------- benchmark 'True-True-agg1-10000': 2 tests ---------------------------------------------------------------
Name (time in ms)                               Min               Max              Mean            StdDev            Median               IQR            Outliers  Rounds
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------
groupby_agg[True-True-agg1-10000] (afte)     4.8505 (1.0)      5.3411 (1.0)      4.8882 (1.0)      0.0596 (1.49)     4.8693 (1.0)      0.0240 (1.41)        12;15     166
groupby_agg[True-True-agg1-10000] (befo)     4.9857 (1.03)     5.3869 (1.01)     5.0191 (1.03)     0.0399 (1.0)      5.0089 (1.03)     0.0170 (1.0)          9;15     160
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------

----------------------------------------------------------------- benchmark 'True-True-agg1-1000000': 2 tests -----------------------------------------------------------------
Name (time in ms)                                  Min                Max               Mean            StdDev             Median               IQR            Outliers  Rounds
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
groupby_agg[True-True-agg1-1000000] (afte)     36.5387 (1.01)     55.8017 (1.52)     37.3622 (1.03)     3.6965 (48.22)    36.5756 (1.00)     0.0882 (2.75)          1;3      27
groupby_agg[True-True-agg1-1000000] (befo)     36.3456 (1.0)      36.7584 (1.0)      36.4209 (1.0)      0.0767 (1.0)      36.4014 (1.0)      0.0320 (1.0)           1;4      27
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

--------------------------------------------------------------- benchmark 'True-True-agg2-100': 2 tests ---------------------------------------------------------------
Name (time in ms)                             Min               Max              Mean            StdDev            Median               IQR            Outliers  Rounds
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------
groupby_agg[True-True-agg2-100] (afte)     4.5713 (1.0)      5.1548 (1.06)     4.6064 (1.0)      0.0621 (4.49)     4.5886 (1.0)      0.0203 (1.51)        13;22     170
groupby_agg[True-True-agg2-100] (befo)     4.7628 (1.04)     4.8752 (1.0)      4.7832 (1.04)     0.0138 (1.0)      4.7795 (1.04)     0.0134 (1.0)          29;9     167
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------

--------------------------------------------------------------- benchmark 'True-True-agg2-10000': 2 tests ---------------------------------------------------------------
Name (time in ms)                               Min               Max              Mean            StdDev            Median               IQR            Outliers  Rounds
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------
groupby_agg[True-True-agg2-10000] (afte)     5.1343 (1.0)      5.4159 (1.0)      5.1769 (1.0)      0.0517 (1.36)     5.1590 (1.0)      0.0179 (1.21)        16;22     157
groupby_agg[True-True-agg2-10000] (befo)     5.3567 (1.04)     5.6432 (1.04)     5.3858 (1.04)     0.0379 (1.0)      5.3785 (1.04)     0.0147 (1.0)          7;12     152
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------

----------------------------------------------------------------- benchmark 'True-True-agg2-1000000': 2 tests -----------------------------------------------------------------
Name (time in ms)                                  Min                Max               Mean            StdDev             Median               IQR            Outliers  Rounds
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
groupby_agg[True-True-agg2-1000000] (afte)     38.0357 (1.00)     38.2935 (1.00)     38.1159 (1.00)     0.0597 (1.0)      38.1014 (1.00)     0.0846 (1.0)           6;1      27
groupby_agg[True-True-agg2-1000000] (befo)     37.9134 (1.0)      38.2851 (1.0)      38.0201 (1.0)      0.0929 (1.55)     37.9944 (1.0)      0.1066 (1.26)          7;1      26
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

--------------------------------------------------------------- benchmark 'True-True-sum-100': 2 tests ---------------------------------------------------------------
Name (time in ms)                            Min               Max              Mean            StdDev            Median               IQR            Outliers  Rounds
----------------------------------------------------------------------------------------------------------------------------------------------------------------------
groupby_agg[True-True-sum-100] (afte)     3.7452 (1.0)      4.0287 (1.0)      3.8009 (1.0)      0.0408 (1.0)      3.7968 (1.0)      0.0503 (1.0)          29;3     131
groupby_agg[True-True-sum-100] (befo)     3.8752 (1.03)     4.4384 (1.10)     3.9316 (1.03)     0.0608 (1.49)     3.9265 (1.03)     0.0504 (1.00)          4;3     148
----------------------------------------------------------------------------------------------------------------------------------------------------------------------

---------------------------------------------------------------- benchmark 'True-True-sum-10000': 2 tests ---------------------------------------------------------------
Name (time in ms)                              Min                Max              Mean            StdDev            Median               IQR            Outliers  Rounds
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------
groupby_agg[True-True-sum-10000] (afte)     4.4442 (1.0)      11.3511 (2.35)     4.5582 (1.0)      0.5829 (24.78)    4.4741 (1.0)      0.0323 (2.85)         3;19     171
groupby_agg[True-True-sum-10000] (befo)     4.5676 (1.03)      4.8264 (1.0)      4.5913 (1.01)     0.0235 (1.0)      4.5871 (1.03)     0.0114 (1.0)         15;16     168
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------

----------------------------------------------------------------- benchmark 'True-True-sum-1000000': 2 tests -----------------------------------------------------------------
Name (time in ms)                                 Min                Max               Mean            StdDev             Median               IQR            Outliers  Rounds
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
groupby_agg[True-True-sum-1000000] (afte)     30.5326 (1.00)     33.6395 (1.02)     31.2355 (1.0)      0.9563 (1.20)     30.6933 (1.0)      0.9663 (1.0)           5;3      30
groupby_agg[True-True-sum-1000000] (befo)     30.4080 (1.0)      33.0341 (1.0)      31.2527 (1.00)     0.7946 (1.0)      30.9808 (1.01)     1.2781 (1.32)         11;0      30
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
```

</details>

[Benchmark code](https://github.com/isVoid/cudf_benchmarks/blob/9d9644eaa5301df7894c2fe4b1ba317396240518/bench_groupby.py#L23-L42)

Authors:
  - Michael Wang (https://github.com/isVoid)
  - Bradley Dice (https://github.com/bdice)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/10419
---
 python/cudf/cudf/_lib/groupby.pyx        | 222 +++++++++++------------
 python/cudf/cudf/_typing.py              |  10 +-
 python/cudf/cudf/core/frame.py           |   5 +-
 python/cudf/cudf/core/groupby/groupby.py | 211 +++++++++++----------
 4 files changed, 221 insertions(+), 227 deletions(-)
diff --git a/python/cudf/cudf/_lib/groupby.pyx b/python/cudf/cudf/_lib/groupby.pyx
index 49a924c9104..48f566b846d 100644
--- a/python/cudf/cudf/_lib/groupby.pyx
+++ b/python/cudf/cudf/_lib/groupby.pyx
@@ -26,7 +26,12 @@ import cudf
 
 from cudf._lib.column cimport Column
 from cudf._lib.scalar cimport DeviceScalar
-from cudf._lib.utils cimport table_view_from_table
+from cudf._lib.utils cimport (
+    columns_from_unique_ptr,
+    data_from_unique_ptr,
+    table_view_from_columns,
+    table_view_from_table,
+)
 
 from cudf._lib.scalar import as_device_scalar
 
@@ -46,7 +51,6 @@ from cudf._lib.cpp.scalar.scalar cimport scalar
 from cudf._lib.cpp.table.table cimport table, table_view
 from cudf._lib.cpp.types cimport size_type
 from cudf._lib.cpp.utilities.host_span cimport host_span
-from cudf._lib.utils cimport data_from_unique_ptr
 
 # The sets below define the possible aggregations that can be performed on
 # different dtypes. These strings must be elements of the AggregationKind enum.
@@ -62,11 +66,39 @@ _DECIMAL_AGGS = {"COUNT", "SUM", "ARGMIN", "ARGMAX", "MIN", "MAX", "NUNIQUE",
 # workaround for https://github.com/cython/cython/issues/3885
 ctypedef const scalar constscalar
 
+
+cdef _agg_result_from_columns(
+    vector[libcudf_groupby.aggregation_result]& c_result_columns,
+    set column_included,
+    int n_input_columns
+):
+    """Construct the list of result columns from libcudf result. The result
+    contains the same number of lists as the number of input columns. Result
+    for an input column that has no applicable aggregations is an empty list.
+    """
+    cdef:
+        int i
+        int j
+        int result_index = 0
+        vector[unique_ptr[column]]* c_result
+    result_columns = []
+    for i in range(n_input_columns):
+        if i in column_included:
+            c_result = &c_result_columns[result_index].results
+            result_columns.append([
+                Column.from_unique_ptr(move(c_result[0][j]))
+                for j in range(c_result[0].size())
+            ])
+            result_index += 1
+        else:
+            result_columns.append([])
+    return result_columns
+
 cdef class GroupBy:
     cdef unique_ptr[libcudf_groupby.groupby] c_obj
     cdef dict __dict__
 
-    def __cinit__(self, keys, bool dropna=True, *args, **kwargs):
+    def __cinit__(self, list keys, bool dropna=True, *args, **kwargs):
         cdef libcudf_types.null_policy c_null_handling
 
         if dropna:
@@ -74,7 +106,7 @@ cdef class GroupBy:
         else:
             c_null_handling = libcudf_types.null_policy.INCLUDE
 
-        cdef table_view keys_view = table_view_from_table(keys)
+        cdef table_view keys_view = table_view_from_columns(keys)
 
         with nogil:
             self.c_obj.reset(
@@ -84,46 +116,42 @@ cdef class GroupBy:
                 )
             )
 
-    def __init__(self, keys, bool dropna=True):
+    def __init__(self, list keys, bool dropna=True):
         self.keys = keys
         self.dropna = dropna
 
-    def groups(self, values):
-
-        cdef table_view values_view = table_view_from_table(values)
+    def groups(self, list values):
+        cdef table_view values_view = table_view_from_columns(values)
 
         with nogil:
             c_groups = move(self.c_obj.get()[0].get_groups(values_view))
 
-        c_grouped_keys = move(c_groups.keys)
-        c_grouped_values = move(c_groups.values)
-        c_group_offsets = c_groups.offsets
-
-        grouped_keys = cudf.core.index._index_from_data(
-            *data_from_unique_ptr(
-                move(c_grouped_keys),
-                column_names=range(c_grouped_keys.get()[0].num_columns())
-            )
-        )
-        grouped_values = data_from_unique_ptr(
-            move(c_grouped_values),
-            index_names=values._index_names,
-            column_names=values._column_names
-        )
-        return grouped_keys, grouped_values, c_group_offsets
+        grouped_key_cols = columns_from_unique_ptr(move(c_groups.keys))
+        grouped_value_cols = columns_from_unique_ptr(move(c_groups.values))
+        return grouped_key_cols, grouped_value_cols, c_groups.offsets
 
     def aggregate_internal(self, values, aggregations):
-        from cudf.core.column_accessor import ColumnAccessor
+        """`values` is a list of columns and `aggregations` is a list of list
+        of aggregations. `aggregations[i]` is a list of aggregations for
+        `values[i]`. Returns a tuple containing 1) list of list of aggregation
+        results, 2) a list of grouped keys, and 3) a list of list of
+        aggregations performed.
+        """
         cdef vector[libcudf_groupby.aggregation_request] c_agg_requests
         cdef libcudf_groupby.aggregation_request c_agg_request
         cdef Column col
         cdef GroupbyAggregation agg_obj
 
-        allow_empty = all(len(v) == 0 for v in aggregations.values())
+        cdef pair[
+            unique_ptr[table],
+            vector[libcudf_groupby.aggregation_result]
+        ] c_result
 
-        included_aggregations = defaultdict(list)
-        for i, (col_name, aggs) in enumerate(aggregations.items()):
-            col = values._data[col_name]
+        allow_empty = all(len(v) == 0 for v in aggregations)
+
+        included_aggregations = []
+        column_included = set()
+        for i, (col, aggs) in enumerate(zip(values, aggregations)):
             dtype = col.dtype
 
             valid_aggregations = (
@@ -135,36 +163,27 @@ cdef class GroupBy:
                 else _DECIMAL_AGGS if is_decimal_dtype(dtype)
                 else "ALL"
             )
-            if (valid_aggregations is _DECIMAL_AGGS
-                    and rmm._cuda.gpu.runtimeGetVersion() < 11000):
-                raise RuntimeError(
-                    "Decimal aggregations are only supported on CUDA >= 11 "
-                    "due to an nvcc compiler bug."
-                )
+            included_aggregations_i = []
 
             c_agg_request = move(libcudf_groupby.aggregation_request())
             for agg in aggs:
                 agg_obj = make_groupby_aggregation(agg)
                 if (valid_aggregations == "ALL"
                         or agg_obj.kind in valid_aggregations):
-                    included_aggregations[col_name].append(agg)
+                    included_aggregations_i.append(agg)
                     c_agg_request.aggregations.push_back(
                         move(agg_obj.c_obj)
                     )
+            included_aggregations.append(included_aggregations_i)
             if not c_agg_request.aggregations.empty():
                 c_agg_request.values = col.view()
                 c_agg_requests.push_back(
                     move(c_agg_request)
                 )
-
+                column_included.add(i)
         if c_agg_requests.empty() and not allow_empty:
             raise DataError("All requested aggregations are unsupported.")
 
-        cdef pair[
-            unique_ptr[table],
-            vector[libcudf_groupby.aggregation_result]
-        ] c_result
-
         with nogil:
             c_result = move(
                 self.c_obj.get()[0].aggregate(
@@ -172,37 +191,38 @@ cdef class GroupBy:
                 )
             )
 
-        grouped_keys, _ = data_from_unique_ptr(
-            move(c_result.first),
-            column_names=self.keys._column_names
+        grouped_keys = columns_from_unique_ptr(
+            move(c_result.first)
         )
 
-        result_data = ColumnAccessor(multiindex=True)
-        # Note: This loop relies on the included_aggregations dict being
-        # insertion ordered to map results to requested aggregations by index.
-        for i, col_name in enumerate(included_aggregations):
-            for j, agg_name in enumerate(included_aggregations[col_name]):
-                if callable(agg_name):
-                    agg_name = agg_name.__name__
-                result_data[(col_name, agg_name)] = (
-                    Column.from_unique_ptr(move(c_result.second[i].results[j]))
-                )
+        result_columns = _agg_result_from_columns(
+            c_result.second, column_included, len(values)
+        )
 
-        return result_data, cudf.core.index._index_from_data(
-            grouped_keys)
+        return result_columns, grouped_keys, included_aggregations
 
     def scan_internal(self, values, aggregations):
-        from cudf.core.column_accessor import ColumnAccessor
+        """`values` is a list of columns and `aggregations` is a list of list
+        of aggregations. `aggregations[i]` is a list of aggregations for
+        `values[i]`. Returns a tuple containing 1) list of list of aggregation
+        results, 2) a list of grouped keys, and 3) a list of list of
+        aggregations performed.
+        """
         cdef vector[libcudf_groupby.scan_request] c_agg_requests
         cdef libcudf_groupby.scan_request c_agg_request
         cdef Column col
         cdef GroupbyScanAggregation agg_obj
 
-        allow_empty = all(len(v) == 0 for v in aggregations.values())
+        cdef pair[
+            unique_ptr[table],
+            vector[libcudf_groupby.aggregation_result]
+        ] c_result
+
+        allow_empty = all(len(v) == 0 for v in aggregations)
 
-        included_aggregations = defaultdict(list)
-        for i, (col_name, aggs) in enumerate(aggregations.items()):
-            col = values._data[col_name]
+        included_aggregations = []
+        column_included = set()
+        for i, (col, aggs) in enumerate(zip(values, aggregations)):
             dtype = col.dtype
 
             valid_aggregations = (
@@ -214,36 +234,27 @@ cdef class GroupBy:
                 else _DECIMAL_AGGS if is_decimal_dtype(dtype)
                 else "ALL"
             )
-            if (valid_aggregations is _DECIMAL_AGGS
-                    and rmm._cuda.gpu.runtimeGetVersion() < 11000):
-                raise RuntimeError(
-                    "Decimal aggregations are only supported on CUDA >= 11 "
-                    "due to an nvcc compiler bug."
-                )
+            included_aggregations_i = []
 
             c_agg_request = move(libcudf_groupby.scan_request())
             for agg in aggs:
                 agg_obj = make_groupby_scan_aggregation(agg)
                 if (valid_aggregations == "ALL"
                         or agg_obj.kind in valid_aggregations):
-                    included_aggregations[col_name].append(agg)
+                    included_aggregations_i.append(agg)
                     c_agg_request.aggregations.push_back(
                         move(agg_obj.c_obj)
                     )
+            included_aggregations.append(included_aggregations_i)
             if not c_agg_request.aggregations.empty():
                 c_agg_request.values = col.view()
                 c_agg_requests.push_back(
                     move(c_agg_request)
                 )
-
+                column_included.add(i)
         if c_agg_requests.empty() and not allow_empty:
             raise DataError("All requested aggregations are unsupported.")
 
-        cdef pair[
-            unique_ptr[table],
-            vector[libcudf_groupby.aggregation_result]
-        ] c_result
-
         with nogil:
             c_result = move(
                 self.c_obj.get()[0].scan(
@@ -251,24 +262,15 @@ cdef class GroupBy:
                 )
             )
 
-        grouped_keys, _ = data_from_unique_ptr(
-            move(c_result.first),
-            column_names=self.keys._column_names
+        grouped_keys = columns_from_unique_ptr(
+            move(c_result.first)
         )
 
-        result_data = ColumnAccessor(multiindex=True)
-        # Note: This loop relies on the included_aggregations dict being
-        # insertion ordered to map results to requested aggregations by index.
-        for i, col_name in enumerate(included_aggregations):
-            for j, agg_name in enumerate(included_aggregations[col_name]):
-                if callable(agg_name):
-                    agg_name = agg_name.__name__
-                result_data[(col_name, agg_name)] = (
-                    Column.from_unique_ptr(move(c_result.second[i].results[j]))
-                )
+        result_columns = _agg_result_from_columns(
+            c_result.second, column_included, len(values)
+        )
 
-        return result_data, cudf.core.index._index_from_data(
-            grouped_keys)
+        return result_columns, grouped_keys, included_aggregations
 
     def aggregate(self, values, aggregations):
         """
@@ -292,8 +294,8 @@ cdef class GroupBy:
 
         return self.aggregate_internal(values, aggregations)
 
-    def shift(self, values, int periods, list fill_values):
-        cdef table_view view = table_view_from_table(values)
+    def shift(self, list values, int periods, list fill_values):
+        cdef table_view view = table_view_from_columns(values)
         cdef size_type num_col = view.num_columns()
         cdef vector[size_type] offsets = vector[size_type](num_col, periods)
 
@@ -301,7 +303,7 @@ cdef class GroupBy:
         cdef DeviceScalar d_slr
         d_slrs = []
         c_fill_values.reserve(num_col)
-        for val, col in zip(fill_values, values._columns):
+        for val, col in zip(fill_values, values):
             d_slr = as_device_scalar(val, dtype=col.dtype)
             d_slrs.append(d_slr)
             c_fill_values.push_back(
@@ -315,21 +317,13 @@ cdef class GroupBy:
                 self.c_obj.get()[0].shift(view, offsets, c_fill_values)
             )
 
-        grouped_keys = cudf.core.index._index_from_data(
-            *data_from_unique_ptr(
-                move(c_result.first),
-                column_names=self.keys._column_names
-            )
-        )
-
-        shifted, _ = data_from_unique_ptr(
-            move(c_result.second), column_names=values._column_names
-        )
+        grouped_keys = columns_from_unique_ptr(move(c_result.first))
+        shifted = columns_from_unique_ptr(move(c_result.second))
 
         return shifted, grouped_keys
 
-    def replace_nulls(self, values, object method):
-        cdef table_view val_view = table_view_from_table(values)
+    def replace_nulls(self, list values, object method):
+        cdef table_view val_view = table_view_from_columns(values)
         cdef pair[unique_ptr[table], unique_ptr[table]] c_result
         cdef replace_policy policy = (
             replace_policy.PRECEDING
@@ -344,15 +338,13 @@ cdef class GroupBy:
                 self.c_obj.get()[0].replace_nulls(val_view, policies)
             )
 
-        return data_from_unique_ptr(
-            move(c_result.second), column_names=values._column_names
-        )[0]
+        return columns_from_unique_ptr(move(c_result.second))
 
 
 _GROUPBY_SCANS = {"cumcount", "cumsum", "cummin", "cummax"}
 
 
-def _is_all_scan_aggregate(aggs):
+def _is_all_scan_aggregate(all_aggs):
     """
     Returns true if all are scan aggregations.
     Raises
@@ -365,16 +357,12 @@ def _is_all_scan_aggregate(aggs):
         return agg.__name__ if callable(agg) else agg
 
     all_scan = all(
-        all(
-            get_name(agg_name) in _GROUPBY_SCANS for agg_name in aggs[col_name]
-        )
-        for col_name in aggs
+        get_name(agg_name) in _GROUPBY_SCANS for aggs in all_aggs
+        for agg_name in aggs
     )
     any_scan = any(
-        any(
-            get_name(agg_name) in _GROUPBY_SCANS for agg_name in aggs[col_name]
-        )
-        for col_name in aggs
+        get_name(agg_name) in _GROUPBY_SCANS for aggs in all_aggs
+        for agg_name in aggs
     )
 
     if not all_scan and any_scan:
diff --git a/python/cudf/cudf/_typing.py b/python/cudf/cudf/_typing.py
index 793a5d1d9e8..ca2024929f3 100644
--- a/python/cudf/cudf/_typing.py
+++ b/python/cudf/cudf/_typing.py
@@ -1,6 +1,6 @@
-# Copyright (c) 2021, NVIDIA CORPORATION.
+# Copyright (c) 2021-2022, NVIDIA CORPORATION.
 
-from typing import TYPE_CHECKING, Any, TypeVar, Union
+from typing import TYPE_CHECKING, Any, Callable, Dict, Iterable, TypeVar, Union
 
 import numpy as np
 from pandas import Period, Timedelta, Timestamp
@@ -32,3 +32,9 @@
 SeriesOrSingleColumnIndex = Union[
     "cudf.Series", "cudf.core.index.GenericIndex"
 ]
+
+# Groupby aggregation
+AggType = Union[str, Callable]
+MultiColumnAggType = Union[
+    AggType, Iterable[AggType], Dict[Any, Iterable[AggType]]
+]
diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index a847c0b5d3b..2802009b848 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -229,14 +229,17 @@ def _from_columns(
     def _from_columns_like_self(
         self,
         columns: List[ColumnBase],
-        column_names: abc.Iterable[str],
+        column_names: Optional[abc.Iterable[str]] = None,
         index_names: Optional[List[str]] = None,
     ):
         """Construct a `Frame` from a list of columns with metadata from self.
 
+        If `column_names` is None, use column names from self.
         If `index_names` is set, the first `len(index_names)` columns are
         used to construct the index of the frame.
         """
+        if column_names is None:
+            column_names = self._column_names
         frame = self.__class__._from_columns(
             columns, column_names, index_names
         )
diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py
index a1a4596ba45..0c274911f3d 100644
--- a/python/cudf/cudf/core/groupby/groupby.py
+++ b/python/cudf/cudf/core/groupby/groupby.py
@@ -5,16 +5,18 @@
 import pickle
 import warnings
 from functools import cached_property
+from typing import Any, Iterable, List, Tuple, Union
 
 import numpy as np
 
 import cudf
 from cudf._lib import groupby as libgroupby
 from cudf._lib.reshape import interleave_columns
-from cudf._typing import DataFrameOrSeries
+from cudf._typing import AggType, DataFrameOrSeries, MultiColumnAggType
 from cudf.api.types import is_list_like
 from cudf.core.abc import Serializable
-from cudf.core.column.column import arange, as_column
+from cudf.core.column.column import ColumnBase, arange, as_column
+from cudf.core.column_accessor import ColumnAccessor
 from cudf.core.mixins import Reducible, Scannable
 from cudf.core.multiindex import MultiIndex
 from cudf.utils.utils import GetAttrGetItemMixin, _cudf_nvtx_annotate
@@ -37,6 +39,8 @@ def _quantile_75(x):
 
 class GroupBy(Serializable, Reducible, Scannable):
 
+    obj: "cudf.core.indexed_frame.IndexedFrame"
+
     _VALID_REDUCTIONS = {
         "sum",
         "prod",
@@ -107,6 +111,7 @@ def __init__(
         self._dropna = dropna
 
         if isinstance(by, _Grouping):
+            by._obj = self.obj
             self.grouping = by
         else:
             self.grouping = _Grouping(obj, by, level)
@@ -204,7 +209,9 @@ def cumcount(self):
 
     @cached_property
     def _groupby(self):
-        return libgroupby.GroupBy(self.grouping.keys, dropna=self._dropna)
+        return libgroupby.GroupBy(
+            [*self.grouping.keys._columns], dropna=self._dropna
+        )
 
     @_cudf_nvtx_annotate
     def agg(self, func):
@@ -274,55 +281,48 @@ def agg(self, func):
         1  1.5  1.75  2.0   2.0
         2  3.0  3.00  1.0   1.0
         """
-        normalized_aggs = self._normalize_aggs(func)
+        column_names, columns, normalized_aggs = self._normalize_aggs(func)
 
         # Note: When there are no key columns, the below produces
         # a Float64Index, while Pandas returns an Int64Index
         # (GH: 6945)
-        result = cudf.DataFrame._from_data(
-            *self._groupby.aggregate(self.obj, normalized_aggs)
+        (
+            result_columns,
+            grouped_key_cols,
+            included_aggregations,
+        ) = self._groupby.aggregate(columns, normalized_aggs)
+
+        result_index = self.grouping.keys._from_columns_like_self(
+            grouped_key_cols,
         )
 
+        multilevel = _is_multi_agg(func)
+        data = {}
+        for col_name, aggs, cols in zip(
+            column_names, included_aggregations, result_columns
+        ):
+            for agg, col in zip(aggs, cols):
+                if multilevel:
+                    agg_name = agg.__name__ if callable(agg) else agg
+                    key = (col_name, agg_name)
+                else:
+                    key = col_name
+                data[key] = col
+        data = ColumnAccessor(data, multiindex=multilevel)
+        if not multilevel:
+            data = data.rename_levels({np.nan: None}, level=0)
+        result = cudf.DataFrame._from_data(data, index=result_index)
+
         if self._sort:
             result = result.sort_index()
 
-        if not _is_multi_agg(func):
-            if result._data.nlevels <= 1:  # 0 or 1 levels
-                # make sure it's a flat index:
-                result._data.multiindex = False
-
-            if result._data.nlevels > 1:
-                result._data.droplevel(-1)
-
-                # if, after dropping the last level, the only
-                # remaining key is `NaN`, we need to convert to `None`
-                # for Pandas compat:
-                if result._data.names == (np.nan,):
-                    result._data = result._data.rename_levels(
-                        {np.nan: None}, level=0
-                    )
+        if not self._as_index:
+            result = result.reset_index()
 
         if libgroupby._is_all_scan_aggregate(normalized_aggs):
             # Scan aggregations return rows in original index order
             return self._mimic_pandas_order(result)
 
-        # set index names to be group key names
-        if len(result):
-            result.index.names = self.grouping.names
-
-        # copy categorical information from keys to the result index:
-        result.index._copy_type_metadata(self.grouping.keys)
-        result._index = cudf.Index(result._index)
-
-        if not self._as_index:
-            for col_name in reversed(self.grouping._named_columns):
-                result._insert(
-                    0,
-                    col_name,
-                    result.index.get_level_values(col_name)._values,
-                )
-            result.index = cudf.core.index.RangeIndex(len(result))
-
         return result
 
     def _reduce(
@@ -417,43 +417,50 @@ def deserialize(cls, header, frames):
         return cls(obj, grouping, **kwargs)
 
     def _grouped(self):
-        grouped_keys, grouped_values, offsets = self._groupby.groups(self.obj)
-        grouped_values = self.obj.__class__._from_data(*grouped_values)
-        grouped_values._copy_type_metadata(self.obj)
+        grouped_key_cols, grouped_value_cols, offsets = self._groupby.groups(
+            [*self.obj._index._columns, *self.obj._columns]
+        )
+        grouped_keys = cudf.core.index._index_from_columns(grouped_key_cols)
+        grouped_values = self.obj._from_columns_like_self(
+            grouped_value_cols,
+            column_names=self.obj._column_names,
+            index_names=self.obj._index_names,
+        )
         group_names = grouped_keys.unique()
         return (group_names, offsets, grouped_keys, grouped_values)
 
-    def _normalize_aggs(self, aggs):
+    def _normalize_aggs(
+        self, aggs: MultiColumnAggType
+    ) -> Tuple[Iterable[Any], Tuple[ColumnBase, ...], List[List[AggType]]]:
         """
-        Normalize aggs to a dict mapping column names
-        to a list of aggregations.
+        Normalize aggs to a list of list of aggregations, where `out[i]`
+        is a list of aggregations for column `self.obj[i]`. We support three
+        different form of `aggs` input here:
+        - A single agg, such as "sum". This agg is applied to all value
+        columns.
+        - A list of aggs, such as ["sum", "mean"]. All aggs are applied to all
+        value columns.
+        - A mapping of column name to aggs, such as
+        {"a": ["sum"], "b": ["mean"]}, the aggs are applied to specified
+        column.
+        Each agg can be string or lambda functions.
         """
-        if not isinstance(aggs, collections.abc.Mapping):
-            # Make col_name->aggs mapping from aggs.
-            # Do not include named key columns
-
-            # Can't do set arithmetic here as sets are
-            # not ordered
-            if isinstance(self, SeriesGroupBy):
-                columns = [self.obj.name]
-            else:
-                columns = [
-                    col_name
-                    for col_name in self.obj._data
-                    if col_name not in self.grouping._named_columns
-                ]
-            out = dict.fromkeys(columns, aggs)
-        else:
-            out = aggs.copy()
 
-        # Convert all values to list-like:
-        for col, agg in out.items():
-            if not is_list_like(agg):
-                out[col] = [agg]
-            else:
-                out[col] = list(agg)
-
-        return out
+        aggs_per_column: Iterable[Union[AggType, Iterable[AggType]]]
+        if isinstance(aggs, dict):
+            column_names, aggs_per_column = aggs.keys(), aggs.values()
+            columns = tuple(self.obj._data[col] for col in column_names)
+        else:
+            values = self.grouping.values
+            column_names = values._column_names
+            columns = values._columns
+            aggs_per_column = (aggs,) * len(columns)
+
+        normalized_aggs = [
+            list(agg) if is_list_like(agg) else [agg]
+            for agg in aggs_per_column
+        ]
+        return column_names, columns, normalized_aggs
 
     def pipe(self, func, *args, **kwargs):
         """
@@ -1201,29 +1208,20 @@ def diff(self, periods=1, axis=0):
         if not axis == 0:
             raise NotImplementedError("Only axis=0 is supported.")
 
-        # grouped values
-        value_columns = self.grouping.values
-        _, (data, index), _ = self._groupby.groups(
-            cudf.core.frame.Frame(value_columns._data)
-        )
-        grouped = self.obj.__class__._from_data(data, index)
-        grouped = self._mimic_pandas_order(grouped)._copy_type_metadata(
-            value_columns
+        values = self.obj.__class__._from_data(
+            self.grouping.values._data, self.obj.index
         )
-
-        result = grouped - self.shift(periods=periods)
-        return result._copy_type_metadata(value_columns)
+        return values - self.shift(periods=periods)
 
     def _scan_fill(self, method: str, limit: int) -> DataFrameOrSeries:
         """Internal implementation for `ffill` and `bfill`"""
-        value_columns = self.grouping.values
-        result = self.obj.__class__._from_data(
-            self._groupby.replace_nulls(
-                cudf.core.frame.Frame(value_columns._data), method
-            )
+        values = self.grouping.values
+        result = self.obj._from_columns(
+            self._groupby.replace_nulls([*values._columns], method),
+            values._column_names,
         )
         result = self._mimic_pandas_order(result)
-        return result._copy_type_metadata(value_columns)
+        return result._copy_type_metadata(values)
 
     def pad(self, limit=None):
         """Forward fill NA values.
@@ -1334,17 +1332,12 @@ def fillna(
                 )
             return getattr(self, method, limit)()
 
-        value_columns = self.grouping.values
-        _, (data, index), _ = self._groupby.groups(
-            cudf.core.frame.Frame(value_columns._data)
+        values = self.obj.__class__._from_data(
+            self.grouping.values._data, self.obj.index
         )
-
-        grouped = self.obj.__class__._from_data(data, index)
-        result = grouped.fillna(
+        return values.fillna(
             value=value, inplace=inplace, axis=axis, limit=limit
         )
-        result = self._mimic_pandas_order(result)
-        return result._copy_type_metadata(value_columns)
 
     def shift(self, periods=1, freq=None, axis=0, fill_value=None):
         """
@@ -1385,22 +1378,21 @@ def shift(self, periods=1, freq=None, axis=0, fill_value=None):
         if not axis == 0:
             raise NotImplementedError("Only axis=0 is supported.")
 
-        value_columns = self.grouping.values
+        values = self.grouping.values
         if is_list_like(fill_value):
-            if not len(fill_value) == len(value_columns._data):
+            if len(fill_value) != len(values._data):
                 raise ValueError(
                     "Mismatched number of columns and values to fill."
                 )
         else:
-            fill_value = [fill_value] * len(value_columns._data)
+            fill_value = [fill_value] * len(values._data)
 
-        result = self.obj.__class__._from_data(
-            *self._groupby.shift(
-                cudf.core.frame.Frame(value_columns._data), periods, fill_value
-            )
+        result = self.obj.__class__._from_columns(
+            self._groupby.shift([*values._columns], periods, fill_value)[0],
+            values._column_names,
         )
         result = self._mimic_pandas_order(result)
-        return result._copy_type_metadata(value_columns)
+        return result._copy_type_metadata(values)
 
     def _mimic_pandas_order(
         self, result: DataFrameOrSeries
@@ -1408,11 +1400,12 @@ def _mimic_pandas_order(
         """Given a groupby result from libcudf, reconstruct the row orders
         matching that of pandas. This also adds appropriate indices.
         """
-        sorted_order_column = arange(0, result._data.nrows)
-        _, (order, _), _ = self._groupby.groups(
-            cudf.core.frame.Frame({"sorted_order_column": sorted_order_column})
+        # TODO: copy metadata after this method is a common pattern, should
+        # merge in this method.
+        _, order_cols, _ = self._groupby.groups(
+            [arange(0, result._data.nrows)]
         )
-        gather_map = order["sorted_order_column"].argsort()
+        gather_map = order_cols[0].argsort()
         result = result.take(gather_map)
         result.index = self.obj.index
         return result
@@ -1502,6 +1495,8 @@ class DataFrameGroupBy(GroupBy, GetAttrGetItemMixin):
     Captive      210.0
     """
 
+    obj: "cudf.core.dataframe.DataFrame"
+
     _PROTECTED_KEYS = frozenset(("obj",))
 
     def __getitem__(self, key):
@@ -1570,6 +1565,8 @@ class SeriesGroupBy(GroupBy):
     Name: Max Speed, dtype: float64
     """
 
+    obj: "cudf.core.series.Series"
+
     def agg(self, func):
         result = super().agg(func)
 
@@ -1667,7 +1664,7 @@ def keys(self):
             )
 
     @property
-    def values(self):
+    def values(self) -> cudf.core.frame.Frame:
         """Return value columns as a frame.
 
         Note that in aggregation, value columns can be arbitrarily