diff --git a/src/dataframerow/utils.jl b/src/dataframerow/utils.jl
index ea62c8bf66..4d16e36e72 100644
--- a/src/dataframerow/utils.jl
+++ b/src/dataframerow/utils.jl
@@ -45,7 +45,7 @@ function hashrows_col!(h::Vector{UInt},
     # which is always zero
     # also when the number of values in the pool is more than half the length
     # of the vector avoid using this path. 50% is roughly based on benchmarks
-    if firstcol && 2 * length(rp) < length(v)
+    if firstcol && Int64(2) * length(rp) < length(v)
         hashes = Vector{UInt}(undef, length(rp))
         @inbounds for (i, v) in zip(eachindex(hashes), rp)
             hashes[i] = hash(v)
@@ -94,6 +94,85 @@ isequal_row(cols1::Tuple{Vararg{AbstractVector}}, r1::Int,
     isequal(cols1[1][r1], cols2[1][r2]) &&
         isequal_row(Base.tail(cols1), r1, Base.tail(cols2), r2)
 
+# IntegerRefarray and IntegerRefPool are two complementary view types that allow
+# wrapping arrays with Union{Real, Missing} eltype to satisfy the DataAPI.refpool
+# and DataAPI.refarray API when calling row_group_slots.
+# IntegerRefarray converts values to Int and replaces missing with an integer
+# (set by the caller to the maximum value + 1)
+# IntegerRefPool subtracts the minimum value - 1 and replaces back the maximum
+# value + 1 to missing. This ensures all values are in 1:length(refpool), while
+# row_group_slots knows the number of (potential) groups via length(refpool)
+# and is able to skip missing values when skipmissing=true
+
+struct IntegerRefarray{T<:AbstractArray} <: AbstractVector{Int}
+    x::T
+    offset::Int
+    replacement::Int
+end
+
+Base.size(x::IntegerRefarray) = size(x.x)
+Base.axes(x::IntegerRefarray) = axes(x.x)
+Base.IndexStyle(::Type{<:IntegerRefarray{T}}) where {T} = Base.IndexStyle(T)
+@inline function Base.getindex(x::IntegerRefarray, i)
+    @boundscheck checkbounds(x.x, i)
+    @inbounds v = x.x[i]
+    if eltype(x.x) >: Missing && v === missing
+        return x.replacement
+    else
+        # Overflow is guaranteed not to happen by checks before calling the constructor
+        return Int(v - x.offset)
+    end
+end
+
+struct IntegerRefpool{T<:Union{Int, Missing}} <: AbstractVector{T}
+    max::Int
+    function IntegerRefpool{T}(max::Integer) where T<:Union{Int, Missing}
+        @assert max < typemax(Int) # to store missing values as max + 1
+        new{T}(max)
+    end
+end
+
+Base.size(x::IntegerRefpool{T}) where {T} = (x.max + (T >: Missing),)
+Base.axes(x::IntegerRefpool{T}) where {T} = (Base.OneTo(x.max + (T >: Missing)),)
+Base.IndexStyle(::Type{<:IntegerRefpool}) = Base.IndexLinear()
+@inline function Base.getindex(x::IntegerRefpool{T}, i::Real) where T
+    @boundscheck checkbounds(x, i)
+    if T >: Missing && i == x.max + 1
+        return missing
+    else
+        return Int(i)
+    end
+end
+Base.allunique(::IntegerRefpool) = true
+Base.issorted(::IntegerRefpool) = true
+
+function refpool_and_array(x::AbstractArray)
+    refpool = DataAPI.refpool(x)
+    refarray = DataAPI.refarray(x)
+
+    if refpool !== nothing
+        return refpool, refarray
+    elseif x isa AbstractArray{<:Union{Real, Missing}} &&
+        all(v -> ismissing(v) | isinteger(v), x) &&
+        !isempty(skipmissing(x))
+        minval, maxval = extrema(skipmissing(x))
+        ngroups = big(maxval) - big(minval) + 1
+        # Threshold chosen with the same rationale as the row_group_slots refpool method:
+        # refpool approach is faster but we should not allocate too much memory either
+        # We also have to avoid overflow, including with ngroups + 1 for missing values
+        # (note that it would be possible to allow minval and maxval to be outside of the
+        # range supported by Int by adding a type parameter for minval to IntegerRefarray)
+        if typemin(Int) < minval <= maxval < typemax(Int) &&
+            ngroups + 1 <= Int64(2) * length(x) <= typemax(Int)
+            T = eltype(x) >: Missing ? Union{Int, Missing} : Int
+            refpool′ = IntegerRefpool{T}(Int(ngroups))
+            refarray′ = IntegerRefarray(x, Int(minval) - 1, Int(ngroups) + 1)
+            return refpool′, refarray′
+        end
+    end
+    return nothing, nothing
+end
+
 # Helper function for RowGroupDict.
 # Returns a tuple:
 # 1) the highest group index in the `groups` vector
@@ -103,16 +182,21 @@ isequal_row(cols1::Tuple{Vararg{AbstractVector}}, r1::Int,
 # 4) whether groups are already sorted
 # Optional `groups` vector is set to the group indices of each row (starting at 1)
 # With skipmissing=true, rows with missing values are attributed index 0.
-row_group_slots(cols::Tuple{Vararg{AbstractVector}},
-                hash::Val = Val(true),
-                groups::Union{Vector{Int}, Nothing} = nothing,
-                skipmissing::Bool = false,
-                sort::Bool = false)::Tuple{Int, Vector{UInt}, Vector{Int}, Bool} =
-    row_group_slots(cols, DataAPI.refpool.(cols), hash, groups, skipmissing, sort)
+function row_group_slots(cols::Tuple{Vararg{AbstractVector}},
+                         hash::Val = Val(true),
+                         groups::Union{Vector{Int}, Nothing} = nothing,
+                         skipmissing::Bool = false,
+                         sort::Bool = false)::Tuple{Int, Vector{UInt}, Vector{Int}, Bool}
+    rpa = refpool_and_array.(cols)
+    refpools = first.(rpa)
+    refarrays = last.(rpa)
+    row_group_slots(cols, refpools, refarrays, hash, groups, skipmissing, sort)
+end
 
 # Generic fallback method based on open adressing hash table
 function row_group_slots(cols::Tuple{Vararg{AbstractVector}},
-                         refpools::Any,
+                         refpools::Any,  # Ignored
+                         refarrays::Any, # Ignored
                          hash::Val = Val(true),
                          groups::Union{Vector{Int}, Nothing} = nothing,
                          skipmissing::Bool = false,
@@ -163,8 +247,12 @@ function row_group_slots(cols::Tuple{Vararg{AbstractVector}},
 end
 
 # Optimized method for arrays for which DataAPI.refpool is defined and returns an AbstractVector
-function row_group_slots(cols::NTuple{N, <:AbstractVector},
-                         refpools::NTuple{N, <:AbstractVector},
+function row_group_slots(cols::NTuple{N, AbstractVector},
+                         refpools::NTuple{N, AbstractVector},
+                         refarrays::NTuple{N,
+                             Union{AbstractVector{<:Real},
+                                   Missings.EachReplaceMissing{
+                                       <:AbstractVector{<:Union{Real, Missing}}}}},
                          hash::Val{false},
                          groups::Union{Vector{Int}, Nothing} = nothing,
                          skipmissing::Bool = false,
@@ -173,7 +261,6 @@ function row_group_slots(cols::NTuple{N, <:AbstractVector},
     # and this method needs to allocate a groups vector anyway
     @assert groups !== nothing && all(col -> length(col) == length(groups), cols)
 
-    refs = map(DataAPI.refarray, cols)
     missinginds = map(refpools) do refpool
         eltype(refpool) >: Missing ?
             something(findfirst(ismissing, refpool), lastindex(refpool)+1) : lastindex(refpool)+1
@@ -201,16 +288,17 @@ function row_group_slots(cols::NTuple{N, <:AbstractVector},
     # but it needs to remain reasonable compared with the size of the data frame.
     anydups = !all(allunique, refpools)
     if prod(big.(ngroupstup)) > typemax(Int) ||
-       ngroups > 2 * length(groups) ||
+       ngroups > Int64(2) * length(groups) ||
        anydups
         # In the simplest case, we can work directly with the reference codes
         newcols = (skipmissing && any(refpool -> eltype(refpool) >: Missing, refpools)) ||
+                  !(refarrays isa NTuple{<:Any, AbstractVector}) ||
                   sort ||
-                  anydups ? cols : refs
+                  anydups ? cols : refarrays
         return invoke(row_group_slots,
-                      Tuple{Tuple{Vararg{AbstractVector}}, Any, Val,
+                      Tuple{Tuple{Vararg{AbstractVector}}, Any, Any, Val,
                             Union{Vector{Int}, Nothing}, Bool, Bool},
-                      newcols, refpools, hash, groups, skipmissing, sort)
+                      newcols, refpools, refarrays, hash, groups, skipmissing, sort)
     end
 
     seen = fill(false, ngroups)
@@ -253,7 +341,7 @@ function row_group_slots(cols::NTuple{N, <:AbstractVector},
         @inbounds for i in eachindex(groups)
             local refs_i
             let i=i # Workaround for julia#15276
-                refs_i = map(c -> c[i], refs)
+                refs_i = map(c -> c[i], refarrays)
             end
             vals = map((m, r, s, fi) -> m[r-fi+1] * s, refmaps, refs_i, strides, firstinds)
             j = sum(vals) + 1
@@ -269,7 +357,7 @@ function row_group_slots(cols::NTuple{N, <:AbstractVector},
         @inbounds for i in eachindex(groups)
             local refs_i
             let i=i # Workaround for julia#15276
-                refs_i = map(refs, missinginds) do ref, missingind
+                refs_i = map(refarrays, missinginds) do ref, missingind
                     r = Int(ref[i])
                     if skipmissing
                         return r == missingind ? -1 : (r > missingind ? r-1 : r)
@@ -322,7 +410,7 @@ function compute_indices(groups::AbstractVector{<:Integer}, ngroups::Integer)
 
     # group start positions in a sorted table
     starts = Vector{Int}(undef, ngroups+1)
-    if length(starts) > 1
+    if length(starts) > 0
         starts[1] = 1
         @inbounds for i in 1:ngroups
             starts[i+1] = starts[i] + stops[i]
diff --git a/test/data.jl b/test/data.jl
index b78d2c8ef8..8cf8023b2f 100644
--- a/test/data.jl
+++ b/test/data.jl
@@ -76,15 +76,14 @@ const ≅ = isequal
     d3 = randn(N)
     d4 = randn(N)
     df7 = DataFrame([d1, d2, d3], [:d1, :d2, :d3])
-    ref_d1 = unique(d1)
 
     #test_group("groupby")
     gd = groupby(df7, :d1)
     @test length(gd) == 2
-    @test gd[1][:, :d2] ≅ d2[d1 .== ref_d1[1]]
-    @test gd[2][:, :d2] ≅ d2[d1 .== ref_d1[2]]
-    @test gd[1][:, :d3] == d3[d1 .== ref_d1[1]]
-    @test gd[2][:, :d3] == d3[d1 .== ref_d1[2]]
+    @test gd[1][:, :d2] ≅ d2[d1 .== 1]
+    @test gd[2][:, :d2] ≅ d2[d1 .== 2]
+    @test gd[1][:, :d3] == d3[d1 .== 1]
+    @test gd[2][:, :d3] == d3[d1 .== 2]
 
     g1 = groupby(df7, [:d1, :d2])
     g2 = groupby(df7, [:d2, :d1])
diff --git a/test/grouping.jl b/test/grouping.jl
index 3e1f6c33df..26d88864a7 100644
--- a/test/grouping.jl
+++ b/test/grouping.jl
@@ -166,12 +166,12 @@ end
         res4 = df[:, cols]
         res4.x2 = df.x.^2
         shcatdf = sort(hcatdf, colssym)
-        sres = sort(res, colssym)
-        sres2 = sort(res2, colssym)
-        sres3 = sort(res3, colssym)
-        sres4 = sort(res4, colssym)
 
         # groupby_checked() without groups sorting
+        sres = sort(res)
+        sres2 = sort(res2)
+        sres3 = sort(res3)
+        sres4 = sort(res4)
         gd = groupby_checked(df, cols)
         @test names(parent(gd), gd.cols) == string.(colssym)
         df_comb = combine(identity, gd)
@@ -181,16 +181,20 @@ end
         df_ref = DataFrame(gd)
         @test sort(hcat(df_ref[!, cols], df_ref[!, Not(cols)]), colssym) == shcatdf
         @test df_ref.x == df_comb.x
-        @test combine(f1, gd) == res
-        @test combine(f2, gd) == res
-        @test rename(combine(f3, gd), :x1 => :xmax) == res
-        @test combine(f4, gd) == res2
-        @test combine(f5, gd) == res2
-        @test combine(f6, gd) == res3
-        @test sort(combine(f7, gd), colssym) == sort(res4, colssym)
-        @test sort(combine(f8, gd), colssym) == sort(res4, colssym)
+        @test sort(combine(f1, gd)) == sres
+        @test sort(combine(f2, gd)) == sres
+        @test sort(rename(combine(f3, gd), :x1 => :xmax)) == sres
+        @test sort(combine(f4, gd)) == sres2
+        @test sort(combine(f5, gd)) == sres2
+        @test sort(combine(f6, gd)) == sres3
+        @test sort(combine(f7, gd)) == sres4
+        @test sort(combine(f8, gd)) == sres4
 
         # groupby_checked() with groups sorting
+        sres = sort(res, colssym)
+        sres2 = sort(res2, colssym)
+        sres3 = sort(res3, colssym)
+        sres4 = sort(res4, colssym)
         gd = groupby_checked(df, cols, sort=true)
         @test names(parent(gd), gd.cols) == string.(colssym)
         for i in 1:length(gd)
@@ -599,6 +603,17 @@ end
             @test issorted(vcat(gd...), [:Key1, :Key2])
         end
     end
+
+    @test groupby_checked(DataFrame(x=[missing]), :x).groups ==
+        groupby_checked(DataFrame(x=Union{Int, Missing}[missing]), :x).groups ==
+        groupby_checked(DataFrame(x=Union{String, Missing}[missing]), :x).groups ==
+        groupby_checked(DataFrame(x=Any[missing]), :x).groups == [1]
+    @test isempty(groupby_checked(DataFrame(x=[missing]), :x, skipmissing=true))
+    @test isempty(groupby_checked(DataFrame(x=Union{Int, Missing}[missing]),
+                                  :x, skipmissing=true))
+    @test isempty(groupby_checked(DataFrame(x=Union{String, Missing}[missing]),
+                                  :x, skipmissing=true))
+    @test isempty(groupby_checked(DataFrame(x=Any[missing]), :x, skipmissing=true))
 end
 
 @testset "grouping arrays that allow missing without missings" begin
@@ -717,6 +732,119 @@ end
     end
 end
 
+@testset "grouping on integer columns" begin
+    Random.seed!(6)
+
+    # Check optimized approach based on refpool method
+    for sm in (false, true),
+        S in (Int, Float64),
+        T in (Int, Float64),
+        df in (DataFrame(x=rand(1:10, 1000),
+                         y=rand(-3:10, 1000), z=rand(1000)),
+               DataFrame(x=rand([1:10; missing], 1000),
+                         y=rand([1:10; missing], 1000), z=rand(1000)),
+               DataFrame(x=rand([1:10; missing], 1000),
+                         y=rand(-3:10, 1000), z=rand(1000)))
+        df.x = convert.(Union{S, Missing}, df.x)
+        df.y = convert.(Union{T, Missing}, df.y)
+        df.x2 = passmissing(string).(df.x)
+        df.y2 = passmissing(string).(df.y)
+        gd = groupby_checked(df, :x, skipmissing=sm)
+        @test issorted(combine(gd, :x)) # Test that optimized method is used
+        @test isequal_unordered(gd, [groupby_checked(df, :x2, skipmissing=sm)...])
+        gd = groupby_checked(df, [:x, :y], skipmissing=sm)
+        @test issorted(combine(gd, :x, :y)) # Test that optimized method is used
+        @test isequal_unordered(gd, [groupby_checked(df, [:x2, :y2], skipmissing=sm)...])
+    end
+    for sm in (false, true),
+        v in (typemin(Int), typemax(Int) - 11),
+        df in (DataFrame(x=rand((1:10) .+ v, 1000),
+                         y=rand(-3:10, 1000), z=rand(1000)),
+               DataFrame(x=rand([1:10; missing] .+ v, 1000),
+                         y=rand([1:10; missing], 1000), z=rand(1000)),
+               DataFrame(x=rand([1:10; missing] .+ v, 1000),
+                         y=rand(-3:10, 1000), z=rand(1000)))
+        df.x = allowmissing(df.x)
+        df.y = allowmissing(df.y)
+        df.x2 = passmissing(string).(df.x)
+        df.y2 = passmissing(string).(df.y)
+        gd = groupby_checked(df, :x, skipmissing=sm)
+        @test issorted(combine(gd, :x)) # Test that optimized method is used
+        @test isequal_unordered(gd, [groupby_checked(df, :x2, skipmissing=sm)...])
+        gd = groupby_checked(df, [:x, :y], skipmissing=sm)
+        @test issorted(combine(gd, :x, :y)) # Test that optimized method is used
+        @test isequal_unordered(gd, [groupby_checked(df, [:x2, :y2], skipmissing=sm)...])
+    end
+
+    # Check fallback to hash table method when range is too wide
+    for sm in (false, true),
+        S in (Int, Float64),
+        T in (Int, Float64),
+        df in (DataFrame(x=rand(1:100_000, 100),
+                         y=rand(-50:110_000, 100), z=rand(100)),
+               DataFrame(x=rand([1:100_000; missing], 100),
+                         y=rand([-50:110_000; missing], 100), z=rand(100)),
+               DataFrame(x=rand([1:100_000; missing], 100),
+                         y=rand(-50:110_000, 100), z=rand(100)))
+        df.x = convert.(Union{S, Missing}, df.x)
+        df.y = convert.(Union{T, Missing}, df.y)
+        df.x2 = passmissing(string).(df.x)
+        df.y2 = passmissing(string).(df.y)
+        gd = groupby_checked(df, :x, skipmissing=sm)
+        @test !issorted(combine(gd, :x)) # Test that optimized method is not used
+        @test isequal_unordered(gd, [groupby_checked(df, :x2, skipmissing=sm)...])
+        gd = groupby_checked(df, [:x, :y], skipmissing=sm)
+        @test !issorted(combine(gd, :x, :y)) # Test that optimized method is not used
+        @test isequal_unordered(gd, [groupby_checked(df, [:x2, :y2], skipmissing=sm)...])
+    end
+
+    @test isempty(groupby_checked(DataFrame(x=Int[]), :x))
+    @test isempty(groupby_checked(DataFrame(x=Union{}[]), :x))
+    @test isempty(groupby_checked(DataFrame(x=Union{Int, Missing}[]), :x))
+    @test groupby_checked(DataFrame(x=Union{Int, Missing}[missing]), :x) ≅
+        groupby_checked(DataFrame(x=Union{String, Missing}[missing]), :x) ≅
+        groupby_checked(DataFrame(x=[missing]), :x)
+    @test isempty(groupby_checked(DataFrame(x=Union{Int, Missing}[missing]),
+                                  skipmissing=true, :x))
+    @test isempty(groupby_checked(DataFrame(x=[missing]), skipmissing=true, :x))
+
+    # Check Int overflow
+    groups = rand(1:3, 100)
+    for i in (0, 1, 2, 10), j in (0, 1, 2, 10),
+        v in (big(0), missing)
+        @test groupby_checked(DataFrame(x=[big(typemax(Int)) + i, v,
+                                           big(typemin(Int)) - j][groups]), :x) ≅
+            groupby_checked(DataFrame(x=Any[big(typemax(Int)) + i, v,
+                                            big(typemin(Int)) - j][groups]), :x)
+    end
+    # Corner cases where overflow could happen due to additional missing values group
+    for i in (0, 1, 2), j in (0, 1, 2),
+        v in (0, missing)
+        @test groupby_checked(DataFrame(x=[typemax(Int) - i, v,
+                                           typemin(Int) + j][groups]), :x) ≅
+            groupby_checked(DataFrame(x=Any[typemax(Int) - i, v,
+                                            typemin(Int) + j][groups]), :x)
+        @test groupby_checked(DataFrame(x=[typemax(Int) ÷ 2 - i, v,
+                                           typemin(Int) ÷ 2 - j][groups]), :x) ≅
+            groupby_checked(DataFrame(x=Any[typemax(Int) ÷ 2 - i, v,
+                                            typemin(Int) ÷ 2 - j][groups]), :x)
+    end
+    for i in (0, 1, -1, 2, -2, 10, -10)
+        @test groupby_checked(DataFrame(x=fill(big(typemax(Int)) + i, 100)), :x).groups ==
+            fill(1, 100)
+    end
+
+    # Check special case of Bool
+    for sm in (false, true),
+        df in (DataFrame(x=rand(Bool, 1000), y=rand(1000)),
+               DataFrame(x=rand([true, false, missing], 1000), y=rand(1000)))
+        df.x2 = passmissing(string).(df.x)
+        gd = groupby_checked(df, :x, skipmissing=sm)
+        @test issorted(combine(gd, :x)) # Test that optimized method is used
+        @test isequal_unordered(gd, [groupby_checked(df, :x2, skipmissing=sm)...])
+    end
+end
+
 @testset "grouping with three keys" begin
     # We need many rows so that optimized CategoricalArray method is used
     xv = rand(["A", "B", missing], 100)
@@ -2066,13 +2194,12 @@ end
 end
 
 @testset "correct dropping of groups" begin
-    df = DataFrame(g = 10:-1:1)
+    df = DataFrame(g = 1:10)
     gdf = groupby_checked(df, :g)
     sgdf = groupby_checked(df, :g, sort=true)
     for keep in [[3, 2, 1], [5, 3, 1], [9], Int[]]
-        @test combine(gdf, :g => first => :keep, :g => x -> x[1] in keep ? x : Int[]) ==
-              DataFrame(g=keep, keep=keep, g_function=keep)
-        @test combine(sgdf, :g => first => :keep, :g => x -> x[1] in keep ? x : Int[]) ==
+        @test sort(combine(gdf, :g => first => :keep, :g => x -> x[1] in keep ? x : Int[])) ==
+              combine(sgdf, :g => first => :keep, :g => x -> x[1] in keep ? x : Int[]) ==
               sort(DataFrame(g=keep, keep=keep, g_function=keep))
     end
 end
@@ -2131,69 +2258,63 @@ end
         if !(df.g isa CategoricalVector)
             gdf = groupby_checked(df, :g, sort=false, skipmissing=false)
 
+            @test sort(combine(gdf, :x => sum, keepkeys=true, ungroup=true)) ≅
+                  DataFrame(g = [1, 3, missing], x_sum = [5, 1, 4])
             @test combine(gdf, :x => sum, keepkeys=false, ungroup=true) ==
-                  DataFrame(x_sum = [1, 5, 4])
+                select(combine(gdf, :x => sum, keepkeys=true, ungroup=true), :x_sum)
             @test_throws ArgumentError combine(gdf, :x => sum, keepkeys=false, ungroup=false)
-            @test combine(gdf, :x => sum, keepkeys=true, ungroup=true) ≅
-                  DataFrame(g = [3, 1, missing], x_sum = [1, 5, 4])
             gdf2 = validate_gdf(combine(gdf, :x => sum, keepkeys=true, ungroup=false))
             @test gdf2 isa GroupedDataFrame{DataFrame}
-            @test gdf2.groups == 1:3
-            @test DataFrame(gdf2) ≅ DataFrame(g = [3, 1, missing], x_sum = [1, 5, 4])
-            @test DataFrame(gdf2, keepkeys=false) == DataFrame(x_sum = [1, 5, 4])
-
-            @test combine(gdf, :x => sum, :g, keepkeys=false, ungroup=true) ≅
-                  DataFrame(x_sum = [1, 5, 5, 4], g = [3, 1, 1, missing])
-            @test combine(gdf, :x => sum, :g, keepkeys=true, ungroup=true) ≅
-                  DataFrame(g = [3, 1, 1, missing], x_sum = [1, 5, 5, 4])
+            @test sort(DataFrame(gdf2)) ≅ DataFrame(g = [1, 3, missing], x_sum = [5, 1, 4])
+            @test DataFrame(gdf2, keepkeys=false) == select(DataFrame(gdf2), :x_sum)
+
+            @test sort(combine(gdf, :x => sum, :g, keepkeys=false, ungroup=true)) ≅
+                  DataFrame(x_sum = [1, 4, 5, 5], g = [3, missing, 1, 1])
+            @test sort(combine(gdf, :x => sum, :g, keepkeys=true, ungroup=true)) ≅
+                  DataFrame(g = [1, 1, 3, missing], x_sum = [5, 5, 1, 4])
             gdf2 = validate_gdf(combine(gdf, :x => sum, :g, keepkeys=true, ungroup=false))
             @test gdf2 isa GroupedDataFrame{DataFrame}
-            @test gdf2.groups == [1, 2, 2, 3]
-            @test DataFrame(gdf2) ≅ DataFrame(g = [3, 1, 1, missing], x_sum = [1, 5, 5, 4])
-            @test DataFrame(gdf2, keepkeys=false) ≅ DataFrame(x_sum = [1, 5, 5, 4])
+            @test sort(DataFrame(gdf2)) ≅ DataFrame(g = [1, 1, 3, missing], x_sum = [5, 5, 1, 4])
+            @test DataFrame(gdf2, keepkeys=false) ≅ select(DataFrame(gdf2), :x_sum)
 
+            @test sort(combine(x -> (x_sum = sum(x.x),), gdf, keepkeys=true, ungroup=true)) ≅
+                  DataFrame(g = [1, 3, missing], x_sum = [5, 1, 4])
             @test combine(x -> (x_sum = sum(x.x),), gdf, keepkeys=false, ungroup=true) ==
-                  DataFrame(x_sum = [1, 5, 4])
-            @test combine(x -> (x_sum = sum(x.x),), gdf, keepkeys=true, ungroup=true) ≅
-                  DataFrame(g = [3, 1, missing], x_sum = [1, 5, 4])
+                select(combine(x -> (x_sum = sum(x.x),), gdf, keepkeys=true, ungroup=true), :x_sum)
             gdf2 = validate_gdf(combine(x -> (x_sum = sum(x.x),), gdf, keepkeys=true, ungroup=false))
             @test gdf2 isa GroupedDataFrame{DataFrame}
-            @test gdf2.groups == 1:3
-            @test DataFrame(gdf2) ≅ DataFrame(g = [3, 1, missing], x_sum = [1, 5, 4])
-            @test DataFrame(gdf2, keepkeys=false) ≅ DataFrame(x_sum = [1, 5, 4])
+            @test sort(DataFrame(gdf2)) ≅ DataFrame(g = [1, 3, missing], x_sum = [5, 1, 4])
+            @test DataFrame(gdf2, keepkeys=false) ≅ select(DataFrame(gdf2), :x_sum)
 
             gdf = groupby_checked(df, :g, sort=false, skipmissing=true)
 
-            @test combine(gdf, :x => sum, keepkeys=false, ungroup=true) ==
+            @test sort(combine(gdf, :x => sum, keepkeys=false, ungroup=true)) ≅
                   DataFrame(x_sum = [1, 5])
             @test_throws ArgumentError combine(gdf, :x => sum, keepkeys=false, ungroup=false)
-            @test combine(gdf, :x => sum, keepkeys=true, ungroup=true) ≅
-                  DataFrame(g = [3, 1], x_sum = [1, 5])
+            @test sort(combine(gdf, :x => sum, keepkeys=true, ungroup=true)) ≅
+                  DataFrame(g = [1, 3], x_sum = [5, 1])
             gdf2 = validate_gdf(combine(gdf, :x => sum, keepkeys=true, ungroup=false))
             @test gdf2 isa GroupedDataFrame{DataFrame}
-            @test gdf2.groups == 1:2
-            @test DataFrame(gdf2) ≅ DataFrame(g = [3, 1], x_sum = [1, 5])
-            @test DataFrame(gdf2, keepkeys=false) ≅ DataFrame(x_sum = [1, 5])
+            @test sort(DataFrame(gdf2)) ≅ DataFrame(g = [1, 3], x_sum = [5, 1])
+            @test DataFrame(gdf2, keepkeys=false) ≅ select(DataFrame(gdf2), :x_sum)
 
-            @test combine(gdf, :x => sum, :g, keepkeys=false, ungroup=true) ≅
+            @test sort(combine(gdf, :x => sum, :g, keepkeys=false, ungroup=true)) ≅
                   DataFrame(x_sum = [1, 5, 5], g = [3, 1, 1])
-            @test combine(gdf, :x => sum, :g, keepkeys=true, ungroup=true) ≅
-                  DataFrame(g = [3, 1, 1], x_sum = [1, 5, 5])
+            @test sort(combine(gdf, :x => sum, :g, keepkeys=true, ungroup=true)) ≅
+                  DataFrame(g = [1, 1, 3], x_sum = [5, 5, 1])
             gdf2 = validate_gdf(combine(gdf, :x => sum, :g, keepkeys=true, ungroup=false))
             @test gdf2 isa GroupedDataFrame{DataFrame}
-            @test gdf2.groups == [1, 2, 2]
-            @test DataFrame(gdf2) ≅ DataFrame(g = [3, 1, 1], x_sum = [1, 5, 5])
-            @test DataFrame(gdf2, keepkeys=false) ≅ DataFrame(x_sum = [1, 5, 5])
+            @test sort(DataFrame(gdf2)) ≅ DataFrame(g = [1, 1, 3], x_sum = [5, 5, 1])
+            @test DataFrame(gdf2, keepkeys=false) ≅ select(DataFrame(gdf2), :x_sum)
 
-            @test combine(x -> (x_sum = sum(x.x),), gdf, keepkeys=false, ungroup=true) ==
-                  DataFrame(x_sum = [1, 5])
-            @test combine(x -> (x_sum = sum(x.x),), gdf, keepkeys=true, ungroup=true) ≅
-                  DataFrame(g = [3, 1], x_sum = [1, 5])
+            @test sort(combine(x -> (x_sum = sum(x.x),), gdf, keepkeys=true, ungroup=true)) ≅
+                  DataFrame(g = [1, 3], x_sum = [5, 1])
+            @test combine(x -> (x_sum = sum(x.x),), gdf, keepkeys=false, ungroup=true) ≅
+                select(combine(x -> (x_sum = sum(x.x),), gdf, keepkeys=true, ungroup=true), :x_sum)
             gdf2 = validate_gdf(combine(x -> (x_sum = sum(x.x),), gdf, keepkeys=true, ungroup=false))
             @test gdf2 isa GroupedDataFrame{DataFrame}
-            @test gdf2.groups == 1:2
-            @test DataFrame(gdf2) ≅ DataFrame(g = [3, 1], x_sum = [1, 5])
-            @test DataFrame(gdf2, keepkeys=false) ≅ DataFrame(x_sum = [1, 5])
+            @test sort(DataFrame(gdf2)) ≅ DataFrame(g = [1, 3], x_sum = [5, 1])
+            @test DataFrame(gdf2, keepkeys=false) ≅ select(DataFrame(gdf2), :x_sum)
         end
 
         gdf = groupby_checked(df, :g, sort=true, skipmissing=false)
@@ -2205,7 +2326,6 @@ end
               DataFrame(g = [1, 3, missing], x_sum = [5, 1, 4])
         gdf2 = validate_gdf(combine(gdf, :x => sum, keepkeys=true, ungroup=false))
         @test gdf2 isa GroupedDataFrame{DataFrame}
-        @test gdf2.groups == 1:3
         @test DataFrame(gdf2) ≅ DataFrame(g = [1, 3, missing], x_sum = [5, 1, 4])
         @test DataFrame(gdf2, keepkeys=false) ≅ DataFrame(x_sum = [5, 1, 4])
 
@@ -2215,7 +2335,6 @@ end
               DataFrame(g = [1, 1, 3, missing], x_sum = [5, 5, 1, 4])
         gdf2 = validate_gdf(combine(gdf, :x => sum, :g, keepkeys=true, ungroup=false))
         @test gdf2 isa GroupedDataFrame{DataFrame}
-        @test gdf2.groups == [1, 1, 2, 3]
         @test DataFrame(gdf2) ≅ DataFrame(g = [1, 1, 3, missing], x_sum = [5, 5, 1, 4])
         @test DataFrame(gdf2, keepkeys=false) ≅ DataFrame(x_sum = [5, 5, 1, 4])
 
@@ -2225,7 +2344,6 @@ end
               DataFrame(g = [1, 3, missing], x_sum = [5, 1, 4])
         gdf2 = validate_gdf(combine(x -> (x_sum = sum(x.x),), gdf, keepkeys=true, ungroup=false))
         @test gdf2 isa GroupedDataFrame{DataFrame}
-        @test gdf2.groups == 1:3
         @test DataFrame(gdf2) ≅ DataFrame(g = [1, 3, missing], x_sum = [5, 1, 4])
         @test DataFrame(gdf2, keepkeys=false) ≅ DataFrame(x_sum = [5, 1, 4])
 
@@ -2238,7 +2356,6 @@ end
               DataFrame(g = [1, 3], x_sum = [5, 1])
         gdf2 = validate_gdf(combine(gdf, :x => sum, keepkeys=true, ungroup=false))
         @test gdf2 isa GroupedDataFrame{DataFrame}
-        @test gdf2.groups == 1:2
         @test DataFrame(gdf2) ≅ DataFrame(g = [1, 3], x_sum = [5, 1])
         @test DataFrame(gdf2, keepkeys=false) ≅ DataFrame(x_sum = [5, 1])
 
@@ -2248,7 +2365,6 @@ end
               DataFrame(g = [1, 1, 3], x_sum = [5, 5, 1])
         gdf2 = validate_gdf(combine(gdf, :x => sum, :g, keepkeys=true, ungroup=false))
         @test gdf2 isa GroupedDataFrame{DataFrame}
-        @test gdf2.groups == [1, 1, 2]
         @test DataFrame(gdf2) ≅ DataFrame(g = [1, 1, 3], x_sum = [5, 5, 1])
         @test DataFrame(gdf2, keepkeys=false) ≅ DataFrame(x_sum = [5, 5, 1])
 
@@ -2258,7 +2374,6 @@ end
               DataFrame(g = [1, 3], x_sum = [5, 1])
         gdf2 = validate_gdf(combine(x -> (x_sum = sum(x.x),), gdf, keepkeys=true, ungroup=false))
         @test gdf2 isa GroupedDataFrame{DataFrame}
-        @test gdf2.groups == 1:2
         @test DataFrame(gdf2) ≅ DataFrame(g = [1, 3], x_sum = [5, 1])
         @test DataFrame(gdf2, keepkeys=false) ≅ DataFrame(x_sum = [5, 1])
     end
@@ -2356,8 +2471,7 @@ end
         @test res1.x_mean + res1.x_function ≈ df.x
 
         res2 = combine(gdf, :x => mean, :x => x -> x .- mean(x), :id)
-        @test unique(res2.g) ==
-              (dosort || df.g isa CategoricalVector ? sort! : identity)(unique(df.g))
+        @test unique(res2.g) == sort(unique(df.g))
         for i in unique(res2.g)
             @test issorted(filter(:g => x -> x == i, res2).id)
         end
@@ -3084,11 +3198,12 @@ end
     @test transform(df, :x => x -> 2x) == transform(gdf, :x => x -> 2x)
     @test transform(df, identity) == transform(gdf, identity)
     @test transform(df, x -> (a=x.x, b=x.x)) == transform(gdf, x -> (a=x.x, b=x.x))
-    @test combine(gdf, :x => x -> 2x) ==
-          DataFrame(id=[1, 1, 3, 3, 2, 2], x_function=[6, 10, 2, 8, 4, 12])
-    @test combine(gdf, identity) == DataFrame(id=[1, 1, 3, 3, 2, 2], x=[3, 5, 1, 4, 2, 6])
-    @test combine(gdf, x -> (a=x.x, b=x.x)) ==
-          DataFrame(id=[1, 1, 3, 3, 2, 2], a=[3, 5, 1, 4, 2, 6], b=[3, 5, 1, 4, 2, 6])
+    @test sort(combine(gdf, :x => x -> 2x)) ==
+          DataFrame(id=[1, 1, 2, 2, 3, 3], x_function=[6, 10, 4, 12, 2, 8])
+    @test sort(combine(gdf, identity)) ==
+          DataFrame(id=[1, 1, 2, 2, 3, 3], x=[3, 5, 2, 6, 1, 4])
+    @test sort(combine(gdf, x -> (a=x.x, b=x.x))) ==
+          DataFrame(id=[1, 1, 2, 2, 3, 3], a=[3, 5, 2, 6, 1, 4], b=[3, 5, 2, 6, 1, 4])
 end
 
 @testset "basic tests of advanced rules with multicolumn output" begin