From 98c74800c772eacb4b301544edd97e90f01e7257 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= <bkamins@sgh.waw.pl>
Date: Thu, 26 Dec 2019 15:54:52 +0100
Subject: [PATCH 1/8] allow containers with no columns in combine

---
 src/groupeddataframe/splitapplycombine.jl | 51 +++++++++++++++++------
 test/grouping.jl                          | 47 +++++++++++++++++----
 2 files changed, 78 insertions(+), 20 deletions(-)

diff --git a/src/groupeddataframe/splitapplycombine.jl b/src/groupeddataframe/splitapplycombine.jl
index 72aaa348b6..d03b1cadd3 100644
--- a/src/groupeddataframe/splitapplycombine.jl
+++ b/src/groupeddataframe/splitapplycombine.jl
@@ -201,6 +201,7 @@ which is determined using the following rules:
   for each group as the length of the returned vector for that group.
 - A data frame, a named tuple of vectors or a matrix gives the same additional columns
   and as many rows for each group as the rows returned for that group.
+  In this case returning a table with no columns drops group.
 
 `f` must always return the same kind of object (as defined in the above list) for
 all groups, and if a named tuple or data frame, with the same fields or columns.
@@ -340,6 +341,7 @@ which is determined using the following rules:
   for each group as the length of the returned vector for that group.
 - A data frame, a named tuple of vectors or a matrix gives a `DataFrame` with the same
   additional columns and as many rows for each group as the rows returned for that group.
+  In this case returning a table with no columns drops group.
 
 `f` must always return the same kind of object (as defined in the above list) for
 all groups, and if a named tuple or data frame, with the same fields or columns.
@@ -790,7 +792,7 @@ function _combine(f::Any, gd::GroupedDataFrame)
          !isa(firstres, Union{AbstractDataFrame, NamedTuple, DataFrameRow, AbstractMatrix}))
          nms = [Symbol(names(gd.parent)[index(gd.parent)[first(f)]], '_', funname(fun))]
     end
-    valscat = DataFrame(collect(AbstractVector, outcols), collect(Symbol, nms))
+    valscat = DataFrame(collect(AbstractVector, outcols), nms)
     return idx, valscat
 end
 
@@ -818,14 +820,17 @@ function _combine_with_first(first::Union{NamedTuple, DataFrameRow, AbstractData
     let eltys=eltys, n=n # Workaround for julia#15276
         initialcols = ntuple(i -> Tables.allocatecolumn(eltys[i], n), _ncol(first))
     end
-    outcols = _combine_with_first!(first, initialcols, idx, 1, 1, f, gd, incols,
-                                   tuple(propertynames(first)...))
-    idx, outcols, propertynames(first)
+    targetcolnames = collect(Symbol, propertynames(first))
+    outcols = first isa Union{AbstractDataFrame,
+                              NamedTuple{<:Any, <:Tuple{Vararg{AbstractVector}}}} ?
+              _combine_with_first!(first, initialcols, idx, 1, 1, f, gd, incols, targetcolnames) :
+              _combine_with_first_row!(first, initialcols, idx, 1, 1, f, gd, incols, targetcolnames)
+    idx, outcols, targetcolnames
 end
 
 function fill_row!(row, outcols::NTuple{N, AbstractVector},
                    i::Integer, colstart::Integer,
-                   colnames::NTuple{N, Symbol}) where N
+                   colnames::Vector{Symbol}) where N
     if !isa(row, Union{NamedTuple, DataFrameRow})
         throw(ArgumentError("return value must not change its kind " *
                             "(single row or variable number of rows) across groups"))
@@ -856,12 +861,12 @@ function fill_row!(row, outcols::NTuple{N, AbstractVector},
     return nothing
 end
 
-function _combine_with_first!(first::Union{NamedTuple, DataFrameRow},
+function _combine_with_first_row!(first::Union{NamedTuple, DataFrameRow},
                               outcols::NTuple{N, AbstractVector},
                               idx::Vector{Int}, rowstart::Integer, colstart::Integer,
                               f::Any, gd::GroupedDataFrame,
                               incols::Union{Nothing, AbstractVector, NamedTuple},
-                              colnames::NTuple{N, Symbol}) where N
+                              colnames::Vector{Symbol}) where N
     len = length(gd)
     gdidx = gd.idx
     starts = gd.starts
@@ -889,7 +894,7 @@ function _combine_with_first!(first::Union{NamedTuple, DataFrameRow},
                     end
                 end
             end
-            return _combine_with_first!(row, newcols, idx, i, j, f, gd, incols, colnames)
+            return _combine_with_first_row!(row, newcols, idx, i, j, f, gd, incols, colnames)
         end
         idx[i] = gdidx[starts[i]]
     end
@@ -911,7 +916,7 @@ else
 end
 
 function append_rows!(rows, outcols::NTuple{N, AbstractVector},
-                      colstart::Integer, colnames::NTuple{N, Symbol}) where N
+                      colstart::Integer, colnames::Vector{Symbol}) where N
     if !isa(rows, Union{AbstractDataFrame, NamedTuple{<:Any, <:Tuple{Vararg{AbstractVector}}}})
         throw(ArgumentError("return value must not change its kind " *
                             "(single row or variable number of rows) across groups"))
@@ -944,18 +949,37 @@ function _combine_with_first!(first::Union{AbstractDataFrame,
                               idx::Vector{Int}, rowstart::Integer, colstart::Integer,
                               f::Any, gd::GroupedDataFrame,
                               incols::Union{Nothing, AbstractVector, NamedTuple},
-                              colnames::NTuple{N, Symbol}) where N
+                              colnames::Vector{Symbol}) where N
     len = length(gd)
     gdidx = gd.idx
     starts = gd.starts
     ends = gd.ends
     # Handle first group
-    j = append_rows!(first, outcols, colstart, colnames)
-    @assert j === nothing # eltype is guaranteed to match
-    append!(idx, Iterators.repeated(gdidx[starts[rowstart]], _nrow(first)))
+
+    @assert isempty(colnames) == (_ncol(first) == 0) == isempty(outcols)
+    if !isempty(colnames)
+        j = append_rows!(first, outcols, colstart, colnames)
+        @assert j === nothing # eltype is guaranteed to match
+        append!(idx, Iterators.repeated(gdidx[starts[rowstart]], _nrow(first)))
+    end
     # Handle remaining groups
     @inbounds for i in rowstart+1:len
         rows = wrap(do_call(f, gdidx, starts, ends, gd, incols, i))
+        if !(rows isa Union{AbstractDataFrame, NamedTuple{<:Any, <:Tuple{Vararg{AbstractVector}}}})
+            throw(ArgumentError("return value must not change its kind " *
+                                "(single row or variable number of rows) across groups"))
+        end
+        _ncol(rows) == 0 && continue
+        if isempty(colnames)
+            append!(colnames, collect(Symbol, propertynames(rows)))
+            if rows isa AbstractDataFrame
+                eltys = eltype.(eachcol(rows))
+            else
+                eltys = map(eltype, rows)
+            end
+            initialcols = ntuple(i -> Tables.allocatecolumn(eltys[i], 0), _ncol(rows))
+            return _combine_with_first!(rows, initialcols, idx, i, 1, f, gd, incols, colnames)
+        end
         j = append_rows!(rows, outcols, 1, colnames)
         if j !== nothing # Need to widen column type
             local newcols
@@ -1021,6 +1045,7 @@ which is determined using the following rules:
   for each group as the length of the returned vector for that group.
 - A data frame, a named tuple of vectors or a matrix gives a `DataFrame` with the same
   additional columns and as many rows for each group as the rows returned for that group.
+  In this case returning a table with no columns drops group.
 
 `f` must always return the same kind of object (as defined in the above list) for
 all groups, and if a named tuple or data frame, with the same fields or columns.
diff --git a/test/grouping.jl b/test/grouping.jl
index 4e7a597883..6ed6686fae 100644
--- a/test/grouping.jl
+++ b/test/grouping.jl
@@ -421,8 +421,6 @@ end
     @test_throws ArgumentError by(d -> d.x == [1] ? (x1=1) : NamedTuple(), df, :x)
     @test_throws ArgumentError by(d -> d.x == [1] ? 1 : DataFrame(x1=1), df, :x)
     @test_throws ArgumentError by(d -> d.x == [1] ? DataFrame(x1=1) : 1, df, :x)
-    @test_throws ArgumentError by(d -> d.x == [1] ? DataFrame() : DataFrame(x1=1), df, :x)
-    @test_throws ArgumentError by(d -> d.x == [1] ? DataFrame(x1=1) : DataFrame(), df, :x)
     @test_throws ArgumentError by(d -> d.x == [1] ? (x1=1) : (x1=[1]), df, :x)
     @test_throws ArgumentError by(d -> d.x == [1] ? (x1=[1]) : (x1=1), df, :x)
     @test_throws ArgumentError by(d -> d.x == [1] ? 1 : [1], df, :x)
@@ -643,7 +641,7 @@ end
         by(df, :a, (:c => sum,)) ==
         by(df, :a, [:c => sum]) ==
         by(df, :a, c_sum = :c => sum) ==
-        by(d -> (c_sum=sum(d.c),), df, :a)
+        by(d -> (c_sum=sum(d.c),), df, :a) ==
         by(df, :a, d -> (c_sum=sum(d.c),))
 
     @test by(:c => vexp, df, :a) ==
@@ -651,21 +649,21 @@ end
         by(df, :a, (:c => vexp,)) ==
         by(df, :a, [:c => vexp]) ==
         by(df, :a, c_function = :c => vexp) ==
-        by(d -> (c_function=vexp(d.c),), df, :a)
+        by(d -> (c_function=vexp(d.c),), df, :a) ==
         by(df, :a, d -> (c_function=vexp(d.c),))
 
     @test by(df, :a, :b => sum, :c => sum) ==
         by(df, :a, (:b => sum, :c => sum,)) ==
         by(df, :a, [:b => sum, :c => sum]) ==
         by(df, :a, b_sum = :b => sum, c_sum = :c => sum) ==
-        by(d -> (b_sum=sum(d.b), c_sum=sum(d.c)), df, :a)
+        by(d -> (b_sum=sum(d.b), c_sum=sum(d.c)), df, :a) ==
         by(df, :a, d -> (b_sum=sum(d.b), c_sum=sum(d.c)))
 
     @test by(df, :a, :b => vexp, :c => identity) ==
         by(df, :a, (:b => vexp, :c => identity,)) ==
         by(df, :a, [:b => vexp, :c => identity]) ==
         by(df, :a, b_function = :b => vexp, c_identity = :c => identity) ==
-        by(d -> (b_function=vexp(d.b), c_identity=identity(d.c)), df, :a)
+        by(d -> (b_function=vexp(d.b), c_identity=identity(d.c)), df, :a) ==
         by(df, :a, d -> (b_function=vexp(d.b), c_identity=identity(d.c)))
 
     gd = groupby(df, :a)
@@ -689,7 +687,7 @@ end
         combine(gd, c_function = :c => vexp) ==
         combine(:c => x -> (c_function=exp.(x),), gd) ==
         combine(gd, :c => x -> (c_function=exp.(x),)) ==
-        combine(d -> (c_function=exp.(d.c),), gd)
+        combine(d -> (c_function=exp.(d.c),), gd) ==
         combine(gd, d -> (c_function=exp.(d.c),))
 
     @test combine(gd, :b => sum, :c => sum) ==
@@ -1458,4 +1456,39 @@ end
     @test_throws KeyError gd[(a=:A, b=:X)]
 end
 
+@testset "Allow returning DataFrame() or NamedTuple() to drop group" begin
+    for i in typemin(UInt8):typemax(UInt8), er in (DataFrame(), NamedTuple(), rand(0,0), rand(5,0))
+        df = DataFrame(a = 1:8, x1 = Bool.(collect(bitstring(UInt8(i))) .- '0'))
+        res = by(sdf -> sdf.x1[1] ? DataFrame(x1=[true]) : er, df, :a)
+        @test res == DataFrame(map(sdf -> sdf.x1[1] ? DataFrame(x1=[true]) : er, groupby(df, :a)))
+        if nrow(res) == 0
+            @test res == DataFrame(a=Int[])
+        else
+            @test res == df[df.x1, :]
+        end
+
+        res = by(sdf -> sdf.x1[1] ? (x1=[true],) : er, df, :a)
+        if typemin(UInt8) < i < typemax(UInt8)
+            @test_throws ArgumentError by(sdf -> sdf.x1[1] ? (x1=true,) : er, df, :a)
+        end
+        @test res == DataFrame(map(sdf -> sdf.x1[1] ? (x1=[true],) : er, groupby(df, :a)))
+        if nrow(res) == 0
+            @test res == DataFrame(a=Int[])
+        else
+            @test res == df[df.x1, :]
+        end
+
+        res = by(sdf -> sdf.x1[1] ? hcat(true) : er, df, :a)
+        if typemin(UInt8) < i < typemax(UInt8)
+            @test_throws ArgumentError by(sdf -> sdf.x1[1] ? true : er, df, :a)
+        end
+        @test res == DataFrame(map(sdf -> sdf.x1[1] ? hcat(true) : er, groupby(df, :a)))
+        if nrow(res) == 0
+            @test res == DataFrame(a=Int[])
+        else
+            @test res == df[df.x1, :]
+        end
+    end
+end
+
 end # module

From a2bcaa56335e83fb62e043ffd3beab9467952e0a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= <bkamins@sgh.waw.pl>
Date: Thu, 26 Dec 2019 21:24:30 +0100
Subject: [PATCH 2/8] add SubDataFrame to tests

---
 test/grouping.jl | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/test/grouping.jl b/test/grouping.jl
index 6ed6686fae..f6b28416a1 100644
--- a/test/grouping.jl
+++ b/test/grouping.jl
@@ -1457,7 +1457,10 @@ end
 end
 
 @testset "Allow returning DataFrame() or NamedTuple() to drop group" begin
-    for i in typemin(UInt8):typemax(UInt8), er in (DataFrame(), NamedTuple(), rand(0,0), rand(5,0))
+    for i in typemin(UInt8):typemax(UInt8),
+        er in (DataFrame(), view(DataFrame(ones(2,2)), 2:1, 2:1),
+               view(DataFrame(ones(2,2)), 1:2, 2:1),
+               NamedTuple(), rand(0,0), rand(5,0))
         df = DataFrame(a = 1:8, x1 = Bool.(collect(bitstring(UInt8(i))) .- '0'))
         res = by(sdf -> sdf.x1[1] ? DataFrame(x1=[true]) : er, df, :a)
         @test res == DataFrame(map(sdf -> sdf.x1[1] ? DataFrame(x1=[true]) : er, groupby(df, :a)))

From 964520ad7c7d40667b0b0a6aedb72a8c687becdd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= <bkamins@sgh.waw.pl>
Date: Thu, 26 Dec 2019 22:05:28 +0100
Subject: [PATCH 3/8] retain colnames as Tuple

---
 src/groupeddataframe/splitapplycombine.jl | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/src/groupeddataframe/splitapplycombine.jl b/src/groupeddataframe/splitapplycombine.jl
index d03b1cadd3..e4b1349169 100644
--- a/src/groupeddataframe/splitapplycombine.jl
+++ b/src/groupeddataframe/splitapplycombine.jl
@@ -820,17 +820,17 @@ function _combine_with_first(first::Union{NamedTuple, DataFrameRow, AbstractData
     let eltys=eltys, n=n # Workaround for julia#15276
         initialcols = ntuple(i -> Tables.allocatecolumn(eltys[i], n), _ncol(first))
     end
-    targetcolnames = collect(Symbol, propertynames(first))
-    outcols = first isa Union{AbstractDataFrame,
+    targetcolnames = tuple(propertynames(first)...)
+    outcols, finalcolnames = first isa Union{AbstractDataFrame,
                               NamedTuple{<:Any, <:Tuple{Vararg{AbstractVector}}}} ?
               _combine_with_first!(first, initialcols, idx, 1, 1, f, gd, incols, targetcolnames) :
               _combine_with_first_row!(first, initialcols, idx, 1, 1, f, gd, incols, targetcolnames)
-    idx, outcols, targetcolnames
+    idx, outcols, collect(Symbol, finalcolnames)
 end
 
 function fill_row!(row, outcols::NTuple{N, AbstractVector},
                    i::Integer, colstart::Integer,
-                   colnames::Vector{Symbol}) where N
+                   colnames::NTuple{N, Symbol}) where N
     if !isa(row, Union{NamedTuple, DataFrameRow})
         throw(ArgumentError("return value must not change its kind " *
                             "(single row or variable number of rows) across groups"))
@@ -866,7 +866,7 @@ function _combine_with_first_row!(first::Union{NamedTuple, DataFrameRow},
                               idx::Vector{Int}, rowstart::Integer, colstart::Integer,
                               f::Any, gd::GroupedDataFrame,
                               incols::Union{Nothing, AbstractVector, NamedTuple},
-                              colnames::Vector{Symbol}) where N
+                              colnames::NTuple{N, Symbol}) where N
     len = length(gd)
     gdidx = gd.idx
     starts = gd.starts
@@ -898,7 +898,7 @@ function _combine_with_first_row!(first::Union{NamedTuple, DataFrameRow},
         end
         idx[i] = gdidx[starts[i]]
     end
-    outcols
+    return outcols, colnames
 end
 
 # This needs to be in a separate function
@@ -916,7 +916,7 @@ else
 end
 
 function append_rows!(rows, outcols::NTuple{N, AbstractVector},
-                      colstart::Integer, colnames::Vector{Symbol}) where N
+                      colstart::Integer, colnames::NTuple{N, Symbol}) where N
     if !isa(rows, Union{AbstractDataFrame, NamedTuple{<:Any, <:Tuple{Vararg{AbstractVector}}}})
         throw(ArgumentError("return value must not change its kind " *
                             "(single row or variable number of rows) across groups"))
@@ -949,7 +949,7 @@ function _combine_with_first!(first::Union{AbstractDataFrame,
                               idx::Vector{Int}, rowstart::Integer, colstart::Integer,
                               f::Any, gd::GroupedDataFrame,
                               incols::Union{Nothing, AbstractVector, NamedTuple},
-                              colnames::Vector{Symbol}) where N
+                              colnames::NTuple{N, Symbol}) where N
     len = length(gd)
     gdidx = gd.idx
     starts = gd.starts
@@ -971,14 +971,14 @@ function _combine_with_first!(first::Union{AbstractDataFrame,
         end
         _ncol(rows) == 0 && continue
         if isempty(colnames)
-            append!(colnames, collect(Symbol, propertynames(rows)))
+            newcolnames = tuple(propertynames(rows)...)
             if rows isa AbstractDataFrame
                 eltys = eltype.(eachcol(rows))
             else
                 eltys = map(eltype, rows)
             end
             initialcols = ntuple(i -> Tables.allocatecolumn(eltys[i], 0), _ncol(rows))
-            return _combine_with_first!(rows, initialcols, idx, i, 1, f, gd, incols, colnames)
+            return _combine_with_first!(rows, initialcols, idx, i, 1, f, gd, incols, newcolnames)
         end
         j = append_rows!(rows, outcols, 1, colnames)
         if j !== nothing # Need to widen column type
@@ -999,7 +999,7 @@ function _combine_with_first!(first::Union{AbstractDataFrame,
         end
         append!(idx, Iterators.repeated(gdidx[starts[i]], _nrow(rows)))
     end
-    outcols
+    return outcols, colnames
 end
 
 """

From 14ed8f2bc74f7cfcab2bf06eee9a5efe29f2eb61 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= <bkamins@sgh.waw.pl>
Date: Fri, 27 Dec 2019 23:50:27 +0100
Subject: [PATCH 4/8] Apply suggestions from code review

Co-Authored-By: Milan Bouchet-Valat <nalimilan@club.fr>
---
 test/grouping.jl | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/test/grouping.jl b/test/grouping.jl
index 840ed08ca1..789bd96cad 100644
--- a/test/grouping.jl
+++ b/test/grouping.jl
@@ -1485,9 +1485,10 @@ end
                NamedTuple(), rand(0,0), rand(5,0))
         df = DataFrame(a = 1:8, x1 = Bool.(collect(bitstring(UInt8(i))) .- '0'))
         res = by(sdf -> sdf.x1[1] ? DataFrame(x1=[true]) : er, df, :a)
-        @test res == DataFrame(map(sdf -> sdf.x1[1] ? DataFrame(x1=[true]) : er, groupby(df, :a)))
+        @test res == DataFrame(map(sdf -> sdf.x1[1] ? DataFrame(x1=[true]) : er, groupby_checked(df, :a)))
         if nrow(res) == 0
-            @test res == DataFrame(a=Int[])
+            @test res == DataFrame(a=[])
+            @test typeof(res.a) == Vector{Int}
         else
             @test res == df[df.x1, :]
         end
@@ -1496,9 +1497,10 @@ end
         if typemin(UInt8) < i < typemax(UInt8)
             @test_throws ArgumentError by(sdf -> sdf.x1[1] ? (x1=true,) : er, df, :a)
         end
-        @test res == DataFrame(map(sdf -> sdf.x1[1] ? (x1=[true],) : er, groupby(df, :a)))
+        @test res == DataFrame(map(sdf -> sdf.x1[1] ? (x1=[true],) : er, groupby_checked(df, :a)))
         if nrow(res) == 0
-            @test res == DataFrame(a=Int[])
+            @test res == DataFrame(a=[])
+            @test typeof(res.a) == Vector{Int}
         else
             @test res == df[df.x1, :]
         end
@@ -1507,9 +1509,10 @@ end
         if typemin(UInt8) < i < typemax(UInt8)
             @test_throws ArgumentError by(sdf -> sdf.x1[1] ? true : er, df, :a)
         end
-        @test res == DataFrame(map(sdf -> sdf.x1[1] ? hcat(true) : er, groupby(df, :a)))
+        @test res == DataFrame(map(sdf -> sdf.x1[1] ? hcat(true) : er, groupby_checked(df, :a)))
         if nrow(res) == 0
-            @test res == DataFrame(a=Int[])
+            @test res == DataFrame(a=[])
+            @test typeof(res.a) == Vector{Int}
         else
             @test res == df[df.x1, :]
         end

From 4c0a4943770e445e65e75353ec433f80fbe5136e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= <bkamins@sgh.waw.pl>
Date: Sat, 28 Dec 2019 00:57:52 +0100
Subject: [PATCH 5/8] fixes after the code review

---
 src/groupeddataframe/splitapplycombine.jl | 52 ++++++++++++-----------
 test/grouping.jl                          | 41 ++++++------------
 2 files changed, 41 insertions(+), 52 deletions(-)

diff --git a/src/groupeddataframe/splitapplycombine.jl b/src/groupeddataframe/splitapplycombine.jl
index 1b19796602..826654fb0a 100644
--- a/src/groupeddataframe/splitapplycombine.jl
+++ b/src/groupeddataframe/splitapplycombine.jl
@@ -201,7 +201,7 @@ which is determined using the following rules:
   for each group as the length of the returned vector for that group.
 - A data frame, a named tuple of vectors or a matrix gives the same additional columns
   and as many rows for each group as the rows returned for that group.
-  In this case returning a table with no columns drops group.
+  (a table with zero columns drops the group)
 
 `f` must always return the same kind of object (as defined in the above list) for
 all groups, and if a named tuple or data frame, with the same fields or columns.
@@ -341,7 +341,7 @@ which is determined using the following rules:
   for each group as the length of the returned vector for that group.
 - A data frame, a named tuple of vectors or a matrix gives a `DataFrame` with the same
   additional columns and as many rows for each group as the rows returned for that group.
-  In this case returning a table with no columns drops group.
+  (a table with zero columns drops the group)
 
 `f` must always return the same kind of object (as defined in the above list) for
 all groups, and if a named tuple or data frame, with the same fields or columns.
@@ -829,10 +829,12 @@ function _combine_with_first(first::Union{NamedTuple, DataFrameRow, AbstractData
         initialcols = ntuple(i -> Tables.allocatecolumn(eltys[i], n), _ncol(first))
     end
     targetcolnames = tuple(propertynames(first)...)
-    outcols, finalcolnames = first isa Union{AbstractDataFrame,
-                              NamedTuple{<:Any, <:Tuple{Vararg{AbstractVector}}}} ?
-              _combine_with_first!(first, initialcols, idx, 1, 1, f, gd, incols, targetcolnames) :
-              _combine_with_first_row!(first, initialcols, idx, 1, 1, f, gd, incols, targetcolnames)
+    outcols, finalcolnames = if first isa Union{AbstractDataFrame,
+                                                NamedTuple{<:Any, <:Tuple{Vararg{AbstractVector}}}}
+        _combine_tables_with_first!(first, initialcols, idx, 1, 1, f, gd, incols, targetcolnames)
+    else
+        _combine_rows_with_first!(first, initialcols, idx, 1, 1, f, gd, incols, targetcolnames)
+    end
     idx, outcols, collect(Symbol, finalcolnames)
 end
 
@@ -869,12 +871,12 @@ function fill_row!(row, outcols::NTuple{N, AbstractVector},
     return nothing
 end
 
-function _combine_with_first_row!(first::Union{NamedTuple, DataFrameRow},
-                              outcols::NTuple{N, AbstractVector},
-                              idx::Vector{Int}, rowstart::Integer, colstart::Integer,
-                              f::Any, gd::GroupedDataFrame,
-                              incols::Union{Nothing, AbstractVector, NamedTuple},
-                              colnames::NTuple{N, Symbol}) where N
+function _combine_rows_with_first!(first::Union{NamedTuple, DataFrameRow},
+                                   outcols::NTuple{N, AbstractVector},
+                                   idx::Vector{Int}, rowstart::Integer, colstart::Integer,
+                                   f::Any, gd::GroupedDataFrame,
+                                   incols::Union{Nothing, AbstractVector, NamedTuple},
+                                   colnames::NTuple{N, Symbol}) where N
     len = length(gd)
     gdidx = gd.idx
     starts = gd.starts
@@ -902,7 +904,7 @@ function _combine_with_first_row!(first::Union{NamedTuple, DataFrameRow},
                     end
                 end
             end
-            return _combine_with_first_row!(row, newcols, idx, i, j, f, gd, incols, colnames)
+            return _combine_rows_with_first!(row, newcols, idx, i, j, f, gd, incols, colnames)
         end
         idx[i] = gdidx[starts[i]]
     end
@@ -940,7 +942,7 @@ function append_rows!(rows, outcols::NTuple{N, AbstractVector},
             vals = getproperty(rows, cn)
         catch
             throw(ArgumentError("return value must have the same column names " *
-                                "for all groups (got $(Tuple(colnames)) and $(Tuple(names(rows))))"))
+                                "for all groups (got $colnames and $(propertynames(rows)))"))
         end
         S = eltype(vals)
         T = eltype(col)
@@ -951,20 +953,20 @@ function append_rows!(rows, outcols::NTuple{N, AbstractVector},
     return nothing
 end
 
-function _combine_with_first!(first::Union{AbstractDataFrame,
-                                           NamedTuple{<:Any, <:Tuple{Vararg{AbstractVector}}}},
-                              outcols::NTuple{N, AbstractVector},
-                              idx::Vector{Int}, rowstart::Integer, colstart::Integer,
-                              f::Any, gd::GroupedDataFrame,
-                              incols::Union{Nothing, AbstractVector, NamedTuple},
-                              colnames::NTuple{N, Symbol}) where N
+function _combine_tables_with_first!(first::Union{AbstractDataFrame,
+                                     NamedTuple{<:Any, <:Tuple{Vararg{AbstractVector}}}},
+                                     outcols::NTuple{N, AbstractVector},
+                                     idx::Vector{Int}, rowstart::Integer, colstart::Integer,
+                                     f::Any, gd::GroupedDataFrame,
+                                     incols::Union{Nothing, AbstractVector, NamedTuple},
+                                     colnames::NTuple{N, Symbol}) where N
     len = length(gd)
     gdidx = gd.idx
     starts = gd.starts
     ends = gd.ends
     # Handle first group
 
-    @assert isempty(colnames) == (_ncol(first) == 0) == isempty(outcols)
+    @assert _ncol(first) == N
     if !isempty(colnames)
         j = append_rows!(first, outcols, colstart, colnames)
         @assert j === nothing # eltype is guaranteed to match
@@ -986,7 +988,7 @@ function _combine_with_first!(first::Union{AbstractDataFrame,
                 eltys = map(eltype, rows)
             end
             initialcols = ntuple(i -> Tables.allocatecolumn(eltys[i], 0), _ncol(rows))
-            return _combine_with_first!(rows, initialcols, idx, i, 1, f, gd, incols, newcolnames)
+            return _combine_tables_with_first!(rows, initialcols, idx, i, 1, f, gd, incols, newcolnames)
         end
         j = append_rows!(rows, outcols, 1, colnames)
         if j !== nothing # Need to widen column type
@@ -1003,7 +1005,7 @@ function _combine_with_first!(first::Union{AbstractDataFrame,
                     end
                 end
             end
-            return _combine_with_first!(rows, newcols, idx, i, j, f, gd, incols, colnames)
+            return _combine_tables_with_first!(rows, newcols, idx, i, j, f, gd, incols, colnames)
         end
         append!(idx, Iterators.repeated(gdidx[starts[i]], _nrow(rows)))
     end
@@ -1053,7 +1055,7 @@ which is determined using the following rules:
   for each group as the length of the returned vector for that group.
 - A data frame, a named tuple of vectors or a matrix gives a `DataFrame` with the same
   additional columns and as many rows for each group as the rows returned for that group.
-  In this case returning a table with no columns drops group.
+  (a table with zero columns drops the group)
 
 `f` must always return the same kind of object (as defined in the above list) for
 all groups, and if a named tuple or data frame, with the same fields or columns.
diff --git a/test/grouping.jl b/test/grouping.jl
index 789bd96cad..2182257922 100644
--- a/test/grouping.jl
+++ b/test/grouping.jl
@@ -1479,43 +1479,30 @@ end
 end
 
 @testset "Allow returning DataFrame() or NamedTuple() to drop group" begin
-    for i in typemin(UInt8):typemax(UInt8),
+    N = 4
+    for (i, x1) in enumerate(collect.(Iterators.product(repeat([[true, false]], N)...))),
         er in (DataFrame(), view(DataFrame(ones(2,2)), 2:1, 2:1),
                view(DataFrame(ones(2,2)), 1:2, 2:1),
-               NamedTuple(), rand(0,0), rand(5,0))
-        df = DataFrame(a = 1:8, x1 = Bool.(collect(bitstring(UInt8(i))) .- '0'))
-        res = by(sdf -> sdf.x1[1] ? DataFrame(x1=[true]) : er, df, :a)
-        @test res == DataFrame(map(sdf -> sdf.x1[1] ? DataFrame(x1=[true]) : er, groupby_checked(df, :a)))
-        if nrow(res) == 0
+               NamedTuple(), rand(0,0), rand(5,0),
+               DataFrame(x1=Int[]), DataFrame(x1=Any[]),
+               (x1=Int[],), (x1=Any[],), rand(0,1)),
+        fr in (DataFrame(x1=[true]), (x1=[true],), hcat(true))
+        df = DataFrame(a = 1:N, x1 = x1)
+        res = by(sdf -> sdf.x1[1] ? fr : er, df, :a)
+        @test res == DataFrame(map(sdf -> sdf.x1[1] ? fr : er, groupby_checked(df, :a)))
+        @test res == by(df, :a, :x1 => x -> x[1] ? fr : er)
+        @test res == by(df, :a, (:a, :x1) => x -> x.x1[1] ? fr : er)
+        if nrow(res) == 0 && length(propertynames(er)) == 0 && er != rand(0, 1)
             @test res == DataFrame(a=[])
             @test typeof(res.a) == Vector{Int}
         else
             @test res == df[df.x1, :]
         end
-
-        res = by(sdf -> sdf.x1[1] ? (x1=[true],) : er, df, :a)
-        if typemin(UInt8) < i < typemax(UInt8)
+        if 1 < i < 2^N
             @test_throws ArgumentError by(sdf -> sdf.x1[1] ? (x1=true,) : er, df, :a)
-        end
-        @test res == DataFrame(map(sdf -> sdf.x1[1] ? (x1=[true],) : er, groupby_checked(df, :a)))
-        if nrow(res) == 0
-            @test res == DataFrame(a=[])
-            @test typeof(res.a) == Vector{Int}
-        else
-            @test res == df[df.x1, :]
-        end
-
-        res = by(sdf -> sdf.x1[1] ? hcat(true) : er, df, :a)
-        if typemin(UInt8) < i < typemax(UInt8)
+            @test_throws ArgumentError by(sdf -> sdf.x1[1] ? fr : (x2=[true],), df, :a)
             @test_throws ArgumentError by(sdf -> sdf.x1[1] ? true : er, df, :a)
         end
-        @test res == DataFrame(map(sdf -> sdf.x1[1] ? hcat(true) : er, groupby_checked(df, :a)))
-        if nrow(res) == 0
-            @test res == DataFrame(a=[])
-            @test typeof(res.a) == Vector{Int}
-        else
-            @test res == df[df.x1, :]
-        end
     end
 end
 

From d461a0e6e1075a378e464ec352af82526aa3f815 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= <bkamins@sgh.waw.pl>
Date: Sat, 28 Dec 2019 01:38:01 +0100
Subject: [PATCH 6/8] add more tests

---
 test/grouping.jl | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/test/grouping.jl b/test/grouping.jl
index 2182257922..d269d1ecee 100644
--- a/test/grouping.jl
+++ b/test/grouping.jl
@@ -1486,11 +1486,19 @@ end
                NamedTuple(), rand(0,0), rand(5,0),
                DataFrame(x1=Int[]), DataFrame(x1=Any[]),
                (x1=Int[],), (x1=Any[],), rand(0,1)),
-        fr in (DataFrame(x1=[true]), (x1=[true],), hcat(true))
+        fr in (DataFrame(x1=[true]), (x1=[true],), hcat(true), [true])
         df = DataFrame(a = 1:N, x1 = x1)
         res = by(sdf -> sdf.x1[1] ? fr : er, df, :a)
         @test res == DataFrame(map(sdf -> sdf.x1[1] ? fr : er, groupby_checked(df, :a)))
-        @test res == by(df, :a, :x1 => x -> x[1] ? fr : er)
+        if fr == [true]
+            if df.x1[1]
+                @test rename(res, 2=>:x1_function) == by(df, :a, :x1 => x -> x[1] ? fr : er)
+            else
+                @test res == by(df, :a, :x1 => x -> x[1] ? fr : er)
+            end
+        else
+            @test res == by(df, :a, :x1 => x -> x[1] ? fr : er)
+        end
         @test res == by(df, :a, (:a, :x1) => x -> x.x1[1] ? fr : er)
         if nrow(res) == 0 && length(propertynames(er)) == 0 && er != rand(0, 1)
             @test res == DataFrame(a=[])

From 39ab56d3ea5ce6d3b2576629f867bbe7e00261f4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= <bkamins@sgh.waw.pl>
Date: Sat, 28 Dec 2019 21:48:02 +0100
Subject: [PATCH 7/8] improve docstring

---
 src/groupeddataframe/splitapplycombine.jl | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/src/groupeddataframe/splitapplycombine.jl b/src/groupeddataframe/splitapplycombine.jl
index 826654fb0a..3e0a6438f1 100644
--- a/src/groupeddataframe/splitapplycombine.jl
+++ b/src/groupeddataframe/splitapplycombine.jl
@@ -201,7 +201,8 @@ which is determined using the following rules:
   for each group as the length of the returned vector for that group.
 - A data frame, a named tuple of vectors or a matrix gives the same additional columns
   and as many rows for each group as the rows returned for that group.
-  (a table with zero columns drops the group)
+  As a special case, returning an empty table with zero columns is allowed,
+  whatever the number of columns returned for other groups.
 
 `f` must always return the same kind of object (as defined in the above list) for
 all groups, and if a named tuple or data frame, with the same fields or columns.
@@ -341,7 +342,8 @@ which is determined using the following rules:
   for each group as the length of the returned vector for that group.
 - A data frame, a named tuple of vectors or a matrix gives a `DataFrame` with the same
   additional columns and as many rows for each group as the rows returned for that group.
-  (a table with zero columns drops the group)
+  As a special case, returning an empty table with zero columns is allowed,
+  whatever the number of columns returned for other groups.
 
 `f` must always return the same kind of object (as defined in the above list) for
 all groups, and if a named tuple or data frame, with the same fields or columns.
@@ -1055,7 +1057,8 @@ which is determined using the following rules:
   for each group as the length of the returned vector for that group.
 - A data frame, a named tuple of vectors or a matrix gives a `DataFrame` with the same
   additional columns and as many rows for each group as the rows returned for that group.
-  (a table with zero columns drops the group)
+  As a special case, returning an empty table with zero columns is allowed,
+  whatever the number of columns returned for other groups.
 
 `f` must always return the same kind of object (as defined in the above list) for
 all groups, and if a named tuple or data frame, with the same fields or columns.

From 6b9b8aa68d74a131f4e2a4089861099b50c06ad2 Mon Sep 17 00:00:00 2001
From: Milan Bouchet-Valat <nalimilan@club.fr>
Date: Thu, 2 Jan 2020 18:32:49 +0100
Subject: [PATCH 8/8] Break lines at 92 chars

---
 src/groupeddataframe/splitapplycombine.jl | 22 ++++++++++++++--------
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/src/groupeddataframe/splitapplycombine.jl b/src/groupeddataframe/splitapplycombine.jl
index 3e0a6438f1..ab6cfc481c 100644
--- a/src/groupeddataframe/splitapplycombine.jl
+++ b/src/groupeddataframe/splitapplycombine.jl
@@ -831,11 +831,13 @@ function _combine_with_first(first::Union{NamedTuple, DataFrameRow, AbstractData
         initialcols = ntuple(i -> Tables.allocatecolumn(eltys[i], n), _ncol(first))
     end
     targetcolnames = tuple(propertynames(first)...)
-    outcols, finalcolnames = if first isa Union{AbstractDataFrame,
-                                                NamedTuple{<:Any, <:Tuple{Vararg{AbstractVector}}}}
-        _combine_tables_with_first!(first, initialcols, idx, 1, 1, f, gd, incols, targetcolnames)
+    if first isa Union{AbstractDataFrame,
+                       NamedTuple{<:Any, <:Tuple{Vararg{AbstractVector}}}}
+        outcols, finalcolnames = _combine_tables_with_first!(first, initialcols, idx, 1, 1,
+                                                             f, gd, incols, targetcolnames)
     else
-        _combine_rows_with_first!(first, initialcols, idx, 1, 1, f, gd, incols, targetcolnames)
+        outcols, finalcolnames = _combine_rows_with_first!(first, initialcols, idx, 1, 1,
+                                                           f, gd, incols, targetcolnames)
     end
     idx, outcols, collect(Symbol, finalcolnames)
 end
@@ -906,7 +908,8 @@ function _combine_rows_with_first!(first::Union{NamedTuple, DataFrameRow},
                     end
                 end
             end
-            return _combine_rows_with_first!(row, newcols, idx, i, j, f, gd, incols, colnames)
+            return _combine_rows_with_first!(row, newcols, idx, i, j,
+                                             f, gd, incols, colnames)
         end
         idx[i] = gdidx[starts[i]]
     end
@@ -977,7 +980,8 @@ function _combine_tables_with_first!(first::Union{AbstractDataFrame,
     # Handle remaining groups
     @inbounds for i in rowstart+1:len
         rows = wrap(do_call(f, gdidx, starts, ends, gd, incols, i))
-        if !(rows isa Union{AbstractDataFrame, NamedTuple{<:Any, <:Tuple{Vararg{AbstractVector}}}})
+        if !(rows isa Union{AbstractDataFrame,
+                            NamedTuple{<:Any, <:Tuple{Vararg{AbstractVector}}}})
             throw(ArgumentError("return value must not change its kind " *
                                 "(single row or variable number of rows) across groups"))
         end
@@ -990,7 +994,8 @@ function _combine_tables_with_first!(first::Union{AbstractDataFrame,
                 eltys = map(eltype, rows)
             end
             initialcols = ntuple(i -> Tables.allocatecolumn(eltys[i], 0), _ncol(rows))
-            return _combine_tables_with_first!(rows, initialcols, idx, i, 1, f, gd, incols, newcolnames)
+            return _combine_tables_with_first!(rows, initialcols, idx, i, 1,
+                                               f, gd, incols, newcolnames)
         end
         j = append_rows!(rows, outcols, 1, colnames)
         if j !== nothing # Need to widen column type
@@ -1007,7 +1012,8 @@ function _combine_tables_with_first!(first::Union{AbstractDataFrame,
                     end
                 end
             end
-            return _combine_tables_with_first!(rows, newcols, idx, i, j, f, gd, incols, colnames)
+            return _combine_tables_with_first!(rows, newcols, idx, i, j,
+                                               f, gd, incols, colnames)
         end
         append!(idx, Iterators.repeated(gdidx[starts[i]], _nrow(rows)))
     end