Skip to content

Commit

Permalink
Backport PR #43154: Updating _resolve_numeric_only function of GroupBy (
Browse files Browse the repository at this point in the history
#43481)

Co-authored-by: Prerana Chakraborty <[email protected]>
  • Loading branch information
meeseeksmachine and kurchi1205 authored Sep 9, 2021
1 parent 04328a7 commit ac09649
Show file tree
Hide file tree
Showing 4 changed files with 73 additions and 1 deletion.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.3.3.rst
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ Fixed regressions
- Fixed regression in :meth:`DataFrame.__getitem__` raising error for slice of :class:`DatetimeIndex` when index is non monotonic (:issue:`43223`)
- Fixed regression in :meth:`.Resampler.aggregate` when used after column selection would raise if ``func`` is a list of aggregation functions (:issue:`42905`)
- Fixed regression in :meth:`DataFrame.corr` where Kendall correlation would produce incorrect results for columns with repeated values (:issue:`43401`)
- Fixed regression in :meth:`DataFrame.groupby` where aggregation on columns with object types dropped results on those columns (:issue:`42395`, :issue:`43108`)
- Fixed regression in :meth:`Series.fillna` raising ``TypeError`` when filling ``float`` ``Series`` with list-like fill value having a dtype which couldn't cast lostlessly (like ``float32`` filled with ``float64``) (:issue:`43424`)
-

Expand Down
8 changes: 8 additions & 0 deletions pandas/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -1126,6 +1126,14 @@ def _resolve_numeric_only(self, numeric_only: bool | lib.NoDefault) -> bool:
if self.obj.ndim == 2:
# i.e. DataFrameGroupBy
numeric_only = True
# GH#42395 GH#43108 GH#43154
# Regression from 1.2.5 to 1.3 caused object columns to be dropped
obj = self._obj_with_exclusions
check = obj._get_numeric_data()
if len(obj.columns) and not len(check.columns) and not obj.empty:
numeric_only = False
# TODO: v1.4+ Add FutureWarning

else:
numeric_only = False

Expand Down
3 changes: 2 additions & 1 deletion pandas/tests/groupby/aggregate/test_cython.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,8 @@ def test_cython_agg_nothing_to_agg():

frame = DataFrame({"a": np.random.randint(0, 5, 50), "b": ["foo", "bar"] * 25})

result = frame[["b"]].groupby(frame["a"]).mean()
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = frame[["b"]].groupby(frame["a"]).mean()
expected = DataFrame([], index=frame["a"].sort_values().drop_duplicates())
tm.assert_frame_equal(result, expected)

Expand Down
62 changes: 62 additions & 0 deletions pandas/tests/groupby/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
MultiIndex,
RangeIndex,
Series,
Timedelta,
Timestamp,
date_range,
read_csv,
Expand Down Expand Up @@ -2377,6 +2378,67 @@ def test_groupby_empty_multi_column(as_index, numeric_only):
tm.assert_frame_equal(result, expected)


def test_groupby_aggregation_non_numeric_dtype():
# GH #43108
df = DataFrame(
[["M", [1]], ["M", [1]], ["W", [10]], ["W", [20]]], columns=["MW", "v"]
)

expected = DataFrame(
{
"v": [[1, 1], [10, 20]],
},
index=Index(["M", "W"], dtype="object", name="MW"),
)

gb = df.groupby(by=["MW"])
result = gb.sum()
tm.assert_frame_equal(result, expected)


def test_groupby_aggregation_multi_non_numeric_dtype():
# GH #42395
df = DataFrame(
{
"x": [1, 0, 1, 1, 0],
"y": [Timedelta(i, "days") for i in range(1, 6)],
"z": [Timedelta(i * 10, "days") for i in range(1, 6)],
}
)

expected = DataFrame(
{
"y": [Timedelta(i, "days") for i in range(7, 9)],
"z": [Timedelta(i * 10, "days") for i in range(7, 9)],
},
index=Index([0, 1], dtype="int64", name="x"),
)

gb = df.groupby(by=["x"])
result = gb.sum()
tm.assert_frame_equal(result, expected)


def test_groupby_aggregation_numeric_with_non_numeric_dtype():
# GH #43108
df = DataFrame(
{
"x": [1, 0, 1, 1, 0],
"y": [Timedelta(i, "days") for i in range(1, 6)],
"z": list(range(1, 6)),
}
)

expected = DataFrame(
{"z": [7, 8]},
index=Index([0, 1], dtype="int64", name="x"),
)

gb = df.groupby(by=["x"])
result = gb.sum()
tm.assert_frame_equal(result, expected)


def test_groupby_filtered_df_std():
# GH 16174
dicts = [
Expand Down

0 comments on commit ac09649

Please sign in to comment.