Skip to content

Commit

Permalink
Backport PR #42640: BUG: Fix bug in SeriesGroupBy.value_counts when D…
Browse files Browse the repository at this point in the history
…ataFrame has one row (#42618) (#42696)

Co-authored-by: neelmraman <[email protected]>
  • Loading branch information
meeseeksmachine and neelmraman authored Jul 24, 2021
1 parent a6ddae7 commit 176e8d3
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 13 deletions.
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v1.3.1.rst
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ Fixed regressions
- Fixed regression for ``SettingWithCopyWarning`` displaying incorrect stacklevel (:issue:`42570`)
- Fixed regression for :func:`merge_asof` raising ``KeyError`` when one of the ``by`` columns is in the index (:issue:`34488`)
- Fixed regression in :func:`to_datetime` returning pd.NaT for inputs that produce duplicated values, when ``cache=True`` (:issue:`42259`)

- Fixed regression in :meth:`SeriesGroupBy.value_counts` that resulted in an ``IndexError`` when called on a Series with one row (:issue:`42618`)

.. ---------------------------------------------------------------------------
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/groupby/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -759,7 +759,7 @@ def apply_series_value_counts():
# new values are where sorted labels change
lchanges = llab(lab, slice(1, None)) != llab(lab, slice(None, -1))
inc = np.r_[True, lchanges]
if not len(lchanges):
if not len(val):
inc = lchanges
inc[idx] = True # group boundaries are also new values
out = np.diff(np.nonzero(np.r_[inc, True])[0]) # value counts
Expand Down
26 changes: 15 additions & 11 deletions pandas/tests/groupby/test_value_counts.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,23 +122,27 @@ def test_series_groupby_value_counts_with_grouper():
tm.assert_series_equal(result, expected)


def test_series_groupby_value_counts_empty():
@pytest.mark.parametrize("columns", [["A", "B"], ["A", "B", "C"]])
def test_series_groupby_value_counts_empty(columns):
# GH39172
df = DataFrame(columns=["A", "B"])
dfg = df.groupby("A")
df = DataFrame(columns=columns)
dfg = df.groupby(columns[:-1])

result = dfg["B"].value_counts()
expected = Series([], name="B", dtype=result.dtype)
expected.index = MultiIndex.from_arrays([[]] * 2, names=["A", "B"])
result = dfg[columns[-1]].value_counts()
expected = Series([], name=columns[-1], dtype=result.dtype)
expected.index = MultiIndex.from_arrays([[]] * len(columns), names=columns)

tm.assert_series_equal(result, expected)

df = DataFrame(columns=["A", "B", "C"])
dfg = df.groupby(["A", "B"])

result = dfg["C"].value_counts()
expected = Series([], name="C", dtype=result.dtype)
expected.index = MultiIndex.from_arrays([[]] * 3, names=["A", "B", "C"])
@pytest.mark.parametrize("columns", [["A", "B"], ["A", "B", "C"]])
def test_series_groupby_value_counts_one_row(columns):
# GH42618
df = DataFrame(data=[range(len(columns))], columns=columns)
dfg = df.groupby(columns[:-1])

result = dfg[columns[-1]].value_counts()
expected = df.value_counts().rename(columns[-1])

tm.assert_series_equal(result, expected)

Expand Down

0 comments on commit 176e8d3

Please sign in to comment.