Skip to content

Commit

Permalink
EHN: Groupby on multiindex with missing data in group keys raises Ind…
Browse files Browse the repository at this point in the history
…exError (#20519)

* if all the values in a level of a MultiIndex were missing, fill with numpy nan
  • Loading branch information
proost committed Sep 14, 2019
1 parent def01cf commit 892126f
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 2 deletions.
4 changes: 3 additions & 1 deletion doc/source/whatsnew/v1.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -174,8 +174,10 @@ Groupby/resample/rolling
^^^^^^^^^^^^^^^^^^^^^^^^

-
-
- ``IndexError`` would not raise if all the values in a level of a MultiIndex were missing (:issue:`20519`)
- Bug in :meth:`DataFrame.rolling` not allowing for rolling over datetimes when ``axis=1`` (:issue: `28192`)
- Bug in :meth:`DataFrame.groupby` not offering selection by column name when ``axis=1`` (:issue:`27614`)
- Bug in :meth:`DataFrameGroupby.agg` not able to use lambda function with named aggregation (:issue:`27519`)

Reshaping
^^^^^^^^^
Expand Down
5 changes: 4 additions & 1 deletion pandas/core/indexes/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -1279,7 +1279,10 @@ def _get_grouper_for_level(self, mapper, level):
# Remove unobserved levels from level_index
level_index = level_index.take(uniques)

grouper = level_index.take(codes)
if len(level_index):
grouper = level_index.take(codes)
else:
grouper = level_index.take(codes, fill_value=True)

return grouper, codes, level_index

Expand Down
34 changes: 34 additions & 0 deletions pandas/tests/groupby/test_grouping.py
Original file line number Diff line number Diff line change
Expand Up @@ -628,6 +628,40 @@ def test_groupby_empty(self):
# check name
assert s.groupby(s).grouper.names == ["name"]

def test_groupby_level_index_value_all_na(self):
# issue 20519
df = pd.DataFrame([["x", np.nan, 10]], columns=["A", "B", "C"]).set_index(
["A", "B"]
)
result = df.groupby(level=["A", "B"]).sum()
expected = DataFrame(
data=[],
index=MultiIndex(
levels=[
Index(["x"], dtype="object", name="A"),
Index([], dtype="float64", name="B"),
],
codes=[[], []],
),
columns=["C"],
dtype="int64",
)
tm.assert_frame_equal(result, expected)

df = pd.DataFrame(
[[None, None, "x", 10], [None, "y", None, 20]], columns=["A", "B", "C", "D"]
).set_index(["A", "B", "C"])
result = df.groupby(level=["A", "B", "C"]).sum()
expected = DataFrame(
data=[],
index=MultiIndex(
levels=[[], ["y"], ["x"]], codes=[[], [], []], names=["A", "B", "C"]
),
columns=["D"],
dtype="int64",
)
tm.assert_frame_equal(result, expected)


# get_group
# --------------------------------
Expand Down

0 comments on commit 892126f

Please sign in to comment.