Skip to content

Commit

Permalink
EHN: Groupby on multiindex with missing data in group keys raises Ind…
Browse files Browse the repository at this point in the history
…exError (#20519)

* If all index values in some level is NA, fill with NaN
  • Loading branch information
proost committed Sep 10, 2019
1 parent def01cf commit 6576242
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 2 deletions.
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v1.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ Groupby/resample/rolling
^^^^^^^^^^^^^^^^^^^^^^^^

-
-
- ``IndexError`` would not raise if all index values in some index level is missing data (:issue:`20519`)
- Bug in :meth:`DataFrame.groupby` not offering selection by column name when ``axis=1`` (:issue:`27614`)

Reshaping
Expand Down
5 changes: 4 additions & 1 deletion pandas/core/indexes/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -1279,7 +1279,10 @@ def _get_grouper_for_level(self, mapper, level):
# Remove unobserved levels from level_index
level_index = level_index.take(uniques)

grouper = level_index.take(codes)
if len(level_index):
grouper = level_index.take(codes)
else:
grouper = level_index.take(codes,fill_value=True)

return grouper, codes, level_index

Expand Down
32 changes: 32 additions & 0 deletions pandas/tests/groupby/test_grouping.py
Original file line number Diff line number Diff line change
Expand Up @@ -628,6 +628,38 @@ def test_groupby_empty(self):
# check name
assert s.groupby(s).grouper.names == ["name"]

def test_groupby_level_index_value_all_na(self):
# issue 20519
df = pd.DataFrame([["x", np.nan, 1]], columns=["A", "B", "C"]).set_index(
["A", "B"]
)
result = df.groupby(level=["A", "B"]).sum()
expected = DataFrame(
data=[],
index=MultiIndex(
levels=[["x"],[]],
codes=[[],[]],
names=["A","B"]
),
columns=["C"]
)
tm.assert_frame_equal(result, expected, check_index_type=False, check_dtype=False)

df = pd.DataFrame(
[[None, None, "x", 2], [np.nan, "y", np.nan, 4]], columns=["A", "B", "C", "D"]
).set_index(["A", "B", "C"])
result = df.groupby(level=["A", "B", "C"]).sum()
expected = DataFrame(
data=[],
index=MultiIndex(
levels=[[], ["y"], ["x"]],
codes=[[], [], []],
names=["A", "B", "C"]
),
columns=["D"]
)
tm.assert_frame_equal(result, expected, check_index_type=False, check_dtype=False)


# get_group
# --------------------------------
Expand Down

0 comments on commit 6576242

Please sign in to comment.