Skip to content

Commit

Permalink
EHN: Groupby on multiindex with missing data in group keys raises Ind…
Browse files Browse the repository at this point in the history
…exError (#20519)

* If all index values in some level is NA, fill with NaN
  • Loading branch information
proost committed Sep 10, 2019
1 parent def01cf commit ab639ca
Show file tree
Hide file tree
Showing 3 changed files with 48 additions and 2 deletions.
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v1.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ Groupby/resample/rolling
^^^^^^^^^^^^^^^^^^^^^^^^

-
-
- ``IndexError`` would not raise if all index values in some index level is missing data (:issue:`20519`)
- Bug in :meth:`DataFrame.groupby` not offering selection by column name when ``axis=1`` (:issue:`27614`)

Reshaping
Expand Down
5 changes: 4 additions & 1 deletion pandas/core/indexes/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -1279,7 +1279,10 @@ def _get_grouper_for_level(self, mapper, level):
# Remove unobserved levels from level_index
level_index = level_index.take(uniques)

grouper = level_index.take(codes)
if len(level_index):
grouper = level_index.take(codes)
else:
grouper = level_index.take(codes, fill_value=True)

return grouper, codes, level_index

Expand Down
43 changes: 43 additions & 0 deletions pandas/tests/groupby/test_grouping.py
Original file line number Diff line number Diff line change
Expand Up @@ -628,6 +628,49 @@ def test_groupby_empty(self):
# check name
assert s.groupby(s).grouper.names == ["name"]

def test_groupby_level_index_value_all_na(self):
# issue 20519
df = pd.DataFrame([["x", np.nan, 1]], columns=["A", "B", "C"]).set_index(
["A", "B"]
)
result = df.groupby(level=["A", "B"]).sum()
expected = DataFrame(
data=[],
index=MultiIndex(
levels=[["x"], []],
codes=[[], []],
names=["A", "B"]
),
columns=["C"]
)
tm.assert_frame_equal(
result,
expected,
check_index_type=False,
check_dtype=False
)

df = pd.DataFrame(
[[None, None, "x", 2], [np.nan, "y", np.nan, 4]],
columns=["A", "B", "C", "D"]
).set_index(["A", "B", "C"])
result = df.groupby(level=["A", "B", "C"]).sum()
expected = DataFrame(
data=[],
index=MultiIndex(
levels=[[], ["y"], ["x"]],
codes=[[], [], []],
names=["A", "B", "C"]
),
columns=["D"]
)
tm.assert_frame_equal(
result,
expected,
check_index_type=False,
check_dtype=False
)


# get_group
# --------------------------------
Expand Down

0 comments on commit ab639ca

Please sign in to comment.