Skip to content

Commit

Permalink
Add unstack() support for non-multiindexed dataframes(#7054)
Browse files Browse the repository at this point in the history
Closes #6694 

When `unstack()` receives a dataframe with "single" index, returns a series to match pandas behavior.

Authors:
  - Michael Wang <[email protected]>

Approvers:
  - null

URL: #7054
  • Loading branch information
isVoid authored Jan 6, 2021
1 parent c0920e6 commit 1930432
Show file tree
Hide file tree
Showing 2 changed files with 85 additions and 6 deletions.
40 changes: 35 additions & 5 deletions python/cudf/cudf/core/reshape.py
Original file line number Diff line number Diff line change
Expand Up @@ -902,6 +902,11 @@ def unstack(df, level, fill_value=None):
Pivots the specified levels of the index labels of df to the innermost
levels of the columns labels of the result.
* If the index of ``df`` has multiple levels, returns a ``Dataframe`` with
specified level of the index pivoted to the column levels.
* If the index of ``df`` has single level, returns a ``Series`` with all
column levels pivoted to the index levels.
Parameters
----------
df : DataFrame
Expand All @@ -913,7 +918,7 @@ def unstack(df, level, fill_value=None):
Returns
-------
DataFrame with specified index levels pivoted to column levels
Series or DataFrame
Examples
--------
Expand Down Expand Up @@ -964,18 +969,43 @@ def unstack(df, level, fill_value=None):
a
1 5 <NA> 6 <NA> 7
2 <NA> 8 <NA> 9 <NA>
Unstacking single level index dataframe:
>>> df = cudf.DataFrame({('c', 1): [1, 2, 3], ('c', 2):[9, 8, 7]})
>>> df.unstack()
c 1 0 1
1 2
2 3
2 0 9
1 8
2 7
dtype: int64
"""
if not isinstance(df, cudf.DataFrame):
raise ValueError("`df` should be a cudf Dataframe object.")

if df.empty:
raise ValueError("Cannot unstack an empty dataframe.")

if fill_value is not None:
raise NotImplementedError("fill_value is not supported.")
if pd.api.types.is_list_like(level):
if not level:
return df
df = df.copy(deep=False)
if not isinstance(df.index, cudf.MultiIndex):
raise NotImplementedError(
"Calling unstack() on a DataFrame without a MultiIndex "
"is not supported"
)
dtype = df._columns[0].dtype
for col in df._columns:
if not col.dtype == dtype:
raise ValueError(
"Calling unstack() on single index dataframe"
" with different column datatype is not supported."
)
res = df.T.stack(dropna=False)
# Result's index is a multiindex
res.index.names = tuple(df.columns.names) + df.index.names
return res
else:
columns = df.index._poplevels(level)
index = df.index
Expand Down
51 changes: 50 additions & 1 deletion python/cudf/cudf/tests/test_reshape.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import re

import numpy as np
import pandas as pd
import pytest
Expand Down Expand Up @@ -402,7 +404,7 @@ def test_pivot_multi_values():
),
],
)
def test_unstack(level):
def test_unstack_multiindex(level):
pdf = pd.DataFrame(
{
"foo": ["one", "one", "one", "two", "two", "two"],
Expand All @@ -417,6 +419,53 @@ def test_unstack(level):
)


@pytest.mark.parametrize(
"data",
[{"A": [1.0, 2.0, 3.0, 4.0, 5.0], "B": [11.0, 12.0, 13.0, 14.0, 15.0]}],
)
@pytest.mark.parametrize(
"index",
[
pd.Index(range(0, 5), name=None),
pd.Index(range(0, 5), name="row_index"),
],
)
@pytest.mark.parametrize(
"col_idx",
[
pd.Index(["a", "b"], name=None),
pd.Index(["a", "b"], name="col_index"),
pd.MultiIndex.from_tuples([("c", 1), ("c", 2)], names=[None, None]),
pd.MultiIndex.from_tuples(
[("c", 1), ("c", 2)], names=["col_index1", "col_index2"]
),
],
)
def test_unstack_index(data, index, col_idx):
pdf = pd.DataFrame(data)
gdf = cudf.from_pandas(pdf)

pdf.index = index
pdf.columns = col_idx

gdf.index = cudf.from_pandas(index)
gdf.columns = cudf.from_pandas(col_idx)

assert_eq(pdf.unstack(), gdf.unstack())


def test_unstack_index_invalid():
gdf = cudf.DataFrame({"a": [1, 2, 3], "b": ["a", "b", "c"]})
with pytest.raises(
ValueError,
match=re.escape(
"Calling unstack() on single index dataframe with "
"different column datatype is not supported."
),
):
gdf.unstack()


def test_pivot_duplicate_error():
gdf = cudf.DataFrame(
{"a": [0, 1, 2, 2], "b": [1, 2, 3, 3], "d": [1, 2, 3, 4]}
Expand Down

0 comments on commit 1930432

Please sign in to comment.