Skip to content

Commit

Permalink
BUG: fix margin/dropna issues
Browse files Browse the repository at this point in the history
closes #12650
closes #12133
closes #12642
  • Loading branch information
OXPHOS authored and jreback committed Apr 17, 2016
1 parent 0734df0 commit 3bed097
Show file tree
Hide file tree
Showing 3 changed files with 65 additions and 5 deletions.
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v0.18.1.txt
Original file line number Diff line number Diff line change
Expand Up @@ -315,6 +315,8 @@ Bug Fixes


- Bug in ``pivot_table`` when ``margins=True`` and ``dropna=True`` where nulls still contributed to margin count (:issue:`12577`)
- Bug in ``pivot_table`` when ``dropna=False`` where table index/column names disappear (:issue:`12133`)
- Bug in ``crosstab`` when ``margins=True`` and ``dropna=False`` which raised (:issue:`12642`)

- Bug in ``Series.name`` when ``name`` attribute can be a hashable type (:issue:`12610`)
- Bug in ``.describe()`` resets categorical columns information (:issue:`11558`)
Expand Down
6 changes: 4 additions & 2 deletions pandas/tools/pivot.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,13 +128,15 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean',

if not dropna:
try:
m = MultiIndex.from_arrays(cartesian_product(table.index.levels))
m = MultiIndex.from_arrays(cartesian_product(table.index.levels),
names=table.index.names)
table = table.reindex_axis(m, axis=0)
except AttributeError:
pass # it's a single level

try:
m = MultiIndex.from_arrays(cartesian_product(table.columns.levels))
m = MultiIndex.from_arrays(cartesian_product(table.columns.levels),
names=table.columns.names)
table = table.reindex_axis(m, axis=1)
except AttributeError:
pass # it's a single level or a series
Expand Down
62 changes: 59 additions & 3 deletions pandas/tools/tests/test_pivot.py
Original file line number Diff line number Diff line change
Expand Up @@ -899,8 +899,8 @@ def test_crosstab_dropna(self):
'two', 'two', 'two'], dtype=object)
c = np.array(['dull', 'dull', 'dull', 'dull',
'dull', 'shiny', 'shiny'], dtype=object)
res = crosstab(a, [b, c], rownames=['a'],
colnames=['b', 'c'], dropna=False)
res = pd.crosstab(a, [b, c], rownames=['a'],
colnames=['b', 'c'], dropna=False)
m = MultiIndex.from_tuples([('one', 'dull'), ('one', 'shiny'),
('two', 'dull'), ('two', 'shiny')])
assert_equal(res.columns.values, m.values)
Expand Down Expand Up @@ -936,7 +936,7 @@ def test_crosstab_no_overlap(self):

tm.assert_frame_equal(actual, expected)

def test_margin_ignore_dropna_bug(self):
def test_margin_dropna(self):
# GH 12577
# pivot_table counts null into margin ('All')
# when margins=true and dropna=true
Expand Down Expand Up @@ -965,6 +965,62 @@ def test_margin_ignore_dropna_bug(self):
expected.columns = Index([3, 4, 'All'], name='b')
tm.assert_frame_equal(actual, expected)

# GH 12642
# _add_margins raises KeyError: Level None not found
# when margins=True and dropna=False
df = pd.DataFrame({'a': [1, 2, 2, 2, 2, np.nan],
'b': [3, 3, 4, 4, 4, 4]})
actual = pd.crosstab(df.a, df.b, margins=True, dropna=False)
expected = pd.DataFrame([[1, 0, 1], [1, 3, 4], [2, 4, 6]])
expected.index = Index([1.0, 2.0, 'All'], name='a')
expected.columns = Index([3, 4, 'All'], name='b')
tm.assert_frame_equal(actual, expected)

df = DataFrame({'a': [1, np.nan, np.nan, np.nan, 2, np.nan],
'b': [3, np.nan, 4, 4, 4, 4]})
actual = pd.crosstab(df.a, df.b, margins=True, dropna=False)
expected = pd.DataFrame([[1, 0, 1], [0, 1, 1], [1, 4, 6]])
expected.index = Index([1.0, 2.0, 'All'], name='a')
expected.columns = Index([3.0, 4.0, 'All'], name='b')
tm.assert_frame_equal(actual, expected)

a = np.array(['foo', 'foo', 'foo', 'bar',
'bar', 'foo', 'foo'], dtype=object)
b = np.array(['one', 'one', 'two', 'one',
'two', np.nan, 'two'], dtype=object)
c = np.array(['dull', 'dull', 'dull', 'dull',
'dull', 'shiny', 'shiny'], dtype=object)

actual = pd.crosstab(a, [b, c], rownames=['a'],
colnames=['b', 'c'], margins=True, dropna=False)
m = MultiIndex.from_arrays([['one', 'one', 'two', 'two', 'All'],
['dull', 'shiny', 'dull', 'shiny', '']],
names=['b', 'c'])
expected = DataFrame([[1, 0, 1, 0, 2], [2, 0, 1, 1, 5],
[3, 0, 2, 1, 7]], columns=m)
expected.index = Index(['bar', 'foo', 'All'], name='a')
tm.assert_frame_equal(actual, expected)

actual = pd.crosstab([a, b], c, rownames=['a', 'b'],
colnames=['c'], margins=True, dropna=False)
m = MultiIndex.from_arrays([['bar', 'bar', 'foo', 'foo', 'All'],
['one', 'two', 'one', 'two', '']],
names=['a', 'b'])
expected = DataFrame([[1, 0, 1], [1, 0, 1], [2, 0, 2], [1, 1, 2],
[5, 2, 7]], index=m)
expected.columns = Index(['dull', 'shiny', 'All'], name='c')
tm.assert_frame_equal(actual, expected)

actual = pd.crosstab([a, b], c, rownames=['a', 'b'],
colnames=['c'], margins=True, dropna=True)
m = MultiIndex.from_arrays([['bar', 'bar', 'foo', 'foo', 'All'],
['one', 'two', 'one', 'two', '']],
names=['a', 'b'])
expected = DataFrame([[1, 0, 1], [1, 0, 1], [2, 0, 2], [1, 1, 2],
[5, 1, 6]], index=m)
expected.columns = Index(['dull', 'shiny', 'All'], name='c')
tm.assert_frame_equal(actual, expected)

if __name__ == '__main__':
import nose
nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
Expand Down

0 comments on commit 3bed097

Please sign in to comment.