From 3bed097b56db84891c891009c6bf57bb512040e5 Mon Sep 17 00:00:00 2001 From: OXPHOS Date: Wed, 13 Apr 2016 23:21:20 -0400 Subject: [PATCH] BUG: fix margin/dropna issues closes #12650 closes #12133 closes #12642 --- doc/source/whatsnew/v0.18.1.txt | 2 ++ pandas/tools/pivot.py | 6 ++-- pandas/tools/tests/test_pivot.py | 62 ++++++++++++++++++++++++++++++-- 3 files changed, 65 insertions(+), 5 deletions(-) diff --git a/doc/source/whatsnew/v0.18.1.txt b/doc/source/whatsnew/v0.18.1.txt index a9b42b563f931..c6642c5216262 100644 --- a/doc/source/whatsnew/v0.18.1.txt +++ b/doc/source/whatsnew/v0.18.1.txt @@ -315,6 +315,8 @@ Bug Fixes - Bug in ``pivot_table`` when ``margins=True`` and ``dropna=True`` where nulls still contributed to margin count (:issue:`12577`) +- Bug in ``pivot_table`` when ``dropna=False`` where table index/column names disappear (:issue:`12133`) +- Bug in ``crosstab`` when ``margins=True`` and ``dropna=False`` which raised (:issue:`12642`) - Bug in ``Series.name`` when ``name`` attribute can be a hashable type (:issue:`12610`) - Bug in ``.describe()`` resets categorical columns information (:issue:`11558`) diff --git a/pandas/tools/pivot.py b/pandas/tools/pivot.py index 06b31b5d5dc30..d7798bf1e7982 100644 --- a/pandas/tools/pivot.py +++ b/pandas/tools/pivot.py @@ -128,13 +128,15 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean', if not dropna: try: - m = MultiIndex.from_arrays(cartesian_product(table.index.levels)) + m = MultiIndex.from_arrays(cartesian_product(table.index.levels), + names=table.index.names) table = table.reindex_axis(m, axis=0) except AttributeError: pass # it's a single level try: - m = MultiIndex.from_arrays(cartesian_product(table.columns.levels)) + m = MultiIndex.from_arrays(cartesian_product(table.columns.levels), + names=table.columns.names) table = table.reindex_axis(m, axis=1) except AttributeError: pass # it's a single level or a series diff --git a/pandas/tools/tests/test_pivot.py b/pandas/tools/tests/test_pivot.py index ae0cd67ad77e6..bff82e32dccc0 100644 --- a/pandas/tools/tests/test_pivot.py +++ b/pandas/tools/tests/test_pivot.py @@ -899,8 +899,8 @@ def test_crosstab_dropna(self): 'two', 'two', 'two'], dtype=object) c = np.array(['dull', 'dull', 'dull', 'dull', 'dull', 'shiny', 'shiny'], dtype=object) - res = crosstab(a, [b, c], rownames=['a'], - colnames=['b', 'c'], dropna=False) + res = pd.crosstab(a, [b, c], rownames=['a'], + colnames=['b', 'c'], dropna=False) m = MultiIndex.from_tuples([('one', 'dull'), ('one', 'shiny'), ('two', 'dull'), ('two', 'shiny')]) assert_equal(res.columns.values, m.values) @@ -936,7 +936,7 @@ def test_crosstab_no_overlap(self): tm.assert_frame_equal(actual, expected) - def test_margin_ignore_dropna_bug(self): + def test_margin_dropna(self): # GH 12577 # pivot_table counts null into margin ('All') # when margins=true and dropna=true @@ -965,6 +965,62 @@ def test_margin_ignore_dropna_bug(self): expected.columns = Index([3, 4, 'All'], name='b') tm.assert_frame_equal(actual, expected) + # GH 12642 + # _add_margins raises KeyError: Level None not found + # when margins=True and dropna=False + df = pd.DataFrame({'a': [1, 2, 2, 2, 2, np.nan], + 'b': [3, 3, 4, 4, 4, 4]}) + actual = pd.crosstab(df.a, df.b, margins=True, dropna=False) + expected = pd.DataFrame([[1, 0, 1], [1, 3, 4], [2, 4, 6]]) + expected.index = Index([1.0, 2.0, 'All'], name='a') + expected.columns = Index([3, 4, 'All'], name='b') + tm.assert_frame_equal(actual, expected) + + df = DataFrame({'a': [1, np.nan, np.nan, np.nan, 2, np.nan], + 'b': [3, np.nan, 4, 4, 4, 4]}) + actual = pd.crosstab(df.a, df.b, margins=True, dropna=False) + expected = pd.DataFrame([[1, 0, 1], [0, 1, 1], [1, 4, 6]]) + expected.index = Index([1.0, 2.0, 'All'], name='a') + expected.columns = Index([3.0, 4.0, 'All'], name='b') + tm.assert_frame_equal(actual, expected) + + a = np.array(['foo', 'foo', 'foo', 'bar', + 'bar', 'foo', 'foo'], dtype=object) + b = np.array(['one', 'one', 'two', 'one', + 'two', np.nan, 'two'], dtype=object) + c = np.array(['dull', 'dull', 'dull', 'dull', + 'dull', 'shiny', 'shiny'], dtype=object) + + actual = pd.crosstab(a, [b, c], rownames=['a'], + colnames=['b', 'c'], margins=True, dropna=False) + m = MultiIndex.from_arrays([['one', 'one', 'two', 'two', 'All'], + ['dull', 'shiny', 'dull', 'shiny', '']], + names=['b', 'c']) + expected = DataFrame([[1, 0, 1, 0, 2], [2, 0, 1, 1, 5], + [3, 0, 2, 1, 7]], columns=m) + expected.index = Index(['bar', 'foo', 'All'], name='a') + tm.assert_frame_equal(actual, expected) + + actual = pd.crosstab([a, b], c, rownames=['a', 'b'], + colnames=['c'], margins=True, dropna=False) + m = MultiIndex.from_arrays([['bar', 'bar', 'foo', 'foo', 'All'], + ['one', 'two', 'one', 'two', '']], + names=['a', 'b']) + expected = DataFrame([[1, 0, 1], [1, 0, 1], [2, 0, 2], [1, 1, 2], + [5, 2, 7]], index=m) + expected.columns = Index(['dull', 'shiny', 'All'], name='c') + tm.assert_frame_equal(actual, expected) + + actual = pd.crosstab([a, b], c, rownames=['a', 'b'], + colnames=['c'], margins=True, dropna=True) + m = MultiIndex.from_arrays([['bar', 'bar', 'foo', 'foo', 'All'], + ['one', 'two', 'one', 'two', '']], + names=['a', 'b']) + expected = DataFrame([[1, 0, 1], [1, 0, 1], [2, 0, 2], [1, 1, 2], + [5, 1, 6]], index=m) + expected.columns = Index(['dull', 'shiny', 'All'], name='c') + tm.assert_frame_equal(actual, expected) + if __name__ == '__main__': import nose nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],