diff --git a/doc/source/whatsnew/v0.18.1.txt b/doc/source/whatsnew/v0.18.1.txt index dbe446f0a7b4f..00d8d5c145915 100644 --- a/doc/source/whatsnew/v0.18.1.txt +++ b/doc/source/whatsnew/v0.18.1.txt @@ -89,3 +89,5 @@ Bug Fixes ~~~~~~~~~ - Bug in ``value_counts`` when ``normalize=True`` and ``dropna=True`` where nulls still contributed to the normalized count (:issue:`12558`) +- Bug in ``pivot_table`` when ``margins=True`` and ``dropna=True`` where nulls still contributed to margin count (:issue:`12577`) +- Bug in ``pivot_table`` when ``margins=True`` and ``dropna=False`` where column names result in KeyError diff --git a/pandas/tools/pivot.py b/pandas/tools/pivot.py index d699639c6c796..55fc4a1fd1b4b 100644 --- a/pandas/tools/pivot.py +++ b/pandas/tools/pivot.py @@ -149,6 +149,8 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean', table = table.fillna(value=fill_value, downcast='infer') if margins: + if dropna: + data = data[data.notnull().all(axis=1)] table = _add_margins(table, data, values, rows=index, cols=columns, aggfunc=aggfunc, margins_name=margins_name) @@ -181,8 +183,9 @@ def _add_margins(table, data, values, rows, cols, aggfunc, # could be passed a Series object with no 'columns' if hasattr(table, 'columns'): for level in table.columns.names[1:]: - if margins_name in table.columns.get_level_values(level): - raise ValueError(exception_msg) + if level is not None: + if margins_name in table.columns.get_level_values(level): + raise ValueError(exception_msg) if len(rows) > 1: key = (margins_name,) + ('',) * (len(rows) - 1) diff --git a/pandas/tools/tests/test_pivot.py b/pandas/tools/tests/test_pivot.py index 994269d36cd85..44f1de4754e55 100644 --- a/pandas/tools/tests/test_pivot.py +++ b/pandas/tools/tests/test_pivot.py @@ -936,6 +936,59 @@ def test_crosstab_no_overlap(self): tm.assert_frame_equal(actual, expected) + def test_margin_ignore_dropna_bug(self): + # GH 12577 + # pivot_table counts null into margin ('All') + # when margins=true and dropna=true + + df = pd.DataFrame({'a': [1, 2, 2, 2, 2, np.nan], + 'b': [3, 3, 4, 4, 4, 4]}) + actual = pd.crosstab(df.a, df.b, margins=True, dropna=True) + expected = pd.DataFrame([[1, 0, 1], [1, 3, 4], [2, 3, 5]]) + expected.index = Index([1.0, 2.0, 'All'], name='a') + expected.columns = Index([3, 4, 'All'], name='b') + tm.assert_frame_equal(actual, expected) + + df = DataFrame({'a': [1, np.nan, np.nan, np.nan, 2, np.nan], + 'b': [3, np.nan, 4, 4, 4, 4]}) + actual = pd.crosstab(df.a, df.b, margins=True, dropna=True) + expected = pd.DataFrame([[1, 0, 1], [0, 1, 1], [1, 1, 2]]) + expected.index = Index([1.0, 2.0, 'All'], name='a') + expected.columns = Index([3.0, 4.0, 'All'], name='b') + tm.assert_frame_equal(actual, expected) + + df = DataFrame({'a': [1, np.nan, np.nan, np.nan, np.nan, 2], + 'b': [3, 3, 4, 4, 4, 4]}) + actual = pd.crosstab(df.a, df.b, margins=True, dropna=True) + expected = pd.DataFrame([[1, 0, 1], [0, 1, 1], [1, 1, 2]]) + expected.index = Index([1.0, 2.0, 'All'], name='a') + expected.columns = Index([3, 4, 'All'], name='b') + tm.assert_frame_equal(actual, expected) + + df = pd.DataFrame({'a': [1, 2, 2, 2, 2, np.nan], + 'b': [3, 3, 4, 4, 4, 4]}) + actual = pd.crosstab(df.a, df.b, margins=True, dropna=False) + expected = pd.DataFrame([[1, 0, 1], [1, 3, 4], [2, 4, 6]]) + expected.index = Index([1.0, 2.0, 'All'], name='a') + expected.columns = Index([3, 4, 'All']) + tm.assert_frame_equal(actual, expected) + + df = DataFrame({'a': [1, np.nan, np.nan, np.nan, 2, np.nan], + 'b': [3, np.nan, 4, 4, 4, 4]}) + actual = pd.crosstab(df.a, df.b, margins=True, dropna=False) + expected = pd.DataFrame([[1, 0, 1], [0, 1, 1], [1, 4, 6]]) + expected.index = Index([1.0, 2.0, 'All'], name='a') + expected.columns = Index([3.0, 4.0, 'All']) + tm.assert_frame_equal(actual, expected) + + df = DataFrame({'a': [1, np.nan, np.nan, np.nan, np.nan, 2], + 'b': [3, 3, 4, 4, 4, 4]}) + actual = pd.crosstab(df.a, df.b, margins=True, dropna=False) + expected = pd.DataFrame([[1, 0, 1], [0, 1, 1], [2, 4, 6]]) + expected.index = Index([1.0, 2.0, 'All'], name='a') + expected.columns = Index([3, 4, 'All']) + tm.assert_frame_equal(actual, expected) + if __name__ == '__main__': import nose nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],