From 79289601c44b697a0f7b1a0934fdfaace5089799 Mon Sep 17 00:00:00 2001 From: P-Tillmann Date: Sat, 8 Jul 2017 00:28:11 +0200 Subject: [PATCH 1/8] BUG: GH16843 fix groupby Fix for GH16843 where groubpy gives wrong result when number of groupedby index+columns is equal to length of axis --- pandas/core/groupby.py | 9 +++++---- pandas/tests/groupby/test_groupby.py | 11 +++++++++++ 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index daf3381ae4e89..8e555c3fe9f39 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -2628,13 +2628,14 @@ def _get_grouper(obj, key=None, axis=0, level=None, sort=True, try: if isinstance(obj, DataFrame): - all_in_columns = all(g in obj.columns for g in keys) + all_in_columns_index = all(g in obj.columns or g in obj.index.names + for g in keys) else: - all_in_columns = False + all_in_columns_index = False except Exception: - all_in_columns = False + all_in_columns_index = False - if not any_callable and not all_in_columns and \ + if not any_callable and not all_in_columns_index and \ not any_arraylike and not any_groupers and \ match_axis_length and level is None: keys = [com._asarray_tuplesafe(keys)] diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 19124a33bdbcb..fab086bf0b8a6 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -3890,6 +3890,17 @@ def predictions(tool): expected = df1.groupby('Key').apply(predictions).p1 result = df2.groupby('Key').apply(predictions).p1 tm.assert_series_equal(expected, result) + + def test_gb_key_len_equal_axis_len(self): + # GH16843 + df = pd.DataFrame([['foo','bar', 'B', 1], + ['foo','bar', 'B', 2], + ['foo','baz', 'C', 3]], + columns=['first','second','third','one']) + df.set_index(['first','second',], inplace=True) + df = df.groupby(['first','second','third']).size() + assert df.loc[('foo','bar','B')]==2 + assert df.loc[('foo','baz','C')]==1 def _check_groupby(df, result, keys, field, f=lambda x: x.sum()): From acd449f49c322d4fcdf1d6f16cfd6341659c79cf Mon Sep 17 00:00:00 2001 From: P-Tillmann Date: Sat, 8 Jul 2017 00:44:24 +0200 Subject: [PATCH 2/8] Added whitespaces Added whitespaces --- pandas/tests/groupby/test_groupby.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index fab086bf0b8a6..c00690c663e8c 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -3890,17 +3890,17 @@ def predictions(tool): expected = df1.groupby('Key').apply(predictions).p1 result = df2.groupby('Key').apply(predictions).p1 tm.assert_series_equal(expected, result) - + def test_gb_key_len_equal_axis_len(self): # GH16843 - df = pd.DataFrame([['foo','bar', 'B', 1], - ['foo','bar', 'B', 2], - ['foo','baz', 'C', 3]], - columns=['first','second','third','one']) - df.set_index(['first','second',], inplace=True) - df = df.groupby(['first','second','third']).size() - assert df.loc[('foo','bar','B')]==2 - assert df.loc[('foo','baz','C')]==1 + df = pd.DataFrame([['foo', 'bar', 'B', 1], + ['foo', 'bar', 'B', 2], + ['foo', 'baz', 'C', 3]], + columns=['first', 'second', 'third', 'one']) + df.set_index(['first', 'second'], inplace=True) + df = df.groupby(['first', 'second', 'third']).size() + assert df.loc[('foo', 'bar', 'B')] == 2 + assert df.loc[('foo', 'baz', 'C')] == 1 def _check_groupby(df, result, keys, field, f=lambda x: x.sum()): From d5ec7541a2242ed3ed46d7839999775cab16f5e7 Mon Sep 17 00:00:00 2001 From: P-Tillmann Date: Tue, 11 Jul 2017 20:54:43 +0200 Subject: [PATCH 3/8] Added test description Added test description --- pandas/tests/groupby/test_groupby.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index c00690c663e8c..a870b6db9afd1 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -3893,6 +3893,8 @@ def predictions(tool): def test_gb_key_len_equal_axis_len(self): # GH16843 + #test ensures that index and column keys are recognized correctly + #when number of keys equals axis length of groupby df = pd.DataFrame([['foo', 'bar', 'B', 1], ['foo', 'bar', 'B', 2], ['foo', 'baz', 'C', 3]], From f34b10a12b5770208b22ba973d3ebad3b6d55d44 Mon Sep 17 00:00:00 2001 From: P-Tillmann Date: Tue, 11 Jul 2017 21:12:15 +0200 Subject: [PATCH 4/8] Added whats-new --- doc/source/whatsnew/v0.21.0.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 95eab9e3b684f..5506800b581a4 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -168,7 +168,7 @@ Plotting Groupby/Resample/Rolling ^^^^^^^^^^^^^^^^^^^^^^^^ - Bug in ``DataFrame.resample().size()`` where an empty ``DataFrame`` did not return a ``Series`` (:issue:`14962`) - +- Bug in ``DataFrame.groupby`` when called with index or index + column key numbers equal to the axis length of the groupby (:issue:`16859`) Sparse From 7980d79f929c5850ad95dd775b7cc6e3a2897ad8 Mon Sep 17 00:00:00 2001 From: P-Tillmann Date: Thu, 27 Jul 2017 14:11:36 +0200 Subject: [PATCH 5/8] updated whats new file (v0.21.0) --- doc/source/whatsnew/v0.21.0.txt | 330 ++++++++++++++++++++++++++++++++ 1 file changed, 330 insertions(+) create mode 100644 doc/source/whatsnew/v0.21.0.txt diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt new file mode 100644 index 0000000000000..5a16bf73d5ed9 --- /dev/null +++ b/doc/source/whatsnew/v0.21.0.txt @@ -0,0 +1,330 @@ +.. _whatsnew_0210: + +v0.21.0 (???) +------------- + +This is a major release from 0.20.x and includes a number of API changes, deprecations, new features, +enhancements, and performance improvements along with a large number of bug fixes. We recommend that all +users upgrade to this version. + +Highlights include: + +Check the :ref:`API Changes ` and :ref:`deprecations ` before updating. + +.. contents:: What's new in v0.21.0 + :local: + :backlinks: none + +.. _whatsnew_0210.enhancements: + +New features +~~~~~~~~~~~~ + +- Support for `PEP 519 -- Adding a file system path protocol + `_ on most readers and writers (:issue:`13823`) +- Added ``__fspath__`` method to :class:`~pandas.HDFStore`, :class:`~pandas.ExcelFile`, + and :class:`~pandas.ExcelWriter` to work properly with the file system path protocol (:issue:`13823`) +- Added ``skipna`` parameter to :func:`~pandas.api.types.infer_dtype` to + support type inference in the presence of missing values (:issue:`17059`). + + +.. _whatsnew_0210.enhancements.infer_objects: + +``infer_objects`` type conversion +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The :meth:`DataFrame.infer_objects` and :meth:`Series.infer_objects` +methods have been added to perform dtype inference on object columns, replacing +some of the functionality of the deprecated ``convert_objects`` +method. See the documentation :ref:`here ` +for more details. (:issue:`11221`) + +This method only performs soft conversions on object columns, converting Python objects +to native types, but not any coercive conversions. For example: + +.. ipython:: python + + df = pd.DataFrame({'A': [1, 2, 3], + 'B': np.array([1, 2, 3], dtype='object'), + 'C': ['1', '2', '3']}) + df.dtypes + df.infer_objects().dtypes + +Note that column ``'C'`` was not converted - only scalar numeric types +will be inferred to a new type. Other types of conversion should be accomplished +using the :func:`to_numeric` function (or :func:`to_datetime`, :func:`to_timedelta`). + +.. ipython:: python + + df = df.infer_objects() + df['C'] = pd.to_numeric(df['C'], errors='coerce') + df.dtypes + +.. _whatsnew_0210.enhancements.other: + +Other Enhancements +^^^^^^^^^^^^^^^^^^ + +- The ``validate`` argument for :func:`merge` function now checks whether a merge is one-to-one, one-to-many, many-to-one, or many-to-many. If a merge is found to not be an example of specified merge type, an exception of type ``MergeError`` will be raised. For more, see :ref:`here ` (:issue:`16270`) +- :func:`Series.to_dict` and :func:`DataFrame.to_dict` now support an ``into`` keyword which allows you to specify the ``collections.Mapping`` subclass that you would like returned. The default is ``dict``, which is backwards compatible. (:issue:`16122`) +- :func:`RangeIndex.append` now returns a ``RangeIndex`` object when possible (:issue:`16212`) +- :func:`Series.rename_axis` and :func:`DataFrame.rename_axis` with ``inplace=True`` now return ``None`` while renaming the axis inplace. (:issue:`15704`) +- :func:`Series.set_axis` and :func:`DataFrame.set_axis` now support the ``inplace`` parameter. (:issue:`14636`) +- :func:`Series.to_pickle` and :func:`DataFrame.to_pickle` have gained a ``protocol`` parameter (:issue:`16252`). By default, this parameter is set to `HIGHEST_PROTOCOL `__ +- :func:`api.types.infer_dtype` now infers decimals. (:issue:`15690`) +- :func:`read_feather` has gained the ``nthreads`` parameter for multi-threaded operations (:issue:`16359`) +- :func:`DataFrame.clip()` and :func:`Series.clip()` have gained an ``inplace`` argument. (:issue:`15388`) +- :func:`crosstab` has gained a ``margins_name`` parameter to define the name of the row / column that will contain the totals when ``margins=True``. (:issue:`15972`) +- :func:`DataFrame.select_dtypes` now accepts scalar values for include/exclude as well as list-like. (:issue:`16855`) +- :func:`date_range` now accepts 'YS' in addition to 'AS' as an alias for start of year (:issue:`9313`) +- :func:`date_range` now accepts 'Y' in addition to 'A' as an alias for end of year (:issue:`9313`) + +.. _whatsnew_0210.api_breaking: + +Backwards incompatible API changes +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. _whatsnew_0210.api_breaking.pandas_eval: + +Improved error handling during item assignment in pd.eval +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:func:`eval` will now raise a ``ValueError`` when item assignment malfunctions, or +inplace operations are specified, but there is no item assignment in the expression (:issue:`16732`) + +.. ipython:: python + + arr = np.array([1, 2, 3]) + +Previously, if you attempted the following expression, you would get a not very helpful error message: + +.. code-block:: ipython + + In [3]: pd.eval("a = 1 + 2", target=arr, inplace=True) + ... + IndexError: only integers, slices (`:`), ellipsis (`...`), numpy.newaxis (`None`) + and integer or boolean arrays are valid indices + +This is a very long way of saying numpy arrays don't support string-item indexing. With this +change, the error message is now this: + +.. code-block:: python + + In [3]: pd.eval("a = 1 + 2", target=arr, inplace=True) + ... + ValueError: Cannot assign expression output to target + +It also used to be possible to evaluate expressions inplace, even if there was no item assignment: + +.. code-block:: ipython + + In [4]: pd.eval("1 + 2", target=arr, inplace=True) + Out[4]: 3 + +However, this input does not make much sense because the output is not being assigned to +the target. Now, a ``ValueError`` will be raised when such an input is passed in: + +.. code-block:: ipython + + In [4]: pd.eval("1 + 2", target=arr, inplace=True) + ... + ValueError: Cannot operate inplace if there is no assignment + +Dtype Conversions +^^^^^^^^^^^^^^^^^ + +- Previously assignments, ``.where()`` and ``.fillna()`` with a ``bool`` assignment, would coerce to + same the type (e.g. int / float), or raise for datetimelikes. These will now preseve the bools with ``object`` dtypes. (:issue:`16821`). + + .. ipython:: python + + s = Series([1, 2, 3]) + + .. code-block:: python + + In [5]: s[1] = True + + In [6]: s + Out[6]: + 0 1 + 1 1 + 2 3 + dtype: int64 + + New Behavior + + .. ipython:: python + + s[1] = True + s + +- Previously, as assignment to a datetimelike with a non-datetimelike would coerce the + non-datetime-like item being assigned (:issue:`14145`). + + .. ipython:: python + + s = pd.Series([pd.Timestamp('2011-01-01'), pd.Timestamp('2012-01-01')]) + + .. code-block:: python + + In [1]: s[1] = 1 + + In [2]: s + Out[2]: + 0 2011-01-01 00:00:00.000000000 + 1 1970-01-01 00:00:00.000000001 + dtype: datetime64[ns] + + These now coerce to ``object`` dtype. + + .. ipython:: python + + s[1] = 1 + s + +- Inconsistent behavior in ``.where()`` with datetimelikes which would raise rather than coerce to ``object`` (:issue:`16402`) +- Bug in assignment against ``int64`` data with ``np.ndarray`` with ``float64`` dtype may keep ``int64`` dtype (:issue:`14001`) + +.. _whatsnew_0210.api.na_changes: + +NA naming Changes +^^^^^^^^^^^^^^^^^ + +In order to promote more consistency among the pandas API, we have added additional top-level +functions :func:`isna` and :func:`notna` that are aliases for :func:`isnull` and :func:`notnull`. +The naming scheme is now more consistent with methods like ``.dropna()`` and ``.fillna()``. Furthermore +in all cases where ``.isnull()`` and ``.notnull()`` methods are defined, these have additional methods +named ``.isna()`` and ``.notna()``, these are included for classes ``Categorical``, +``Index``, ``Series``, and ``DataFrame``. (:issue:`15001`). + +The configuration option ``pd.options.mode.use_inf_as_null`` is deprecated, and ``pd.options.mode.use_inf_as_na`` is added as a replacement. + +.. _whatsnew_0210.api: + +Other API Changes +^^^^^^^^^^^^^^^^^ + +- Support has been dropped for Python 3.4 (:issue:`15251`) +- The Categorical constructor no longer accepts a scalar for the ``categories`` keyword. (:issue:`16022`) +- Accessing a non-existent attribute on a closed :class:`~pandas.HDFStore` will now + raise an ``AttributeError`` rather than a ``ClosedFileError`` (:issue:`16301`) +- :func:`read_csv` now treats ``'null'`` strings as missing values by default (:issue:`16471`) +- :func:`read_csv` now treats ``'n/a'`` strings as missing values by default (:issue:`16078`) +- :class:`pandas.HDFStore`'s string representation is now faster and less detailed. For the previous behavior, use ``pandas.HDFStore.info()``. (:issue:`16503`). +- Compression defaults in HDF stores now follow pytable standards. Default is no compression and if ``complib`` is missing and ``complevel`` > 0 ``zlib`` is used (:issue:`15943`) +- ``Index.get_indexer_non_unique()`` now returns a ndarray indexer rather than an ``Index``; this is consistent with ``Index.get_indexer()`` (:issue:`16819`) +- Removed the ``@slow`` decorator from ``pandas.util.testing``, which caused issues for some downstream packages' test suites. Use ``@pytest.mark.slow`` instead, which achieves the same thing (:issue:`16850`) +- Moved definition of ``MergeError`` to the ``pandas.errors`` module. +- The signature of :func:`Series.set_axis` and :func:`DataFrame.set_axis` has been changed from ``set_axis(axis, labels)`` to ``set_axis(labels, axis=0)``, for consistency with the rest of the API. The old signature is still supported and causes a ``FutureWarning`` to be emitted (:issue:`14636`) + + +.. _whatsnew_0210.deprecations: + +Deprecations +~~~~~~~~~~~~ +- :func:`read_excel()` has deprecated ``sheetname`` in favor of ``sheet_name`` for consistency with ``.to_excel()`` (:issue:`10559`). + +- ``pd.options.html.border`` has been deprecated in favor of ``pd.options.display.html.border`` (:issue:`15793`). + +.. _whatsnew_0210.prior_deprecations: + +Removal of prior version deprecations/changes +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +- :func:`read_excel()` has dropped the ``has_index_names`` parameter (:issue:`10967`) +- The ``pd.options.display.height`` configuration has been dropped (:issue:`3663`) +- The ``pd.options.display.line_width`` configuration has been dropped (:issue:`2881`) +- The ``pd.options.display.mpl_style`` configuration has been dropped (:issue:`12190`) +- ``Index`` has dropped the ``.sym_diff()`` method in favor of ``.symmetric_difference()`` (:issue:`12591`) +- ``Categorical`` has dropped the ``.order()`` and ``.sort()`` methods in favor of ``.sort_values()`` (:issue:`12882`) +- :func:`eval` and :func:`DataFrame.eval` have changed the default of ``inplace`` from ``None`` to ``False`` (:issue:`11149`) +- The function ``get_offset_name`` has been dropped in favor of the ``.freqstr`` attribute for an offset (:issue:`11834`) + + +.. _whatsnew_0210.performance: + +Performance Improvements +~~~~~~~~~~~~~~~~~~~~~~~~ + +- Improved performance of instantiating :class:`SparseDataFrame` (:issue:`16773`) + + +.. _whatsnew_0210.bug_fixes: + +Bug Fixes +~~~~~~~~~ + + +Conversion +^^^^^^^^^^ + +- Bug in assignment against datetime-like data with ``int`` may incorrectly converte to datetime-like (:issue:`14145`) +- Bug in assignment against ``int64`` data with ``np.ndarray`` with ``float64`` dtype may keep ``int64`` dtype (:issue:`14001`) + + +Indexing +^^^^^^^^ + +- When called with a null slice (e.g. ``df.iloc[:]``), the ``.iloc`` and ``.loc`` indexers return a shallow copy of the original object. Previously they returned the original object. (:issue:`13873`). +- When called on an unsorted ``MultiIndex``, the ``loc`` indexer now will raise ``UnsortedIndexError`` only if proper slicing is used on non-sorted levels (:issue:`16734`). +- Fixes regression in 0.20.3 when indexing with a string on a ``TimedeltaIndex`` (:issue:`16896`). +- Fixed :func:`TimedeltaIndex.get_loc` handling of ``np.timedelta64`` inputs (:issue:`16909`). +- Fix :func:`MultiIndex.sort_index` ordering when ``ascending`` argument is a list, but not all levels are specified, or are in a different order (:issue:`16934`). +- Fixes bug where indexing with ``np.inf`` caused an ``OverflowError`` to be raised (:issue:`16957`) +- Bug in reindexing on an empty ``CategoricalIndex`` (:issue:`16770`) +- Fixes ``DataFrame.loc`` for setting with alignment and tz-aware ``DatetimeIndex`` (:issue:`16889`) + +I/O +^^^ + +- Bug in :func:`read_csv` in which columns were not being thoroughly de-duplicated (:issue:`17060`) +- Bug in :func:`read_csv` in which non integer values for the header argument generated an unhelpful / unrelated error message (:issue:`16338`) +- Bug in :func:`read_csv` in which memory management issues in exception handling, under certain conditions, would cause the interpreter to segfault (:issue:`14696`, :issue:`16798`). +- Bug in :func:`read_csv` when called with ``low_memory=False`` in which a CSV with at least one column > 2GB in size would incorrectly raise a ``MemoryError`` (:issue:`16798`). +- Bug in :func:`read_stata` where value labels could not be read when using an iterator (:issue:`16923`) +- Bug in :func:`read_html` where import check fails when run in multiple threads (:issue:`16928`) + +Plotting +^^^^^^^^ +- Bug in plotting methods using ``secondary_y`` and ``fontsize`` not setting secondary axis font size (:issue:`12565`) + + +Groupby/Resample/Rolling +^^^^^^^^^^^^^^^^^^^^^^^^ + +- Bug in ``DataFrame.resample(...).size()`` where an empty ``DataFrame`` did not return a ``Series`` (:issue:`14962`) +- Bug in :func:`infer_freq` causing indices with 2-day gaps during the working week to be wrongly inferred as business daily (:issue:`16624`) +- Bug in ``.rolling(...).quantile()`` which incorrectly used different defaults than :func:`Series.quantile()` and :func:`DataFrame.quantile()` (:issue:`9413`, :issue:`16211`) +- Bug in ``groupby.transform()`` that would coerce boolean dtypes back to float (:issue:`16875`) +- Bug in ``DataFrame.groupby`` when called with index or index + column key numbers equal to the axis length of the groupby (:issue:`16859`) + +Sparse +^^^^^^ + +- Bug in ``SparseSeries`` raises ``AttributeError`` when a dictionary is passed in as data (:issue:`16905`) +- Bug in :func:`SparseDataFrame.fillna` not filling all NaNs when frame was instantiated from SciPy sparse matrix (:issue:`16112`) + + +Reshaping +^^^^^^^^^ +- Joining/Merging with a non unique ``PeriodIndex`` raised a ``TypeError`` (:issue:`16871`) +- Bug in :func:`crosstab` where non-aligned series of integers were casted to float (:issue:`17005`) +- Bug in merging with categorical dtypes with datetimelikes incorrectly raised a ``TypeError`` (:issue:`16900`) +- Bug when using :func:`isin` on a large object series and large comparison array (:issue:`16012`) +- Fixes regression from 0.20, :func:`Series.aggregate` and :func:`DataFrame.aggregate` allow dictionaries as return values again (:issue:`16741`) +- Fixes dtype of result with integer dtype input, from :func:`pivot_table` when called with ``margins=True`` (:issue:`17013`) + +Numeric +^^^^^^^ +- Bug in ``.clip()`` with ``axis=1`` and a list-like for ``threshold`` is passed; previously this raised ``ValueError`` (:issue:`15390`) + + +Categorical +^^^^^^^^^^^ +- Bug in :func:`Series.isin` when called with a categorical (:issue`16639`) + + +Other +^^^^^ +- Bug in :func:`eval` where the ``inplace`` parameter was being incorrectly handled (:issue:`16732`) +- Bug in ``.isin()`` in which checking membership in empty ``Series`` objects raised an error (:issue:`16991`) \ No newline at end of file From 2c48282643b9841f94cc7a8b9efdd68132184820 Mon Sep 17 00:00:00 2001 From: P-Tillmann Date: Thu, 27 Jul 2017 14:58:12 +0200 Subject: [PATCH 6/8] lint fix --- pandas/tests/groupby/test_groupby.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index e0db40954ddf4..4b15ae3b26e54 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -3893,8 +3893,8 @@ def predictions(tool): def test_gb_key_len_equal_axis_len(self): # GH16843 - #test ensures that index and column keys are recognized correctly - #when number of keys equals axis length of groupby + # test ensures that index and column keys are recognized correctly + # when number of keys equals axis length of groupby df = pd.DataFrame([['foo', 'bar', 'B', 1], ['foo', 'bar', 'B', 2], ['foo', 'baz', 'C', 3]], From 9af8a47eaccdb962682fad2e1be508d2f1131a9b Mon Sep 17 00:00:00 2001 From: P-Tillmann Date: Fri, 28 Jul 2017 10:32:58 +0200 Subject: [PATCH 7/8] replaced inplace=True for set index in testcase, added newline at the end of whatsnew file --- doc/source/whatsnew/v0.21.0.txt | 2 +- pandas/tests/groupby/test_groupby.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 5a16bf73d5ed9..ec7aba681c5bb 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -327,4 +327,4 @@ Categorical Other ^^^^^ - Bug in :func:`eval` where the ``inplace`` parameter was being incorrectly handled (:issue:`16732`) -- Bug in ``.isin()`` in which checking membership in empty ``Series`` objects raised an error (:issue:`16991`) \ No newline at end of file +- Bug in ``.isin()`` in which checking membership in empty ``Series`` objects raised an error (:issue:`16991`) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 4b15ae3b26e54..6c53cb06bb91b 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -3899,7 +3899,7 @@ def test_gb_key_len_equal_axis_len(self): ['foo', 'bar', 'B', 2], ['foo', 'baz', 'C', 3]], columns=['first', 'second', 'third', 'one']) - df.set_index(['first', 'second'], inplace=True) + df = df.set_index(['first', 'second']) df = df.groupby(['first', 'second', 'third']).size() assert df.loc[('foo', 'bar', 'B')] == 2 assert df.loc[('foo', 'baz', 'C')] == 1 From a98db9aec22696d60f9581deba1d20573fe370a3 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Sun, 20 Aug 2017 23:50:48 -0700 Subject: [PATCH 8/8] DOC: Address reviewer comments --- doc/source/whatsnew/v0.21.0.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 1535cca22e161..944f88d257496 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -361,7 +361,7 @@ Groupby/Resample/Rolling - Bug in ``groupby.transform()`` that would coerce boolean dtypes back to float (:issue:`16875`) - Bug in ``Series.resample(...).apply()`` where an empty ``Series`` modified the source index and did not return the name of a ``Series`` (:issue:`14313`) - Bug in ``.rolling(...).apply(...)`` with a ``DataFrame`` with a ``DatetimeIndex``, a ``window`` of a timedelta-convertible and ``min_periods >= 1` (:issue:`15305`) -- Bug in ``DataFrame.groupby`` when called with index or index + column key numbers equal to the axis length of the groupby (:issue:`16859`) +- Bug in ``DataFrame.groupby`` where index and column keys were not recognized correctly when the number of keys equaled the number of elements on the groupby axis (:issue:`16859`) Sparse ^^^^^^