From 5b224003b0bd29ef5e46852c4262e8f491484ead Mon Sep 17 00:00:00 2001 From: andrejonasson Date: Mon, 18 Sep 2017 20:14:03 +0200 Subject: [PATCH] ERR: get_indexer returns the correct indexer when Index is numeric and target is boolean (#16877) --- doc/source/whatsnew/v0.21.0.txt | 2 +- pandas/core/indexes/base.py | 6 +++++- pandas/tests/indexes/test_base.py | 7 +++++++ pandas/tests/series/test_indexing.py | 5 +++++ 4 files changed, 18 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index c808babeee5d99..260427bb6d5bb7 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -433,7 +433,7 @@ Other API Changes - :class:`Period` is now immutable, and will now raise an ``AttributeError`` when a user tries to assign a new value to the ``ordinal`` or ``freq`` attributes (:issue:`17116`). - :func:`to_datetime` when passed a tz-aware ``origin=`` kwarg will now raise a more informative ``ValueError`` rather than a ``TypeError`` (:issue:`16842`) - Renamed non-functional ``index`` to ``index_col`` in :func:`read_stata` to improve API consistency (:issue:`16342`) - +- Bug in :func:`DataFrame.drop` caused boolean labels ``False`` and ``True`` to be treated as labels 0 and 1 respectively when dropping indices from a numeric index, this will now raise (:issue:`16877`) .. _whatsnew_0210.deprecations: diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 008828cf4f309a..7219d4247b2464 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -2588,6 +2588,11 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None): if tolerance is not None: tolerance = self._convert_tolerance(tolerance) + # Treat boolean labels passed to a numeric index as not found. Without + # this fix False and True would be treated as 0 and 1 respectively. + if target.is_boolean() and self.is_numeric(): + return np.repeat(-1, target.size) + pself, ptarget = self._maybe_promote(target) if pself is not self or ptarget is not target: return pself.get_indexer(ptarget, method=method, limit=limit, @@ -2616,7 +2621,6 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None): 'backfill or nearest reindexing') indexer = self._engine.get_indexer(target._values) - return _ensure_platform_int(indexer) def _convert_tolerance(self, tolerance): diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index fa73c9fc7b7225..198e3a354672cb 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -1141,6 +1141,13 @@ def test_get_indexer_strings(self): with pytest.raises(TypeError): idx.get_indexer(['a', 'b', 'c', 'd'], method='pad', tolerance=2) + def test_get_indexer_numeric_index_boolean_target(self): + # GH 16877 + numeric_idx = pd.Index(range(4)) + result = numeric_idx.get_indexer([True, False, True]) + expected = np.array([-1, -1, -1], dtype=np.intp) + tm.assert_numpy_array_equal(result, expected) + def test_get_loc(self): idx = pd.Index([0, 1, 2]) all_methods = [None, 'pad', 'backfill', 'nearest'] diff --git a/pandas/tests/series/test_indexing.py b/pandas/tests/series/test_indexing.py index 45a92f6d6f50b0..a2e18cbb3e1055 100644 --- a/pandas/tests/series/test_indexing.py +++ b/pandas/tests/series/test_indexing.py @@ -1783,6 +1783,11 @@ def test_drop(self): expected = Series([3], index=[False]) assert_series_equal(result, expected) + # GH 16877 + s = Series([2, 3], index=[0, 1]) + with tm.assert_raises_regex(ValueError, 'not contained in axis'): + s.drop([False, True]) + def test_align(self): def _check_align(a, b, how='left', fill=None): aa, ab = a.align(b, join=how, fill_value=fill)