From 45a795e03c985aa3d456916879e3728b90276a7a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Jonasson?= Date: Mon, 25 Sep 2017 23:50:18 +0200 Subject: [PATCH] ERR: get_indexer returns the correct indexer when Index is numeric and target is boolean (#16877) (#17343) --- doc/source/whatsnew/v0.21.0.txt | 2 +- pandas/core/indexes/base.py | 7 ++++++- pandas/tests/indexes/test_base.py | 7 +++++++ pandas/tests/series/test_indexing.py | 5 +++++ 4 files changed, 19 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 36551fa30c3ad..b6bd86bd79a1f 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -481,7 +481,7 @@ Other API Changes - :class:`Period` is now immutable, and will now raise an ``AttributeError`` when a user tries to assign a new value to the ``ordinal`` or ``freq`` attributes (:issue:`17116`). - :func:`to_datetime` when passed a tz-aware ``origin=`` kwarg will now raise a more informative ``ValueError`` rather than a ``TypeError`` (:issue:`16842`) - Renamed non-functional ``index`` to ``index_col`` in :func:`read_stata` to improve API consistency (:issue:`16342`) - +- Bug in :func:`DataFrame.drop` caused boolean labels ``False`` and ``True`` to be treated as labels 0 and 1 respectively when dropping indices from a numeric index. This will now raise a ValueError (:issue:`16877`) .. _whatsnew_0210.deprecations: diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index f28ff9697e517..be26720adb0bd 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -2609,6 +2609,12 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None): if tolerance is not None: tolerance = self._convert_tolerance(tolerance) + # Treat boolean labels passed to a numeric index as not found. Without + # this fix False and True would be treated as 0 and 1 respectively. + # (GH #16877) + if target.is_boolean() and self.is_numeric(): + return _ensure_platform_int(np.repeat(-1, target.size)) + pself, ptarget = self._maybe_promote(target) if pself is not self or ptarget is not target: return pself.get_indexer(ptarget, method=method, limit=limit, @@ -2637,7 +2643,6 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None): 'backfill or nearest reindexing') indexer = self._engine.get_indexer(target._values) - return _ensure_platform_int(indexer) def _convert_tolerance(self, tolerance): diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 0bd2861e060ed..81f113d58d680 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -1141,6 +1141,13 @@ def test_get_indexer_strings(self): with pytest.raises(TypeError): idx.get_indexer(['a', 'b', 'c', 'd'], method='pad', tolerance=2) + def test_get_indexer_numeric_index_boolean_target(self): + # GH 16877 + numeric_idx = pd.Index(range(4)) + result = numeric_idx.get_indexer([True, False, True]) + expected = np.array([-1, -1, -1], dtype=np.intp) + tm.assert_numpy_array_equal(result, expected) + def test_get_loc(self): idx = pd.Index([0, 1, 2]) all_methods = [None, 'pad', 'backfill', 'nearest'] diff --git a/pandas/tests/series/test_indexing.py b/pandas/tests/series/test_indexing.py index 91187b709463a..2182e3fbfc212 100644 --- a/pandas/tests/series/test_indexing.py +++ b/pandas/tests/series/test_indexing.py @@ -1783,6 +1783,11 @@ def test_drop(self): expected = Series([3], index=[False]) assert_series_equal(result, expected) + # GH 16877 + s = Series([2, 3], index=[0, 1]) + with tm.assert_raises_regex(ValueError, 'not contained in axis'): + s.drop([False, True]) + def test_align(self): def _check_align(a, b, how='left', fill=None): aa, ab = a.align(b, join=how, fill_value=fill)