From c24df004ef531d0b4d7784d31add048ace13eba7 Mon Sep 17 00:00:00 2001 From: MeeseeksMachine <39504233+meeseeksmachine@users.noreply.github.com> Date: Fri, 1 Feb 2019 13:41:08 -0800 Subject: [PATCH] Backport PR #25063: API: change Index set ops sort=True -> sort=None (#25083) --- doc/source/whatsnew/v0.24.1.rst | 34 +++- pandas/_libs/lib.pyx | 3 +- pandas/core/indexes/api.py | 2 +- pandas/core/indexes/base.py | 82 ++++++-- pandas/core/indexes/datetimes.py | 6 +- pandas/core/indexes/interval.py | 2 +- pandas/core/indexes/multi.py | 43 +++- pandas/core/indexes/range.py | 8 +- pandas/tests/indexes/common.py | 2 +- pandas/tests/indexes/datetimes/test_setops.py | 12 +- .../tests/indexes/interval/test_interval.py | 24 +-- pandas/tests/indexes/multi/test_set_ops.py | 155 ++++++++++++-- pandas/tests/indexes/period/test_period.py | 2 +- pandas/tests/indexes/period/test_setops.py | 20 +- pandas/tests/indexes/test_base.py | 189 ++++++++++++++---- pandas/tests/indexes/test_range.py | 2 +- .../indexes/timedeltas/test_timedelta.py | 8 +- 17 files changed, 465 insertions(+), 129 deletions(-) diff --git a/doc/source/whatsnew/v0.24.1.rst b/doc/source/whatsnew/v0.24.1.rst index c8b5417a5b77f..1ee4397960b0b 100644 --- a/doc/source/whatsnew/v0.24.1.rst +++ b/doc/source/whatsnew/v0.24.1.rst @@ -15,10 +15,40 @@ Whats New in 0.24.1 (February XX, 2019) These are the changes in pandas 0.24.1. See :ref:`release` for a full changelog including other versions of pandas. +.. _whatsnew_0241.api: + +API Changes +~~~~~~~~~~~ + +Changing the ``sort`` parameter for :class:`Index` set operations +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The default ``sort`` value for :meth:`Index.union` has changed from ``True`` to ``None`` (:issue:`24959`). +The default *behavior*, however, remains the same: the result is sorted, unless + +1. ``self`` and ``other`` are identical +2. ``self`` or ``other`` is empty +3. ``self`` or ``other`` contain values that can not be compared (a ``RuntimeWarning`` is raised). + +This change will allow ``sort=True`` to mean "always sort" in a future release. + +The same change applies to :meth:`Index.difference` and :meth:`Index.symmetric_difference`, which +would not sort the result when the values could not be compared. + +The `sort` option for :meth:`Index.intersection` has changed in three ways. + +1. The default has changed from ``True`` to ``False``, to restore the + pandas 0.23.4 and earlier behavior of not sorting by default. +2. The behavior of ``sort=True`` can now be obtained with ``sort=None``. + This will sort the result only if the values in ``self`` and ``other`` + are not identical. +3. The value ``sort=True`` is no longer allowed. A future version of pandas + will properly support ``sort=True`` meaning "always sort". + .. _whatsnew_0241.regressions: Fixed Regressions -^^^^^^^^^^^^^^^^^ +~~~~~~~~~~~~~~~~~ - Bug in :meth:`DataFrame.itertuples` with ``records`` orient raising an ``AttributeError`` when the ``DataFrame`` contained more than 255 columns (:issue:`24939`) - Bug in :meth:`DataFrame.itertuples` orient converting integer column names to strings prepended with an underscore (:issue:`24940`) @@ -30,7 +60,7 @@ Fixed Regressions .. _whatsnew_0241.enhancements: Enhancements -^^^^^^^^^^^^ +~~~~~~~~~~~~ .. _whatsnew_0241.bug_fixes: diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index f845a5437ded4..333626b309e3a 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -231,10 +231,11 @@ def fast_unique_multiple(list arrays, sort: bool=True): if val not in table: table[val] = stub uniques.append(val) - if sort: + if sort is None: try: uniques.sort() except Exception: + # TODO: RuntimeWarning? pass return uniques diff --git a/pandas/core/indexes/api.py b/pandas/core/indexes/api.py index 684a19c56c92f..6299fc482d0df 100644 --- a/pandas/core/indexes/api.py +++ b/pandas/core/indexes/api.py @@ -112,7 +112,7 @@ def _get_combined_index(indexes, intersect=False, sort=False): elif intersect: index = indexes[0] for other in indexes[1:]: - index = index.intersection(other, sort=sort) + index = index.intersection(other) else: index = _union_indexes(indexes, sort=sort) index = ensure_index(index) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 3d176012df22b..2fa034670e885 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -2245,18 +2245,37 @@ def _get_reconciled_name_object(self, other): return self._shallow_copy(name=name) return self - def union(self, other, sort=True): + def _validate_sort_keyword(self, sort): + if sort not in [None, False]: + raise ValueError("The 'sort' keyword only takes the values of " + "None or False; {0} was passed.".format(sort)) + + def union(self, other, sort=None): """ Form the union of two Index objects. Parameters ---------- other : Index or array-like - sort : bool, default True - Sort the resulting index if possible + sort : bool or None, default None + Whether to sort the resulting Index. + + * None : Sort the result, except when + + 1. `self` and `other` are equal. + 2. `self` or `other` has length 0. + 3. Some values in `self` or `other` cannot be compared. + A RuntimeWarning is issued in this case. + + * False : do not sort the result. .. versionadded:: 0.24.0 + .. versionchanged:: 0.24.1 + + Changed the default value from ``True`` to ``None`` + (without change in behaviour). + Returns ------- union : Index @@ -2269,6 +2288,7 @@ def union(self, other, sort=True): >>> idx1.union(idx2) Int64Index([1, 2, 3, 4, 5, 6], dtype='int64') """ + self._validate_sort_keyword(sort) self._assert_can_do_setop(other) other = ensure_index(other) @@ -2319,7 +2339,7 @@ def union(self, other, sort=True): else: result = lvals - if sort: + if sort is None: try: result = sorting.safe_sort(result) except TypeError as e: @@ -2342,14 +2362,19 @@ def intersection(self, other, sort=False): Parameters ---------- other : Index or array-like - sort : bool, default False - Sort the resulting index if possible + sort : False or None, default False + Whether to sort the resulting index. + + * False : do not sort the result. + * None : sort the result, except when `self` and `other` are equal + or when the values cannot be compared. .. versionadded:: 0.24.0 .. versionchanged:: 0.24.1 - Changed the default from ``True`` to ``False``. + Changed the default from ``True`` to ``False``, to match + the behaviour of 0.23.4 and earlier. Returns ------- @@ -2363,6 +2388,7 @@ def intersection(self, other, sort=False): >>> idx1.intersection(idx2) Int64Index([3, 4], dtype='int64') """ + self._validate_sort_keyword(sort) self._assert_can_do_setop(other) other = ensure_index(other) @@ -2402,7 +2428,7 @@ def intersection(self, other, sort=False): taken = other.take(indexer) - if sort: + if sort is None: taken = sorting.safe_sort(taken.values) if self.name != other.name: name = None @@ -2415,7 +2441,7 @@ def intersection(self, other, sort=False): return taken - def difference(self, other, sort=True): + def difference(self, other, sort=None): """ Return a new Index with elements from the index that are not in `other`. @@ -2425,11 +2451,22 @@ def difference(self, other, sort=True): Parameters ---------- other : Index or array-like - sort : bool, default True - Sort the resulting index if possible + sort : False or None, default None + Whether to sort the resulting index. By default, the + values are attempted to be sorted, but any TypeError from + incomparable elements is caught by pandas. + + * None : Attempt to sort the result, but catch any TypeErrors + from comparing incomparable elements. + * False : Do not sort the result. .. versionadded:: 0.24.0 + .. versionchanged:: 0.24.1 + + Changed the default value from ``True`` to ``None`` + (without change in behaviour). + Returns ------- difference : Index @@ -2444,6 +2481,7 @@ def difference(self, other, sort=True): >>> idx1.difference(idx2, sort=False) Int64Index([2, 1], dtype='int64') """ + self._validate_sort_keyword(sort) self._assert_can_do_setop(other) if self.equals(other): @@ -2460,7 +2498,7 @@ def difference(self, other, sort=True): label_diff = np.setdiff1d(np.arange(this.size), indexer, assume_unique=True) the_diff = this.values.take(label_diff) - if sort: + if sort is None: try: the_diff = sorting.safe_sort(the_diff) except TypeError: @@ -2468,7 +2506,7 @@ def difference(self, other, sort=True): return this._shallow_copy(the_diff, name=result_name, freq=None) - def symmetric_difference(self, other, result_name=None, sort=True): + def symmetric_difference(self, other, result_name=None, sort=None): """ Compute the symmetric difference of two Index objects. @@ -2476,11 +2514,22 @@ def symmetric_difference(self, other, result_name=None, sort=True): ---------- other : Index or array-like result_name : str - sort : bool, default True - Sort the resulting index if possible + sort : False or None, default None + Whether to sort the resulting index. By default, the + values are attempted to be sorted, but any TypeError from + incomparable elements is caught by pandas. + + * None : Attempt to sort the result, but catch any TypeErrors + from comparing incomparable elements. + * False : Do not sort the result. .. versionadded:: 0.24.0 + .. versionchanged:: 0.24.1 + + Changed the default value from ``True`` to ``None`` + (without change in behaviour). + Returns ------- symmetric_difference : Index @@ -2504,6 +2553,7 @@ def symmetric_difference(self, other, result_name=None, sort=True): >>> idx1 ^ idx2 Int64Index([1, 5], dtype='int64') """ + self._validate_sort_keyword(sort) self._assert_can_do_setop(other) other, result_name_update = self._convert_can_do_setop(other) if result_name is None: @@ -2524,7 +2574,7 @@ def symmetric_difference(self, other, result_name=None, sort=True): right_diff = other.values.take(right_indexer) the_diff = _concat._concat_compat([left_diff, right_diff]) - if sort: + if sort is None: try: the_diff = sorting.safe_sort(the_diff) except TypeError: diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index ef941ab87ba12..9c46860eb49d6 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -602,19 +602,21 @@ def intersection(self, other, sort=False): Parameters ---------- other : DatetimeIndex or array-like - sort : bool, default True + sort : False or None, default False Sort the resulting index if possible. .. versionadded:: 0.24.0 .. versionchanged:: 0.24.1 - Changed the default from ``True`` to ``False``. + Changed the default to ``False`` to match the behaviour + from before 0.24.0. Returns ------- y : Index or DatetimeIndex """ + self._validate_sort_keyword(sort) self._assert_can_do_setop(other) if self.equals(other): diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 736de94991181..2c63fe33c57fe 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -1093,7 +1093,7 @@ def equals(self, other): def overlaps(self, other): return self._data.overlaps(other) - def _setop(op_name, sort=True): + def _setop(op_name, sort=None): def func(self, other, sort=sort): other = self._as_like_interval_index(other) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 16af3fe8eef26..14975dbbefa63 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -2879,30 +2879,47 @@ def equal_levels(self, other): return False return True - def union(self, other, sort=True): + def union(self, other, sort=None): """ Form the union of two MultiIndex objects Parameters ---------- other : MultiIndex or array / Index of tuples - sort : bool, default True - Sort the resulting MultiIndex if possible + sort : False or None, default None + Whether to sort the resulting Index. + + * None : Sort the result, except when + + 1. `self` and `other` are equal. + 2. `self` has length 0. + 3. Some values in `self` or `other` cannot be compared. + A RuntimeWarning is issued in this case. + + * False : do not sort the result. .. versionadded:: 0.24.0 + .. versionchanged:: 0.24.1 + + Changed the default value from ``True`` to ``None`` + (without change in behaviour). + Returns ------- Index >>> index.union(index2) """ + self._validate_sort_keyword(sort) self._assert_can_do_setop(other) other, result_names = self._convert_can_do_setop(other) if len(other) == 0 or self.equals(other): return self + # TODO: Index.union returns other when `len(self)` is 0. + uniq_tuples = lib.fast_unique_multiple([self._ndarray_values, other._ndarray_values], sort=sort) @@ -2917,19 +2934,21 @@ def intersection(self, other, sort=False): Parameters ---------- other : MultiIndex or array / Index of tuples - sort : bool, default True + sort : False or None, default False Sort the resulting MultiIndex if possible .. versionadded:: 0.24.0 .. versionchanged:: 0.24.1 - Changed the default from ``True`` to ``False``. + Changed the default from ``True`` to ``False``, to match + behaviour from before 0.24.0 Returns ------- Index """ + self._validate_sort_keyword(sort) self._assert_can_do_setop(other) other, result_names = self._convert_can_do_setop(other) @@ -2940,7 +2959,7 @@ def intersection(self, other, sort=False): other_tuples = other._ndarray_values uniq_tuples = set(self_tuples) & set(other_tuples) - if sort: + if sort is None: uniq_tuples = sorted(uniq_tuples) if len(uniq_tuples) == 0: @@ -2951,22 +2970,28 @@ def intersection(self, other, sort=False): return MultiIndex.from_arrays(lzip(*uniq_tuples), sortorder=0, names=result_names) - def difference(self, other, sort=True): + def difference(self, other, sort=None): """ Compute set difference of two MultiIndex objects Parameters ---------- other : MultiIndex - sort : bool, default True + sort : False or None, default None Sort the resulting MultiIndex if possible .. versionadded:: 0.24.0 + .. versionchanged:: 0.24.1 + + Changed the default value from ``True`` to ``None`` + (without change in behaviour). + Returns ------- diff : MultiIndex """ + self._validate_sort_keyword(sort) self._assert_can_do_setop(other) other, result_names = self._convert_can_do_setop(other) @@ -2986,7 +3011,7 @@ def difference(self, other, sort=True): label_diff = np.setdiff1d(np.arange(this.size), indexer, assume_unique=True) difference = this.values.take(label_diff) - if sort: + if sort is None: difference = sorted(difference) if len(difference) == 0: diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index e17a6a682af40..5aafe9734b6a0 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -350,19 +350,21 @@ def intersection(self, other, sort=False): Parameters ---------- other : Index or array-like - sort : bool, default True + sort : False or None, default False Sort the resulting index if possible .. versionadded:: 0.24.0 .. versionchanged:: 0.24.1 - Changed the default from ``True`` to ``False``. + Changed the default to ``False`` to match the behaviour + from before 0.24.0. Returns ------- intersection : Index """ + self._validate_sort_keyword(sort) if self.equals(other): return self._get_reconciled_name_object(other) @@ -405,7 +407,7 @@ def intersection(self, other, sort=False): if (self._step < 0 and other._step < 0) is not (new_index._step < 0): new_index = new_index[::-1] - if sort: + if sort is None: new_index = new_index.sort_values() return new_index diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index 499f01f0e7f7b..e0259b83a8768 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -478,7 +478,7 @@ def test_union_base(self): with pytest.raises(TypeError, match=msg): first.union([1, 2, 3]) - @pytest.mark.parametrize("sort", [True, False]) + @pytest.mark.parametrize("sort", [None, False]) def test_difference_base(self, sort): for name, idx in compat.iteritems(self.indices): first = idx[2:] diff --git a/pandas/tests/indexes/datetimes/test_setops.py b/pandas/tests/indexes/datetimes/test_setops.py index bd37cc815d0f7..19009e45ee83a 100644 --- a/pandas/tests/indexes/datetimes/test_setops.py +++ b/pandas/tests/indexes/datetimes/test_setops.py @@ -138,7 +138,7 @@ def test_intersection2(self): @pytest.mark.parametrize("tz", [None, 'Asia/Tokyo', 'US/Eastern', 'dateutil/US/Pacific']) - @pytest.mark.parametrize("sort", [True, False]) + @pytest.mark.parametrize("sort", [None, False]) def test_intersection(self, tz, sort): # GH 4690 (with tz) base = date_range('6/1/2000', '6/30/2000', freq='D', name='idx') @@ -187,7 +187,7 @@ def test_intersection(self, tz, sort): for (rng, expected) in [(rng2, expected2), (rng3, expected3), (rng4, expected4)]: result = base.intersection(rng, sort=sort) - if sort: + if sort is None: expected = expected.sort_values() tm.assert_index_equal(result, expected) assert result.name == expected.name @@ -212,7 +212,7 @@ def test_intersection_bug_1708(self): assert len(result) == 0 @pytest.mark.parametrize("tz", tz) - @pytest.mark.parametrize("sort", [True, False]) + @pytest.mark.parametrize("sort", [None, False]) def test_difference(self, tz, sort): rng_dates = ['1/2/2000', '1/3/2000', '1/1/2000', '1/4/2000', '1/5/2000'] @@ -233,11 +233,11 @@ def test_difference(self, tz, sort): (rng2, other2, expected2), (rng3, other3, expected3)]: result_diff = rng.difference(other, sort) - if sort: + if sort is None: expected = expected.sort_values() tm.assert_index_equal(result_diff, expected) - @pytest.mark.parametrize("sort", [True, False]) + @pytest.mark.parametrize("sort", [None, False]) def test_difference_freq(self, sort): # GH14323: difference of DatetimeIndex should not preserve frequency @@ -254,7 +254,7 @@ def test_difference_freq(self, sort): tm.assert_index_equal(idx_diff, expected) tm.assert_attr_equal('freq', idx_diff, expected) - @pytest.mark.parametrize("sort", [True, False]) + @pytest.mark.parametrize("sort", [None, False]) def test_datetimeindex_diff(self, sort): dti1 = date_range(freq='Q-JAN', start=datetime(1997, 12, 31), periods=100) diff --git a/pandas/tests/indexes/interval/test_interval.py b/pandas/tests/indexes/interval/test_interval.py index db69258c1d3d2..f1fd06c9cab6e 100644 --- a/pandas/tests/indexes/interval/test_interval.py +++ b/pandas/tests/indexes/interval/test_interval.py @@ -783,19 +783,19 @@ def test_non_contiguous(self, closed): assert 1.5 not in index - @pytest.mark.parametrize("sort", [True, False]) + @pytest.mark.parametrize("sort", [None, False]) def test_union(self, closed, sort): index = self.create_index(closed=closed) other = IntervalIndex.from_breaks(range(5, 13), closed=closed) expected = IntervalIndex.from_breaks(range(13), closed=closed) result = index[::-1].union(other, sort=sort) - if sort: + if sort is None: tm.assert_index_equal(result, expected) assert tm.equalContents(result, expected) result = other[::-1].union(index, sort=sort) - if sort: + if sort is None: tm.assert_index_equal(result, expected) assert tm.equalContents(result, expected) @@ -812,19 +812,19 @@ def test_union(self, closed, sort): result = index.union(other, sort=sort) tm.assert_index_equal(result, index) - @pytest.mark.parametrize("sort", [True, False]) + @pytest.mark.parametrize("sort", [None, False]) def test_intersection(self, closed, sort): index = self.create_index(closed=closed) other = IntervalIndex.from_breaks(range(5, 13), closed=closed) expected = IntervalIndex.from_breaks(range(5, 11), closed=closed) result = index[::-1].intersection(other, sort=sort) - if sort: + if sort is None: tm.assert_index_equal(result, expected) assert tm.equalContents(result, expected) result = other[::-1].intersection(index, sort=sort) - if sort: + if sort is None: tm.assert_index_equal(result, expected) assert tm.equalContents(result, expected) @@ -842,14 +842,14 @@ def test_intersection(self, closed, sort): result = index.intersection(other, sort=sort) tm.assert_index_equal(result, expected) - @pytest.mark.parametrize("sort", [True, False]) + @pytest.mark.parametrize("sort", [None, False]) def test_difference(self, closed, sort): index = IntervalIndex.from_arrays([1, 0, 3, 2], [1, 2, 3, 4], closed=closed) result = index.difference(index[:1], sort=sort) expected = index[1:] - if sort: + if sort is None: expected = expected.sort_values() tm.assert_index_equal(result, expected) @@ -864,19 +864,19 @@ def test_difference(self, closed, sort): result = index.difference(other, sort=sort) tm.assert_index_equal(result, expected) - @pytest.mark.parametrize("sort", [True, False]) + @pytest.mark.parametrize("sort", [None, False]) def test_symmetric_difference(self, closed, sort): index = self.create_index(closed=closed) result = index[1:].symmetric_difference(index[:-1], sort=sort) expected = IntervalIndex([index[0], index[-1]]) - if sort: + if sort is None: tm.assert_index_equal(result, expected) assert tm.equalContents(result, expected) # GH 19101: empty result, same dtype result = index.symmetric_difference(index, sort=sort) expected = IntervalIndex(np.array([], dtype='int64'), closed=closed) - if sort: + if sort is None: tm.assert_index_equal(result, expected) assert tm.equalContents(result, expected) @@ -888,7 +888,7 @@ def test_symmetric_difference(self, closed, sort): @pytest.mark.parametrize('op_name', [ 'union', 'intersection', 'difference', 'symmetric_difference']) - @pytest.mark.parametrize("sort", [True, False]) + @pytest.mark.parametrize("sort", [None, False]) def test_set_operation_errors(self, closed, op_name, sort): index = self.create_index(closed=closed) set_op = getattr(index, op_name) diff --git a/pandas/tests/indexes/multi/test_set_ops.py b/pandas/tests/indexes/multi/test_set_ops.py index 208d6cf1c639f..41a0e1e59e8a5 100644 --- a/pandas/tests/indexes/multi/test_set_ops.py +++ b/pandas/tests/indexes/multi/test_set_ops.py @@ -9,7 +9,7 @@ @pytest.mark.parametrize("case", [0.5, "xxx"]) -@pytest.mark.parametrize("sort", [True, False]) +@pytest.mark.parametrize("sort", [None, False]) @pytest.mark.parametrize("method", ["intersection", "union", "difference", "symmetric_difference"]) def test_set_ops_error_cases(idx, case, sort, method): @@ -19,13 +19,13 @@ def test_set_ops_error_cases(idx, case, sort, method): getattr(idx, method)(case, sort=sort) -@pytest.mark.parametrize("sort", [True, False]) +@pytest.mark.parametrize("sort", [None, False]) def test_intersection_base(idx, sort): first = idx[:5] second = idx[:3] intersect = first.intersection(second, sort=sort) - if sort: + if sort is None: tm.assert_index_equal(intersect, second.sort_values()) assert tm.equalContents(intersect, second) @@ -34,7 +34,7 @@ def test_intersection_base(idx, sort): for klass in [np.array, Series, list]] for case in cases: result = first.intersection(case, sort=sort) - if sort: + if sort is None: tm.assert_index_equal(result, second.sort_values()) assert tm.equalContents(result, second) @@ -43,13 +43,13 @@ def test_intersection_base(idx, sort): first.intersection([1, 2, 3], sort=sort) -@pytest.mark.parametrize("sort", [True, False]) +@pytest.mark.parametrize("sort", [None, False]) def test_union_base(idx, sort): first = idx[3:] second = idx[:5] everything = idx union = first.union(second, sort=sort) - if sort: + if sort is None: tm.assert_index_equal(union, everything.sort_values()) assert tm.equalContents(union, everything) @@ -58,7 +58,7 @@ def test_union_base(idx, sort): for klass in [np.array, Series, list]] for case in cases: result = first.union(case, sort=sort) - if sort: + if sort is None: tm.assert_index_equal(result, everything.sort_values()) assert tm.equalContents(result, everything) @@ -67,13 +67,13 @@ def test_union_base(idx, sort): first.union([1, 2, 3], sort=sort) -@pytest.mark.parametrize("sort", [True, False]) +@pytest.mark.parametrize("sort", [None, False]) def test_difference_base(idx, sort): second = idx[4:] answer = idx[:4] result = idx.difference(second, sort=sort) - if sort: + if sort is None: answer = answer.sort_values() assert result.equals(answer) @@ -91,14 +91,14 @@ def test_difference_base(idx, sort): idx.difference([1, 2, 3], sort=sort) -@pytest.mark.parametrize("sort", [True, False]) +@pytest.mark.parametrize("sort", [None, False]) def test_symmetric_difference(idx, sort): first = idx[1:] second = idx[:-1] answer = idx[[-1, 0]] result = first.symmetric_difference(second, sort=sort) - if sort: + if sort is None: answer = answer.sort_values() tm.assert_index_equal(result, answer) @@ -121,14 +121,14 @@ def test_empty(idx): assert idx[:0].empty -@pytest.mark.parametrize("sort", [True, False]) +@pytest.mark.parametrize("sort", [None, False]) def test_difference(idx, sort): first = idx result = first.difference(idx[-3:], sort=sort) vals = idx[:-3].values - if sort: + if sort is None: vals = sorted(vals) expected = MultiIndex.from_tuples(vals, @@ -189,14 +189,62 @@ def test_difference(idx, sort): first.difference([1, 2, 3, 4, 5], sort=sort) -@pytest.mark.parametrize("sort", [True, False]) +def test_difference_sort_special(): + # GH-24959 + idx = pd.MultiIndex.from_product([[1, 0], ['a', 'b']]) + # sort=None, the default + result = idx.difference([]) + tm.assert_index_equal(result, idx) + + +@pytest.mark.xfail(reason="Not implemented.") +def test_difference_sort_special_true(): + # TODO decide on True behaviour + idx = pd.MultiIndex.from_product([[1, 0], ['a', 'b']]) + result = idx.difference([], sort=True) + expected = pd.MultiIndex.from_product([[0, 1], ['a', 'b']]) + tm.assert_index_equal(result, expected) + + +def test_difference_sort_incomparable(): + # GH-24959 + idx = pd.MultiIndex.from_product([[1, pd.Timestamp('2000'), 2], + ['a', 'b']]) + + other = pd.MultiIndex.from_product([[3, pd.Timestamp('2000'), 4], + ['c', 'd']]) + # sort=None, the default + # MultiIndex.difference deviates here from other difference + # implementations in not catching the TypeError + with pytest.raises(TypeError): + result = idx.difference(other) + + # sort=False + result = idx.difference(other, sort=False) + tm.assert_index_equal(result, idx) + + +@pytest.mark.xfail(reason="Not implemented.") +def test_difference_sort_incomparable_true(): + # TODO decide on True behaviour + # # sort=True, raises + idx = pd.MultiIndex.from_product([[1, pd.Timestamp('2000'), 2], + ['a', 'b']]) + other = pd.MultiIndex.from_product([[3, pd.Timestamp('2000'), 4], + ['c', 'd']]) + + with pytest.raises(TypeError): + idx.difference(other, sort=True) + + +@pytest.mark.parametrize("sort", [None, False]) def test_union(idx, sort): piece1 = idx[:5][::-1] piece2 = idx[3:] the_union = piece1.union(piece2, sort=sort) - if sort: + if sort is None: tm.assert_index_equal(the_union, idx.sort_values()) assert tm.equalContents(the_union, idx) @@ -225,14 +273,14 @@ def test_union(idx, sort): # assert result.equals(result2) -@pytest.mark.parametrize("sort", [True, False]) +@pytest.mark.parametrize("sort", [None, False]) def test_intersection(idx, sort): piece1 = idx[:5][::-1] piece2 = idx[3:] the_int = piece1.intersection(piece2, sort=sort) - if sort: + if sort is None: tm.assert_index_equal(the_int, idx[3:5]) assert tm.equalContents(the_int, idx[3:5]) @@ -249,3 +297,76 @@ def test_intersection(idx, sort): # tuples = _index.values # result = _index & tuples # assert result.equals(tuples) + + +def test_intersect_equal_sort(): + # GH-24959 + idx = pd.MultiIndex.from_product([[1, 0], ['a', 'b']]) + tm.assert_index_equal(idx.intersection(idx, sort=False), idx) + tm.assert_index_equal(idx.intersection(idx, sort=None), idx) + + +@pytest.mark.xfail(reason="Not implemented.") +def test_intersect_equal_sort_true(): + # TODO decide on True behaviour + idx = pd.MultiIndex.from_product([[1, 0], ['a', 'b']]) + sorted_ = pd.MultiIndex.from_product([[0, 1], ['a', 'b']]) + tm.assert_index_equal(idx.intersection(idx, sort=True), sorted_) + + +@pytest.mark.parametrize('slice_', [slice(None), slice(0)]) +def test_union_sort_other_empty(slice_): + # https://github.com/pandas-dev/pandas/issues/24959 + idx = pd.MultiIndex.from_product([[1, 0], ['a', 'b']]) + + # default, sort=None + other = idx[slice_] + tm.assert_index_equal(idx.union(other), idx) + # MultiIndex does not special case empty.union(idx) + # tm.assert_index_equal(other.union(idx), idx) + + # sort=False + tm.assert_index_equal(idx.union(other, sort=False), idx) + + +@pytest.mark.xfail(reason="Not implemented.") +def test_union_sort_other_empty_sort(slice_): + # TODO decide on True behaviour + # # sort=True + idx = pd.MultiIndex.from_product([[1, 0], ['a', 'b']]) + other = idx[:0] + result = idx.union(other, sort=True) + expected = pd.MultiIndex.from_product([[0, 1], ['a', 'b']]) + tm.assert_index_equal(result, expected) + + +def test_union_sort_other_incomparable(): + # https://github.com/pandas-dev/pandas/issues/24959 + idx = pd.MultiIndex.from_product([[1, pd.Timestamp('2000')], ['a', 'b']]) + + # default, sort=None + result = idx.union(idx[:1]) + tm.assert_index_equal(result, idx) + + # sort=False + result = idx.union(idx[:1], sort=False) + tm.assert_index_equal(result, idx) + + +@pytest.mark.xfail(reason="Not implemented.") +def test_union_sort_other_incomparable_sort(): + # TODO decide on True behaviour + # # sort=True + idx = pd.MultiIndex.from_product([[1, pd.Timestamp('2000')], ['a', 'b']]) + with pytest.raises(TypeError, match='Cannot compare'): + idx.union(idx[:1], sort=True) + + +@pytest.mark.parametrize("method", ['union', 'intersection', 'difference', + 'symmetric_difference']) +def test_setops_disallow_true(method): + idx1 = pd.MultiIndex.from_product([['a', 'b'], [1, 2]]) + idx2 = pd.MultiIndex.from_product([['b', 'c'], [1, 2]]) + + with pytest.raises(ValueError, match="The 'sort' keyword only takes"): + getattr(idx1, method)(idx2, sort=True) diff --git a/pandas/tests/indexes/period/test_period.py b/pandas/tests/indexes/period/test_period.py index 464ff7aa5d58d..dc9a32d75d272 100644 --- a/pandas/tests/indexes/period/test_period.py +++ b/pandas/tests/indexes/period/test_period.py @@ -77,7 +77,7 @@ def test_no_millisecond_field(self): with pytest.raises(AttributeError): DatetimeIndex([]).millisecond - @pytest.mark.parametrize("sort", [True, False]) + @pytest.mark.parametrize("sort", [None, False]) def test_difference_freq(self, sort): # GH14323: difference of Period MUST preserve frequency # but the ability to union results must be preserved diff --git a/pandas/tests/indexes/period/test_setops.py b/pandas/tests/indexes/period/test_setops.py index a97ab47bcda16..bf29edad4841e 100644 --- a/pandas/tests/indexes/period/test_setops.py +++ b/pandas/tests/indexes/period/test_setops.py @@ -38,7 +38,7 @@ def test_join_does_not_recur(self): df.columns[0], df.columns[1]], object) tm.assert_index_equal(res, expected) - @pytest.mark.parametrize("sort", [True, False]) + @pytest.mark.parametrize("sort", [None, False]) def test_union(self, sort): # union other1 = pd.period_range('1/1/2000', freq='D', periods=5) @@ -97,11 +97,11 @@ def test_union(self, sort): (rng8, other8, expected8)]: result_union = rng.union(other, sort=sort) - if sort: + if sort is None: expected = expected.sort_values() tm.assert_index_equal(result_union, expected) - @pytest.mark.parametrize("sort", [True, False]) + @pytest.mark.parametrize("sort", [None, False]) def test_union_misc(self, sort): index = period_range('1/1/2000', '1/20/2000', freq='D') @@ -110,7 +110,7 @@ def test_union_misc(self, sort): # not in order result = _permute(index[:-5]).union(_permute(index[10:]), sort=sort) - if sort: + if sort is None: tm.assert_index_equal(result, index) assert tm.equalContents(result, index) @@ -139,7 +139,7 @@ def test_union_dataframe_index(self): exp = pd.period_range('1/1/1980', '1/1/2012', freq='M') tm.assert_index_equal(df.index, exp) - @pytest.mark.parametrize("sort", [True, False]) + @pytest.mark.parametrize("sort", [None, False]) def test_intersection(self, sort): index = period_range('1/1/2000', '1/20/2000', freq='D') @@ -150,7 +150,7 @@ def test_intersection(self, sort): left = _permute(index[:-5]) right = _permute(index[10:]) result = left.intersection(right, sort=sort) - if sort: + if sort is None: tm.assert_index_equal(result, index[10:-5]) assert tm.equalContents(result, index[10:-5]) @@ -164,7 +164,7 @@ def test_intersection(self, sort): with pytest.raises(period.IncompatibleFrequency): index.intersection(index3, sort=sort) - @pytest.mark.parametrize("sort", [True, False]) + @pytest.mark.parametrize("sort", [None, False]) def test_intersection_cases(self, sort): base = period_range('6/1/2000', '6/30/2000', freq='D', name='idx') @@ -210,7 +210,7 @@ def test_intersection_cases(self, sort): for (rng, expected) in [(rng2, expected2), (rng3, expected3), (rng4, expected4)]: result = base.intersection(rng, sort=sort) - if sort: + if sort is None: expected = expected.sort_values() tm.assert_index_equal(result, expected) assert result.name == expected.name @@ -224,7 +224,7 @@ def test_intersection_cases(self, sort): result = rng.intersection(rng[0:0]) assert len(result) == 0 - @pytest.mark.parametrize("sort", [True, False]) + @pytest.mark.parametrize("sort", [None, False]) def test_difference(self, sort): # diff period_rng = ['1/3/2000', '1/2/2000', '1/1/2000', '1/5/2000', @@ -276,6 +276,6 @@ def test_difference(self, sort): (rng6, other6, expected6), (rng7, other7, expected7), ]: result_difference = rng.difference(other, sort=sort) - if sort: + if sort is None: expected = expected.sort_values() tm.assert_index_equal(result_difference, expected) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 20e439de46bde..c99007cef90d4 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -3,6 +3,7 @@ from collections import defaultdict from datetime import datetime, timedelta import math +import operator import sys import numpy as np @@ -684,12 +685,12 @@ def test_empty_fancy_raises(self, attr): # np.ndarray only accepts ndarray of int & bool dtypes, so should Index pytest.raises(IndexError, index.__getitem__, empty_farr) - @pytest.mark.parametrize("sort", [True, False]) + @pytest.mark.parametrize("sort", [None, False]) def test_intersection(self, sort): first = self.strIndex[:20] second = self.strIndex[:10] intersect = first.intersection(second, sort=sort) - if sort: + if sort is None: tm.assert_index_equal(intersect, second.sort_values()) assert tm.equalContents(intersect, second) @@ -701,7 +702,7 @@ def test_intersection(self, sort): (Index([3, 4, 5, 6, 7], name="index"), True), # preserve same name (Index([3, 4, 5, 6, 7], name="other"), False), # drop diff names (Index([3, 4, 5, 6, 7]), False)]) - @pytest.mark.parametrize("sort", [True, False]) + @pytest.mark.parametrize("sort", [None, False]) def test_intersection_name_preservation(self, index2, keeps_name, sort): index1 = Index([1, 2, 3, 4, 5], name='index') expected = Index([3, 4, 5]) @@ -715,7 +716,7 @@ def test_intersection_name_preservation(self, index2, keeps_name, sort): @pytest.mark.parametrize("first_name,second_name,expected_name", [ ('A', 'A', 'A'), ('A', 'B', None), (None, 'B', None)]) - @pytest.mark.parametrize("sort", [True, False]) + @pytest.mark.parametrize("sort", [None, False]) def test_intersection_name_preservation2(self, first_name, second_name, expected_name, sort): first = self.strIndex[5:20] @@ -728,7 +729,7 @@ def test_intersection_name_preservation2(self, first_name, second_name, @pytest.mark.parametrize("index2,keeps_name", [ (Index([4, 7, 6, 5, 3], name='index'), True), (Index([4, 7, 6, 5, 3], name='other'), False)]) - @pytest.mark.parametrize("sort", [True, False]) + @pytest.mark.parametrize("sort", [None, False]) def test_intersection_monotonic(self, index2, keeps_name, sort): index1 = Index([5, 3, 2, 4, 1], name='index') expected = Index([5, 3, 4]) @@ -737,25 +738,25 @@ def test_intersection_monotonic(self, index2, keeps_name, sort): expected.name = "index" result = index1.intersection(index2, sort=sort) - if sort: + if sort is None: expected = expected.sort_values() tm.assert_index_equal(result, expected) @pytest.mark.parametrize("index2,expected_arr", [ (Index(['B', 'D']), ['B']), (Index(['B', 'D', 'A']), ['A', 'B', 'A'])]) - @pytest.mark.parametrize("sort", [True, False]) + @pytest.mark.parametrize("sort", [None, False]) def test_intersection_non_monotonic_non_unique(self, index2, expected_arr, sort): # non-monotonic non-unique index1 = Index(['A', 'B', 'A', 'C']) expected = Index(expected_arr, dtype='object') result = index1.intersection(index2, sort=sort) - if sort: + if sort is None: expected = expected.sort_values() tm.assert_index_equal(result, expected) - @pytest.mark.parametrize("sort", [True, False]) + @pytest.mark.parametrize("sort", [None, False]) def test_intersect_str_dates(self, sort): dt_dates = [datetime(2012, 2, 9), datetime(2012, 2, 22)] @@ -770,7 +771,19 @@ def test_intersect_nosort(self): expected = pd.Index(['b', 'a']) tm.assert_index_equal(result, expected) - @pytest.mark.parametrize("sort", [True, False]) + def test_intersection_equal_sort(self): + idx = pd.Index(['c', 'a', 'b']) + tm.assert_index_equal(idx.intersection(idx, sort=False), idx) + tm.assert_index_equal(idx.intersection(idx, sort=None), idx) + + @pytest.mark.xfail(reason="Not implemented") + def test_intersection_equal_sort_true(self): + # TODO decide on True behaviour + idx = pd.Index(['c', 'a', 'b']) + sorted_ = pd.Index(['a', 'b', 'c']) + tm.assert_index_equal(idx.intersection(idx, sort=True), sorted_) + + @pytest.mark.parametrize("sort", [None, False]) def test_chained_union(self, sort): # Chained unions handles names correctly i1 = Index([1, 2], name='i1') @@ -787,7 +800,7 @@ def test_chained_union(self, sort): expected = j1.union(j2, sort=sort).union(j3, sort=sort) tm.assert_index_equal(union, expected) - @pytest.mark.parametrize("sort", [True, False]) + @pytest.mark.parametrize("sort", [None, False]) def test_union(self, sort): # TODO: Replace with fixturesult first = self.strIndex[5:20] @@ -795,13 +808,65 @@ def test_union(self, sort): everything = self.strIndex[:20] union = first.union(second, sort=sort) - if sort: + if sort is None: tm.assert_index_equal(union, everything.sort_values()) assert tm.equalContents(union, everything) + @pytest.mark.parametrize('slice_', [slice(None), slice(0)]) + def test_union_sort_other_special(self, slice_): + # https://github.com/pandas-dev/pandas/issues/24959 + + idx = pd.Index([1, 0, 2]) + # default, sort=None + other = idx[slice_] + tm.assert_index_equal(idx.union(other), idx) + tm.assert_index_equal(other.union(idx), idx) + + # sort=False + tm.assert_index_equal(idx.union(other, sort=False), idx) + + @pytest.mark.xfail(reason="Not implemented") + @pytest.mark.parametrize('slice_', [slice(None), slice(0)]) + def test_union_sort_special_true(self, slice_): + # TODO decide on True behaviour + # sort=True + idx = pd.Index([1, 0, 2]) + # default, sort=None + other = idx[slice_] + + result = idx.union(other, sort=True) + expected = pd.Index([0, 1, 2]) + tm.assert_index_equal(result, expected) + + def test_union_sort_other_incomparable(self): + # https://github.com/pandas-dev/pandas/issues/24959 + idx = pd.Index([1, pd.Timestamp('2000')]) + # default (sort=None) + with tm.assert_produces_warning(RuntimeWarning): + result = idx.union(idx[:1]) + + tm.assert_index_equal(result, idx) + + # sort=None + with tm.assert_produces_warning(RuntimeWarning): + result = idx.union(idx[:1], sort=None) + tm.assert_index_equal(result, idx) + + # sort=False + result = idx.union(idx[:1], sort=False) + tm.assert_index_equal(result, idx) + + @pytest.mark.xfail(reason="Not implemented") + def test_union_sort_other_incomparable_true(self): + # TODO decide on True behaviour + # sort=True + idx = pd.Index([1, pd.Timestamp('2000')]) + with pytest.raises(TypeError, match='.*'): + idx.union(idx[:1], sort=True) + @pytest.mark.parametrize("klass", [ np.array, Series, list]) - @pytest.mark.parametrize("sort", [True, False]) + @pytest.mark.parametrize("sort", [None, False]) def test_union_from_iterables(self, klass, sort): # GH 10149 # TODO: Replace with fixturesult @@ -811,29 +876,30 @@ def test_union_from_iterables(self, klass, sort): case = klass(second.values) result = first.union(case, sort=sort) - if sort: + if sort is None: tm.assert_index_equal(result, everything.sort_values()) assert tm.equalContents(result, everything) - @pytest.mark.parametrize("sort", [True, False]) + @pytest.mark.parametrize("sort", [None, False]) def test_union_identity(self, sort): # TODO: replace with fixturesult first = self.strIndex[5:20] union = first.union(first, sort=sort) - assert union is first + # i.e. identity is not preserved when sort is True + assert (union is first) is (not sort) union = first.union([], sort=sort) - assert union is first + assert (union is first) is (not sort) union = Index([]).union(first, sort=sort) - assert union is first + assert (union is first) is (not sort) @pytest.mark.parametrize("first_list", [list('ba'), list()]) @pytest.mark.parametrize("second_list", [list('ab'), list()]) @pytest.mark.parametrize("first_name, second_name, expected_name", [ ('A', 'B', None), (None, 'B', None), ('A', None, None)]) - @pytest.mark.parametrize("sort", [True, False]) + @pytest.mark.parametrize("sort", [None, False]) def test_union_name_preservation(self, first_list, second_list, first_name, second_name, expected_name, sort): first = Index(first_list, name=first_name) @@ -842,14 +908,14 @@ def test_union_name_preservation(self, first_list, second_list, first_name, vals = set(first_list).union(second_list) - if sort and len(first_list) > 0 and len(second_list) > 0: + if sort is None and len(first_list) > 0 and len(second_list) > 0: expected = Index(sorted(vals), name=expected_name) tm.assert_index_equal(union, expected) else: expected = Index(vals, name=expected_name) assert tm.equalContents(union, expected) - @pytest.mark.parametrize("sort", [True, False]) + @pytest.mark.parametrize("sort", [None, False]) def test_union_dt_as_obj(self, sort): # TODO: Replace with fixturesult firstCat = self.strIndex.union(self.dateIndex) @@ -866,6 +932,15 @@ def test_union_dt_as_obj(self, sort): tm.assert_contains_all(self.strIndex, secondCat) tm.assert_contains_all(self.dateIndex, firstCat) + @pytest.mark.parametrize("method", ['union', 'intersection', 'difference', + 'symmetric_difference']) + def test_setops_disallow_true(self, method): + idx1 = pd.Index(['a', 'b']) + idx2 = pd.Index(['b', 'c']) + + with pytest.raises(ValueError, match="The 'sort' keyword only takes"): + getattr(idx1, method)(idx2, sort=True) + def test_map_identity_mapping(self): # GH 12766 # TODO: replace with fixture @@ -987,7 +1062,7 @@ def test_append_empty_preserve_name(self, name, expected): @pytest.mark.parametrize("second_name,expected", [ (None, None), ('name', 'name')]) - @pytest.mark.parametrize("sort", [True, False]) + @pytest.mark.parametrize("sort", [None, False]) def test_difference_name_preservation(self, second_name, expected, sort): # TODO: replace with fixturesult first = self.strIndex[5:20] @@ -1005,7 +1080,7 @@ def test_difference_name_preservation(self, second_name, expected, sort): else: assert result.name == expected - @pytest.mark.parametrize("sort", [True, False]) + @pytest.mark.parametrize("sort", [None, False]) def test_difference_empty_arg(self, sort): first = self.strIndex[5:20] first.name == 'name' @@ -1014,7 +1089,7 @@ def test_difference_empty_arg(self, sort): assert tm.equalContents(result, first) assert result.name == first.name - @pytest.mark.parametrize("sort", [True, False]) + @pytest.mark.parametrize("sort", [None, False]) def test_difference_identity(self, sort): first = self.strIndex[5:20] first.name == 'name' @@ -1023,7 +1098,7 @@ def test_difference_identity(self, sort): assert len(result) == 0 assert result.name == first.name - @pytest.mark.parametrize("sort", [True, False]) + @pytest.mark.parametrize("sort", [None, False]) def test_difference_sort(self, sort): first = self.strIndex[5:20] second = self.strIndex[:10] @@ -1031,12 +1106,12 @@ def test_difference_sort(self, sort): result = first.difference(second, sort) expected = self.strIndex[10:20] - if sort: + if sort is None: expected = expected.sort_values() tm.assert_index_equal(result, expected) - @pytest.mark.parametrize("sort", [True, False]) + @pytest.mark.parametrize("sort", [None, False]) def test_symmetric_difference(self, sort): # smoke index1 = Index([5, 2, 3, 4], name='index1') @@ -1045,7 +1120,7 @@ def test_symmetric_difference(self, sort): expected = Index([5, 1]) assert tm.equalContents(result, expected) assert result.name is None - if sort: + if sort is None: expected = expected.sort_values() tm.assert_index_equal(result, expected) @@ -1054,13 +1129,43 @@ def test_symmetric_difference(self, sort): assert tm.equalContents(result, expected) assert result.name is None - @pytest.mark.parametrize("sort", [True, False]) + @pytest.mark.parametrize('opname', ['difference', 'symmetric_difference']) + def test_difference_incomparable(self, opname): + a = pd.Index([3, pd.Timestamp('2000'), 1]) + b = pd.Index([2, pd.Timestamp('1999'), 1]) + op = operator.methodcaller(opname, b) + + # sort=None, the default + result = op(a) + expected = pd.Index([3, pd.Timestamp('2000'), 2, pd.Timestamp('1999')]) + if opname == 'difference': + expected = expected[:2] + tm.assert_index_equal(result, expected) + + # sort=False + op = operator.methodcaller(opname, b, sort=False) + result = op(a) + tm.assert_index_equal(result, expected) + + @pytest.mark.xfail(reason="Not implemented") + @pytest.mark.parametrize('opname', ['difference', 'symmetric_difference']) + def test_difference_incomparable_true(self, opname): + # TODO decide on True behaviour + # # sort=True, raises + a = pd.Index([3, pd.Timestamp('2000'), 1]) + b = pd.Index([2, pd.Timestamp('1999'), 1]) + op = operator.methodcaller(opname, b, sort=True) + + with pytest.raises(TypeError, match='Cannot compare'): + op(a) + + @pytest.mark.parametrize("sort", [None, False]) def test_symmetric_difference_mi(self, sort): index1 = MultiIndex.from_tuples(self.tuples) index2 = MultiIndex.from_tuples([('foo', 1), ('bar', 3)]) result = index1.symmetric_difference(index2, sort=sort) expected = MultiIndex.from_tuples([('bar', 2), ('baz', 3), ('bar', 3)]) - if sort: + if sort is None: expected = expected.sort_values() tm.assert_index_equal(result, expected) assert tm.equalContents(result, expected) @@ -1068,18 +1173,18 @@ def test_symmetric_difference_mi(self, sort): @pytest.mark.parametrize("index2,expected", [ (Index([0, 1, np.nan]), Index([2.0, 3.0, 0.0])), (Index([0, 1]), Index([np.nan, 2.0, 3.0, 0.0]))]) - @pytest.mark.parametrize("sort", [True, False]) + @pytest.mark.parametrize("sort", [None, False]) def test_symmetric_difference_missing(self, index2, expected, sort): # GH 13514 change: {nan} - {nan} == {} # (GH 6444, sorting of nans, is no longer an issue) index1 = Index([1, np.nan, 2, 3]) result = index1.symmetric_difference(index2, sort=sort) - if sort: + if sort is None: expected = expected.sort_values() tm.assert_index_equal(result, expected) - @pytest.mark.parametrize("sort", [True, False]) + @pytest.mark.parametrize("sort", [None, False]) def test_symmetric_difference_non_index(self, sort): index1 = Index([1, 2, 3, 4], name='index1') index2 = np.array([2, 3, 4, 5]) @@ -1093,7 +1198,7 @@ def test_symmetric_difference_non_index(self, sort): assert tm.equalContents(result, expected) assert result.name == 'new_name' - @pytest.mark.parametrize("sort", [True, False]) + @pytest.mark.parametrize("sort", [None, False]) def test_difference_type(self, sort): # GH 20040 # If taking difference of a set and itself, it @@ -1104,7 +1209,7 @@ def test_difference_type(self, sort): expected = index.drop(index) tm.assert_index_equal(result, expected) - @pytest.mark.parametrize("sort", [True, False]) + @pytest.mark.parametrize("sort", [None, False]) def test_intersection_difference(self, sort): # GH 20040 # Test that the intersection of an index with an @@ -1607,11 +1712,11 @@ def test_drop_tuple(self, values, to_drop): ('intersection', np.array([(1, 'A'), (1, 'B'), (2, 'A'), (2, 'B')], dtype=[('num', int), ('let', 'a1')]), - True), + None), ('union', np.array([(1, 'A'), (1, 'B'), (1, 'C'), (2, 'A'), (2, 'B'), (2, 'C')], dtype=[('num', int), ('let', 'a1')]), - True) + None) ]) def test_tuple_union_bug(self, method, expected, sort): index1 = Index(np.array([(1, 'A'), (2, 'A'), (1, 'B'), (2, 'B')], @@ -2259,20 +2364,20 @@ def test_unique_na(self): result = idx.unique() tm.assert_index_equal(result, expected) - @pytest.mark.parametrize("sort", [True, False]) + @pytest.mark.parametrize("sort", [None, False]) def test_intersection_base(self, sort): # (same results for py2 and py3 but sortedness not tested elsewhere) index = self.create_index() first = index[:5] second = index[:3] - expected = Index([0, 1, 'a']) if sort else Index([0, 'a', 1]) + expected = Index([0, 1, 'a']) if sort is None else Index([0, 'a', 1]) result = first.intersection(second, sort=sort) tm.assert_index_equal(result, expected) @pytest.mark.parametrize("klass", [ np.array, Series, list]) - @pytest.mark.parametrize("sort", [True, False]) + @pytest.mark.parametrize("sort", [None, False]) def test_intersection_different_type_base(self, klass, sort): # GH 10149 index = self.create_index() @@ -2282,7 +2387,7 @@ def test_intersection_different_type_base(self, klass, sort): result = first.intersection(klass(second.values), sort=sort) assert tm.equalContents(result, second) - @pytest.mark.parametrize("sort", [True, False]) + @pytest.mark.parametrize("sort", [None, False]) def test_difference_base(self, sort): # (same results for py2 and py3 but sortedness not tested elsewhere) index = self.create_index() @@ -2291,7 +2396,7 @@ def test_difference_base(self, sort): result = first.difference(second, sort) expected = Index([0, 'a', 1]) - if sort: + if sort is None: expected = Index(safe_sort(expected)) tm.assert_index_equal(result, expected) diff --git a/pandas/tests/indexes/test_range.py b/pandas/tests/indexes/test_range.py index bbd1e0ccc19b1..96cf83d477376 100644 --- a/pandas/tests/indexes/test_range.py +++ b/pandas/tests/indexes/test_range.py @@ -503,7 +503,7 @@ def test_join_self(self): joined = self.index.join(self.index, how=kind) assert self.index is joined - @pytest.mark.parametrize("sort", [True, False]) + @pytest.mark.parametrize("sort", [None, False]) def test_intersection(self, sort): # intersect with Int64Index other = Index(np.arange(1, 6)) diff --git a/pandas/tests/indexes/timedeltas/test_timedelta.py b/pandas/tests/indexes/timedeltas/test_timedelta.py index 547366ec79094..79210705103ab 100644 --- a/pandas/tests/indexes/timedeltas/test_timedelta.py +++ b/pandas/tests/indexes/timedeltas/test_timedelta.py @@ -51,7 +51,7 @@ def test_fillna_timedelta(self): [pd.Timedelta('1 day'), 'x', pd.Timedelta('3 day')], dtype=object) tm.assert_index_equal(idx.fillna('x'), exp) - @pytest.mark.parametrize("sort", [True, False]) + @pytest.mark.parametrize("sort", [None, False]) def test_difference_freq(self, sort): # GH14323: Difference of TimedeltaIndex should not preserve frequency @@ -69,7 +69,7 @@ def test_difference_freq(self, sort): tm.assert_index_equal(idx_diff, expected) tm.assert_attr_equal('freq', idx_diff, expected) - @pytest.mark.parametrize("sort", [True, False]) + @pytest.mark.parametrize("sort", [None, False]) def test_difference_sort(self, sort): index = pd.TimedeltaIndex(["5 days", "3 days", "2 days", "4 days", @@ -80,7 +80,7 @@ def test_difference_sort(self, sort): expected = TimedeltaIndex(["5 days", "0 days"], freq=None) - if sort: + if sort is None: expected = expected.sort_values() tm.assert_index_equal(idx_diff, expected) @@ -90,7 +90,7 @@ def test_difference_sort(self, sort): idx_diff = index.difference(other, sort) expected = TimedeltaIndex(["1 days", "0 days"], freq=None) - if sort: + if sort is None: expected = expected.sort_values() tm.assert_index_equal(idx_diff, expected)