Skip to content

Commit

Permalink
Backport PR pandas-dev#25063: API: change Index set ops sort=True -> …
Browse files Browse the repository at this point in the history
…sort=None (pandas-dev#25083)
  • Loading branch information
meeseeksmachine authored and TomAugspurger committed Feb 1, 2019
1 parent 5cb622a commit c24df00
Show file tree
Hide file tree
Showing 17 changed files with 465 additions and 129 deletions.
34 changes: 32 additions & 2 deletions doc/source/whatsnew/v0.24.1.rst
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,40 @@ Whats New in 0.24.1 (February XX, 2019)
These are the changes in pandas 0.24.1. See :ref:`release` for a full changelog
including other versions of pandas.

.. _whatsnew_0241.api:

API Changes
~~~~~~~~~~~

Changing the ``sort`` parameter for :class:`Index` set operations
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

The default ``sort`` value for :meth:`Index.union` has changed from ``True`` to ``None`` (:issue:`24959`).
The default *behavior*, however, remains the same: the result is sorted, unless

1. ``self`` and ``other`` are identical
2. ``self`` or ``other`` is empty
3. ``self`` or ``other`` contain values that can not be compared (a ``RuntimeWarning`` is raised).

This change will allow ``sort=True`` to mean "always sort" in a future release.

The same change applies to :meth:`Index.difference` and :meth:`Index.symmetric_difference`, which
would not sort the result when the values could not be compared.

The `sort` option for :meth:`Index.intersection` has changed in three ways.

1. The default has changed from ``True`` to ``False``, to restore the
pandas 0.23.4 and earlier behavior of not sorting by default.
2. The behavior of ``sort=True`` can now be obtained with ``sort=None``.
This will sort the result only if the values in ``self`` and ``other``
are not identical.
3. The value ``sort=True`` is no longer allowed. A future version of pandas
will properly support ``sort=True`` meaning "always sort".

.. _whatsnew_0241.regressions:

Fixed Regressions
^^^^^^^^^^^^^^^^^
~~~~~~~~~~~~~~~~~

- Bug in :meth:`DataFrame.itertuples` with ``records`` orient raising an ``AttributeError`` when the ``DataFrame`` contained more than 255 columns (:issue:`24939`)
- Bug in :meth:`DataFrame.itertuples` orient converting integer column names to strings prepended with an underscore (:issue:`24940`)
Expand All @@ -30,7 +60,7 @@ Fixed Regressions
.. _whatsnew_0241.enhancements:

Enhancements
^^^^^^^^^^^^
~~~~~~~~~~~~


.. _whatsnew_0241.bug_fixes:
Expand Down
3 changes: 2 additions & 1 deletion pandas/_libs/lib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -231,10 +231,11 @@ def fast_unique_multiple(list arrays, sort: bool=True):
if val not in table:
table[val] = stub
uniques.append(val)
if sort:
if sort is None:
try:
uniques.sort()
except Exception:
# TODO: RuntimeWarning?
pass

return uniques
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/indexes/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ def _get_combined_index(indexes, intersect=False, sort=False):
elif intersect:
index = indexes[0]
for other in indexes[1:]:
index = index.intersection(other, sort=sort)
index = index.intersection(other)
else:
index = _union_indexes(indexes, sort=sort)
index = ensure_index(index)
Expand Down
82 changes: 66 additions & 16 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -2245,18 +2245,37 @@ def _get_reconciled_name_object(self, other):
return self._shallow_copy(name=name)
return self

def union(self, other, sort=True):
def _validate_sort_keyword(self, sort):
if sort not in [None, False]:
raise ValueError("The 'sort' keyword only takes the values of "
"None or False; {0} was passed.".format(sort))

def union(self, other, sort=None):
"""
Form the union of two Index objects.
Parameters
----------
other : Index or array-like
sort : bool, default True
Sort the resulting index if possible
sort : bool or None, default None
Whether to sort the resulting Index.
* None : Sort the result, except when
1. `self` and `other` are equal.
2. `self` or `other` has length 0.
3. Some values in `self` or `other` cannot be compared.
A RuntimeWarning is issued in this case.
* False : do not sort the result.
.. versionadded:: 0.24.0
.. versionchanged:: 0.24.1
Changed the default value from ``True`` to ``None``
(without change in behaviour).
Returns
-------
union : Index
Expand All @@ -2269,6 +2288,7 @@ def union(self, other, sort=True):
>>> idx1.union(idx2)
Int64Index([1, 2, 3, 4, 5, 6], dtype='int64')
"""
self._validate_sort_keyword(sort)
self._assert_can_do_setop(other)
other = ensure_index(other)

Expand Down Expand Up @@ -2319,7 +2339,7 @@ def union(self, other, sort=True):
else:
result = lvals

if sort:
if sort is None:
try:
result = sorting.safe_sort(result)
except TypeError as e:
Expand All @@ -2342,14 +2362,19 @@ def intersection(self, other, sort=False):
Parameters
----------
other : Index or array-like
sort : bool, default False
Sort the resulting index if possible
sort : False or None, default False
Whether to sort the resulting index.
* False : do not sort the result.
* None : sort the result, except when `self` and `other` are equal
or when the values cannot be compared.
.. versionadded:: 0.24.0
.. versionchanged:: 0.24.1
Changed the default from ``True`` to ``False``.
Changed the default from ``True`` to ``False``, to match
the behaviour of 0.23.4 and earlier.
Returns
-------
Expand All @@ -2363,6 +2388,7 @@ def intersection(self, other, sort=False):
>>> idx1.intersection(idx2)
Int64Index([3, 4], dtype='int64')
"""
self._validate_sort_keyword(sort)
self._assert_can_do_setop(other)
other = ensure_index(other)

Expand Down Expand Up @@ -2402,7 +2428,7 @@ def intersection(self, other, sort=False):

taken = other.take(indexer)

if sort:
if sort is None:
taken = sorting.safe_sort(taken.values)
if self.name != other.name:
name = None
Expand All @@ -2415,7 +2441,7 @@ def intersection(self, other, sort=False):

return taken

def difference(self, other, sort=True):
def difference(self, other, sort=None):
"""
Return a new Index with elements from the index that are not in
`other`.
Expand All @@ -2425,11 +2451,22 @@ def difference(self, other, sort=True):
Parameters
----------
other : Index or array-like
sort : bool, default True
Sort the resulting index if possible
sort : False or None, default None
Whether to sort the resulting index. By default, the
values are attempted to be sorted, but any TypeError from
incomparable elements is caught by pandas.
* None : Attempt to sort the result, but catch any TypeErrors
from comparing incomparable elements.
* False : Do not sort the result.
.. versionadded:: 0.24.0
.. versionchanged:: 0.24.1
Changed the default value from ``True`` to ``None``
(without change in behaviour).
Returns
-------
difference : Index
Expand All @@ -2444,6 +2481,7 @@ def difference(self, other, sort=True):
>>> idx1.difference(idx2, sort=False)
Int64Index([2, 1], dtype='int64')
"""
self._validate_sort_keyword(sort)
self._assert_can_do_setop(other)

if self.equals(other):
Expand All @@ -2460,27 +2498,38 @@ def difference(self, other, sort=True):
label_diff = np.setdiff1d(np.arange(this.size), indexer,
assume_unique=True)
the_diff = this.values.take(label_diff)
if sort:
if sort is None:
try:
the_diff = sorting.safe_sort(the_diff)
except TypeError:
pass

return this._shallow_copy(the_diff, name=result_name, freq=None)

def symmetric_difference(self, other, result_name=None, sort=True):
def symmetric_difference(self, other, result_name=None, sort=None):
"""
Compute the symmetric difference of two Index objects.
Parameters
----------
other : Index or array-like
result_name : str
sort : bool, default True
Sort the resulting index if possible
sort : False or None, default None
Whether to sort the resulting index. By default, the
values are attempted to be sorted, but any TypeError from
incomparable elements is caught by pandas.
* None : Attempt to sort the result, but catch any TypeErrors
from comparing incomparable elements.
* False : Do not sort the result.
.. versionadded:: 0.24.0
.. versionchanged:: 0.24.1
Changed the default value from ``True`` to ``None``
(without change in behaviour).
Returns
-------
symmetric_difference : Index
Expand All @@ -2504,6 +2553,7 @@ def symmetric_difference(self, other, result_name=None, sort=True):
>>> idx1 ^ idx2
Int64Index([1, 5], dtype='int64')
"""
self._validate_sort_keyword(sort)
self._assert_can_do_setop(other)
other, result_name_update = self._convert_can_do_setop(other)
if result_name is None:
Expand All @@ -2524,7 +2574,7 @@ def symmetric_difference(self, other, result_name=None, sort=True):
right_diff = other.values.take(right_indexer)

the_diff = _concat._concat_compat([left_diff, right_diff])
if sort:
if sort is None:
try:
the_diff = sorting.safe_sort(the_diff)
except TypeError:
Expand Down
6 changes: 4 additions & 2 deletions pandas/core/indexes/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -602,19 +602,21 @@ def intersection(self, other, sort=False):
Parameters
----------
other : DatetimeIndex or array-like
sort : bool, default True
sort : False or None, default False
Sort the resulting index if possible.
.. versionadded:: 0.24.0
.. versionchanged:: 0.24.1
Changed the default from ``True`` to ``False``.
Changed the default to ``False`` to match the behaviour
from before 0.24.0.
Returns
-------
y : Index or DatetimeIndex
"""
self._validate_sort_keyword(sort)
self._assert_can_do_setop(other)

if self.equals(other):
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/indexes/interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -1093,7 +1093,7 @@ def equals(self, other):
def overlaps(self, other):
return self._data.overlaps(other)

def _setop(op_name, sort=True):
def _setop(op_name, sort=None):
def func(self, other, sort=sort):
other = self._as_like_interval_index(other)

Expand Down
Loading

0 comments on commit c24df00

Please sign in to comment.