From 5c27c024c2191817aadc86cf86f897bbf5eb5002 Mon Sep 17 00:00:00 2001 From: Douglas McNeil Date: Tue, 16 Aug 2016 06:08:47 -0400 Subject: [PATCH] BUG: Avoid sentinel-infinity comparison problems (#13445) The problem causing #13445 ultimately traces to the fact that our Infinity/NegInfinity objects were greater than/less than themselves, which violates an assumption numpy makes when sorting. This was separately reported as https://github.com/numpy/numpy/issues/7934, but we can fix and test downstream as well. closes #13445 Author: Douglas McNeil Closes #14006 from dsm054/fix_rank_segfault and squashes the following commits: 7d79370 [Douglas McNeil] BUG: Avoid sentinel-infinity comparison problems (#13445) --- doc/source/whatsnew/v0.19.0.txt | 1 + pandas/algos.pyx | 31 +++++++++++++++---------------- pandas/tests/test_algos.py | 30 ++++++++++++++++++++++++++++++ pandas/tests/test_stats.py | 7 +++++++ 4 files changed, 53 insertions(+), 16 deletions(-) diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt index 9cac79288ea89..0ee56f865f8c8 100644 --- a/doc/source/whatsnew/v0.19.0.txt +++ b/doc/source/whatsnew/v0.19.0.txt @@ -900,6 +900,7 @@ Bug Fixes - Bug in ``.rolling()`` that allowed a negative integer window in contruction of the ``Rolling()`` object, but would later fail on aggregation (:issue:`13383`) - Bug in printing ``pd.DataFrame`` where unusual elements with the ``object`` dtype were causing segfaults (:issue:`13717`) +- Bug in ranking ``Series`` which could result in segfaults (:issue:`13445`) - Bug in various index types, which did not propagate the name of passed index (:issue:`12309`) - Bug in ``DatetimeIndex``, which did not honour the ``copy=True`` (:issue:`13205`) - Bug in ``DatetimeIndex.is_normalized`` returns incorrectly for normalized date_range in case of local timezones (:issue:`13459`) diff --git a/pandas/algos.pyx b/pandas/algos.pyx index 44288ab9621f1..d3e68ad2a5eee 100644 --- a/pandas/algos.pyx +++ b/pandas/algos.pyx @@ -567,28 +567,27 @@ cdef inline are_diff(object left, object right): except TypeError: return left != right -_return_false = lambda self, other: False -_return_true = lambda self, other: True class Infinity(object): + """ provide a positive Infinity comparision method for ranking """ - __lt__ = _return_false - __le__ = _return_false - __eq__ = _return_false - __ne__ = _return_true - __gt__ = _return_true - __ge__ = _return_true - __cmp__ = _return_false + __lt__ = lambda self, other: False + __le__ = lambda self, other: self is other + __eq__ = lambda self, other: self is other + __ne__ = lambda self, other: self is not other + __gt__ = lambda self, other: self is not other + __ge__ = lambda self, other: True class NegInfinity(object): + """ provide a negative Infinity comparision method for ranking """ + + __lt__ = lambda self, other: self is not other + __le__ = lambda self, other: True + __eq__ = lambda self, other: self is other + __ne__ = lambda self, other: self is not other + __gt__ = lambda self, other: False + __ge__ = lambda self, other: self is other - __lt__ = _return_true - __le__ = _return_true - __eq__ = _return_false - __ne__ = _return_true - __gt__ = _return_false - __ge__ = _return_false - __cmp__ = _return_true def rank_2d_generic(object in_arr, axis=0, ties_method='average', ascending=True, na_option='keep', pct=False): diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index 66fd1861f08f9..452355541a79b 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -5,6 +5,7 @@ from numpy.random import RandomState from numpy import nan from datetime import datetime +from itertools import permutations from pandas import Series, Categorical, CategoricalIndex, Index import pandas as pd @@ -1270,6 +1271,35 @@ def test_groupsort_indexer(): assert (np.array_equal(result, expected)) +def test_infinity_sort(): + # GH 13445 + # numpy's argsort can be unhappy if something is less than + # itself. Instead, let's give our infinities a self-consistent + # ordering, but outside the float extended real line. + + Inf = _algos.Infinity() + NegInf = _algos.NegInfinity() + + ref_nums = [NegInf, float("-inf"), -1e100, 0, 1e100, float("inf"), Inf] + + assert all(Inf >= x for x in ref_nums) + assert all(Inf > x or x is Inf for x in ref_nums) + assert Inf >= Inf and Inf == Inf + assert not Inf < Inf and not Inf > Inf + + assert all(NegInf <= x for x in ref_nums) + assert all(NegInf < x or x is NegInf for x in ref_nums) + assert NegInf <= NegInf and NegInf == NegInf + assert not NegInf < NegInf and not NegInf > NegInf + + for perm in permutations(ref_nums): + assert sorted(perm) == ref_nums + + # smoke tests + np.array([_algos.Infinity()] * 32).argsort() + np.array([_algos.NegInfinity()] * 32).argsort() + + def test_ensure_platform_int(): arr = np.arange(100) diff --git a/pandas/tests/test_stats.py b/pandas/tests/test_stats.py index 85ce1d5127512..41d25b9662b5b 100644 --- a/pandas/tests/test_stats.py +++ b/pandas/tests/test_stats.py @@ -179,6 +179,13 @@ def test_rank_int(self): expected.index = result.index assert_series_equal(result, expected) + def test_rank_object_bug(self): + # GH 13445 + + # smoke tests + Series([np.nan] * 32).astype(object).rank(ascending=True) + Series([np.nan] * 32).astype(object).rank(ascending=False) + if __name__ == '__main__': nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],