From 4fe7c68728da2174f7ccd290a43e358f16a1a6f9 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sat, 11 Jul 2015 09:36:37 -0500 Subject: [PATCH 1/8] ERR: Boolean comparisons of a Series vs None will now be equivalent of to null comparisions, rather than raise TypeError, xref, #1079 --- doc/source/whatsnew/v0.17.0.txt | 2 +- pandas/core/ops.py | 71 ++++++++++++------ pandas/lib.pyx | 10 ++- pandas/tests/test_base.py | 42 ++++++++++- pandas/tests/test_index.py | 127 ++++++++++++++++---------------- pandas/tests/test_series.py | 5 -- pandas/util/testing.py | 15 ++-- 7 files changed, 169 insertions(+), 103 deletions(-) diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.txt index 45f1412c65e3d..b2ed1d41bc153 100644 --- a/doc/source/whatsnew/v0.17.0.txt +++ b/doc/source/whatsnew/v0.17.0.txt @@ -231,7 +231,7 @@ Other API Changes - Enable serialization of lists and dicts to strings in ExcelWriter (:issue:`8188`) - Allow passing `kwargs` to the interpolation methods (:issue:`10378`). - Serialize metadata properties of subclasses of pandas objects (:issue:`10553`). - +- Boolean comparisons of a ``Series`` vs None will now be equivalent to comparing with np.nan, rather than raise ``TypeError``, xref (:issue:`1079`). .. _whatsnew_0170.deprecations: diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 0b62eb1e53ddb..0a0cf98174270 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -13,8 +13,11 @@ from pandas.util.decorators import Appender import pandas.core.common as com import pandas.computation.expressions as expressions +from pandas.lib import isscalar +from pandas.tslib import iNaT from pandas.core.common import(bind_method, is_list_like, notnull, isnull, - _values_from_object, _maybe_match_name) + _values_from_object, _maybe_match_name, + needs_i8_conversion, is_integer_dtype) # ----------------------------------------------------------------------------- # Functions that add arithmetic methods to objects, given arithmetic factory @@ -257,7 +260,7 @@ class _TimeOp(object): Generally, you should use classmethod ``maybe_convert_for_time_op`` as an entry point. """ - fill_value = tslib.iNaT + fill_value = iNaT wrap_results = staticmethod(lambda x: x) dtype = None @@ -346,7 +349,7 @@ def _convert_to_array(self, values, name=None, other=None): if (other is not None and other.dtype == 'timedelta64[ns]' and all(isnull(v) for v in values)): values = np.empty(values.shape, dtype=other.dtype) - values[:] = tslib.iNaT + values[:] = iNaT # a datelike elif isinstance(values, pd.DatetimeIndex): @@ -381,7 +384,7 @@ def _convert_to_array(self, values, name=None, other=None): # all nan, so ok, use the other dtype (e.g. timedelta or datetime) if isnull(values).all(): values = np.empty(values.shape, dtype=other.dtype) - values[:] = tslib.iNaT + values[:] = iNaT else: raise TypeError( 'incompatible type [{0}] for a datetime/timedelta ' @@ -549,12 +552,12 @@ def na_op(x, y): elif com.is_categorical_dtype(y) and not lib.isscalar(y): return op(y,x) - if x.dtype == np.object_: + if com.is_object_dtype(x.dtype): if isinstance(y, list): y = lib.list_to_object_array(y) if isinstance(y, (np.ndarray, pd.Series)): - if y.dtype != np.object_: + if not com.is_object_dtype(y.dtype): result = lib.vec_compare(x, y.astype(np.object_), op) else: result = lib.vec_compare(x, y, op) @@ -562,13 +565,46 @@ def na_op(x, y): result = lib.scalar_compare(x, y, op) else: + # numpy does not like comparisons vs None + if lib.isscalar(y) and isnull(y): + y = np.nan + + # we want to compare like types + # we only want to convert to integer like if + # we are not NotImplemented, otherwise + # we would allow datetime64 (but viewed as i8) against + # integer comparisons + if needs_i8_conversion(x) and (not isscalar(y) and is_integer_dtype(y)): + raise TypeError("invalid type comparison") + elif (not isscalar(y) and needs_i8_conversion(y)) and is_integer_dtype(x): + raise TypeError("invalid type comparison") + + # we have a datetime/timedelta and may need to convert + mask = None + if needs_i8_conversion(x) or (not isscalar(y) and needs_i8_conversion(y)): + + if isscalar(y): + y = _index.convert_scalar(x,_values_from_object(y)) + else: + y = y.view('i8') + + if name == '__ne__': + mask = notnull(x) + else: + mask = isnull(x) + + x = x.view('i8') + try: result = getattr(x, name)(y) if result is NotImplemented: raise TypeError("invalid type comparison") - except (AttributeError): + except AttributeError: result = op(x, y) + if mask is not None and mask.any(): + result[mask] = False + return result def wrapper(self, other, axis=None): @@ -596,8 +632,6 @@ def wrapper(self, other, axis=None): raise TypeError(msg.format(op=op,typ=self.dtype)) - mask = isnull(self) - if com.is_categorical_dtype(self): # cats are a special case as get_values() would return an ndarray, which would then # not take categories ordering into account @@ -605,14 +639,11 @@ def wrapper(self, other, axis=None): res = op(self.values, other) else: values = self.get_values() - other = _index.convert_scalar(values,_values_from_object(other)) - - if issubclass(values.dtype.type, (np.datetime64, np.timedelta64)): - values = values.view('i8') + if is_list_like(other): + other = np.asarray(other) - # scalars res = na_op(values, other) - if np.isscalar(res): + if lib.isscalar(res): raise TypeError('Could not compare %s type with Series' % type(other)) @@ -621,11 +652,6 @@ def wrapper(self, other, axis=None): res = pd.Series(res, index=self.index, name=self.name, dtype='bool') - - # mask out the invalids - if mask.any(): - res[mask] = masker - return res return wrapper @@ -643,8 +669,7 @@ def na_op(x, y): y = lib.list_to_object_array(y) if isinstance(y, (np.ndarray, pd.Series)): - if (x.dtype == np.bool_ and - y.dtype == np.bool_): # pragma: no cover + if (com.is_bool_dtype(x.dtype) and com.is_bool_dtype(y.dtype)): result = op(x, y) # when would this be hit? else: x = com._ensure_object(x) @@ -1046,7 +1071,7 @@ def na_op(x, y): # work only for scalars def f(self, other): - if not np.isscalar(other): + if not lib.isscalar(other): raise ValueError('Simple arithmetic with %s can only be ' 'done with scalar values' % self._constructor.__name__) diff --git a/pandas/lib.pyx b/pandas/lib.pyx index 44be74b78d6bb..4805a33e5b496 100644 --- a/pandas/lib.pyx +++ b/pandas/lib.pyx @@ -378,7 +378,7 @@ def isnullobj2d_old(ndarray[object, ndim=2] arr): @cython.boundscheck(False) cpdef ndarray[object] list_to_object_array(list obj): ''' - Convert list to object ndarray. Seriously can't believe I had to write this + Convert list to object ndarray. Seriously can\'t believe I had to write this function ''' cdef: @@ -682,6 +682,7 @@ def scalar_compare(ndarray[object] values, object val, object op): cdef: Py_ssize_t i, n = len(values) ndarray[uint8_t, cast=True] result + bint isnull_val int flag object x @@ -701,12 +702,15 @@ def scalar_compare(ndarray[object] values, object val, object op): raise ValueError('Unrecognized operator') result = np.empty(n, dtype=bool).view(np.uint8) + isnull_val = _checknull(val) if flag == cpython.Py_NE: for i in range(n): x = values[i] if _checknull(x): result[i] = True + elif isnull_val: + result[i] = True else: try: result[i] = cpython.PyObject_RichCompareBool(x, val, flag) @@ -717,6 +721,8 @@ def scalar_compare(ndarray[object] values, object val, object op): x = values[i] if _checknull(x): result[i] = False + elif isnull_val: + result[i] = False else: try: result[i] = cpython.PyObject_RichCompareBool(x, val, flag) @@ -728,6 +734,8 @@ def scalar_compare(ndarray[object] values, object val, object op): x = values[i] if _checknull(x): result[i] = False + elif isnull_val: + result[i] = False else: result[i] = cpython.PyObject_RichCompareBool(x, val, flag) diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py index e17910a2e14be..db23b13edd42b 100644 --- a/pandas/tests/test_base.py +++ b/pandas/tests/test_base.py @@ -12,6 +12,7 @@ from pandas.tseries.common import is_datetimelike from pandas import Series, Index, Int64Index, DatetimeIndex, TimedeltaIndex, PeriodIndex, Timedelta import pandas.tslib as tslib +from pandas import _np_version_under1p9 import nose import pandas.util.testing as tm @@ -273,6 +274,45 @@ def setUp(self): self.is_valid_objs = [ o for o in self.objs if o._allow_index_ops ] self.not_valid_objs = [ o for o in self.objs if not o._allow_index_ops ] + def test_none_comparison(self): + + # bug brought up by #1079 + # changed from TypeError in 0.17.0 + for o in self.is_valid_objs: + if isinstance(o, Series): + + o[0] = np.nan + + result = o == None + self.assertFalse(result.iat[0]) + self.assertFalse(result.iat[1]) + + result = o != None + self.assertTrue(result.iat[0]) + self.assertTrue(result.iat[1]) + + result = None == o + self.assertFalse(result.iat[0]) + self.assertFalse(result.iat[1]) + + if _np_version_under1p9: + # fails as this tries not __eq__ which + # is not valid for numpy + pass + else: + result = None != o + self.assertTrue(result.iat[0]) + self.assertTrue(result.iat[1]) + + result = None > o + self.assertFalse(result.iat[0]) + self.assertFalse(result.iat[1]) + + result = o < None + self.assertFalse(result.iat[0]) + self.assertFalse(result.iat[1]) + + def test_ndarray_compat_properties(self): for o in self.objs: @@ -513,7 +553,7 @@ def test_value_counts_inferred(self): expected = Series([4, 3, 2], index=['b', 'a', 'd']) tm.assert_series_equal(s.value_counts(), expected) - self.assert_numpy_array_equal(s.unique(), np.array(['a', 'b', np.nan, 'd'], dtype='O')) + self.assert_numpy_array_equivalent(s.unique(), np.array(['a', 'b', np.nan, 'd'], dtype='O')) self.assertEqual(s.nunique(), 3) s = klass({}) diff --git a/pandas/tests/test_index.py b/pandas/tests/test_index.py index 3d901837f5123..569f7d84862ff 100644 --- a/pandas/tests/test_index.py +++ b/pandas/tests/test_index.py @@ -10,14 +10,13 @@ import os import numpy as np -from numpy.testing import assert_array_equal from pandas import (period_range, date_range, Categorical, Series, Index, Float64Index, Int64Index, MultiIndex, CategoricalIndex, DatetimeIndex, TimedeltaIndex, PeriodIndex) from pandas.core.index import InvalidIndexError, NumericIndex from pandas.util.testing import (assert_almost_equal, assertRaisesRegexp, - assert_copy) + assert_copy, assert_numpy_array_equivalent, assert_numpy_array_equal) from pandas import compat from pandas.compat import long, is_platform_windows @@ -101,7 +100,7 @@ def test_reindex_base(self): expected = np.arange(idx.size) actual = idx.get_indexer(idx) - assert_array_equal(expected, actual) + assert_numpy_array_equivalent(expected, actual) with tm.assertRaisesRegexp(ValueError, 'Invalid fill method'): idx.get_indexer(idx, method='invalid') @@ -449,7 +448,7 @@ def test_constructor(self): index = Index(arr, copy=True, name='name') tm.assertIsInstance(index, Index) self.assertEqual(index.name, 'name') - assert_array_equal(arr, index) + assert_numpy_array_equivalent(arr, index) arr[0] = "SOMEBIGLONGSTRING" self.assertNotEqual(index[0], "SOMEBIGLONGSTRING") @@ -506,7 +505,7 @@ def __array__(self, dtype=None): def test_index_ctor_infer_periodindex(self): xp = period_range('2012-1-1', freq='M', periods=3) rs = Index(xp) - assert_array_equal(rs, xp) + assert_numpy_array_equivalent(rs, xp) tm.assertIsInstance(rs, PeriodIndex) def test_constructor_simple_new(self): @@ -1111,11 +1110,11 @@ def test_get_indexer_nearest(self): all_methods = ['pad', 'backfill', 'nearest'] for method in all_methods: actual = idx.get_indexer([0, 5, 9], method=method) - self.assert_array_equal(actual, [0, 5, 9]) + self.assert_numpy_array_equivalent(actual, [0, 5, 9]) for method, expected in zip(all_methods, [[0, 1, 8], [1, 2, 9], [0, 2, 9]]): actual = idx.get_indexer([0.2, 1.8, 8.5], method=method) - self.assert_array_equal(actual, expected) + self.assert_numpy_array_equivalent(actual, expected) with tm.assertRaisesRegexp(ValueError, 'limit argument'): idx.get_indexer([1, 0], method='nearest', limit=1) @@ -1126,22 +1125,22 @@ def test_get_indexer_nearest_decreasing(self): all_methods = ['pad', 'backfill', 'nearest'] for method in all_methods: actual = idx.get_indexer([0, 5, 9], method=method) - self.assert_array_equal(actual, [9, 4, 0]) + self.assert_numpy_array_equivalent(actual, [9, 4, 0]) for method, expected in zip(all_methods, [[8, 7, 0], [9, 8, 1], [9, 7, 0]]): actual = idx.get_indexer([0.2, 1.8, 8.5], method=method) - self.assert_array_equal(actual, expected) + self.assert_numpy_array_equivalent(actual, expected) def test_get_indexer_strings(self): idx = pd.Index(['b', 'c']) actual = idx.get_indexer(['a', 'b', 'c', 'd'], method='pad') expected = [-1, 0, 1, 1] - self.assert_array_equal(actual, expected) + self.assert_numpy_array_equivalent(actual, expected) actual = idx.get_indexer(['a', 'b', 'c', 'd'], method='backfill') expected = [0, 0, 1, -1] - self.assert_array_equal(actual, expected) + self.assert_numpy_array_equivalent(actual, expected) with tm.assertRaises(TypeError): idx.get_indexer(['a', 'b', 'c', 'd'], method='nearest') @@ -1447,7 +1446,7 @@ def test_str_attribute(self): # test boolean case, should return np.array instead of boolean Index idx = Index(['a1', 'a2', 'b1', 'b2']) expected = np.array([True, True, False, False]) - self.assert_array_equal(idx.str.startswith('a'), expected) + self.assert_numpy_array_equivalent(idx.str.startswith('a'), expected) self.assertIsInstance(idx.str.startswith('a'), np.ndarray) s = Series(range(4), index=idx) expected = Series(range(2), index=['a1', 'a2']) @@ -1557,8 +1556,8 @@ def test_equals_op(self): index_d = Index(['foo']) with tm.assertRaisesRegexp(ValueError, "Lengths must match"): index_a == index_b - assert_array_equal(index_a == index_a, np.array([True, True, True])) - assert_array_equal(index_a == index_c, np.array([True, True, False])) + assert_numpy_array_equivalent(index_a == index_a, np.array([True, True, True])) + assert_numpy_array_equivalent(index_a == index_c, np.array([True, True, False])) # test comparisons with numpy arrays array_a = np.array(['foo', 'bar', 'baz']) @@ -1567,8 +1566,8 @@ def test_equals_op(self): array_d = np.array(['foo']) with tm.assertRaisesRegexp(ValueError, "Lengths must match"): index_a == array_b - assert_array_equal(index_a == array_a, np.array([True, True, True])) - assert_array_equal(index_a == array_c, np.array([True, True, False])) + assert_numpy_array_equivalent(index_a == array_a, np.array([True, True, True])) + assert_numpy_array_equivalent(index_a == array_c, np.array([True, True, False])) # test comparisons with Series series_a = Series(['foo', 'bar', 'baz']) @@ -1577,8 +1576,8 @@ def test_equals_op(self): series_d = Series(['foo']) with tm.assertRaisesRegexp(ValueError, "Lengths must match"): index_a == series_b - assert_array_equal(index_a == series_a, np.array([True, True, True])) - assert_array_equal(index_a == series_c, np.array([True, True, False])) + assert_numpy_array_equivalent(index_a == series_a, np.array([True, True, True])) + assert_numpy_array_equivalent(index_a == series_c, np.array([True, True, False])) # cases where length is 1 for one of them with tm.assertRaisesRegexp(ValueError, "Lengths must match"): @@ -1593,27 +1592,26 @@ def test_equals_op(self): series_a == array_d # comparing with scalar should broadcast - assert_array_equal(index_a == 'foo', np.array([True, False, False])) - assert_array_equal(series_a == 'foo', np.array([True, False, False])) - assert_array_equal(array_a == 'foo', np.array([True, False, False])) + assert_numpy_array_equivalent(index_a == 'foo', np.array([True, False, False])) + assert_numpy_array_equivalent(series_a == 'foo', np.array([True, False, False])) + assert_numpy_array_equivalent(array_a == 'foo', np.array([True, False, False])) # GH9785 # test comparisons of multiindex from pandas.compat import StringIO df = pd.read_csv(StringIO('a,b,c\n1,2,3\n4,5,6'), index_col=[0, 1]) - assert_array_equal(df.index == df.index, np.array([True, True])) + assert_numpy_array_equivalent(df.index == df.index, np.array([True, True])) mi1 = MultiIndex.from_tuples([(1, 2), (4, 5)]) - assert_array_equal(df.index == mi1, np.array([True, True])) + assert_numpy_array_equivalent(df.index == mi1, np.array([True, True])) mi2 = MultiIndex.from_tuples([(1, 2), (4, 6)]) - assert_array_equal(df.index == mi2, np.array([True, False])) + assert_numpy_array_equivalent(df.index == mi2, np.array([True, False])) mi3 = MultiIndex.from_tuples([(1, 2), (4, 5), (8, 9)]) with tm.assertRaisesRegexp(ValueError, "Lengths must match"): df.index == mi3 with tm.assertRaisesRegexp(ValueError, "Lengths must match"): df.index == index_a - assert_array_equal(index_a == mi3, np.array([False, False, False])) - + assert_numpy_array_equivalent(index_a == mi3, np.array([False, False, False])) class TestCategoricalIndex(Base, tm.TestCase): _holder = CategoricalIndex @@ -1868,7 +1866,7 @@ def test_reindex_base(self): expected = np.array([4,0,1,5,2,3]) actual = idx.get_indexer(idx) - assert_array_equal(expected, actual) + assert_numpy_array_equivalent(expected, actual) with tm.assertRaisesRegexp(ValueError, 'Invalid fill method'): idx.get_indexer(idx, method='invalid') @@ -1883,7 +1881,7 @@ def test_reindexing(self): expected = oidx.get_indexer_non_unique(finder)[0] actual = ci.get_indexer(finder) - assert_array_equal(expected, actual) + assert_numpy_array_equivalent(expected, actual) def test_duplicates(self): @@ -2184,12 +2182,12 @@ def test_equals(self): def test_get_indexer(self): idx = Float64Index([0.0, 1.0, 2.0]) - self.assert_array_equal(idx.get_indexer(idx), [0, 1, 2]) + self.assert_numpy_array_equivalent(idx.get_indexer(idx), [0, 1, 2]) target = [-0.1, 0.5, 1.1] - self.assert_array_equal(idx.get_indexer(target, 'pad'), [-1, 0, 1]) - self.assert_array_equal(idx.get_indexer(target, 'backfill'), [0, 1, 2]) - self.assert_array_equal(idx.get_indexer(target, 'nearest'), [0, 1, 1]) + self.assert_numpy_array_equivalent(idx.get_indexer(target, 'pad'), [-1, 0, 1]) + self.assert_numpy_array_equivalent(idx.get_indexer(target, 'backfill'), [0, 1, 2]) + self.assert_numpy_array_equivalent(idx.get_indexer(target, 'nearest'), [0, 1, 1]) def test_get_loc(self): idx = Float64Index([0.0, 1.0, 2.0]) @@ -2227,15 +2225,15 @@ def test_doesnt_contain_all_the_things(self): def test_nan_multiple_containment(self): i = Float64Index([1.0, np.nan]) - np.testing.assert_array_equal(i.isin([1.0]), np.array([True, False])) - np.testing.assert_array_equal(i.isin([2.0, np.pi]), + assert_numpy_array_equivalent(i.isin([1.0]), np.array([True, False])) + assert_numpy_array_equivalent(i.isin([2.0, np.pi]), np.array([False, False])) - np.testing.assert_array_equal(i.isin([np.nan]), + assert_numpy_array_equivalent(i.isin([np.nan]), np.array([False, True])) - np.testing.assert_array_equal(i.isin([1.0, np.nan]), + assert_numpy_array_equivalent(i.isin([1.0, np.nan]), np.array([True, True])) i = Float64Index([1.0, 2.0]) - np.testing.assert_array_equal(i.isin([np.nan]), + assert_numpy_array_equivalent(i.isin([np.nan]), np.array([False, False])) def test_astype_from_object(self): @@ -2784,19 +2782,19 @@ def test_get_loc(self): # time indexing idx = pd.date_range('2000-01-01', periods=24, freq='H') - assert_array_equal(idx.get_loc(time(12)), [12]) - assert_array_equal(idx.get_loc(time(12, 30)), []) + assert_numpy_array_equivalent(idx.get_loc(time(12)), [12]) + assert_numpy_array_equivalent(idx.get_loc(time(12, 30)), []) with tm.assertRaises(NotImplementedError): idx.get_loc(time(12, 30), method='pad') def test_get_indexer(self): idx = pd.date_range('2000-01-01', periods=3) - self.assert_array_equal(idx.get_indexer(idx), [0, 1, 2]) + self.assert_numpy_array_equivalent(idx.get_indexer(idx), [0, 1, 2]) target = idx[0] + pd.to_timedelta(['-1 hour', '12 hours', '1 day 1 hour']) - self.assert_array_equal(idx.get_indexer(target, 'pad'), [-1, 0, 1]) - self.assert_array_equal(idx.get_indexer(target, 'backfill'), [0, 1, 2]) - self.assert_array_equal(idx.get_indexer(target, 'nearest'), [0, 1, 1]) + self.assert_numpy_array_equivalent(idx.get_indexer(target, 'pad'), [-1, 0, 1]) + self.assert_numpy_array_equivalent(idx.get_indexer(target, 'backfill'), [0, 1, 2]) + self.assert_numpy_array_equivalent(idx.get_indexer(target, 'nearest'), [0, 1, 1]) def test_roundtrip_pickle_with_tz(self): @@ -2826,7 +2824,7 @@ def test_time_loc(self): # GH8667 ts = pd.Series(np.random.randn(n), index=idx) i = np.arange(start, n, step) - tm.assert_array_equal(ts.index.get_loc(key), i) + tm.assert_numpy_array_equivalent(ts.index.get_loc(key), i) tm.assert_series_equal(ts[key], ts.iloc[i]) left, right = ts.copy(), ts.copy() @@ -2906,13 +2904,13 @@ def test_get_loc(self): def test_get_indexer(self): idx = pd.period_range('2000-01-01', periods=3).asfreq('H', how='start') - self.assert_array_equal(idx.get_indexer(idx), [0, 1, 2]) + self.assert_numpy_array_equivalent(idx.get_indexer(idx), [0, 1, 2]) target = pd.PeriodIndex(['1999-12-31T23', '2000-01-01T12', '2000-01-02T01'], freq='H') - self.assert_array_equal(idx.get_indexer(target, 'pad'), [-1, 0, 1]) - self.assert_array_equal(idx.get_indexer(target, 'backfill'), [0, 1, 2]) - self.assert_array_equal(idx.get_indexer(target, 'nearest'), [0, 1, 1]) + self.assert_numpy_array_equivalent(idx.get_indexer(target, 'pad'), [-1, 0, 1]) + self.assert_numpy_array_equivalent(idx.get_indexer(target, 'backfill'), [0, 1, 2]) + self.assert_numpy_array_equivalent(idx.get_indexer(target, 'nearest'), [0, 1, 1]) with self.assertRaisesRegexp(ValueError, 'different freq'): idx.asfreq('D').get_indexer(idx) @@ -2950,12 +2948,12 @@ def test_get_loc(self): def test_get_indexer(self): idx = pd.to_timedelta(['0 days', '1 days', '2 days']) - self.assert_array_equal(idx.get_indexer(idx), [0, 1, 2]) + self.assert_numpy_array_equivalent(idx.get_indexer(idx), [0, 1, 2]) target = pd.to_timedelta(['-1 hour', '12 hours', '1 day 1 hour']) - self.assert_array_equal(idx.get_indexer(target, 'pad'), [-1, 0, 1]) - self.assert_array_equal(idx.get_indexer(target, 'backfill'), [0, 1, 2]) - self.assert_array_equal(idx.get_indexer(target, 'nearest'), [0, 1, 1]) + self.assert_numpy_array_equivalent(idx.get_indexer(target, 'pad'), [-1, 0, 1]) + self.assert_numpy_array_equivalent(idx.get_indexer(target, 'backfill'), [0, 1, 2]) + self.assert_numpy_array_equivalent(idx.get_indexer(target, 'nearest'), [0, 1, 1]) def test_numeric_compat(self): @@ -3535,7 +3533,7 @@ def test_from_product(self): ('buz', 'a'), ('buz', 'b'), ('buz', 'c')] expected = MultiIndex.from_tuples(tuples, names=names) - assert_array_equal(result, expected) + assert_numpy_array_equivalent(result, expected) self.assertEqual(result.names, names) def test_from_product_datetimeindex(self): @@ -3545,7 +3543,7 @@ def test_from_product_datetimeindex(self): (1, pd.Timestamp('2000-01-02')), (2, pd.Timestamp('2000-01-01')), (2, pd.Timestamp('2000-01-02'))]) - assert_array_equal(mi.values, etalon) + assert_numpy_array_equivalent(mi.values, etalon) def test_values_boxed(self): tuples = [(1, pd.Timestamp('2000-01-01')), @@ -3555,9 +3553,9 @@ def test_values_boxed(self): (2, pd.Timestamp('2000-01-02')), (3, pd.Timestamp('2000-01-03'))] mi = pd.MultiIndex.from_tuples(tuples) - assert_array_equal(mi.values, pd.lib.list_to_object_array(tuples)) + assert_numpy_array_equivalent(mi.values, pd.lib.list_to_object_array(tuples)) # Check that code branches for boxed values produce identical results - assert_array_equal(mi.values[:4], mi[:4].values) + assert_numpy_array_equivalent(mi.values[:4], mi[:4].values) def test_append(self): result = self.index[:3].append(self.index[3:]) @@ -3597,28 +3595,28 @@ def test_get_level_values_na(self): index = pd.MultiIndex.from_arrays(arrays) values = index.get_level_values(1) expected = [1, np.nan, 2] - assert_array_equal(values.values.astype(float), expected) + assert_numpy_array_equivalent(values.values.astype(float), expected) arrays = [['a', 'b', 'b'], [np.nan, np.nan, 2]] index = pd.MultiIndex.from_arrays(arrays) values = index.get_level_values(1) expected = [np.nan, np.nan, 2] - assert_array_equal(values.values.astype(float), expected) + assert_numpy_array_equivalent(values.values.astype(float), expected) arrays = [[np.nan, np.nan, np.nan], ['a', np.nan, 1]] index = pd.MultiIndex.from_arrays(arrays) values = index.get_level_values(0) expected = [np.nan, np.nan, np.nan] - assert_array_equal(values.values.astype(float), expected) + assert_numpy_array_equivalent(values.values.astype(float), expected) values = index.get_level_values(1) expected = np.array(['a', np.nan, 1],dtype=object) - assert_array_equal(values.values, expected) + assert_numpy_array_equivalent(values.values, expected) arrays = [['a', 'b', 'b'], pd.DatetimeIndex([0, 1, pd.NaT])] index = pd.MultiIndex.from_arrays(arrays) values = index.get_level_values(1) expected = pd.DatetimeIndex([0, 1, pd.NaT]) - assert_array_equal(values.values, expected.values) + assert_numpy_array_equivalent(values.values, expected.values) arrays = [[], []] index = pd.MultiIndex.from_arrays(arrays) @@ -4644,14 +4642,14 @@ def check(nlevels, with_nulls): for take_last in [False, True]: left = mi.duplicated(take_last=take_last) right = pd.lib.duplicated(mi.values, take_last=take_last) - tm.assert_array_equal(left, right) + tm.assert_numpy_array_equivalent(left, right) # GH5873 for a in [101, 102]: mi = MultiIndex.from_arrays([[101, a], [3.5, np.nan]]) self.assertFalse(mi.has_duplicates) self.assertEqual(mi.get_duplicates(), []) - self.assert_array_equal(mi.duplicated(), np.zeros(2, dtype='bool')) + self.assert_numpy_array_equivalent(mi.duplicated(), np.zeros(2, dtype='bool')) for n in range(1, 6): # 1st level shape for m in range(1, 5): # 2nd level shape @@ -4662,7 +4660,7 @@ def check(nlevels, with_nulls): self.assertEqual(len(mi), (n + 1) * (m + 1)) self.assertFalse(mi.has_duplicates) self.assertEqual(mi.get_duplicates(), []) - self.assert_array_equal(mi.duplicated(), + self.assert_numpy_array_equivalent(mi.duplicated(), np.zeros(len(mi), dtype='bool')) def test_duplicate_meta_data(self): @@ -4866,7 +4864,6 @@ def test_equals_operator(self): # GH9785 self.assertTrue((self.index == self.index).all()) - def test_get_combined_index(): from pandas.core.index import _get_combined_index result = _get_combined_index([]) diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py index 7326d7a9d811d..361cf4aba705f 100644 --- a/pandas/tests/test_series.py +++ b/pandas/tests/test_series.py @@ -504,11 +504,6 @@ def test_comparisons(self): s == s2 s2 == s - def test_none_comparison(self): - # bug brought up by #1079 - s = Series(np.random.randn(10), index=lrange(0, 20, 2)) - self.assertRaises(TypeError, s.__eq__, None) - def test_sum_zero(self): arr = np.array([]) self.assertEqual(nanops.nansum(arr), 0) diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 7378e3504b5ca..8328e1a867b8c 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -59,7 +59,6 @@ def reset_testing_mode(): set_testing_mode() - class TestCase(unittest.TestCase): @classmethod @@ -634,7 +633,7 @@ def assert_categorical_equal(res, exp): raise AssertionError("name not the same") -def assert_numpy_array_equal(np_array, assert_equal): +def assert_numpy_array_equal(np_array, assert_equal, err_msg=None): """Checks that 'np_array' is equal to 'assert_equal' Note that the expected array should not contain `np.nan`! @@ -646,11 +645,12 @@ def assert_numpy_array_equal(np_array, assert_equal): """ if np.array_equal(np_array, assert_equal): return - raise AssertionError( - '{0} is not equal to {1}.'.format(np_array, assert_equal)) + if err_msg is None: + err_msg = '{0} is not equal to {1}.'.format(np_array, assert_equal) + raise AssertionError(err_msg) -def assert_numpy_array_equivalent(np_array, assert_equal, strict_nan=False): +def assert_numpy_array_equivalent(np_array, assert_equal, strict_nan=False, err_msg=None): """Checks that 'np_array' is equivalent to 'assert_equal' Two numpy arrays are equivalent if the arrays have equal non-NaN elements, @@ -664,8 +664,9 @@ def assert_numpy_array_equivalent(np_array, assert_equal, strict_nan=False): """ if array_equivalent(np_array, assert_equal, strict_nan=strict_nan): return - raise AssertionError( - '{0} is not equivalent to {1}.'.format(np_array, assert_equal)) + if err_msg is None: + err_msg = '{0} is not equivalent to {1}.'.format(np_array, assert_equal) + raise AssertionError(err_msg) # This could be refactored to use the NDFrame.equals method From dccf5ebf7224af8fa26e0fc529c9b873f0393431 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Thu, 16 Jul 2015 10:04:09 -0400 Subject: [PATCH 2/8] DEPR: remove numpy deprecation warnings for i8 vs integer comparisions --- pandas/core/common.py | 11 +++++++++++ pandas/core/generic.py | 9 ++++++++- pandas/core/internals.py | 11 +++++++++-- pandas/core/ops.py | 12 +++++------- 4 files changed, 33 insertions(+), 10 deletions(-) diff --git a/pandas/core/common.py b/pandas/core/common.py index 33a2fc0aea732..ccb5859ea53e5 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -462,6 +462,10 @@ def array_equivalent(left, right, strict_nan=False): if issubclass(left.dtype.type, (np.floating, np.complexfloating)): return ((left == right) | (np.isnan(left) & np.isnan(right))).all() + # numpy will will not allow this type of datetimelike vs integer comparison + elif is_datetimelike_v_integer(left, right): + return False + # NaNs cannot occur otherwise. return np.array_equal(left, right) @@ -2539,6 +2543,13 @@ def is_datetime_or_timedelta_dtype(arr_or_dtype): return issubclass(tipo, (np.datetime64, np.timedelta64)) +def is_datetimelike_v_integer(a, b): + # return if we have an i8 convertible and and integer comparision + a = np.asarray(a) + b = np.asarray(b) + return (needs_i8_conversion(a) and is_integer_dtype(b)) or ( + needs_i8_conversion(b) and is_integer_dtype(a)) + needs_i8_conversion = is_datetime_or_timedelta_dtype def i8_boxer(arr_or_dtype): diff --git a/pandas/core/generic.py b/pandas/core/generic.py index c792309e83ecb..273c444b30b80 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3574,7 +3574,14 @@ def where(self, cond, other=np.nan, inplace=False, axis=None, level=None, except ValueError: new_other = np.array(other) - matches = (new_other == np.array(other)) + # we can end up comparing integers and m8[ns] + # which is a numpy no no + is_i8 = com.needs_i8_conversion(self.dtype) + if is_i8: + matches = False + else: + matches = (new_other == np.array(other)) + if matches is False or not matches.all(): # coerce other to a common dtype if we can diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 37d6cb9c0d5b6..cb83d98b3cd46 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -14,7 +14,7 @@ is_null_datelike_scalar, _maybe_promote, is_timedelta64_dtype, is_datetime64_dtype, array_equivalent, _maybe_convert_string_to_object, - is_categorical) + is_categorical, needs_i8_conversion, is_datetimelike_v_integer) from pandas.core.index import Index, MultiIndex, _ensure_index from pandas.core.indexing import maybe_convert_indices, length_of_indexer from pandas.core.categorical import Categorical, maybe_to_categorical @@ -3885,9 +3885,16 @@ def _vstack(to_stack, dtype): def _possibly_compare(a, b, op): - res = op(a, b) + is_a_array = isinstance(a, np.ndarray) is_b_array = isinstance(b, np.ndarray) + + # numpy deprecation warning to have i8 vs integer comparisions + if is_datetimelike_v_integer(a, b): + res = False + else: + res = op(a, b) + if np.isscalar(res) and (is_a_array or is_b_array): type_names = [type(a).__name__, type(b).__name__] diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 0a0cf98174270..6c6b777b66152 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -17,7 +17,7 @@ from pandas.tslib import iNaT from pandas.core.common import(bind_method, is_list_like, notnull, isnull, _values_from_object, _maybe_match_name, - needs_i8_conversion, is_integer_dtype) + needs_i8_conversion, is_datetimelike_v_integer, is_integer_dtype) # ----------------------------------------------------------------------------- # Functions that add arithmetic methods to objects, given arithmetic factory @@ -574,9 +574,7 @@ def na_op(x, y): # we are not NotImplemented, otherwise # we would allow datetime64 (but viewed as i8) against # integer comparisons - if needs_i8_conversion(x) and (not isscalar(y) and is_integer_dtype(y)): - raise TypeError("invalid type comparison") - elif (not isscalar(y) and needs_i8_conversion(y)) and is_integer_dtype(x): + if is_datetimelike_v_integer(x, y): raise TypeError("invalid type comparison") # we have a datetime/timedelta and may need to convert @@ -690,7 +688,7 @@ def na_op(x, y): return result def wrapper(self, other): - is_self_int_dtype = com.is_integer_dtype(self.dtype) + is_self_int_dtype = is_integer_dtype(self.dtype) fill_int = lambda x: x.fillna(0) fill_bool = lambda x: x.fillna(False).astype(bool) @@ -698,7 +696,7 @@ def wrapper(self, other): if isinstance(other, pd.Series): name = _maybe_match_name(self, other) other = other.reindex_like(self) - is_other_int_dtype = com.is_integer_dtype(other.dtype) + is_other_int_dtype = is_integer_dtype(other.dtype) other = fill_int(other) if is_other_int_dtype else fill_bool(other) filler = fill_int if is_self_int_dtype and is_other_int_dtype else fill_bool @@ -711,7 +709,7 @@ def wrapper(self, other): else: # scalars, list, tuple, np.array - filler = fill_int if is_self_int_dtype and com.is_integer_dtype(np.asarray(other)) else fill_bool + filler = fill_int if is_self_int_dtype and is_integer_dtype(np.asarray(other)) else fill_bool return filler(self._constructor(na_op(self.values, other), index=self.index)).__finalize__(self) From 8016a7f4443c7c1a8985a2acdd828fcfae6abedd Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Thu, 16 Jul 2015 14:23:49 -0400 Subject: [PATCH 3/8] DEPR: remove visible deprecation warning for slicing in test_internals --- doc/source/whatsnew/v0.17.0.txt | 1 + pandas/core/common.py | 17 ++++++----- pandas/core/internals.py | 4 +-- pandas/core/ops.py | 12 ++++---- pandas/io/stata.py | 3 +- pandas/io/tests/test_stata.py | 38 ++++++++++++------------- pandas/tests/test_internals.py | 13 +++++++-- pandas/tseries/tests/test_timeseries.py | 4 +-- pandas/util/testing.py | 23 ++++++++++++--- 9 files changed, 70 insertions(+), 45 deletions(-) diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.txt index b2ed1d41bc153..f11d35c8d0f1f 100644 --- a/doc/source/whatsnew/v0.17.0.txt +++ b/doc/source/whatsnew/v0.17.0.txt @@ -232,6 +232,7 @@ Other API Changes - Allow passing `kwargs` to the interpolation methods (:issue:`10378`). - Serialize metadata properties of subclasses of pandas objects (:issue:`10553`). - Boolean comparisons of a ``Series`` vs None will now be equivalent to comparing with np.nan, rather than raise ``TypeError``, xref (:issue:`1079`). +- Remove use of some deprecated numpy comparisons (:issue:`10569`) .. _whatsnew_0170.deprecations: diff --git a/pandas/core/common.py b/pandas/core/common.py index ccb5859ea53e5..4de734dcd3d10 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -463,7 +463,7 @@ def array_equivalent(left, right, strict_nan=False): return ((left == right) | (np.isnan(left) & np.isnan(right))).all() # numpy will will not allow this type of datetimelike vs integer comparison - elif is_datetimelike_v_integer(left, right): + elif is_datetimelike_v_numeric(left, right): return False # NaNs cannot occur otherwise. @@ -2543,12 +2543,15 @@ def is_datetime_or_timedelta_dtype(arr_or_dtype): return issubclass(tipo, (np.datetime64, np.timedelta64)) -def is_datetimelike_v_integer(a, b): - # return if we have an i8 convertible and and integer comparision - a = np.asarray(a) - b = np.asarray(b) - return (needs_i8_conversion(a) and is_integer_dtype(b)) or ( - needs_i8_conversion(b) and is_integer_dtype(a)) +def is_datetimelike_v_numeric(a, b): + # return if we have an i8 convertible and numeric comparision + if not hasattr(a,'dtype'): + a = np.asarray(a) + if not hasattr(b, 'dtype'): + b = np.asarray(b) + f = lambda x: is_integer_dtype(x) or is_float_dtype(x) + return (needs_i8_conversion(a) and f(b)) or ( + needs_i8_conversion(b) and f(a)) needs_i8_conversion = is_datetime_or_timedelta_dtype diff --git a/pandas/core/internals.py b/pandas/core/internals.py index cb83d98b3cd46..67e25164537a7 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -14,7 +14,7 @@ is_null_datelike_scalar, _maybe_promote, is_timedelta64_dtype, is_datetime64_dtype, array_equivalent, _maybe_convert_string_to_object, - is_categorical, needs_i8_conversion, is_datetimelike_v_integer) + is_categorical, needs_i8_conversion, is_datetimelike_v_numeric) from pandas.core.index import Index, MultiIndex, _ensure_index from pandas.core.indexing import maybe_convert_indices, length_of_indexer from pandas.core.categorical import Categorical, maybe_to_categorical @@ -3890,7 +3890,7 @@ def _possibly_compare(a, b, op): is_b_array = isinstance(b, np.ndarray) # numpy deprecation warning to have i8 vs integer comparisions - if is_datetimelike_v_integer(a, b): + if is_datetimelike_v_numeric(a, b): res = False else: res = op(a, b) diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 6c6b777b66152..3e18e514b2abf 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -17,7 +17,7 @@ from pandas.tslib import iNaT from pandas.core.common import(bind_method, is_list_like, notnull, isnull, _values_from_object, _maybe_match_name, - needs_i8_conversion, is_datetimelike_v_integer, is_integer_dtype) + needs_i8_conversion, is_datetimelike_v_numeric, is_integer_dtype) # ----------------------------------------------------------------------------- # Functions that add arithmetic methods to objects, given arithmetic factory @@ -565,18 +565,18 @@ def na_op(x, y): result = lib.scalar_compare(x, y, op) else: - # numpy does not like comparisons vs None - if lib.isscalar(y) and isnull(y): - y = np.nan - # we want to compare like types # we only want to convert to integer like if # we are not NotImplemented, otherwise # we would allow datetime64 (but viewed as i8) against # integer comparisons - if is_datetimelike_v_integer(x, y): + if is_datetimelike_v_numeric(x, y): raise TypeError("invalid type comparison") + # numpy does not like comparisons vs None + if lib.isscalar(y) and isnull(y): + y = np.nan + # we have a datetime/timedelta and may need to convert mask = None if needs_i8_conversion(x) or (not isscalar(y) and needs_i8_conversion(y)): diff --git a/pandas/io/stata.py b/pandas/io/stata.py index eecc225d06beb..53f2ff455d32e 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -23,6 +23,7 @@ from pandas.compat import lrange, lmap, lzip, text_type, string_types, range, \ zip, BytesIO from pandas.util.decorators import Appender +import pandas as pd import pandas.core.common as com from pandas.io.common import get_filepath_or_buffer from pandas.lib import max_len_string_array, infer_dtype @@ -291,7 +292,7 @@ def convert_delta_safe(base, deltas, unit): warn("Encountered %tC format. Leaving in Stata Internal Format.") conv_dates = Series(dates, dtype=np.object) if has_bad_values: - conv_dates[bad_locs] = np.nan + conv_dates[bad_locs] = pd.NaT return conv_dates elif fmt in ["%td", "td", "%d", "d"]: # Delta days relative to base base = stata_epoch diff --git a/pandas/io/tests/test_stata.py b/pandas/io/tests/test_stata.py index 8eb60b13fcc81..5937d3184f96c 100644 --- a/pandas/io/tests/test_stata.py +++ b/pandas/io/tests/test_stata.py @@ -180,9 +180,9 @@ def test_read_dta2(self): # buggy test because of the NaT comparison on certain platforms # Format 113 test fails since it does not support tc and tC formats # tm.assert_frame_equal(parsed_113, expected) - tm.assert_frame_equal(parsed_114, expected) - tm.assert_frame_equal(parsed_115, expected) - tm.assert_frame_equal(parsed_117, expected) + tm.assert_frame_equal(parsed_114, expected, check_datetimelike_compat=True) + tm.assert_frame_equal(parsed_115, expected, check_datetimelike_compat=True) + tm.assert_frame_equal(parsed_117, expected, check_datetimelike_compat=True) def test_read_dta3(self): parsed_113 = self.read_dta(self.dta3_113) @@ -684,6 +684,7 @@ def test_big_dates(self): expected.append([NaT] * 7) columns = ['date_tc', 'date_td', 'date_tw', 'date_tm', 'date_tq', 'date_th', 'date_ty'] + # Fixes for weekly, quarterly,half,year expected[2][2] = datetime(9999,12,24) expected[2][3] = datetime(9999,12,1) @@ -696,11 +697,10 @@ def test_big_dates(self): expected[5][5] = expected[5][6] = datetime(1678,1,1) expected = DataFrame(expected, columns=columns, dtype=np.object) - parsed_115 = read_stata(self.dta18_115) parsed_117 = read_stata(self.dta18_117) - tm.assert_frame_equal(expected, parsed_115) - tm.assert_frame_equal(expected, parsed_117) + tm.assert_frame_equal(expected, parsed_115, check_datetimelike_compat=True) + tm.assert_frame_equal(expected, parsed_117, check_datetimelike_compat=True) date_conversion = dict((c, c[-2:]) for c in columns) #{c : c[-2:] for c in columns} @@ -709,7 +709,8 @@ def test_big_dates(self): expected.to_stata(path, date_conversion) written_and_read_again = self.read_dta(path) tm.assert_frame_equal(written_and_read_again.set_index('index'), - expected) + expected, + check_datetimelike_compat=True) def test_dtype_conversion(self): expected = self.read_csv(self.csv15) @@ -903,6 +904,7 @@ def test_read_chunks_117(self): self.dta16_117, self.dta17_117, self.dta18_117, self.dta19_117, self.dta20_117] + raise nose.SkipTest("buggy test: #10606") for fname in files_117: for chunksize in 1,2: for convert_categoricals in False, True: @@ -923,12 +925,10 @@ def test_read_chunks_117(self): except StopIteration: break from_frame = parsed.iloc[pos:pos+chunksize, :] - try: - tm.assert_frame_equal(from_frame, chunk, check_dtype=False) - except AssertionError: - # datetime.datetime and pandas.tslib.Timestamp may hold - # equivalent values but fail assert_frame_equal - assert(all([x == y for x, y in zip(from_frame, chunk)])) + tm.assert_frame_equal(from_frame, + chunk, + check_dtype=False, + check_datetimelike_compat=True) pos += chunksize @@ -961,6 +961,7 @@ def test_read_chunks_115(self): self.dta17_115, self.dta18_115, self.dta19_115, self.dta20_115] + raise nose.SkipTest("buggy test: #10606") for fname in files_115: for chunksize in 1,2: for convert_categoricals in False, True: @@ -982,12 +983,10 @@ def test_read_chunks_115(self): except StopIteration: break from_frame = parsed.iloc[pos:pos+chunksize, :] - try: - tm.assert_frame_equal(from_frame, chunk, check_dtype=False) - except AssertionError: - # datetime.datetime and pandas.tslib.Timestamp may hold - # equivalent values but fail assert_frame_equal - assert(all([x == y for x, y in zip(from_frame, chunk)])) + tm.assert_frame_equal(from_frame, + chunk, + check_dtype=False, + check_datetimelike_compat=True) pos += chunksize @@ -1011,4 +1010,3 @@ def test_read_chunks_columns(self): if __name__ == '__main__': nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], exit=False) - diff --git a/pandas/tests/test_internals.py b/pandas/tests/test_internals.py index ef05b40827dfd..6d2c87a187995 100644 --- a/pandas/tests/test_internals.py +++ b/pandas/tests/test_internals.py @@ -753,15 +753,15 @@ def test_equals(self): def test_equals_block_order_different_dtypes(self): # GH 9330 - - mgr_strings = [ + + mgr_strings = [ "a:i8;b:f8", # basic case "a:i8;b:f8;c:c8;d:b", # many types "a:i8;e:dt;f:td;g:string", # more types "a:i8;b:category;c:category2;d:category2", # categories "c:sparse;d:sparse_na;b:f8", # sparse ] - + for mgr_string in mgr_strings: bm = create_mgr(mgr_string) block_perms = itertools.permutations(bm.blocks) @@ -812,6 +812,13 @@ def test_get_slice(self): def assert_slice_ok(mgr, axis, slobj): # import pudb; pudb.set_trace() mat = mgr.as_matrix() + + # we maybe using an ndarray to test slicing and + # might not be the full length of the axis + if isinstance(slobj, np.ndarray): + ax = mgr.axes[axis] + if len(ax) and len(slobj) and len(slobj) != len(ax): + slobj = np.concatenate([slobj, np.zeros(len(ax)-len(slobj),dtype=bool)]) sliced = mgr.get_slice(slobj, axis=axis) mat_slobj = (slice(None),) * axis + (slobj,) assert_almost_equal(mat[mat_slobj], sliced.as_matrix()) diff --git a/pandas/tseries/tests/test_timeseries.py b/pandas/tseries/tests/test_timeseries.py index c8b96076b26bd..5b23d7123935e 100644 --- a/pandas/tseries/tests/test_timeseries.py +++ b/pandas/tseries/tests/test_timeseries.py @@ -849,11 +849,11 @@ def test_string_na_nat_conversion(self): result2 = to_datetime(strings) tm.assertIsInstance(result2, DatetimeIndex) - self.assert_numpy_array_equal(result, result2) + self.assert_numpy_array_equivalent(result, result2) malformed = np.array(['1/100/2000', np.nan], dtype=object) result = to_datetime(malformed) - self.assert_numpy_array_equal(result, malformed) + self.assert_numpy_array_equivalent(result, malformed) self.assertRaises(ValueError, to_datetime, malformed, errors='raise') diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 8328e1a867b8c..80dc36a361c32 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -24,7 +24,7 @@ from numpy.testing import assert_array_equal import pandas as pd -from pandas.core.common import is_sequence, array_equivalent, is_list_like, is_number +from pandas.core.common import is_sequence, array_equivalent, is_list_like, is_number, is_datetimelike_v_numeric import pandas.compat as compat from pandas.compat import( filter, map, zip, range, unichr, lrange, lmap, lzip, u, callable, Counter, @@ -675,7 +675,8 @@ def assert_series_equal(left, right, check_dtype=True, check_series_type=False, check_less_precise=False, check_exact=False, - check_names=True): + check_names=True, + check_datetimelike_compat=False): if check_series_type: assertIsInstance(left, type(right)) if check_dtype: @@ -684,6 +685,18 @@ def assert_series_equal(left, right, check_dtype=True, if not np.array_equal(left.values, right.values): raise AssertionError('{0} is not equal to {1}.'.format(left.values, right.values)) + elif check_datetimelike_compat: + # we want to check only if we have compat dtypes + # e.g. integer and M|m are NOT compat, but we can simply check the values in that case + if is_datetimelike_v_numeric(left, right): + # datetime.datetime and pandas.tslib.Timestamp may hold + # equivalent values but fail assert_frame_equal + if not all([x == y for x, y in zip(left, right)]): + raise AssertionError( + '[datetimelike_compat=True] {0} is not equal to {1}.'.format(left.values, + right.values)) + else: + assert_numpy_array_equivalent(left.values, right.values) else: assert_almost_equal(left.values, right.values, check_less_precise) if check_less_precise: @@ -716,7 +729,8 @@ def assert_frame_equal(left, right, check_dtype=True, check_less_precise=False, check_names=True, by_blocks=False, - check_exact=False): + check_exact=False, + check_datetimelike_compat=False): if check_frame_type: assertIsInstance(left, type(right)) assertIsInstance(left, DataFrame) @@ -750,7 +764,8 @@ def assert_frame_equal(left, right, check_dtype=True, check_index_type=check_index_type, check_less_precise=check_less_precise, check_exact=check_exact, - check_names=check_names) + check_names=check_names, + check_datetimelike_compat=check_datetimelike_compat) if check_index_type: for level in range(left.index.nlevels): From b381327acd2855b5b1e23ad87f25bee84cb322ce Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Thu, 16 Jul 2015 20:59:06 -0400 Subject: [PATCH 4/8] DEPR: remove unordered types depreceation usage in core/index.py --- pandas/core/index.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/pandas/core/index.py b/pandas/core/index.py index 7047f07280012..98e0214dbf073 100644 --- a/pandas/core/index.py +++ b/pandas/core/index.py @@ -164,18 +164,18 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, fastpath=False, elif data is None or np.isscalar(data): cls._scalar_data_error(data) else: - if tupleize_cols and isinstance(data, list) and data: + if tupleize_cols and isinstance(data, list) and data and isinstance(data[0], tuple): try: - sorted(data) - has_mixed_types = False - except (TypeError, UnicodeDecodeError): - has_mixed_types = True # python3 only - if isinstance(data[0], tuple) and not has_mixed_types: - try: - return MultiIndex.from_tuples( - data, names=name or kwargs.get('names')) - except (TypeError, KeyError): - pass # python2 - MultiIndex fails on mixed types + + # must be orderable in py3 + if compat.PY3: + sorted(data) + return MultiIndex.from_tuples( + data, names=name or kwargs.get('names')) + except (TypeError, KeyError): + # python2 - MultiIndex fails on mixed types + pass + # other iterable of some kind subarr = com._asarray_tuplesafe(data, dtype=object) From 26ee43e82390e52121b563d15ca1c68425d5b5fc Mon Sep 17 00:00:00 2001 From: Kerby Shedden Date: Fri, 17 Jul 2015 02:44:10 -0400 Subject: [PATCH 5/8] Align options in chunk and full file read --- pandas/io/tests/test_stata.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/pandas/io/tests/test_stata.py b/pandas/io/tests/test_stata.py index 5937d3184f96c..3bfb05943ed8b 100644 --- a/pandas/io/tests/test_stata.py +++ b/pandas/io/tests/test_stata.py @@ -914,7 +914,8 @@ def test_read_chunks_117(self): warnings.simplefilter("always") parsed = read_stata(fname, convert_categoricals=convert_categoricals, convert_dates=convert_dates) - itr = read_stata(fname, iterator=True) + itr = read_stata(fname, iterator=True, convert_categoricals=convert_categoricals, + convert_dates=convert_dates) pos = 0 for j in range(5): @@ -967,13 +968,15 @@ def test_read_chunks_115(self): for convert_categoricals in False, True: for convert_dates in False, True: + # Read the whole file with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") parsed = read_stata(fname, convert_categoricals=convert_categoricals, convert_dates=convert_dates) - itr = read_stata(fname, iterator=True, - convert_categoricals=convert_categoricals) + # Compare to what we get when reading by chunk + itr = read_stata(fname, iterator=True, convert_dates=convert_dates, + convert_categoricals=convert_categoricals) pos = 0 for j in range(5): with warnings.catch_warnings(record=True) as w: From 4def8e48d9aaa645b2a20a93ba0ad43417024ca1 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Fri, 17 Jul 2015 08:58:44 -0400 Subject: [PATCH 6/8] TST: fixes stata datetimelike comparisons for #10606 --- pandas/core/common.py | 10 ++++++++++ pandas/io/tests/test_stata.py | 2 -- pandas/util/testing.py | 11 ++++++----- 3 files changed, 16 insertions(+), 7 deletions(-) diff --git a/pandas/core/common.py b/pandas/core/common.py index 4de734dcd3d10..49db94c3bfa86 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -2553,6 +2553,16 @@ def is_datetimelike_v_numeric(a, b): return (needs_i8_conversion(a) and f(b)) or ( needs_i8_conversion(b) and f(a)) +def is_datetimelike_v_object(a, b): + # return if we have an i8 convertible and object comparision + if not hasattr(a,'dtype'): + a = np.asarray(a) + if not hasattr(b, 'dtype'): + b = np.asarray(b) + f = lambda x: is_object_dtype(x) + return (needs_i8_conversion(a) and f(b)) or ( + needs_i8_conversion(b) and f(a)) + needs_i8_conversion = is_datetime_or_timedelta_dtype def i8_boxer(arr_or_dtype): diff --git a/pandas/io/tests/test_stata.py b/pandas/io/tests/test_stata.py index 3bfb05943ed8b..a06c4384d72c5 100644 --- a/pandas/io/tests/test_stata.py +++ b/pandas/io/tests/test_stata.py @@ -904,7 +904,6 @@ def test_read_chunks_117(self): self.dta16_117, self.dta17_117, self.dta18_117, self.dta19_117, self.dta20_117] - raise nose.SkipTest("buggy test: #10606") for fname in files_117: for chunksize in 1,2: for convert_categoricals in False, True: @@ -962,7 +961,6 @@ def test_read_chunks_115(self): self.dta17_115, self.dta18_115, self.dta19_115, self.dta20_115] - raise nose.SkipTest("buggy test: #10606") for fname in files_115: for chunksize in 1,2: for convert_categoricals in False, True: diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 80dc36a361c32..9f75e42a8676a 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -24,7 +24,8 @@ from numpy.testing import assert_array_equal import pandas as pd -from pandas.core.common import is_sequence, array_equivalent, is_list_like, is_number, is_datetimelike_v_numeric +from pandas.core.common import (is_sequence, array_equivalent, is_list_like, is_number, + is_datetimelike_v_numeric, is_datetimelike_v_object) import pandas.compat as compat from pandas.compat import( filter, map, zip, range, unichr, lrange, lmap, lzip, u, callable, Counter, @@ -688,10 +689,10 @@ def assert_series_equal(left, right, check_dtype=True, elif check_datetimelike_compat: # we want to check only if we have compat dtypes # e.g. integer and M|m are NOT compat, but we can simply check the values in that case - if is_datetimelike_v_numeric(left, right): - # datetime.datetime and pandas.tslib.Timestamp may hold - # equivalent values but fail assert_frame_equal - if not all([x == y for x, y in zip(left, right)]): + if is_datetimelike_v_numeric(left, right) or is_datetimelike_v_object(left, right): + + # datetimelike may have different objects (e.g. datetime.datetime vs Timestamp) but will compare equal + if not Index(left.values).equals(Index(right.values)): raise AssertionError( '[datetimelike_compat=True] {0} is not equal to {1}.'.format(left.values, right.values)) From 0bd25ab0c44e5e0a82c1abcca57d517a5fcc7659 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Fri, 17 Jul 2015 09:26:20 -0400 Subject: [PATCH 7/8] DOC: whatsnew changes --- doc/source/whatsnew/v0.17.0.txt | 141 ++++++++++++++++---------------- 1 file changed, 71 insertions(+), 70 deletions(-) diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.txt index f11d35c8d0f1f..d260e496c32ca 100644 --- a/doc/source/whatsnew/v0.17.0.txt +++ b/doc/source/whatsnew/v0.17.0.txt @@ -34,6 +34,7 @@ New features Other enhancements ^^^^^^^^^^^^^^^^^^ + - Enable `read_hdf` to be used without specifying a key when the HDF file contains a single dataset (:issue:`10443`) - ``DatetimeIndex`` can be instantiated using strings contains ``NaT`` (:issue:`7599`) @@ -91,7 +92,7 @@ Backwards incompatible API changes Changes to convert_objects ^^^^^^^^^^^^^^^^^^^^^^^^^^ -- ``DataFrame.convert_objects`` keyword arguments have been shortened. (:issue:`10265`) +``DataFrame.convert_objects`` keyword arguments have been shortened. (:issue:`10265`) ===================== ============= Old New @@ -101,70 +102,65 @@ Changes to convert_objects ``convert_timedelta`` ``timedelta`` ===================== ============= -- Coercing types with ``DataFrame.convert_objects`` is now implemented using the - keyword argument ``coerce=True``. Previously types were coerced by setting a - keyword argument to ``'coerce'`` instead of ``True``, as in ``convert_dates='coerce'``. - - .. ipython:: python +Coercing types with ``DataFrame.convert_objects`` is now implemented using the +keyword argument ``coerce=True``. Previously types were coerced by setting a +keyword argument to ``'coerce'`` instead of ``True``, as in ``convert_dates='coerce'``. - df = pd.DataFrame({'i': ['1','2'], - 'f': ['apple', '4.2'], - 's': ['apple','banana']}) - df +.. ipython:: python - The old usage of ``DataFrame.convert_objects`` used `'coerce'` along with the - type. + df = pd.DataFrame({'i': ['1','2'], + 'f': ['apple', '4.2'], + 's': ['apple','banana']}) + df - .. code-block:: python +The old usage of ``DataFrame.convert_objects`` used `'coerce'` along with the +type. - In [2]: df.convert_objects(convert_numeric='coerce') - - Now the ``coerce`` keyword must be explicitly used. - - .. ipython:: python +.. code-block:: python - df.convert_objects(numeric=True, coerce=True) + In [2]: df.convert_objects(convert_numeric='coerce') -- In earlier versions of pandas, ``DataFrame.convert_objects`` would not coerce - numeric types when there were no values convertible to a numeric type. For example, +Now the ``coerce`` keyword must be explicitly used. - .. code-block:: python +.. ipython:: python - In [1]: df = pd.DataFrame({'s': ['a','b']}) - In [2]: df.convert_objects(convert_numeric='coerce') - Out[2]: - s - 0 a - 1 b + df.convert_objects(numeric=True, coerce=True) - returns the original DataFrame with no conversion. This change alters - this behavior so that +In earlier versions of pandas, ``DataFrame.convert_objects`` would not coerce +numeric types when there were no values convertible to a numeric type. This returns +the original DataFrame with no conversion. This change alters +this behavior so that converts all non-number-like strings to ``NaN``. - .. ipython:: python +.. code-block:: python - pd.DataFrame({'s': ['a','b']}) - df.convert_objects(numeric=True, coerce=True) + In [1]: df = pd.DataFrame({'s': ['a','b']}) + In [2]: df.convert_objects(convert_numeric='coerce') + Out[2]: + s + 0 a + 1 b - converts all non-number-like strings to ``NaN``. +.. ipython:: python -- In earlier versions of pandas, the default behavior was to try and convert - datetimes and timestamps. The new default is for ``DataFrame.convert_objects`` - to do nothing, and so it is necessary to pass at least one conversion target - in the method call. + pd.DataFrame({'s': ['a','b']}) + df.convert_objects(numeric=True, coerce=True) -.. _whatsnew_0170.api_breaking.other: +In earlier versions of pandas, the default behavior was to try and convert +datetimes and timestamps. The new default is for ``DataFrame.convert_objects`` +to do nothing, and so it is necessary to pass at least one conversion target +in the method call. -Other API Changes -^^^^^^^^^^^^^^^^^ +Changes to Index Comparisons +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -- Operator equal on Index should behavior similarly to Series (:issue:`9947`) +Operator equal on Index should behavior similarly to Series (:issue:`9947`) - Starting in v0.17.0, comparing ``Index`` objects of different lengths will raise - a ``ValueError``. This is to be consistent with the behavior of ``Series``. +Starting in v0.17.0, comparing ``Index`` objects of different lengths will raise +a ``ValueError``. This is to be consistent with the behavior of ``Series``. - Previous behavior: +Previous behavior: - .. code-block:: python +.. code-block:: python In [2]: pd.Index([1, 2, 3]) == pd.Index([1, 4, 5]) Out[2]: array([ True, False, False], dtype=bool) @@ -188,9 +184,9 @@ Other API Changes In [7]: pd.Series([1, 2, 3]) == pd.Series([1, 2]) ValueError: Series lengths must match to compare - New behavior: +New behavior: - .. code-block:: python +.. code-block:: python In [8]: pd.Index([1, 2, 3]) == pd.Index([1, 4, 5]) Out[8]: array([ True, False, False], dtype=bool) @@ -214,24 +210,27 @@ Other API Changes In [13]: pd.Series([1, 2, 3]) == pd.Series([1, 2]) ValueError: Series lengths must match to compare - Note that this is different from the ``numpy`` behavior where a comparison can - be broadcast: +Note that this is different from the ``numpy`` behavior where a comparison can +be broadcast: - .. ipython:: python +.. ipython:: python np.array([1, 2, 3]) == np.array([1]) - or it can return False if broadcasting can not be done: +or it can return False if broadcasting can not be done: - .. ipython:: python +.. ipython:: python np.array([1, 2, 3]) == np.array([1, 2]) +Other API Changes +^^^^^^^^^^^^^^^^^ + - Enable writing Excel files in :ref:`memory <_io.excel_writing_buffer>` using StringIO/BytesIO (:issue:`7074`) - Enable serialization of lists and dicts to strings in ExcelWriter (:issue:`8188`) - Allow passing `kwargs` to the interpolation methods (:issue:`10378`). - Serialize metadata properties of subclasses of pandas objects (:issue:`10553`). -- Boolean comparisons of a ``Series`` vs None will now be equivalent to comparing with np.nan, rather than raise ``TypeError``, xref (:issue:`1079`). +- Boolean comparisons of a ``Series`` vs ``None`` will now be equivalent to comparing with ``np.nan``, rather than raise ``TypeError``, xref (:issue:`1079`). - Remove use of some deprecated numpy comparisons (:issue:`10569`) .. _whatsnew_0170.deprecations: @@ -288,46 +287,48 @@ Bug Fixes - Bug in ``DataFrame.apply`` when function returns categorical series. (:issue:`9573`) - Bug in ``to_datetime`` with invalid dates and formats supplied (:issue:`10154`) - - Bug in ``Index.drop_duplicates`` dropping name(s) (:issue:`10115`) - - - Bug in ``pd.Series`` when setting a value on an empty ``Series`` whose index has a frequency. (:issue:`10193`) - - Bug in ``DataFrame.plot`` raises ``ValueError`` when color name is specified by multiple characters (:issue:`10387`) - Bug in ``DataFrame.reset_index`` when index contains `NaT`. (:issue:`10388`) +- Bug in ``ExcelReader`` when worksheet is empty (:issue:`6403`) +- Bug in ``Table.select_column`` where name is not preserved (:issue:`10392`) +- Bug in ``offsets.generate_range`` where ``start`` and ``end`` have finer precision than ``offset`` (:issue:`9907`) -- Bug in ``ExcelReader`` when worksheet is empty (:issue:`6403`) -- Bug in ``Table.select_column`` where name is not preserved (:issue:`10392`) -- Bug in ``offsets.generate_range`` where ``start`` and ``end`` have finer precision than ``offset`` (:issue:`9907`) - Bug in ``DataFrame.interpolate`` with ``axis=1`` and ``inplace=True`` (:issue:`10395`) - - Bug in ``io.sql.get_schema`` when specifying multiple columns as primary key (:issue:`10385`). - - - Bug in ``test_categorical`` on big-endian builds (:issue:`10425`) - Bug in ``Series.map`` using categorical ``Series`` raises ``AttributeError`` (:issue:`10324`) - Bug in ``MultiIndex.get_level_values`` including ``Categorical`` raises ``AttributeError`` (:issue:`10460`) + + + + + + - Bug that caused segfault when resampling an empty Series (:issue:`10228`) - Bug in ``DatetimeIndex`` and ``PeriodIndex.value_counts`` resets name from its result, but retains in result's ``Index``. (:issue:`10150`) - - Bug in `pandas.concat` with ``axis=0`` when column is of dtype ``category`` (:issue:`10177`) - - Bug in ``read_msgpack`` where input type is not always checked (:issue:`10369`) - - Bug in `pandas.read_csv` with ``index_col=False`` or with ``index_col=['a', 'b']`` (:issue:`10413`, :issue:`10467`) - - Bug in `Series.from_csv` with ``header`` kwarg not setting the ``Series.name`` or the ``Series.index.name`` (:issue:`10483`) - - Bug in `groupby.var` which caused variance to be inaccurate for small float values (:issue:`10448`) - - Bug in ``Series.plot(kind='hist')`` Y Label not informative (:issue:`10485`) + + + + + + + + + - Bug in operator equal on Index not being consistent with Series (:issue:`9947`) From effb6761e1e7608c44e9bd1e02dfef2137c928fc Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Fri, 17 Jul 2015 09:30:49 -0400 Subject: [PATCH 8/8] misc import cleanups --- doc/source/whatsnew/v0.17.0.txt | 5 +++-- pandas/core/ops.py | 38 +++++++++++++++++---------------- 2 files changed, 23 insertions(+), 20 deletions(-) diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.txt index d260e496c32ca..c7e08910a5924 100644 --- a/doc/source/whatsnew/v0.17.0.txt +++ b/doc/source/whatsnew/v0.17.0.txt @@ -230,8 +230,6 @@ Other API Changes - Enable serialization of lists and dicts to strings in ExcelWriter (:issue:`8188`) - Allow passing `kwargs` to the interpolation methods (:issue:`10378`). - Serialize metadata properties of subclasses of pandas objects (:issue:`10553`). -- Boolean comparisons of a ``Series`` vs ``None`` will now be equivalent to comparing with ``np.nan``, rather than raise ``TypeError``, xref (:issue:`1079`). -- Remove use of some deprecated numpy comparisons (:issue:`10569`) .. _whatsnew_0170.deprecations: @@ -243,6 +241,8 @@ Deprecations Removal of prior version deprecations/changes ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +- Remove use of some deprecated numpy comparison operations, mainly in tests. (:issue:`10569`) + .. _dask: https://dask.readthedocs.org/en/latest/ .. _whatsnew_0170.gil: @@ -285,6 +285,7 @@ Performance Improvements Bug Fixes ~~~~~~~~~ +- Boolean comparisons of a ``Series`` vs ``None`` will now be equivalent to comparing with ``np.nan``, rather than raise ``TypeError``, xref (:issue:`1079`). - Bug in ``DataFrame.apply`` when function returns categorical series. (:issue:`9573`) - Bug in ``to_datetime`` with invalid dates and formats supplied (:issue:`10154`) - Bug in ``Index.drop_duplicates`` dropping name(s) (:issue:`10115`) diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 3e18e514b2abf..089ca21cb0ef3 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -17,7 +17,9 @@ from pandas.tslib import iNaT from pandas.core.common import(bind_method, is_list_like, notnull, isnull, _values_from_object, _maybe_match_name, - needs_i8_conversion, is_datetimelike_v_numeric, is_integer_dtype) + needs_i8_conversion, is_datetimelike_v_numeric, + is_integer_dtype, is_categorical_dtype, is_object_dtype, + is_timedelta64_dtype, is_datetime64_dtype, is_bool_dtype) # ----------------------------------------------------------------------------- # Functions that add arithmetic methods to objects, given arithmetic factory @@ -276,11 +278,11 @@ def __init__(self, left, right, name): lvalues = self._convert_to_array(left, name=name) rvalues = self._convert_to_array(right, name=name, other=lvalues) - self.is_timedelta_lhs = com.is_timedelta64_dtype(left) - self.is_datetime_lhs = com.is_datetime64_dtype(left) + self.is_timedelta_lhs = is_timedelta64_dtype(left) + self.is_datetime_lhs = is_datetime64_dtype(left) self.is_integer_lhs = left.dtype.kind in ['i', 'u'] - self.is_datetime_rhs = com.is_datetime64_dtype(rvalues) - self.is_timedelta_rhs = com.is_timedelta64_dtype(rvalues) + self.is_datetime_rhs = is_datetime64_dtype(rvalues) + self.is_timedelta_rhs = is_timedelta64_dtype(rvalues) self.is_integer_rhs = rvalues.dtype.kind in ('i', 'u') self._validate() @@ -355,7 +357,7 @@ def _convert_to_array(self, values, name=None, other=None): elif isinstance(values, pd.DatetimeIndex): values = values.to_series() elif not (isinstance(values, (np.ndarray, pd.Series)) and - com.is_datetime64_dtype(values)): + is_datetime64_dtype(values)): values = tslib.array_to_datetime(values) elif inferred_type in ('timedelta', 'timedelta64'): # have a timedelta, convert to to ns here @@ -448,8 +450,8 @@ def maybe_convert_for_time_op(cls, left, right, name): that the data is not the right type for time ops. """ # decide if we can do it - is_timedelta_lhs = com.is_timedelta64_dtype(left) - is_datetime_lhs = com.is_datetime64_dtype(left) + is_timedelta_lhs = is_timedelta64_dtype(left) + is_datetime_lhs = is_datetime64_dtype(left) if not (is_datetime_lhs or is_timedelta_lhs): return None @@ -547,17 +549,17 @@ def na_op(x, y): # dispatch to the categorical if we have a categorical # in either operand - if com.is_categorical_dtype(x): + if is_categorical_dtype(x): return op(x,y) - elif com.is_categorical_dtype(y) and not lib.isscalar(y): + elif is_categorical_dtype(y) and not isscalar(y): return op(y,x) - if com.is_object_dtype(x.dtype): + if is_object_dtype(x.dtype): if isinstance(y, list): y = lib.list_to_object_array(y) if isinstance(y, (np.ndarray, pd.Series)): - if not com.is_object_dtype(y.dtype): + if not is_object_dtype(y.dtype): result = lib.vec_compare(x, y.astype(np.object_), op) else: result = lib.vec_compare(x, y, op) @@ -574,7 +576,7 @@ def na_op(x, y): raise TypeError("invalid type comparison") # numpy does not like comparisons vs None - if lib.isscalar(y) and isnull(y): + if isscalar(y) and isnull(y): y = np.nan # we have a datetime/timedelta and may need to convert @@ -624,13 +626,13 @@ def wrapper(self, other, axis=None): return self._constructor(na_op(self.values, np.asarray(other)), index=self.index).__finalize__(self) elif isinstance(other, pd.Categorical): - if not com.is_categorical_dtype(self): + if not is_categorical_dtype(self): msg = "Cannot compare a Categorical for op {op} with Series of dtype {typ}.\n"\ "If you want to compare values, use 'series np.asarray(other)'." raise TypeError(msg.format(op=op,typ=self.dtype)) - if com.is_categorical_dtype(self): + if is_categorical_dtype(self): # cats are a special case as get_values() would return an ndarray, which would then # not take categories ordering into account # we can go directly to op, as the na_op would just test again and dispatch to it. @@ -641,7 +643,7 @@ def wrapper(self, other, axis=None): other = np.asarray(other) res = na_op(values, other) - if lib.isscalar(res): + if isscalar(res): raise TypeError('Could not compare %s type with Series' % type(other)) @@ -667,7 +669,7 @@ def na_op(x, y): y = lib.list_to_object_array(y) if isinstance(y, (np.ndarray, pd.Series)): - if (com.is_bool_dtype(x.dtype) and com.is_bool_dtype(y.dtype)): + if (is_bool_dtype(x.dtype) and is_bool_dtype(y.dtype)): result = op(x, y) # when would this be hit? else: x = com._ensure_object(x) @@ -1069,7 +1071,7 @@ def na_op(x, y): # work only for scalars def f(self, other): - if not lib.isscalar(other): + if not isscalar(other): raise ValueError('Simple arithmetic with %s can only be ' 'done with scalar values' % self._constructor.__name__)