From 2b016418f6b7c77628ad14f8d29f6224930da0dd Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Mon, 25 Nov 2019 17:00:27 -0600 Subject: [PATCH] Revert "CI: workaround numpydev bug (#29433)" (#29553) --- ci/azure/posix.yml | 17 ++--- pandas/core/dtypes/common.py | 118 +++++++++++++++++++++++++++++ pandas/core/dtypes/missing.py | 5 ++ pandas/core/internals/managers.py | 8 +- pandas/core/missing.py | 14 +++- pandas/tests/dtypes/test_common.py | 28 +++++++ 6 files changed, 177 insertions(+), 13 deletions(-) diff --git a/ci/azure/posix.yml b/ci/azure/posix.yml index 66960ca2c6c102..a10fd402b67330 100644 --- a/ci/azure/posix.yml +++ b/ci/azure/posix.yml @@ -44,16 +44,13 @@ jobs: PATTERN: "not slow and not network" LOCALE_OVERRIDE: "zh_CN.UTF-8" - # https://github.com/pandas-dev/pandas/issues/29432 - # py37_np_dev: - # ENV_FILE: ci/deps/azure-37-numpydev.yaml - # CONDA_PY: "37" - # PATTERN: "not slow and not network" - # TEST_ARGS: "-W error" - # PANDAS_TESTING_MODE: "deprecate" - # EXTRA_APT: "xsel" - # # TODO: - # continueOnError: true + py37_np_dev: + ENV_FILE: ci/deps/azure-37-numpydev.yaml + CONDA_PY: "37" + PATTERN: "not slow and not network" + TEST_ARGS: "-W error" + PANDAS_TESTING_MODE: "deprecate" + EXTRA_APT: "xsel" steps: - script: | diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 783669688ea42c..d981a1d6e4aa4d 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -1191,6 +1191,124 @@ def _is_unorderable_exception(e: TypeError) -> bool: return "'>' not supported between instances of" in str(e) +# This exists to silence numpy deprecation warnings, see GH#29553 +def is_numeric_v_string_like(a, b): + """ + Check if we are comparing a string-like object to a numeric ndarray. + NumPy doesn't like to compare such objects, especially numeric arrays + and scalar string-likes. + + Parameters + ---------- + a : array-like, scalar + The first object to check. + b : array-like, scalar + The second object to check. + + Returns + ------- + boolean + Whether we return a comparing a string-like object to a numeric array. + + Examples + -------- + >>> is_numeric_v_string_like(1, 1) + False + >>> is_numeric_v_string_like("foo", "foo") + False + >>> is_numeric_v_string_like(1, "foo") # non-array numeric + False + >>> is_numeric_v_string_like(np.array([1]), "foo") + True + >>> is_numeric_v_string_like("foo", np.array([1])) # symmetric check + True + >>> is_numeric_v_string_like(np.array([1, 2]), np.array(["foo"])) + True + >>> is_numeric_v_string_like(np.array(["foo"]), np.array([1, 2])) + True + >>> is_numeric_v_string_like(np.array([1]), np.array([2])) + False + >>> is_numeric_v_string_like(np.array(["foo"]), np.array(["foo"])) + False + """ + + is_a_array = isinstance(a, np.ndarray) + is_b_array = isinstance(b, np.ndarray) + + is_a_numeric_array = is_a_array and is_numeric_dtype(a) + is_b_numeric_array = is_b_array and is_numeric_dtype(b) + is_a_string_array = is_a_array and is_string_like_dtype(a) + is_b_string_array = is_b_array and is_string_like_dtype(b) + + is_a_scalar_string_like = not is_a_array and isinstance(a, str) + is_b_scalar_string_like = not is_b_array and isinstance(b, str) + + return ( + (is_a_numeric_array and is_b_scalar_string_like) + or (is_b_numeric_array and is_a_scalar_string_like) + or (is_a_numeric_array and is_b_string_array) + or (is_b_numeric_array and is_a_string_array) + ) + + +# This exists to silence numpy deprecation warnings, see GH#29553 +def is_datetimelike_v_numeric(a, b): + """ + Check if we are comparing a datetime-like object to a numeric object. + By "numeric," we mean an object that is either of an int or float dtype. + + Parameters + ---------- + a : array-like, scalar + The first object to check. + b : array-like, scalar + The second object to check. + + Returns + ------- + boolean + Whether we return a comparing a datetime-like to a numeric object. + + Examples + -------- + >>> dt = np.datetime64(pd.datetime(2017, 1, 1)) + >>> + >>> is_datetimelike_v_numeric(1, 1) + False + >>> is_datetimelike_v_numeric(dt, dt) + False + >>> is_datetimelike_v_numeric(1, dt) + True + >>> is_datetimelike_v_numeric(dt, 1) # symmetric check + True + >>> is_datetimelike_v_numeric(np.array([dt]), 1) + True + >>> is_datetimelike_v_numeric(np.array([1]), dt) + True + >>> is_datetimelike_v_numeric(np.array([dt]), np.array([1])) + True + >>> is_datetimelike_v_numeric(np.array([1]), np.array([2])) + False + >>> is_datetimelike_v_numeric(np.array([dt]), np.array([dt])) + False + """ + + if not hasattr(a, "dtype"): + a = np.asarray(a) + if not hasattr(b, "dtype"): + b = np.asarray(b) + + def is_numeric(x): + """ + Check if an object has a numeric dtype (i.e. integer or float). + """ + return is_integer_dtype(x) or is_float_dtype(x) + + return (needs_i8_conversion(a) and is_numeric(b)) or ( + needs_i8_conversion(b) and is_numeric(a) + ) + + def needs_i8_conversion(arr_or_dtype) -> bool: """ Check whether the array or dtype should be converted to int64. diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py index 25d6f87143d728..cb4199272f5740 100644 --- a/pandas/core/dtypes/missing.py +++ b/pandas/core/dtypes/missing.py @@ -17,6 +17,7 @@ is_complex_dtype, is_datetime64_dtype, is_datetime64tz_dtype, + is_datetimelike_v_numeric, is_dtype_equal, is_extension_array_dtype, is_float_dtype, @@ -465,6 +466,10 @@ def array_equivalent(left, right, strict_nan: bool = False) -> bool: return True return ((left == right) | (isna(left) & isna(right))).all() + elif is_datetimelike_v_numeric(left, right): + # GH#29553 avoid numpy deprecation warning + return False + elif needs_i8_conversion(left) or needs_i8_conversion(right): # datetime64, timedelta64, Period if not is_dtype_equal(left.dtype, right.dtype): diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 5e60440f1577e0..c37a8ea5e42a40 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -18,8 +18,10 @@ ) from pandas.core.dtypes.common import ( _NS_DTYPE, + is_datetimelike_v_numeric, is_extension_array_dtype, is_list_like, + is_numeric_v_string_like, is_scalar, is_sparse, ) @@ -1917,7 +1919,11 @@ def _compare_or_regex_search(a, b, regex=False): is_a_array = isinstance(a, np.ndarray) is_b_array = isinstance(b, np.ndarray) - result = op(a) + if is_datetimelike_v_numeric(a, b) or is_numeric_v_string_like(a, b): + # GH#29553 avoid deprecation warnings from numpy + result = False + else: + result = op(a) if is_scalar(result) and (is_a_array or is_b_array): type_names = [type(a).__name__, type(b).__name__] diff --git a/pandas/core/missing.py b/pandas/core/missing.py index fc54c03c042b7a..044b083b8e9392 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -1,6 +1,7 @@ """ Routines for filling missing data. """ + import numpy as np from pandas._libs import algos, lib @@ -12,6 +13,7 @@ is_datetime64_dtype, is_datetime64tz_dtype, is_integer_dtype, + is_numeric_v_string_like, is_scalar, is_timedelta64_dtype, needs_i8_conversion, @@ -38,14 +40,22 @@ def mask_missing(arr, values_to_mask): mask = None for x in nonna: if mask is None: - mask = arr == x + if is_numeric_v_string_like(arr, x): + # GH#29553 prevent numpy deprecation warnings + mask = False + else: + mask = arr == x # if x is a string and arr is not, then we get False and we must # expand the mask to size arr.shape if is_scalar(mask): mask = np.zeros(arr.shape, dtype=bool) else: - mask |= arr == x + if is_numeric_v_string_like(arr, x): + # GH#29553 prevent numpy deprecation warnings + mask |= False + else: + mask |= arr == x if na_mask.any(): if mask is None: diff --git a/pandas/tests/dtypes/test_common.py b/pandas/tests/dtypes/test_common.py index 912fce6339716e..667ee467f2f29b 100644 --- a/pandas/tests/dtypes/test_common.py +++ b/pandas/tests/dtypes/test_common.py @@ -474,6 +474,34 @@ def test_is_datetime_or_timedelta_dtype(): assert com.is_datetime_or_timedelta_dtype(np.array([], dtype=np.datetime64)) +def test_is_numeric_v_string_like(): + assert not com.is_numeric_v_string_like(1, 1) + assert not com.is_numeric_v_string_like(1, "foo") + assert not com.is_numeric_v_string_like("foo", "foo") + assert not com.is_numeric_v_string_like(np.array([1]), np.array([2])) + assert not com.is_numeric_v_string_like(np.array(["foo"]), np.array(["foo"])) + + assert com.is_numeric_v_string_like(np.array([1]), "foo") + assert com.is_numeric_v_string_like("foo", np.array([1])) + assert com.is_numeric_v_string_like(np.array([1, 2]), np.array(["foo"])) + assert com.is_numeric_v_string_like(np.array(["foo"]), np.array([1, 2])) + + +def test_is_datetimelike_v_numeric(): + dt = np.datetime64(pd.datetime(2017, 1, 1)) + + assert not com.is_datetimelike_v_numeric(1, 1) + assert not com.is_datetimelike_v_numeric(dt, dt) + assert not com.is_datetimelike_v_numeric(np.array([1]), np.array([2])) + assert not com.is_datetimelike_v_numeric(np.array([dt]), np.array([dt])) + + assert com.is_datetimelike_v_numeric(1, dt) + assert com.is_datetimelike_v_numeric(1, dt) + assert com.is_datetimelike_v_numeric(np.array([dt]), 1) + assert com.is_datetimelike_v_numeric(np.array([1]), dt) + assert com.is_datetimelike_v_numeric(np.array([dt]), np.array([1])) + + def test_needs_i8_conversion(): assert not com.needs_i8_conversion(str) assert not com.needs_i8_conversion(np.int64)