From c0c4c779702a14c5b8e09f7ef82b3dd60aeeb067 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Mon, 11 Nov 2019 15:12:41 -0600 Subject: [PATCH 1/6] Revert "CI: workaround numpydev bug (#29433)" This reverts commit b4adb71aee21fcbda3fed32471c60817a8bd5c9f. --- ci/azure/posix.yml | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/ci/azure/posix.yml b/ci/azure/posix.yml index d6afb263b447f..6b2b01c2bf2c7 100644 --- a/ci/azure/posix.yml +++ b/ci/azure/posix.yml @@ -44,16 +44,13 @@ jobs: PATTERN: "not slow and not network" LOCALE_OVERRIDE: "zh_CN.UTF-8" - # https://github.com/pandas-dev/pandas/issues/29432 - # py37_np_dev: - # ENV_FILE: ci/deps/azure-37-numpydev.yaml - # CONDA_PY: "37" - # PATTERN: "not slow and not network" - # TEST_ARGS: "-W error" - # PANDAS_TESTING_MODE: "deprecate" - # EXTRA_APT: "xsel" - # # TODO: - # continueOnError: true + py37_np_dev: + ENV_FILE: ci/deps/azure-37-numpydev.yaml + CONDA_PY: "37" + PATTERN: "not slow and not network" + TEST_ARGS: "-W error" + PANDAS_TESTING_MODE: "deprecate" + EXTRA_APT: "xsel" steps: - script: | From 61d43c3fd2febc7ab3f038373f96d4a5d6b37d53 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Mon, 11 Nov 2019 15:12:41 -0600 Subject: [PATCH 2/6] Revert "CI: workaround numpydev bug (#29433)" This reverts commit b4adb71aee21fcbda3fed32471c60817a8bd5c9f. --- ci/azure/posix.yml | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/ci/azure/posix.yml b/ci/azure/posix.yml index d6afb263b447f..6b2b01c2bf2c7 100644 --- a/ci/azure/posix.yml +++ b/ci/azure/posix.yml @@ -44,16 +44,13 @@ jobs: PATTERN: "not slow and not network" LOCALE_OVERRIDE: "zh_CN.UTF-8" - # https://github.com/pandas-dev/pandas/issues/29432 - # py37_np_dev: - # ENV_FILE: ci/deps/azure-37-numpydev.yaml - # CONDA_PY: "37" - # PATTERN: "not slow and not network" - # TEST_ARGS: "-W error" - # PANDAS_TESTING_MODE: "deprecate" - # EXTRA_APT: "xsel" - # # TODO: - # continueOnError: true + py37_np_dev: + ENV_FILE: ci/deps/azure-37-numpydev.yaml + CONDA_PY: "37" + PATTERN: "not slow and not network" + TEST_ARGS: "-W error" + PANDAS_TESTING_MODE: "deprecate" + EXTRA_APT: "xsel" steps: - script: | From 5c38be70380ded8383dc244e5077991293a3761a Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 12 Nov 2019 09:06:39 -0800 Subject: [PATCH 3/6] COMPAT: suppress numpy warnings --- pandas/core/internals/managers.py | 4 +++- pandas/core/missing.py | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index fbe1db1c23cdb..97161816f0437 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1924,7 +1924,9 @@ def _compare_or_regex_search(a, b, regex=False): is_a_array = isinstance(a, np.ndarray) is_b_array = isinstance(b, np.ndarray) - result = op(a) + with np.errstate(all="ignore"): + # suppress FutureWarning about elementwise comparison + result = op(a) if is_scalar(result) and (is_a_array or is_b_array): type_names = [type(a).__name__, type(b).__name__] diff --git a/pandas/core/missing.py b/pandas/core/missing.py index fb148b39c8a86..4312a79261e8c 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -38,7 +38,9 @@ def mask_missing(arr, values_to_mask): mask = None for x in nonna: if mask is None: - mask = arr == x + with np.errstate(all="ignore"): + # suppress FutureWarning about elementwise comparison + mask = arr == x # if x is a string and arr is not, then we get False and we must # expand the mask to size arr.shape From 922f419aad1ac3bbe975b1399aa38800ba815f3f Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 12 Nov 2019 09:49:10 -0800 Subject: [PATCH 4/6] np.errstate->catch_warnings --- pandas/core/internals/managers.py | 4 +++- pandas/core/missing.py | 5 ++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 97161816f0437..28d6a429c64a1 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -4,6 +4,7 @@ import operator import re from typing import List, Optional, Sequence, Tuple, Union +import warnings import numpy as np @@ -1924,8 +1925,9 @@ def _compare_or_regex_search(a, b, regex=False): is_a_array = isinstance(a, np.ndarray) is_b_array = isinstance(b, np.ndarray) - with np.errstate(all="ignore"): + with warnings.catch_warnings(): # suppress FutureWarning about elementwise comparison + warnings.simplefilter("always") result = op(a) if is_scalar(result) and (is_a_array or is_b_array): diff --git a/pandas/core/missing.py b/pandas/core/missing.py index 4312a79261e8c..cbd01a32a4307 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -1,6 +1,8 @@ """ Routines for filling missing data. """ +import warnings + import numpy as np from pandas._libs import algos, lib @@ -38,8 +40,9 @@ def mask_missing(arr, values_to_mask): mask = None for x in nonna: if mask is None: - with np.errstate(all="ignore"): + with warnings.catch_warnings(): # suppress FutureWarning about elementwise comparison + warnings.simplefilter("always") mask = arr == x # if x is a string and arr is not, then we get False and we must From 58df3f1339354ec656dc5ed7a5b9b8c907fc1fdf Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 12 Nov 2019 10:28:20 -0800 Subject: [PATCH 5/6] TST: catch one more warning --- pandas/core/missing.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pandas/core/missing.py b/pandas/core/missing.py index cbd01a32a4307..e8c28323b5b43 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -50,7 +50,10 @@ def mask_missing(arr, values_to_mask): if is_scalar(mask): mask = np.zeros(arr.shape, dtype=bool) else: - mask |= arr == x + with warnings.catch_warnings(): + # suppress FutureWarning about elementwise comparison + warnings.simplefilter("always") + mask |= arr == x if na_mask.any(): if mask is None: From 6a45b1567ddce8339117500d91d562fac6d9c2ba Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 15 Nov 2019 17:27:17 -0800 Subject: [PATCH 6/6] restorecom.is_datetimelike_v_numeric, com.is_numeric_v_string_like --- pandas/core/dtypes/common.py | 118 +++++++++++++++++++++++++++++ pandas/core/dtypes/missing.py | 5 ++ pandas/core/internals/managers.py | 12 +-- pandas/core/missing.py | 16 ++-- pandas/tests/dtypes/test_common.py | 28 +++++++ 5 files changed, 167 insertions(+), 12 deletions(-) diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index dcc8a274492ee..0c8d4a2b543ad 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -1278,6 +1278,124 @@ def _is_unorderable_exception(e: TypeError) -> bool: return "'>' not supported between instances of" in str(e) +# This exists to silence numpy deprecation warnings, see GH#29553 +def is_numeric_v_string_like(a, b): + """ + Check if we are comparing a string-like object to a numeric ndarray. + NumPy doesn't like to compare such objects, especially numeric arrays + and scalar string-likes. + + Parameters + ---------- + a : array-like, scalar + The first object to check. + b : array-like, scalar + The second object to check. + + Returns + ------- + boolean + Whether we return a comparing a string-like object to a numeric array. + + Examples + -------- + >>> is_numeric_v_string_like(1, 1) + False + >>> is_numeric_v_string_like("foo", "foo") + False + >>> is_numeric_v_string_like(1, "foo") # non-array numeric + False + >>> is_numeric_v_string_like(np.array([1]), "foo") + True + >>> is_numeric_v_string_like("foo", np.array([1])) # symmetric check + True + >>> is_numeric_v_string_like(np.array([1, 2]), np.array(["foo"])) + True + >>> is_numeric_v_string_like(np.array(["foo"]), np.array([1, 2])) + True + >>> is_numeric_v_string_like(np.array([1]), np.array([2])) + False + >>> is_numeric_v_string_like(np.array(["foo"]), np.array(["foo"])) + False + """ + + is_a_array = isinstance(a, np.ndarray) + is_b_array = isinstance(b, np.ndarray) + + is_a_numeric_array = is_a_array and is_numeric_dtype(a) + is_b_numeric_array = is_b_array and is_numeric_dtype(b) + is_a_string_array = is_a_array and is_string_like_dtype(a) + is_b_string_array = is_b_array and is_string_like_dtype(b) + + is_a_scalar_string_like = not is_a_array and isinstance(a, str) + is_b_scalar_string_like = not is_b_array and isinstance(b, str) + + return ( + (is_a_numeric_array and is_b_scalar_string_like) + or (is_b_numeric_array and is_a_scalar_string_like) + or (is_a_numeric_array and is_b_string_array) + or (is_b_numeric_array and is_a_string_array) + ) + + +# This exists to silence numpy deprecation warnings, see GH#29553 +def is_datetimelike_v_numeric(a, b): + """ + Check if we are comparing a datetime-like object to a numeric object. + By "numeric," we mean an object that is either of an int or float dtype. + + Parameters + ---------- + a : array-like, scalar + The first object to check. + b : array-like, scalar + The second object to check. + + Returns + ------- + boolean + Whether we return a comparing a datetime-like to a numeric object. + + Examples + -------- + >>> dt = np.datetime64(pd.datetime(2017, 1, 1)) + >>> + >>> is_datetimelike_v_numeric(1, 1) + False + >>> is_datetimelike_v_numeric(dt, dt) + False + >>> is_datetimelike_v_numeric(1, dt) + True + >>> is_datetimelike_v_numeric(dt, 1) # symmetric check + True + >>> is_datetimelike_v_numeric(np.array([dt]), 1) + True + >>> is_datetimelike_v_numeric(np.array([1]), dt) + True + >>> is_datetimelike_v_numeric(np.array([dt]), np.array([1])) + True + >>> is_datetimelike_v_numeric(np.array([1]), np.array([2])) + False + >>> is_datetimelike_v_numeric(np.array([dt]), np.array([dt])) + False + """ + + if not hasattr(a, "dtype"): + a = np.asarray(a) + if not hasattr(b, "dtype"): + b = np.asarray(b) + + def is_numeric(x): + """ + Check if an object has a numeric dtype (i.e. integer or float). + """ + return is_integer_dtype(x) or is_float_dtype(x) + + return (needs_i8_conversion(a) and is_numeric(b)) or ( + needs_i8_conversion(b) and is_numeric(a) + ) + + def needs_i8_conversion(arr_or_dtype) -> bool: """ Check whether the array or dtype should be converted to int64. diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py index aeba4eebc498e..81d1e9df3b591 100644 --- a/pandas/core/dtypes/missing.py +++ b/pandas/core/dtypes/missing.py @@ -17,6 +17,7 @@ is_complex_dtype, is_datetime64_dtype, is_datetime64tz_dtype, + is_datetimelike_v_numeric, is_dtype_equal, is_extension_array_dtype, is_float_dtype, @@ -465,6 +466,10 @@ def array_equivalent(left, right, strict_nan: bool = False) -> bool: return True return ((left == right) | (isna(left) & isna(right))).all() + elif is_datetimelike_v_numeric(left, right): + # GH#29553 avoid numpy deprecation warning + return False + elif needs_i8_conversion(left) or needs_i8_conversion(right): # datetime64, timedelta64, Period if not is_dtype_equal(left.dtype, right.dtype): diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 9f783caceff2f..fa814784e2d62 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -4,7 +4,6 @@ import operator import re from typing import List, Optional, Sequence, Tuple, Union -import warnings import numpy as np @@ -19,8 +18,10 @@ ) from pandas.core.dtypes.common import ( _NS_DTYPE, + is_datetimelike_v_numeric, is_extension_array_dtype, is_list_like, + is_numeric_v_string_like, is_scalar, is_sparse, ) @@ -1861,7 +1862,7 @@ def _shape_compat(x): def _interleaved_dtype( - blocks: List[Block] + blocks: List[Block], ) -> Optional[Union[np.dtype, ExtensionDtype]]: """Find the common dtype for `blocks`. @@ -1925,9 +1926,10 @@ def _compare_or_regex_search(a, b, regex=False): is_a_array = isinstance(a, np.ndarray) is_b_array = isinstance(b, np.ndarray) - with warnings.catch_warnings(): - # suppress FutureWarning about elementwise comparison - warnings.simplefilter("always") + if is_datetimelike_v_numeric(a, b) or is_numeric_v_string_like(a, b): + # GH#29553 avoid deprecation warnings from numpy + result = False + else: result = op(a) if is_scalar(result) and (is_a_array or is_b_array): diff --git a/pandas/core/missing.py b/pandas/core/missing.py index 7d98865ce962e..044b083b8e939 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -1,7 +1,6 @@ """ Routines for filling missing data. """ -import warnings import numpy as np @@ -14,6 +13,7 @@ is_datetime64_dtype, is_datetime64tz_dtype, is_integer_dtype, + is_numeric_v_string_like, is_scalar, is_timedelta64_dtype, needs_i8_conversion, @@ -40,9 +40,10 @@ def mask_missing(arr, values_to_mask): mask = None for x in nonna: if mask is None: - with warnings.catch_warnings(): - # suppress FutureWarning about elementwise comparison - warnings.simplefilter("always") + if is_numeric_v_string_like(arr, x): + # GH#29553 prevent numpy deprecation warnings + mask = False + else: mask = arr == x # if x is a string and arr is not, then we get False and we must @@ -50,9 +51,10 @@ def mask_missing(arr, values_to_mask): if is_scalar(mask): mask = np.zeros(arr.shape, dtype=bool) else: - with warnings.catch_warnings(): - # suppress FutureWarning about elementwise comparison - warnings.simplefilter("always") + if is_numeric_v_string_like(arr, x): + # GH#29553 prevent numpy deprecation warnings + mask |= False + else: mask |= arr == x if na_mask.any(): diff --git a/pandas/tests/dtypes/test_common.py b/pandas/tests/dtypes/test_common.py index d8420673104d5..6e019f7cc3b4f 100644 --- a/pandas/tests/dtypes/test_common.py +++ b/pandas/tests/dtypes/test_common.py @@ -493,6 +493,34 @@ def test_is_datetime_or_timedelta_dtype(): assert com.is_datetime_or_timedelta_dtype(np.array([], dtype=np.datetime64)) +def test_is_numeric_v_string_like(): + assert not com.is_numeric_v_string_like(1, 1) + assert not com.is_numeric_v_string_like(1, "foo") + assert not com.is_numeric_v_string_like("foo", "foo") + assert not com.is_numeric_v_string_like(np.array([1]), np.array([2])) + assert not com.is_numeric_v_string_like(np.array(["foo"]), np.array(["foo"])) + + assert com.is_numeric_v_string_like(np.array([1]), "foo") + assert com.is_numeric_v_string_like("foo", np.array([1])) + assert com.is_numeric_v_string_like(np.array([1, 2]), np.array(["foo"])) + assert com.is_numeric_v_string_like(np.array(["foo"]), np.array([1, 2])) + + +def test_is_datetimelike_v_numeric(): + dt = np.datetime64(pd.datetime(2017, 1, 1)) + + assert not com.is_datetimelike_v_numeric(1, 1) + assert not com.is_datetimelike_v_numeric(dt, dt) + assert not com.is_datetimelike_v_numeric(np.array([1]), np.array([2])) + assert not com.is_datetimelike_v_numeric(np.array([dt]), np.array([dt])) + + assert com.is_datetimelike_v_numeric(1, dt) + assert com.is_datetimelike_v_numeric(1, dt) + assert com.is_datetimelike_v_numeric(np.array([dt]), 1) + assert com.is_datetimelike_v_numeric(np.array([1]), dt) + assert com.is_datetimelike_v_numeric(np.array([dt]), np.array([1])) + + def test_needs_i8_conversion(): assert not com.needs_i8_conversion(str) assert not com.needs_i8_conversion(np.int64)