Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Internal clean-up of isnull() to avoid relying on pandas #3132

Merged
merged 5 commits into from
Aug 5, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions properties/test_encode_decode.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@
These ones pass, just as you'd hope!

"""
from __future__ import absolute_import, division, print_function

import hypothesis.extra.numpy as npst
import hypothesis.strategies as st
from hypothesis import given, settings
Expand Down
52 changes: 42 additions & 10 deletions xarray/core/duck_array_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,19 +63,51 @@ def fail_on_dask_array_input(values, msg=None, func_name=None):

around = _dask_or_eager_func('around')
isclose = _dask_or_eager_func('isclose')
notnull = _dask_or_eager_func('notnull', eager_module=pd)
_isnull = _dask_or_eager_func('isnull', eager_module=pd)

if hasattr(np, 'isnat') and (
dask_array is None or hasattr(dask_array_type, '__array_ufunc__')):
# np.isnat is available since NumPy 1.13, so __array_ufunc__ is always
# supported.
isnat = np.isnat
else:
isnat = _dask_or_eager_func('isnull', eager_module=pd)
isnan = _dask_or_eager_func('isnan')
zeros_like = _dask_or_eager_func('zeros_like')


pandas_isnull = _dask_or_eager_func('isnull', eager_module=pd)


def isnull(data):
# GH837, GH861
# isnull fcn from pandas will throw TypeError when run on numpy structured
# array therefore for dims that are np structured arrays we assume all
# data is present
try:
return _isnull(data)
except TypeError:
return np.zeros(data.shape, dtype=bool)
data = asarray(data)
scalar_type = data.dtype.type
if issubclass(scalar_type, (np.datetime64, np.timedelta64)):
# datetime types use NaT for null
# note: must check timedelta64 before integers, because currently
# timedelta64 inherits from np.integer
return isnat(data)
elif issubclass(scalar_type, np.inexact):
# float types use NaN for null
return isnan(data)
elif issubclass(
scalar_type, (np.bool_, np.integer, np.character, np.void)
):
# these types cannot represent missing values
return zeros_like(data, dtype=bool)
else:
# at this point, array should have dtype=object
if isinstance(data, (np.ndarray, dask_array_type)):
return pandas_isnull(data)
else:
# Not reachable yet, but intended for use with other duck array
# types. For full consistency with pandas, we should accept None as
# a null value as well as NaN, but it isn't clear how to do this
# with duck typing.
return data != data


def notnull(data):
return ~isnull(data)


transpose = _dask_or_eager_func('transpose')
Expand Down
25 changes: 21 additions & 4 deletions xarray/tests/test_duck_array_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,14 +178,18 @@ def test_wrong_shape(self):
assert not array_notnull_equiv(a, b)

@pytest.mark.parametrize("val1, val2, val3, null", [
(1, 2, 3, None),
(np.datetime64('2000'),
np.datetime64('2001'),
np.datetime64('2002'),
np.datetime64('NaT')),
(1., 2., 3., np.nan),
(1., 2., 3., None),
('foo', 'bar', 'baz', None),
('foo', 'bar', 'baz', np.nan),
])
def test_types(self, val1, val2, val3, null):
arr1 = np.array([val1, null, val3, null])
arr2 = np.array([val1, val2, null, null])
dtype = object if isinstance(val1, str) else None
arr1 = np.array([val1, null, val3, null], dtype=dtype)
arr2 = np.array([val1, val2, null, null], dtype=dtype)
assert array_notnull_equiv(arr1, arr2)


Expand Down Expand Up @@ -432,6 +436,19 @@ def test_argmin_max_error():
da.argmin(dim='y')


@pytest.mark.parametrize('array', [
np.array([np.datetime64('2000-01-01'), np.datetime64('NaT')]),
np.array([np.timedelta64(1, 'h'), np.timedelta64('NaT')]),
np.array([0.0, np.nan]),
np.array([1j, np.nan]),
np.array(['foo', np.nan], dtype=object),
])
def test_isnull(array):
expected = np.array([False, True])
actual = duck_array_ops.isnull(array)
np.testing.assert_equal(expected, actual)


@requires_dask
def test_isnull_with_dask():
da = construct_dataarray(2, np.float32, contains_nan=True, dask=True)
Expand Down