Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Test failure with dask master #3603

Closed
spencerkclark opened this issue Dec 7, 2019 · 2 comments · Fixed by #3604
Closed

Test failure with dask master #3603

spencerkclark opened this issue Dec 7, 2019 · 2 comments · Fixed by #3604

Comments

@spencerkclark
Copy link
Member

It looks like dask/dask#5684, which adds nanmedian to dask (nice!), caused the error message to change for when one tries to reduce an array over all axes via median (i.e. it no longer contains 'dask', because xarray now dispatches to the newly added dask function instead of failing before trying that).

@dcherian do you have thoughts on how to best address this? Should we just remove that check in test_reduce?

=================================== FAILURES ===================================
___________________________ TestVariable.test_reduce ___________________________

error = <class 'NotImplementedError'>, pattern = 'dask'

    @contextmanager
    def raises_regex(error, pattern):
        __tracebackhide__ = True
        with pytest.raises(error) as excinfo:
>           yield

xarray/tests/__init__.py:104: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = <xarray.tests.test_dask.TestVariable object at 0x7fd14f8e9c88>

    def test_reduce(self):
        u = self.eager_var
        v = self.lazy_var
        self.assertLazyAndAllClose(u.mean(), v.mean())
        self.assertLazyAndAllClose(u.std(), v.std())
        with raise_if_dask_computes():
            actual = v.argmax(dim="x")
        self.assertLazyAndAllClose(u.argmax(dim="x"), actual)
        with raise_if_dask_computes():
            actual = v.argmin(dim="x")
        self.assertLazyAndAllClose(u.argmin(dim="x"), actual)
        self.assertLazyAndAllClose((u > 1).any(), (v > 1).any())
        self.assertLazyAndAllClose((u < 1).all("x"), (v < 1).all("x"))
        with raises_regex(NotImplementedError, "dask"):
>           v.median()

xarray/tests/test_dask.py:220: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = <xarray.Variable (x: 4, y: 6)>
dask.array<array, shape=(4, 6), dtype=float64, chunksize=(2, 2), chunktype=numpy.ndarray>
dim = None, axis = None, skipna = None, kwargs = {}

    def wrapped_func(self, dim=None, axis=None, skipna=None, **kwargs):
>       return self.reduce(func, dim, axis, skipna=skipna, **kwargs)

xarray/core/common.py:46: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = <xarray.Variable (x: 4, y: 6)>
dask.array<array, shape=(4, 6), dtype=float64, chunksize=(2, 2), chunktype=numpy.ndarray>
func = <function _create_nan_agg_method.<locals>.f at 0x7fd16c228378>
dim = None, axis = None, keep_attrs = None, keepdims = False, allow_lazy = True
kwargs = {'skipna': None}
input_data = dask.array<array, shape=(4, 6), dtype=float64, chunksize=(2, 2), chunktype=numpy.ndarray>

    def reduce(
        self,
        func,
        dim=None,
        axis=None,
        keep_attrs=None,
        keepdims=False,
        allow_lazy=None,
        **kwargs,
    ):
        """Reduce this array by applying `func` along some dimension(s).
    
        Parameters
        ----------
        func : function
            Function which can be called in the form
            `func(x, axis=axis, **kwargs)` to return the result of reducing an
            np.ndarray over an integer valued axis.
        dim : str or sequence of str, optional
            Dimension(s) over which to apply `func`.
        axis : int or sequence of int, optional
            Axis(es) over which to apply `func`. Only one of the 'dim'
            and 'axis' arguments can be supplied. If neither are supplied, then
            the reduction is calculated over the flattened array (by calling
            `func(x)` without an axis argument).
        keep_attrs : bool, optional
            If True, the variable's attributes (`attrs`) will be copied from
            the original object to the new one.  If False (default), the new
            object will be returned without attributes.
        keepdims : bool, default False
            If True, the dimensions which are reduced are left in the result
            as dimensions of size one
        **kwargs : dict
            Additional keyword arguments passed on to `func`.
    
        Returns
        -------
        reduced : Array
            Array with summarized data and the indicated dimension(s)
            removed.
        """
        if dim == ...:
            dim = None
        if dim is not None and axis is not None:
            raise ValueError("cannot supply both 'axis' and 'dim' arguments")
    
        if dim is not None:
            axis = self.get_axis_num(dim)
    
        if allow_lazy is not None:
            warnings.warn(
                "allow_lazy is deprecated and will be removed in version 0.16.0. It is now True by default.",
                DeprecationWarning,
            )
        else:
            allow_lazy = True
    
        input_data = self.data if allow_lazy else self.values
    
        if axis is not None:
            data = func(input_data, axis=axis, **kwargs)
        else:
>           data = func(input_data, **kwargs)

xarray/core/variable.py:1534: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

values = dask.array<array, shape=(4, 6), dtype=float64, chunksize=(2, 2), chunktype=numpy.ndarray>
axis = None, skipna = None, kwargs = {}
func = <function nanmedian at 0x7fd16c226bf8>, nanname = 'nanmedian'

    def f(values, axis=None, skipna=None, **kwargs):
        if kwargs.pop("out", None) is not None:
            raise TypeError(f"`out` is not valid for {name}")
    
        values = asarray(values)
    
        if coerce_strings and values.dtype.kind in "SU":
            values = values.astype(object)
    
        func = None
        if skipna or (skipna is None and values.dtype.kind in "cfO"):
            nanname = "nan" + name
            func = getattr(nanops, nanname)
        else:
            func = _dask_or_eager_func(name)
    
        try:
>           return func(values, axis=axis, **kwargs)

xarray/core/duck_array_ops.py:307: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

a = dask.array<array, shape=(4, 6), dtype=float64, chunksize=(2, 2), chunktype=numpy.ndarray>
axis = None, out = None

    def nanmedian(a, axis=None, out=None):
>       return _dask_or_eager_func("nanmedian", eager_module=nputils)(a, axis=axis)

xarray/core/nanops.py:144: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

args = (dask.array<array, shape=(4, 6), dtype=float64, chunksize=(2, 2), chunktype=numpy.ndarray>,)
kwargs = {'axis': None}
dispatch_args = (dask.array<array, shape=(4, 6), dtype=float64, chunksize=(2, 2), chunktype=numpy.ndarray>,)
wrapped = <function nanmedian at 0x7fd1737bcea0>

    def f(*args, **kwargs):
        if list_of_args:
            dispatch_args = args[0]
        else:
            dispatch_args = args[array_args]
        if any(isinstance(a, dask_array.Array) for a in dispatch_args):
            try:
                wrapped = getattr(dask_module, name)
            except AttributeError as e:
                raise AttributeError(f"{e}: requires dask >={requires_dask}")
        else:
            wrapped = getattr(eager_module, name)
>       return wrapped(*args, **kwargs)

xarray/core/duck_array_ops.py:47: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

a = dask.array<array, shape=(4, 6), dtype=float64, chunksize=(2, 2), chunktype=numpy.ndarray>
axis = None, keepdims = False, out = None

    @derived_from(np)
    def nanmedian(a, axis=None, keepdims=False, out=None):
        """
        This works by automatically chunking the reduced axes to a single chunk
        and then calling ``numpy.nanmedian`` function across the remaining dimensions
        """
        if axis is None:
            raise NotImplementedError(
>               "The da.nanmedian function only works along an axis or a subset of axes.  "
                "The full algorithm is difficult to do in parallel"
            )
E           NotImplementedError: The da.nanmedian function only works along an axis or a subset of axes.  The full algorithm is difficult to do in parallel

/usr/share/miniconda/envs/xarray-tests/lib/python3.7/site-packages/dask/array/reductions.py:1299: NotImplementedError

During handling of the above exception, another exception occurred:

self = <xarray.tests.test_dask.TestVariable object at 0x7fd14f8e9c88>

    def test_reduce(self):
        u = self.eager_var
        v = self.lazy_var
        self.assertLazyAndAllClose(u.mean(), v.mean())
        self.assertLazyAndAllClose(u.std(), v.std())
        with raise_if_dask_computes():
            actual = v.argmax(dim="x")
        self.assertLazyAndAllClose(u.argmax(dim="x"), actual)
        with raise_if_dask_computes():
            actual = v.argmin(dim="x")
        self.assertLazyAndAllClose(u.argmin(dim="x"), actual)
        self.assertLazyAndAllClose((u > 1).any(), (v > 1).any())
        self.assertLazyAndAllClose((u < 1).all("x"), (v < 1).all("x"))
        with raises_regex(NotImplementedError, "dask"):
>           v.median()

xarray/tests/test_dask.py:220: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = <contextlib._GeneratorContextManager object at 0x7fd14f8bcc50>
type = <class 'NotImplementedError'>
value = NotImplementedError('The da.nanmedian function only works along an axis or a subset of axes.  The full algorithm is difficult to do in parallel')
traceback = <traceback object at 0x7fd154597bc8>

    def __exit__(self, type, value, traceback):
        if type is None:
            try:
                next(self.gen)
            except StopIteration:
                return False
            else:
                raise RuntimeError("generator didn't stop")
        else:
            if value is None:
                # Need to force instantiation so we can reliably
                # tell if we get the same exception back
                value = type()
            try:
>               self.gen.throw(type, value, traceback)
E               AssertionError: exception NotImplementedError('The da.nanmedian function only works along an axis or a subset of axes.  The full algorithm is difficult to do in parallel') did not match pattern 'dask'
@dcherian
Copy link
Contributor

dcherian commented Dec 7, 2019

I'm implementing median as a reduction that can be injected. This error is still raised so let's change the test to check for that specific error

spencerkclark added a commit to spencerkclark/xarray that referenced this issue Dec 7, 2019
@spencerkclark
Copy link
Member Author

Awesome, thanks @dcherian; I didn't realize you already had some xarray work in progress on this.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

Successfully merging a pull request may close this issue.

2 participants