Skip to content
forked from pydata/xarray

Commit

Permalink
Merge branch 'master' into deprecate/inplace
Browse files Browse the repository at this point in the history
* master:
  Global option to always keep/discard attrs on operations (pydata#2482)
  Remove tests where answers change in cftime 1.0.2.1 (pydata#2522)
  Finish deprecation cycle for DataArray.__contains__ checking array values (pydata#2520)
  Fix bug where OverflowError is not being raised (pydata#2519)
  • Loading branch information
dcherian committed Oct 30, 2018
2 parents 66d3cea + 6d55f99 commit 4359403
Show file tree
Hide file tree
Showing 13 changed files with 266 additions and 76 deletions.
3 changes: 2 additions & 1 deletion doc/faq.rst
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,8 @@ conventions`_. (An exception is serialization to and from netCDF files.)

An implication of this choice is that we do not propagate ``attrs`` through
most operations unless explicitly flagged (some methods have a ``keep_attrs``
option). Similarly, xarray does not check for conflicts between ``attrs`` when
option, and there is a global flag for setting this to be always True or
False). Similarly, xarray does not check for conflicts between ``attrs`` when
combining arrays and datasets, unless explicitly requested with the option
``compat='identical'``. The guiding principle is that metadata should not be
allowed to get in the way.
Expand Down
24 changes: 18 additions & 6 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,14 @@ v0.11.0 (unreleased)
Breaking changes
~~~~~~~~~~~~~~~~

- ``Dataset.T`` has been removed as a shortcut for :py:meth:`Dataset.transpose`.
Call :py:meth:`Dataset.transpose` directly instead.
- Iterating over a ``Dataset`` now includes only data variables, not coordinates.
Similarily, calling ``len`` and ``bool`` on a ``Dataset`` now
includes only data variables
- Finished deprecation cycles:
- ``Dataset.T`` has been removed as a shortcut for :py:meth:`Dataset.transpose`.
Call :py:meth:`Dataset.transpose` directly instead.
- Iterating over a ``Dataset`` now includes only data variables, not coordinates.
Similarily, calling ``len`` and ``bool`` on a ``Dataset`` now
includes only data variables.
- ``DataArray.__contains__`` (used by Python's ``in`` operator) now checks
array data, not coordinates.
- Xarray's storage backends now automatically open and close files when
necessary, rather than requiring opening a file with ``autoclose=True``. A
global least-recently-used cache is used to store open files; the default
Expand Down Expand Up @@ -82,7 +85,12 @@ Enhancements
:py:meth:`~xarray.Dataset.differentiate`,
:py:meth:`~xarray.DataArray.interp`, and
:py:meth:`~xarray.Dataset.interp`.
By `Spencer Clark <https://github.com/spencerkclark>`_.
By `Spencer Clark <https://github.com/spencerkclark>`_
- There is now a global option to either always keep or always discard
dataset and dataarray attrs upon operations. The option is set with
``xarray.set_options(keep_attrs=True)``, and the default is to use the old
behaviour.
By `Tom Nicholas <http://github.com/TomNicholas>`_.
- Added a new backend for the GRIB file format based on ECMWF *cfgrib*
python driver and *ecCodes* C-library. (:issue:`2475`)
By `Alessandro Amici <https://github.com/alexamici>`_,
Expand Down Expand Up @@ -126,6 +134,10 @@ Bug fixes
By `Spencer Clark <https://github.com/spencerkclark>`_.
- Avoid use of Dask's deprecated ``get=`` parameter in tests
by `Matthew Rocklin <https://github.com/mrocklin/>`_.
- An ``OverflowError`` is now accurately raised and caught during the
encoding process if a reference date is used that is so distant that
the dates must be encoded using cftime rather than NumPy (:issue:`2272`).
By `Spencer Clark <https://github.com/spencerkclark>`_.

.. _whats-new.0.10.9:

Expand Down
7 changes: 6 additions & 1 deletion xarray/coding/times.py
Original file line number Diff line number Diff line change
Expand Up @@ -361,7 +361,12 @@ def encode_cf_datetime(dates, units=None, calendar=None):
delta_units = _netcdf_to_numpy_timeunit(delta)
time_delta = np.timedelta64(1, delta_units).astype('timedelta64[ns]')
ref_date = np.datetime64(pd.Timestamp(ref_date))
num = (dates - ref_date) / time_delta

# Wrap the dates in a DatetimeIndex to do the subtraction to ensure
# an OverflowError is raised if the ref_date is too far away from
# dates to be encoded (GH 2272).
num = (pd.DatetimeIndex(dates.ravel()) - ref_date) / time_delta
num = num.values.reshape(dates.shape)

except (OutOfBoundsDatetime, OverflowError):
num = _encode_datetime_with_cftime(dates, units, calendar)
Expand Down
22 changes: 13 additions & 9 deletions xarray/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from .arithmetic import SupportsArithmetic
from .pycompat import OrderedDict, basestring, dask_array_type, suppress
from .utils import Frozen, ReprObject, SortedKeysDict, either_dict_or_kwargs
from .options import _get_keep_attrs

# Used as a sentinel value to indicate a all dimensions
ALL_DIMS = ReprObject('<all-dims>')
Expand All @@ -21,13 +22,13 @@ class ImplementsArrayReduce(object):
def _reduce_method(cls, func, include_skipna, numeric_only):
if include_skipna:
def wrapped_func(self, dim=None, axis=None, skipna=None,
keep_attrs=False, **kwargs):
return self.reduce(func, dim, axis, keep_attrs=keep_attrs,
**kwargs):
return self.reduce(func, dim, axis,
skipna=skipna, allow_lazy=True, **kwargs)
else:
def wrapped_func(self, dim=None, axis=None, keep_attrs=False,
def wrapped_func(self, dim=None, axis=None,
**kwargs):
return self.reduce(func, dim, axis, keep_attrs=keep_attrs,
return self.reduce(func, dim, axis,
allow_lazy=True, **kwargs)
return wrapped_func

Expand All @@ -51,14 +52,14 @@ class ImplementsDatasetReduce(object):
@classmethod
def _reduce_method(cls, func, include_skipna, numeric_only):
if include_skipna:
def wrapped_func(self, dim=None, keep_attrs=False, skipna=None,
def wrapped_func(self, dim=None, skipna=None,
**kwargs):
return self.reduce(func, dim, keep_attrs, skipna=skipna,
return self.reduce(func, dim, skipna=skipna,
numeric_only=numeric_only, allow_lazy=True,
**kwargs)
else:
def wrapped_func(self, dim=None, keep_attrs=False, **kwargs):
return self.reduce(func, dim, keep_attrs,
def wrapped_func(self, dim=None, **kwargs):
return self.reduce(func, dim,
numeric_only=numeric_only, allow_lazy=True,
**kwargs)
return wrapped_func
Expand Down Expand Up @@ -591,7 +592,7 @@ def rolling(self, dim=None, min_periods=None, center=False, **dim_kwargs):
center=center)

def resample(self, freq=None, dim=None, how=None, skipna=None,
closed=None, label=None, base=0, keep_attrs=False, **indexer):
closed=None, label=None, base=0, keep_attrs=None, **indexer):
"""Returns a Resample object for performing resampling operations.
Handles both downsampling and upsampling. If any intervals contain no
Expand Down Expand Up @@ -659,6 +660,9 @@ def resample(self, freq=None, dim=None, how=None, skipna=None,
from .dataarray import DataArray
from .resample import RESAMPLE_DIM

if keep_attrs is None:
keep_attrs = _get_keep_attrs(default=False)

if dim is not None:
if how is None:
how = 'mean'
Expand Down
16 changes: 7 additions & 9 deletions xarray/core/dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
assert_coordinate_consistent, remap_label_indexers)
from .dataset import Dataset, merge_indexes, split_indexes
from .formatting import format_item
from .options import OPTIONS
from .options import OPTIONS, _get_keep_attrs
from .pycompat import OrderedDict, basestring, iteritems, range, zip
from .utils import (
_check_inplace, decode_numpy_dict_values, either_dict_or_kwargs,
Expand Down Expand Up @@ -504,11 +504,7 @@ def _item_sources(self):
LevelCoordinatesSource(self)]

def __contains__(self, key):
warnings.warn(
'xarray.DataArray.__contains__ currently checks membership in '
'DataArray.coords, but in xarray v0.11 will change to check '
'membership in array values.', FutureWarning, stacklevel=2)
return key in self._coords
return key in self.data

@property
def loc(self):
Expand Down Expand Up @@ -1564,7 +1560,7 @@ def combine_first(self, other):
"""
return ops.fillna(self, other, join="outer")

def reduce(self, func, dim=None, axis=None, keep_attrs=False, **kwargs):
def reduce(self, func, dim=None, axis=None, keep_attrs=None, **kwargs):
"""Reduce this array by applying `func` along some dimension(s).
Parameters
Expand Down Expand Up @@ -1593,6 +1589,7 @@ def reduce(self, func, dim=None, axis=None, keep_attrs=False, **kwargs):
DataArray with this object's array replaced with an array with
summarized data and the indicated dimension(s) removed.
"""

var = self.variable.reduce(func, dim, axis, keep_attrs, **kwargs)
return self._replace_maybe_drop_dims(var)

Expand Down Expand Up @@ -2275,7 +2272,7 @@ def sortby(self, variables, ascending=True):
ds = self._to_temp_dataset().sortby(variables, ascending=ascending)
return self._from_temp_dataset(ds)

def quantile(self, q, dim=None, interpolation='linear', keep_attrs=False):
def quantile(self, q, dim=None, interpolation='linear', keep_attrs=None):
"""Compute the qth quantile of the data along the specified dimension.
Returns the qth quantiles(s) of the array elements.
Expand Down Expand Up @@ -2321,7 +2318,7 @@ def quantile(self, q, dim=None, interpolation='linear', keep_attrs=False):
q, dim=dim, keep_attrs=keep_attrs, interpolation=interpolation)
return self._from_temp_dataset(ds)

def rank(self, dim, pct=False, keep_attrs=False):
def rank(self, dim, pct=False, keep_attrs=None):
"""Ranks the data.
Equal values are assigned a rank that is the average of the ranks that
Expand Down Expand Up @@ -2357,6 +2354,7 @@ def rank(self, dim, pct=False, keep_attrs=False):
array([ 1., 2., 3.])
Dimensions without coordinates: x
"""

ds = self._to_temp_dataset().rank(dim, pct=pct, keep_attrs=keep_attrs)
return self._from_temp_dataset(ds)

Expand Down
25 changes: 18 additions & 7 deletions xarray/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
from .merge import (
dataset_merge_method, dataset_update_method, merge_data_and_coords,
merge_variables)
from .options import OPTIONS
from .options import OPTIONS, _get_keep_attrs
from .pycompat import (
OrderedDict, basestring, dask_array_type, integer_types, iteritems, range)
from .utils import (
Expand Down Expand Up @@ -2851,7 +2851,7 @@ def combine_first(self, other):
out = ops.fillna(self, other, join="outer", dataset_join="outer")
return out

def reduce(self, func, dim=None, keep_attrs=False, numeric_only=False,
def reduce(self, func, dim=None, keep_attrs=None, numeric_only=False,
allow_lazy=False, **kwargs):
"""Reduce this dataset by applying `func` along some dimension(s).
Expand Down Expand Up @@ -2893,6 +2893,9 @@ def reduce(self, func, dim=None, keep_attrs=False, numeric_only=False,
raise ValueError('Dataset does not contain the dimensions: %s'
% missing_dimensions)

if keep_attrs is None:
keep_attrs = _get_keep_attrs(default=False)

variables = OrderedDict()
for name, var in iteritems(self._variables):
reduce_dims = [dim for dim in var.dims if dim in dims]
Expand Down Expand Up @@ -2921,7 +2924,7 @@ def reduce(self, func, dim=None, keep_attrs=False, numeric_only=False,
attrs = self.attrs if keep_attrs else None
return self._replace_vars_and_dims(variables, coord_names, attrs=attrs)

def apply(self, func, keep_attrs=False, args=(), **kwargs):
def apply(self, func, keep_attrs=None, args=(), **kwargs):
"""Apply a function over the data variables in this dataset.
Parameters
Expand Down Expand Up @@ -2966,6 +2969,8 @@ def apply(self, func, keep_attrs=False, args=(), **kwargs):
variables = OrderedDict(
(k, maybe_wrap_array(v, func(v, *args, **kwargs)))
for k, v in iteritems(self.data_vars))
if keep_attrs is None:
keep_attrs = _get_keep_attrs(default=False)
attrs = self.attrs if keep_attrs else None
return type(self)(variables, attrs=attrs)

Expand Down Expand Up @@ -3630,7 +3635,7 @@ def sortby(self, variables, ascending=True):
return aligned_self.isel(**indices)

def quantile(self, q, dim=None, interpolation='linear',
numeric_only=False, keep_attrs=False):
numeric_only=False, keep_attrs=None):
"""Compute the qth quantile of the data along the specified dimension.
Returns the qth quantiles(s) of the array elements for each variable
Expand Down Expand Up @@ -3708,6 +3713,8 @@ def quantile(self, q, dim=None, interpolation='linear',

# construct the new dataset
coord_names = set(k for k in self.coords if k in variables)
if keep_attrs is None:
keep_attrs = _get_keep_attrs(default=False)
attrs = self.attrs if keep_attrs else None
new = self._replace_vars_and_dims(variables, coord_names, attrs=attrs)
if 'quantile' in new.dims:
Expand All @@ -3716,7 +3723,7 @@ def quantile(self, q, dim=None, interpolation='linear',
new.coords['quantile'] = q
return new

def rank(self, dim, pct=False, keep_attrs=False):
def rank(self, dim, pct=False, keep_attrs=None):
"""Ranks the data.
Equal values are assigned a rank that is the average of the ranks that
Expand Down Expand Up @@ -3756,6 +3763,8 @@ def rank(self, dim, pct=False, keep_attrs=False):
variables[name] = var

coord_names = set(self.coords)
if keep_attrs is None:
keep_attrs = _get_keep_attrs(default=False)
attrs = self.attrs if keep_attrs else None
return self._replace_vars_and_dims(variables, coord_names, attrs=attrs)

Expand Down Expand Up @@ -3819,11 +3828,13 @@ def differentiate(self, coord, edge_order=1, datetime_unit=None):

@property
def real(self):
return self._unary_op(lambda x: x.real, keep_attrs=True)(self)
return self._unary_op(lambda x: x.real,
keep_attrs=True)(self)

@property
def imag(self):
return self._unary_op(lambda x: x.imag, keep_attrs=True)(self)
return self._unary_op(lambda x: x.imag,
keep_attrs=True)(self)

def filter_by_attrs(self, **kwargs):
"""Returns a ``Dataset`` with variables that match specific conditions.
Expand Down
Loading

0 comments on commit 4359403

Please sign in to comment.