diff --git a/.deepsource.toml b/.deepsource.toml new file mode 100644 index 00000000000..e37b41de303 --- /dev/null +++ b/.deepsource.toml @@ -0,0 +1,18 @@ +version = 1 + +test_patterns = [ + "*/tests/**", + "*/test_*.py" +] + +exclude_patterns = [ + "doc/**", + "ci/**" +] + +[[analyzers]] +name = "python" +enabled = true + + [analyzers.meta] + runtime_version = "3.x.x" \ No newline at end of file diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md index 37dbcd2ebb0..c712cf27979 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -27,7 +27,7 @@ assignees: '' #### Versions -
Output of `xr.show_versions()` +
Output of xr.show_versions() diff --git a/doc/api-hidden.rst b/doc/api-hidden.rst index cc9517a98ba..313428c29d2 100644 --- a/doc/api-hidden.rst +++ b/doc/api-hidden.rst @@ -18,6 +18,8 @@ Dataset.any Dataset.argmax Dataset.argmin + Dataset.idxmax + Dataset.idxmin Dataset.max Dataset.min Dataset.mean @@ -160,6 +162,8 @@ DataArray.any DataArray.argmax DataArray.argmin + DataArray.idxmax + DataArray.idxmin DataArray.max DataArray.min DataArray.mean diff --git a/doc/api.rst b/doc/api.rst index b37c84e7a81..8ec6843d24a 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -173,6 +173,7 @@ Computation Dataset.quantile Dataset.differentiate Dataset.integrate + Dataset.map_blocks Dataset.polyfit **Aggregation**: @@ -358,6 +359,8 @@ Computation DataArray.integrate DataArray.polyfit DataArray.str + DataArray.map_blocks + **Aggregation**: :py:attr:`~DataArray.all` @@ -518,7 +521,6 @@ Dataset methods Dataset.load Dataset.chunk Dataset.unify_chunks - Dataset.map_blocks Dataset.filter_by_attrs Dataset.info @@ -550,7 +552,6 @@ DataArray methods DataArray.load DataArray.chunk DataArray.unify_chunks - DataArray.map_blocks Coordinates objects =================== diff --git a/doc/combining.rst b/doc/combining.rst index 05b7f2efc50..ffc6575c579 100644 --- a/doc/combining.rst +++ b/doc/combining.rst @@ -4,11 +4,12 @@ Combining data -------------- .. ipython:: python - :suppress: + :suppress: import numpy as np import pandas as pd import xarray as xr + np.random.seed(123456) * For combining datasets or data arrays along a single dimension, see concatenate_. @@ -28,11 +29,10 @@ that dimension: .. ipython:: python - arr = xr.DataArray(np.random.randn(2, 3), - [('x', ['a', 'b']), ('y', [10, 20, 30])]) + arr = xr.DataArray(np.random.randn(2, 3), [("x", ["a", "b"]), ("y", [10, 20, 30])]) arr[:, :1] # this resembles how you would use np.concatenate - xr.concat([arr[:, :1], arr[:, 1:]], dim='y') + xr.concat([arr[:, :1], arr[:, 1:]], dim="y") In addition to combining along an existing dimension, ``concat`` can create a new dimension by stacking lower dimensional arrays together: @@ -41,7 +41,7 @@ new dimension by stacking lower dimensional arrays together: arr[0] # to combine these 1d arrays into a 2d array in numpy, you would use np.array - xr.concat([arr[0], arr[1]], 'x') + xr.concat([arr[0], arr[1]], "x") If the second argument to ``concat`` is a new dimension name, the arrays will be concatenated along that new dimension, which is always inserted as the first @@ -49,7 +49,7 @@ dimension: .. ipython:: python - xr.concat([arr[0], arr[1]], 'new_dim') + xr.concat([arr[0], arr[1]], "new_dim") The second argument to ``concat`` can also be an :py:class:`~pandas.Index` or :py:class:`~xarray.DataArray` object as well as a string, in which case it is @@ -57,14 +57,14 @@ used to label the values along the new dimension: .. ipython:: python - xr.concat([arr[0], arr[1]], pd.Index([-90, -100], name='new_dim')) + xr.concat([arr[0], arr[1]], pd.Index([-90, -100], name="new_dim")) Of course, ``concat`` also works on ``Dataset`` objects: .. ipython:: python - ds = arr.to_dataset(name='foo') - xr.concat([ds.sel(x='a'), ds.sel(x='b')], 'x') + ds = arr.to_dataset(name="foo") + xr.concat([ds.sel(x="a"), ds.sel(x="b")], "x") :py:func:`~xarray.concat` has a number of options which provide deeper control over which variables are concatenated and how it handles conflicting variables @@ -84,8 +84,8 @@ To combine variables and coordinates between multiple ``DataArray`` and/or .. ipython:: python - xr.merge([ds, ds.rename({'foo': 'bar'})]) - xr.merge([xr.DataArray(n, name='var%d' % n) for n in range(5)]) + xr.merge([ds, ds.rename({"foo": "bar"})]) + xr.merge([xr.DataArray(n, name="var%d" % n) for n in range(5)]) If you merge another dataset (or a dictionary including data array objects), by default the resulting dataset will be aligned on the **union** of all index @@ -93,7 +93,7 @@ coordinates: .. ipython:: python - other = xr.Dataset({'bar': ('x', [1, 2, 3, 4]), 'x': list('abcd')}) + other = xr.Dataset({"bar": ("x", [1, 2, 3, 4]), "x": list("abcd")}) xr.merge([ds, other]) This ensures that ``merge`` is non-destructive. ``xarray.MergeError`` is raised @@ -116,7 +116,7 @@ used in the :py:class:`~xarray.Dataset` constructor: .. ipython:: python - xr.Dataset({'a': arr[:-1], 'b': arr[1:]}) + xr.Dataset({"a": arr[:-1], "b": arr[1:]}) .. _combine: @@ -131,8 +131,8 @@ are filled with ``NaN``. For example: .. ipython:: python - ar0 = xr.DataArray([[0, 0], [0, 0]], [('x', ['a', 'b']), ('y', [-1, 0])]) - ar1 = xr.DataArray([[1, 1], [1, 1]], [('x', ['b', 'c']), ('y', [0, 1])]) + ar0 = xr.DataArray([[0, 0], [0, 0]], [("x", ["a", "b"]), ("y", [-1, 0])]) + ar1 = xr.DataArray([[1, 1], [1, 1]], [("x", ["b", "c"]), ("y", [0, 1])]) ar0.combine_first(ar1) ar1.combine_first(ar0) @@ -152,7 +152,7 @@ variables with new values: .. ipython:: python - ds.update({'space': ('space', [10.2, 9.4, 3.9])}) + ds.update({"space": ("space", [10.2, 9.4, 3.9])}) However, dimensions are still required to be consistent between different Dataset variables, so you cannot change the size of a dimension unless you @@ -170,7 +170,7 @@ syntax: .. ipython:: python - ds['baz'] = xr.DataArray([9, 9, 9, 9, 9], coords=[('x', list('abcde'))]) + ds["baz"] = xr.DataArray([9, 9, 9, 9, 9], coords=[("x", list("abcde"))]) ds.baz Equals and identical @@ -193,7 +193,7 @@ object: .. ipython:: python - arr.identical(arr.rename('bar')) + arr.identical(arr.rename("bar")) :py:attr:`~xarray.Dataset.broadcast_equals` does a more relaxed form of equality check that allows variables to have different dimensions, as long as values @@ -201,8 +201,8 @@ are constant along those new dimensions: .. ipython:: python - left = xr.Dataset(coords={'x': 0}) - right = xr.Dataset({'x': [0, 0, 0]}) + left = xr.Dataset(coords={"x": 0}) + right = xr.Dataset({"x": [0, 0, 0]}) left.broadcast_equals(right) Like pandas objects, two xarray objects are still equal or identical if they have @@ -231,9 +231,9 @@ coordinates as long as any non-missing values agree or are disjoint: .. ipython:: python - ds1 = xr.Dataset({'a': ('x', [10, 20, 30, np.nan])}, {'x': [1, 2, 3, 4]}) - ds2 = xr.Dataset({'a': ('x', [np.nan, 30, 40, 50])}, {'x': [2, 3, 4, 5]}) - xr.merge([ds1, ds2], compat='no_conflicts') + ds1 = xr.Dataset({"a": ("x", [10, 20, 30, np.nan])}, {"x": [1, 2, 3, 4]}) + ds2 = xr.Dataset({"a": ("x", [np.nan, 30, 40, 50])}, {"x": [2, 3, 4, 5]}) + xr.merge([ds1, ds2], compat="no_conflicts") Note that due to the underlying representation of missing values as floating point numbers (``NaN``), variable data type is not always preserved when merging @@ -273,10 +273,12 @@ datasets into a doubly-nested list, e.g: .. ipython:: python - arr = xr.DataArray(name='temperature', data=np.random.randint(5, size=(2, 2)), dims=['x', 'y']) + arr = xr.DataArray( + name="temperature", data=np.random.randint(5, size=(2, 2)), dims=["x", "y"] + ) arr ds_grid = [[arr, arr], [arr, arr]] - xr.combine_nested(ds_grid, concat_dim=['x', 'y']) + xr.combine_nested(ds_grid, concat_dim=["x", "y"]) :py:func:`~xarray.combine_nested` can also be used to explicitly merge datasets with different variables. For example if we have 4 datasets, which are divided @@ -286,10 +288,10 @@ we wish to use ``merge`` instead of ``concat``: .. ipython:: python - temp = xr.DataArray(name='temperature', data=np.random.randn(2), dims=['t']) - precip = xr.DataArray(name='precipitation', data=np.random.randn(2), dims=['t']) + temp = xr.DataArray(name="temperature", data=np.random.randn(2), dims=["t"]) + precip = xr.DataArray(name="precipitation", data=np.random.randn(2), dims=["t"]) ds_grid = [[temp, precip], [temp, precip]] - xr.combine_nested(ds_grid, concat_dim=['t', None]) + xr.combine_nested(ds_grid, concat_dim=["t", None]) :py:func:`~xarray.combine_by_coords` is for combining objects which have dimension coordinates which specify their relationship to and order relative to one @@ -302,8 +304,8 @@ coordinates, not on their position in the list passed to ``combine_by_coords``. .. ipython:: python :okwarning: - x1 = xr.DataArray(name='foo', data=np.random.randn(3), coords=[('x', [0, 1, 2])]) - x2 = xr.DataArray(name='foo', data=np.random.randn(3), coords=[('x', [3, 4, 5])]) + x1 = xr.DataArray(name="foo", data=np.random.randn(3), coords=[("x", [0, 1, 2])]) + x2 = xr.DataArray(name="foo", data=np.random.randn(3), coords=[("x", [3, 4, 5])]) xr.combine_by_coords([x2, x1]) These functions can be used by :py:func:`~xarray.open_mfdataset` to open many diff --git a/doc/computation.rst b/doc/computation.rst index 4b8014c4782..3660aed93ed 100644 --- a/doc/computation.rst +++ b/doc/computation.rst @@ -18,17 +18,19 @@ Arithmetic operations with a single DataArray automatically vectorize (like numpy) over all array values: .. ipython:: python - :suppress: + :suppress: import numpy as np import pandas as pd import xarray as xr + np.random.seed(123456) .. ipython:: python - arr = xr.DataArray(np.random.RandomState(0).randn(2, 3), - [('x', ['a', 'b']), ('y', [10, 20, 30])]) + arr = xr.DataArray( + np.random.RandomState(0).randn(2, 3), [("x", ["a", "b"]), ("y", [10, 20, 30])] + ) arr - 3 abs(arr) @@ -45,7 +47,7 @@ Use :py:func:`~xarray.where` to conditionally switch between values: .. ipython:: python - xr.where(arr > 0, 'positive', 'negative') + xr.where(arr > 0, "positive", "negative") Use `@` to perform matrix multiplication: @@ -73,14 +75,14 @@ methods for working with missing data from pandas: .. ipython:: python - x = xr.DataArray([0, 1, np.nan, np.nan, 2], dims=['x']) + x = xr.DataArray([0, 1, np.nan, np.nan, 2], dims=["x"]) x.isnull() x.notnull() x.count() - x.dropna(dim='x') + x.dropna(dim="x") x.fillna(-1) - x.ffill('x') - x.bfill('x') + x.ffill("x") + x.bfill("x") Like pandas, xarray uses the float value ``np.nan`` (not-a-number) to represent missing values. @@ -90,9 +92,12 @@ for filling missing values via 1D interpolation. .. ipython:: python - x = xr.DataArray([0, 1, np.nan, np.nan, 2], dims=['x'], - coords={'xx': xr.Variable('x', [0, 1, 1.1, 1.9, 3])}) - x.interpolate_na(dim='x', method='linear', use_coordinate='xx') + x = xr.DataArray( + [0, 1, np.nan, np.nan, 2], + dims=["x"], + coords={"xx": xr.Variable("x", [0, 1, 1.1, 1.9, 3])}, + ) + x.interpolate_na(dim="x", method="linear", use_coordinate="xx") Note that xarray slightly diverges from the pandas ``interpolate`` syntax by providing the ``use_coordinate`` keyword which facilitates a clear specification @@ -110,8 +115,8 @@ applied along particular dimension(s): .. ipython:: python - arr.sum(dim='x') - arr.std(['x', 'y']) + arr.sum(dim="x") + arr.std(["x", "y"]) arr.min() @@ -121,7 +126,7 @@ for wrapping code designed to work with numpy arrays), you can use the .. ipython:: python - arr.get_axis_num('y') + arr.get_axis_num("y") These operations automatically skip missing values, like in pandas: @@ -142,8 +147,7 @@ method supports rolling window aggregation: .. ipython:: python - arr = xr.DataArray(np.arange(0, 7.5, 0.5).reshape(3, 5), - dims=('x', 'y')) + arr = xr.DataArray(np.arange(0, 7.5, 0.5).reshape(3, 5), dims=("x", "y")) arr :py:meth:`~xarray.DataArray.rolling` is applied along one dimension using the @@ -194,8 +198,9 @@ We can also manually iterate through ``Rolling`` objects: .. code:: python - for label, arr_window in r: - # arr_window is a view of x + for label, arr_window in r: + # arr_window is a view of x + ... .. _comput.rolling_exp: @@ -222,9 +227,9 @@ windowed rolling, convolution, short-time FFT etc. .. ipython:: python # rolling with 2-point stride - rolling_da = r.construct('window_dim', stride=2) + rolling_da = r.construct("window_dim", stride=2) rolling_da - rolling_da.mean('window_dim', skipna=False) + rolling_da.mean("window_dim", skipna=False) Because the ``DataArray`` given by ``r.construct('window_dim')`` is a view of the original array, it is memory efficient. @@ -232,8 +237,8 @@ You can also use ``construct`` to compute a weighted rolling sum: .. ipython:: python - weight = xr.DataArray([0.25, 0.5, 0.25], dims=['window']) - arr.rolling(y=3).construct('window').dot(weight) + weight = xr.DataArray([0.25, 0.5, 0.25], dims=["window"]) + arr.rolling(y=3).construct("window").dot(weight) .. note:: numpy's Nan-aggregation functions such as ``nansum`` copy the original array. @@ -254,52 +259,52 @@ support weighted ``sum`` and weighted ``mean``. .. ipython:: python - coords = dict(month=('month', [1, 2, 3])) + coords = dict(month=("month", [1, 2, 3])) - prec = xr.DataArray([1.1, 1.0, 0.9], dims=('month', ), coords=coords) - weights = xr.DataArray([31, 28, 31], dims=('month', ), coords=coords) + prec = xr.DataArray([1.1, 1.0, 0.9], dims=("month",), coords=coords) + weights = xr.DataArray([31, 28, 31], dims=("month",), coords=coords) Create a weighted object: .. ipython:: python - weighted_prec = prec.weighted(weights) - weighted_prec + weighted_prec = prec.weighted(weights) + weighted_prec Calculate the weighted sum: .. ipython:: python - weighted_prec.sum() + weighted_prec.sum() Calculate the weighted mean: .. ipython:: python - weighted_prec.mean(dim="month") + weighted_prec.mean(dim="month") The weighted sum corresponds to: .. ipython:: python - weighted_sum = (prec * weights).sum() - weighted_sum + weighted_sum = (prec * weights).sum() + weighted_sum and the weighted mean to: .. ipython:: python - weighted_mean = weighted_sum / weights.sum() - weighted_mean + weighted_mean = weighted_sum / weights.sum() + weighted_mean However, the functions also take missing values in the data into account: .. ipython:: python - data = xr.DataArray([np.NaN, 2, 4]) - weights = xr.DataArray([8, 1, 1]) + data = xr.DataArray([np.NaN, 2, 4]) + weights = xr.DataArray([8, 1, 1]) - data.weighted(weights).mean() + data.weighted(weights).mean() Using ``(data * weights).sum() / weights.sum()`` would (incorrectly) result in 0.6. @@ -309,16 +314,16 @@ If the weights add up to to 0, ``sum`` returns 0: .. ipython:: python - data = xr.DataArray([1.0, 1.0]) - weights = xr.DataArray([-1.0, 1.0]) + data = xr.DataArray([1.0, 1.0]) + weights = xr.DataArray([-1.0, 1.0]) - data.weighted(weights).sum() + data.weighted(weights).sum() and ``mean`` returns ``NaN``: .. ipython:: python - data.weighted(weights).mean() + data.weighted(weights).mean() .. note:: @@ -336,18 +341,21 @@ methods. This supports the block aggregation along multiple dimensions, .. ipython:: python - x = np.linspace(0, 10, 300) - t = pd.date_range('15/12/1999', periods=364) - da = xr.DataArray(np.sin(x) * np.cos(np.linspace(0, 1, 364)[:, np.newaxis]), - dims=['time', 'x'], coords={'time': t, 'x': x}) - da + x = np.linspace(0, 10, 300) + t = pd.date_range("15/12/1999", periods=364) + da = xr.DataArray( + np.sin(x) * np.cos(np.linspace(0, 1, 364)[:, np.newaxis]), + dims=["time", "x"], + coords={"time": t, "x": x}, + ) + da In order to take a block mean for every 7 days along ``time`` dimension and every 2 points along ``x`` dimension, .. ipython:: python - da.coarsen(time=7, x=2).mean() + da.coarsen(time=7, x=2).mean() :py:meth:`~xarray.DataArray.coarsen` raises an ``ValueError`` if the data length is not a multiple of the corresponding window size. @@ -356,14 +364,14 @@ the excess entries or padding ``nan`` to insufficient entries, .. ipython:: python - da.coarsen(time=30, x=2, boundary='trim').mean() + da.coarsen(time=30, x=2, boundary="trim").mean() If you want to apply a specific function to coordinate, you can pass the function or method name to ``coord_func`` option, .. ipython:: python - da.coarsen(time=7, x=2, coord_func={'time': 'min'}).mean() + da.coarsen(time=7, x=2, coord_func={"time": "min"}).mean() .. _compute.using_coordinates: @@ -377,24 +385,25 @@ central finite differences using their coordinates, .. ipython:: python - a = xr.DataArray([0, 1, 2, 3], dims=['x'], coords=[[0.1, 0.11, 0.2, 0.3]]) + a = xr.DataArray([0, 1, 2, 3], dims=["x"], coords=[[0.1, 0.11, 0.2, 0.3]]) a - a.differentiate('x') + a.differentiate("x") This method can be used also for multidimensional arrays, .. ipython:: python - a = xr.DataArray(np.arange(8).reshape(4, 2), dims=['x', 'y'], - coords={'x': [0.1, 0.11, 0.2, 0.3]}) - a.differentiate('x') + a = xr.DataArray( + np.arange(8).reshape(4, 2), dims=["x", "y"], coords={"x": [0.1, 0.11, 0.2, 0.3]} + ) + a.differentiate("x") :py:meth:`~xarray.DataArray.integrate` computes integration based on trapezoidal rule using their coordinates, .. ipython:: python - a.integrate('x') + a.integrate("x") .. note:: These methods are limited to simple cartesian geometry. Differentiation @@ -412,9 +421,9 @@ best fitting coefficients along a given dimension and for a given order, .. ipython:: python - x = xr.DataArray(np.arange(10), dims=['x'], name='x') - a = xr.DataArray(3 + 4 * x, dims=['x'], coords={'x': x}) - out = a.polyfit(dim='x', deg=1, full=True) + x = xr.DataArray(np.arange(10), dims=["x"], name="x") + a = xr.DataArray(3 + 4 * x, dims=["x"], coords={"x": x}) + out = a.polyfit(dim="x", deg=1, full=True) out The method outputs a dataset containing the coefficients (and more if `full=True`). @@ -443,9 +452,9 @@ arrays with different sizes aligned along different dimensions: .. ipython:: python - a = xr.DataArray([1, 2], [('x', ['a', 'b'])]) + a = xr.DataArray([1, 2], [("x", ["a", "b"])]) a - b = xr.DataArray([-1, -2, -3], [('y', [10, 20, 30])]) + b = xr.DataArray([-1, -2, -3], [("y", [10, 20, 30])]) b With xarray, we can apply binary mathematical operations to these arrays, and @@ -460,7 +469,7 @@ appeared: .. ipython:: python - c = xr.DataArray(np.arange(6).reshape(3, 2), [b['y'], a['x']]) + c = xr.DataArray(np.arange(6).reshape(3, 2), [b["y"], a["x"]]) c a + c @@ -494,7 +503,7 @@ operations. The default result of a binary operation is by the *intersection* .. ipython:: python - arr = xr.DataArray(np.arange(3), [('x', range(3))]) + arr = xr.DataArray(np.arange(3), [("x", range(3))]) arr + arr[:-1] If coordinate values for a dimension are missing on either argument, all @@ -503,7 +512,7 @@ matching dimensions must have the same size: .. ipython:: :verbatim: - In [1]: arr + xr.DataArray([1, 2], dims='x') + In [1]: arr + xr.DataArray([1, 2], dims="x") ValueError: arguments without labels along dimension 'x' cannot be aligned because they have different dimension size(s) {2} than the size of the aligned dimension labels: 3 @@ -562,16 +571,20 @@ variables: .. ipython:: python - ds = xr.Dataset({'x_and_y': (('x', 'y'), np.random.randn(3, 5)), - 'x_only': ('x', np.random.randn(3))}, - coords=arr.coords) + ds = xr.Dataset( + { + "x_and_y": (("x", "y"), np.random.randn(3, 5)), + "x_only": ("x", np.random.randn(3)), + }, + coords=arr.coords, + ) ds > 0 Datasets support most of the same methods found on data arrays: .. ipython:: python - ds.mean(dim='x') + ds.mean(dim="x") abs(ds) Datasets also support NumPy ufuncs (requires NumPy v1.13 or newer), or @@ -594,7 +607,7 @@ Arithmetic between two datasets matches data variables of the same name: .. ipython:: python - ds2 = xr.Dataset({'x_and_y': 0, 'x_only': 100}) + ds2 = xr.Dataset({"x_and_y": 0, "x_only": 100}) ds - ds2 Similarly to index based alignment, the result has the intersection of all @@ -638,7 +651,7 @@ any additional arguments: .. ipython:: python squared_error = lambda x, y: (x - y) ** 2 - arr1 = xr.DataArray([0, 1, 2, 3], dims='x') + arr1 = xr.DataArray([0, 1, 2, 3], dims="x") xr.apply_ufunc(squared_error, arr1, 1) For using more complex operations that consider some array values collectively, @@ -658,21 +671,21 @@ to set ``axis=-1``. As an example, here is how we would wrap .. code-block:: python def vector_norm(x, dim, ord=None): - return xr.apply_ufunc(np.linalg.norm, x, - input_core_dims=[[dim]], - kwargs={'ord': ord, 'axis': -1}) + return xr.apply_ufunc( + np.linalg.norm, x, input_core_dims=[[dim]], kwargs={"ord": ord, "axis": -1} + ) .. ipython:: python - :suppress: + :suppress: def vector_norm(x, dim, ord=None): - return xr.apply_ufunc(np.linalg.norm, x, - input_core_dims=[[dim]], - kwargs={'ord': ord, 'axis': -1}) + return xr.apply_ufunc( + np.linalg.norm, x, input_core_dims=[[dim]], kwargs={"ord": ord, "axis": -1} + ) .. ipython:: python - vector_norm(arr1, dim='x') + vector_norm(arr1, dim="x") Because ``apply_ufunc`` follows a standard convention for ufuncs, it plays nicely with tools for building vectorized functions, like diff --git a/doc/contributing.rst b/doc/contributing.rst index f581bcd9741..51dba2bb0cc 100644 --- a/doc/contributing.rst +++ b/doc/contributing.rst @@ -261,13 +261,13 @@ Some other important things to know about the docs: .. ipython:: python x = 2 - x**3 + x ** 3 will be rendered as:: In [1]: x = 2 - In [2]: x**3 + In [2]: x ** 3 Out[2]: 8 Almost all code examples in the docs are run (and the output saved) during the @@ -467,7 +467,7 @@ typically find tests wrapped in a class. .. code-block:: python class TestReallyCoolFeature: - .... + ... Going forward, we are moving to a more *functional* style using the `pytest `__ framework, which offers a richer @@ -477,7 +477,7 @@ writing test classes, we will write test functions like this: .. code-block:: python def test_really_cool_feature(): - .... + ... Using ``pytest`` ~~~~~~~~~~~~~~~~ @@ -508,17 +508,23 @@ We would name this file ``test_cool_feature.py`` and put in an appropriate place from xarray.testing import assert_equal - @pytest.mark.parametrize('dtype', ['int8', 'int16', 'int32', 'int64']) + @pytest.mark.parametrize("dtype", ["int8", "int16", "int32", "int64"]) def test_dtypes(dtype): assert str(np.dtype(dtype)) == dtype - @pytest.mark.parametrize('dtype', ['float32', - pytest.param('int16', marks=pytest.mark.skip), - pytest.param('int32', marks=pytest.mark.xfail( - reason='to show how it works'))]) + @pytest.mark.parametrize( + "dtype", + [ + "float32", + pytest.param("int16", marks=pytest.mark.skip), + pytest.param( + "int32", marks=pytest.mark.xfail(reason="to show how it works") + ), + ], + ) def test_mark(dtype): - assert str(np.dtype(dtype)) == 'float32' + assert str(np.dtype(dtype)) == "float32" @pytest.fixture @@ -526,7 +532,7 @@ We would name this file ``test_cool_feature.py`` and put in an appropriate place return xr.DataArray([1, 2, 3]) - @pytest.fixture(params=['int8', 'int16', 'int32', 'int64']) + @pytest.fixture(params=["int8", "int16", "int32", "int64"]) def dtype(request): return request.param diff --git a/doc/dask.rst b/doc/dask.rst index 07b3939af6e..df223982ba4 100644 --- a/doc/dask.rst +++ b/doc/dask.rst @@ -56,19 +56,26 @@ argument to :py:func:`~xarray.open_dataset` or using the import numpy as np import pandas as pd import xarray as xr + np.random.seed(123456) np.set_printoptions(precision=3, linewidth=100, threshold=100, edgeitems=3) - ds = xr.Dataset({'temperature': (('time', 'latitude', 'longitude'), - np.random.randn(30, 180, 180)), - 'time': pd.date_range('2015-01-01', periods=30), - 'longitude': np.arange(180), - 'latitude': np.arange(89.5, -90.5, -1)}) - ds.to_netcdf('example-data.nc') + ds = xr.Dataset( + { + "temperature": ( + ("time", "latitude", "longitude"), + np.random.randn(30, 180, 180), + ), + "time": pd.date_range("2015-01-01", periods=30), + "longitude": np.arange(180), + "latitude": np.arange(89.5, -90.5, -1), + } + ) + ds.to_netcdf("example-data.nc") .. ipython:: python - ds = xr.open_dataset('example-data.nc', chunks={'time': 10}) + ds = xr.open_dataset("example-data.nc", chunks={"time": 10}) ds In this example ``latitude`` and ``longitude`` do not appear in the ``chunks`` @@ -106,7 +113,7 @@ usual way. .. ipython:: python - ds.to_netcdf('manipulated-example-data.nc') + ds.to_netcdf("manipulated-example-data.nc") By setting the ``compute`` argument to ``False``, :py:meth:`~xarray.Dataset.to_netcdf` will return a ``dask.delayed`` object that can be computed later. @@ -114,8 +121,9 @@ will return a ``dask.delayed`` object that can be computed later. .. ipython:: python from dask.diagnostics import ProgressBar + # or distributed.progress when using the distributed scheduler - delayed_obj = ds.to_netcdf('manipulated-example-data.nc', compute=False) + delayed_obj = ds.to_netcdf("manipulated-example-data.nc", compute=False) with ProgressBar(): results = delayed_obj.compute() @@ -141,8 +149,9 @@ Dask DataFrames do not support multi-indexes so the coordinate variables from th :suppress: import os - os.remove('example-data.nc') - os.remove('manipulated-example-data.nc') + + os.remove("example-data.nc") + os.remove("manipulated-example-data.nc") Using Dask with xarray ---------------------- @@ -199,7 +208,7 @@ Dask arrays using the :py:meth:`~xarray.Dataset.persist` method: .. ipython:: python - ds = ds.persist() + ds = ds.persist() :py:meth:`~xarray.Dataset.persist` is particularly useful when using a distributed cluster because the data will be loaded into distributed memory @@ -224,11 +233,11 @@ sizes of Dask arrays is done with the :py:meth:`~xarray.Dataset.chunk` method: .. ipython:: python :suppress: - ds = ds.chunk({'time': 10}) + ds = ds.chunk({"time": 10}) .. ipython:: python - rechunked = ds.chunk({'latitude': 100, 'longitude': 100}) + rechunked = ds.chunk({"latitude": 100, "longitude": 100}) You can view the size of existing chunks on an array by viewing the :py:attr:`~xarray.Dataset.chunks` attribute: @@ -256,6 +265,7 @@ lazy Dask arrays, in the :ref:`xarray.ufuncs ` module: .. ipython:: python import xarray.ufuncs as xu + xu.sin(rechunked) To access Dask arrays directly, use the new @@ -274,12 +284,21 @@ loaded into Dask or not: .. _dask.automatic-parallelization: -Automatic parallelization -------------------------- +Automatic parallelization with ``apply_ufunc`` and ``map_blocks`` +----------------------------------------------------------------- Almost all of xarray's built-in operations work on Dask arrays. If you want to -use a function that isn't wrapped by xarray, one option is to extract Dask -arrays from xarray objects (``.data``) and use Dask directly. +use a function that isn't wrapped by xarray, and have it applied in parallel on +each block of your xarray object, you have three options: + +1. Extract Dask arrays from xarray objects (``.data``) and use Dask directly. +2. Use :py:func:`~xarray.apply_ufunc` to apply functions that consume and return NumPy arrays. +3. Use :py:func:`~xarray.map_blocks`, :py:meth:`Dataset.map_blocks` or :py:meth:`DataArray.map_blocks` + to apply functions that consume and return xarray objects. + + +``apply_ufunc`` +~~~~~~~~~~~~~~~ Another option is to use xarray's :py:func:`~xarray.apply_ufunc`, which can automate `embarrassingly parallel @@ -302,24 +321,32 @@ we use to calculate `Spearman's rank-correlation coefficient ` and @@ -453,15 +470,15 @@ dataset variables: .. ipython:: python - ds.rename({'temperature': 'temp', 'precipitation': 'precip'}) + ds.rename({"temperature": "temp", "precipitation": "precip"}) The related :py:meth:`~xarray.Dataset.swap_dims` method allows you do to swap dimension and non-dimension variables: .. ipython:: python - ds.coords['day'] = ('time', [6, 7, 8]) - ds.swap_dims({'time': 'day'}) + ds.coords["day"] = ("time", [6, 7, 8]) + ds.swap_dims({"time": "day"}) .. _coordinates: @@ -519,8 +536,8 @@ To convert back and forth between data and coordinates, you can use the .. ipython:: python ds.reset_coords() - ds.set_coords(['temperature', 'precipitation']) - ds['temperature'].reset_coords(drop=True) + ds.set_coords(["temperature", "precipitation"]) + ds["temperature"].reset_coords(drop=True) Notice that these operations skip coordinates with names given by dimensions, as used for indexing. This mostly because we are not entirely sure how to @@ -544,7 +561,7 @@ logic used for merging coordinates in arithmetic operations .. ipython:: python - alt = xr.Dataset(coords={'z': [10], 'lat': 0, 'lon': 0}) + alt = xr.Dataset(coords={"z": [10], "lat": 0, "lon": 0}) ds.coords.merge(alt.coords) The ``coords.merge`` method may be useful if you want to implement your own @@ -560,7 +577,7 @@ To convert a coordinate (or any ``DataArray``) into an actual .. ipython:: python - ds['time'].to_index() + ds["time"].to_index() A useful shortcut is the ``indexes`` property (on both ``DataArray`` and ``Dataset``), which lazily constructs a dictionary whose keys are given by each @@ -577,9 +594,10 @@ Xarray supports labeling coordinate values with a :py:class:`pandas.MultiIndex`: .. ipython:: python - midx = pd.MultiIndex.from_arrays([['R', 'R', 'V', 'V'], [.1, .2, .7, .9]], - names=('band', 'wn')) - mda = xr.DataArray(np.random.rand(4), coords={'spec': midx}, dims='spec') + midx = pd.MultiIndex.from_arrays( + [["R", "R", "V", "V"], [0.1, 0.2, 0.7, 0.9]], names=("band", "wn") + ) + mda = xr.DataArray(np.random.rand(4), coords={"spec": midx}, dims="spec") mda For convenience multi-index levels are directly accessible as "virtual" or @@ -587,8 +605,8 @@ For convenience multi-index levels are directly accessible as "virtual" or .. ipython:: python - mda['band'] - mda.wn + mda["band"] + mda.wn Indexing with multi-index levels is also possible using the ``sel`` method (see :ref:`multi-level indexing`). diff --git a/doc/faq.rst b/doc/faq.rst index 576cec5c2b1..a2b8be47e06 100644 --- a/doc/faq.rst +++ b/doc/faq.rst @@ -4,11 +4,12 @@ Frequently Asked Questions ========================== .. ipython:: python - :suppress: + :suppress: import numpy as np import pandas as pd import xarray as xr + np.random.seed(123456) @@ -103,21 +104,21 @@ code fragment .. ipython:: python arr = xr.DataArray([1, 2, 3]) - pd.Series({'x': arr[0], 'mean': arr.mean(), 'std': arr.std()}) + pd.Series({"x": arr[0], "mean": arr.mean(), "std": arr.std()}) does not yield the pandas DataFrame we expected. We need to specify the type conversion ourselves: .. ipython:: python - pd.Series({'x': arr[0], 'mean': arr.mean(), 'std': arr.std()}, dtype=float) + pd.Series({"x": arr[0], "mean": arr.mean(), "std": arr.std()}, dtype=float) Alternatively, we could use the ``item`` method or the ``float`` constructor to convert values one at a time .. ipython:: python - pd.Series({'x': arr[0].item(), 'mean': float(arr.mean())}) + pd.Series({"x": arr[0].item(), "mean": float(arr.mean())}) .. _approach to metadata: diff --git a/doc/groupby.rst b/doc/groupby.rst index 223185bd0d5..c72a26c45ea 100644 --- a/doc/groupby.rst +++ b/doc/groupby.rst @@ -26,11 +26,12 @@ Split Let's create a simple example dataset: .. ipython:: python - :suppress: + :suppress: import numpy as np import pandas as pd import xarray as xr + np.random.seed(123456) .. ipython:: python @@ -47,20 +48,20 @@ use a DataArray directly), we get back a ``GroupBy`` object: .. ipython:: python - ds.groupby('letters') + ds.groupby("letters") This object works very similarly to a pandas GroupBy object. You can view the group indices with the ``groups`` attribute: .. ipython:: python - ds.groupby('letters').groups + ds.groupby("letters").groups You can also iterate over groups in ``(label, group)`` pairs: .. ipython:: python - list(ds.groupby('letters')) + list(ds.groupby("letters")) Just like in pandas, creating a GroupBy object is cheap: it does not actually split the data until you access particular values. @@ -75,8 +76,8 @@ a customized coordinate, but xarray facilitates this via the .. ipython:: python - x_bins = [0,25,50] - ds.groupby_bins('x', x_bins).groups + x_bins = [0, 25, 50] + ds.groupby_bins("x", x_bins).groups The binning is implemented via :func:`pandas.cut`, whose documentation details how the bins are assigned. As seen in the example above, by default, the bins are @@ -86,8 +87,8 @@ choose `float` labels which identify the bin centers: .. ipython:: python - x_bin_labels = [12.5,37.5] - ds.groupby_bins('x', x_bins, labels=x_bin_labels).groups + x_bin_labels = [12.5, 37.5] + ds.groupby_bins("x", x_bins, labels=x_bin_labels).groups Apply @@ -102,7 +103,8 @@ concatenated back together along the group axis: def standardize(x): return (x - x.mean()) / x.std() - arr.groupby('letters').map(standardize) + + arr.groupby("letters").map(standardize) GroupBy objects also have a :py:meth:`~xarray.core.groupby.DatasetGroupBy.reduce` method and methods like :py:meth:`~xarray.core.groupby.DatasetGroupBy.mean` as shortcuts for applying an @@ -110,14 +112,14 @@ aggregation function: .. ipython:: python - arr.groupby('letters').mean(dim='x') + arr.groupby("letters").mean(dim="x") Using a groupby is thus also a convenient shortcut for aggregating over all dimensions *other than* the provided one: .. ipython:: python - ds.groupby('x').std(...) + ds.groupby("x").std(...) .. note:: @@ -134,7 +136,7 @@ values for group along the grouped dimension: .. ipython:: python - ds.groupby('letters').first(...) + ds.groupby("letters").first(...) By default, they skip missing values (control this with ``skipna``). @@ -149,9 +151,9 @@ coordinates. For example: .. ipython:: python - alt = arr.groupby('letters').mean(...) + alt = arr.groupby("letters").mean(...) alt - ds.groupby('letters') - alt + ds.groupby("letters") - alt This last line is roughly equivalent to the following:: @@ -169,11 +171,11 @@ the ``squeeze`` parameter: .. ipython:: python - next(iter(arr.groupby('x'))) + next(iter(arr.groupby("x"))) .. ipython:: python - next(iter(arr.groupby('x', squeeze=False))) + next(iter(arr.groupby("x", squeeze=False))) Although xarray will attempt to automatically :py:attr:`~xarray.DataArray.transpose` dimensions back into their original order @@ -197,13 +199,17 @@ __ http://cfconventions.org/cf-conventions/v1.6.0/cf-conventions.html#_two_dimen .. ipython:: python - da = xr.DataArray([[0,1],[2,3]], - coords={'lon': (['ny','nx'], [[30,40],[40,50]] ), - 'lat': (['ny','nx'], [[10,10],[20,20]] ),}, - dims=['ny','nx']) + da = xr.DataArray( + [[0, 1], [2, 3]], + coords={ + "lon": (["ny", "nx"], [[30, 40], [40, 50]]), + "lat": (["ny", "nx"], [[10, 10], [20, 20]]), + }, + dims=["ny", "nx"], + ) da - da.groupby('lon').sum(...) - da.groupby('lon').map(lambda x: x - x.mean(), shortcut=False) + da.groupby("lon").sum(...) + da.groupby("lon").map(lambda x: x - x.mean(), shortcut=False) Because multidimensional groups have the ability to generate a very large number of bins, coarse-binning via :py:meth:`~xarray.Dataset.groupby_bins` @@ -211,7 +217,7 @@ may be desirable: .. ipython:: python - da.groupby_bins('lon', [0,45,50]).sum() + da.groupby_bins("lon", [0, 45, 50]).sum() These methods group by `lon` values. It is also possible to groupby each cell in a grid, regardless of value, by stacking multiple dimensions, @@ -219,5 +225,5 @@ applying your function, and then unstacking the result: .. ipython:: python - stacked = da.stack(gridcell=['ny', 'nx']) - stacked.groupby('gridcell').sum(...).unstack('gridcell') + stacked = da.stack(gridcell=["ny", "nx"]) + stacked.groupby("gridcell").sum(...).unstack("gridcell") \ No newline at end of file diff --git a/doc/indexing.rst b/doc/indexing.rst index cfbb84a8343..af8e44fb80b 100644 --- a/doc/indexing.rst +++ b/doc/indexing.rst @@ -4,11 +4,12 @@ Indexing and selecting data =========================== .. ipython:: python - :suppress: + :suppress: import numpy as np import pandas as pd import xarray as xr + np.random.seed(123456) xarray offers extremely flexible indexing routines that combine the best @@ -60,9 +61,13 @@ DataArray: .. ipython:: python - da = xr.DataArray(np.random.rand(4, 3), - [('time', pd.date_range('2000-01-01', periods=4)), - ('space', ['IA', 'IL', 'IN'])]) + da = xr.DataArray( + np.random.rand(4, 3), + [ + ("time", pd.date_range("2000-01-01", periods=4)), + ("space", ["IA", "IL", "IN"]), + ], + ) da[:2] da[0, 0] da[:, [2, 1]] @@ -81,7 +86,7 @@ fast. To do label based indexing, use the :py:attr:`~xarray.DataArray.loc` attri .. ipython:: python - da.loc['2000-01-01':'2000-01-02', 'IA'] + da.loc["2000-01-01":"2000-01-02", "IA"] In this example, the selected is a subpart of the array in the range '2000-01-01':'2000-01-02' along the first coordinate `time` @@ -98,7 +103,7 @@ Setting values with label based indexing is also supported: .. ipython:: python - da.loc['2000-01-01', ['IL', 'IN']] = -10 + da.loc["2000-01-01", ["IL", "IN"]] = -10 da @@ -117,7 +122,7 @@ use them explicitly to slice data. There are two ways to do this: da[dict(space=0, time=slice(None, 2))] # index by dimension coordinate labels - da.loc[dict(time=slice('2000-01-01', '2000-01-02'))] + da.loc[dict(time=slice("2000-01-01", "2000-01-02"))] 2. Use the :py:meth:`~xarray.DataArray.sel` and :py:meth:`~xarray.DataArray.isel` convenience methods: @@ -128,7 +133,7 @@ use them explicitly to slice data. There are two ways to do this: da.isel(space=0, time=slice(None, 2)) # index by dimension coordinate labels - da.sel(time=slice('2000-01-01', '2000-01-02')) + da.sel(time=slice("2000-01-01", "2000-01-02")) The arguments to these methods can be any objects that could index the array along the dimension given by the keyword, e.g., labels for an individual value, @@ -156,16 +161,16 @@ enabling nearest neighbor (inexact) lookups by use of the methods ``'pad'``, .. ipython:: python - da = xr.DataArray([1, 2, 3], [('x', [0, 1, 2])]) - da.sel(x=[1.1, 1.9], method='nearest') - da.sel(x=0.1, method='backfill') - da.reindex(x=[0.5, 1, 1.5, 2, 2.5], method='pad') + da = xr.DataArray([1, 2, 3], [("x", [0, 1, 2])]) + da.sel(x=[1.1, 1.9], method="nearest") + da.sel(x=0.1, method="backfill") + da.reindex(x=[0.5, 1, 1.5, 2, 2.5], method="pad") Tolerance limits the maximum distance for valid matches with an inexact lookup: .. ipython:: python - da.reindex(x=[1.1, 1.5], method='nearest', tolerance=0.2) + da.reindex(x=[1.1, 1.5], method="nearest", tolerance=0.2) The method parameter is not yet supported if any of the arguments to ``.sel()`` is a ``slice`` object: @@ -173,7 +178,7 @@ to ``.sel()`` is a ``slice`` object: .. ipython:: :verbatim: - In [1]: da.sel(x=slice(1, 3), method='nearest') + In [1]: da.sel(x=slice(1, 3), method="nearest") NotImplementedError However, you don't need to use ``method`` to do inexact slicing. Slicing @@ -182,15 +187,15 @@ labels are monotonic increasing: .. ipython:: python - da.sel(x=slice(0.9, 3.1)) + da.sel(x=slice(0.9, 3.1)) Indexing axes with monotonic decreasing labels also works, as long as the ``slice`` or ``.loc`` arguments are also decreasing: .. ipython:: python - reversed_da = da[::-1] - reversed_da.loc[3.1:0.9] + reversed_da = da[::-1] + reversed_da.loc[3.1:0.9] .. note:: @@ -227,7 +232,7 @@ arrays). However, you can do normal indexing with dimension names: .. ipython:: python ds[dict(space=[0], time=[0])] - ds.loc[dict(time='2000-01-01')] + ds.loc[dict(time="2000-01-01")] Using indexing to *assign* values to a subset of dataset (e.g., ``ds[dict(space=0)] = 1``) is not yet supported. @@ -240,7 +245,7 @@ index labels along a dimension dropped: .. ipython:: python - ds.drop_sel(space=['IN', 'IL']) + ds.drop_sel(space=["IN", "IL"]) ``drop_sel`` is both a ``Dataset`` and ``DataArray`` method. @@ -249,7 +254,7 @@ Any variables with these dimensions are also dropped: .. ipython:: python - ds.drop_dims('time') + ds.drop_dims("time") .. _masking with where: @@ -263,7 +268,7 @@ xarray, use :py:meth:`~xarray.DataArray.where`: .. ipython:: python - da = xr.DataArray(np.arange(16).reshape(4, 4), dims=['x', 'y']) + da = xr.DataArray(np.arange(16).reshape(4, 4), dims=["x", "y"]) da.where(da.x + da.y < 4) This is particularly useful for ragged indexing of multi-dimensional data, @@ -296,7 +301,7 @@ multiple values, use :py:meth:`~xarray.DataArray.isin`: .. ipython:: python - da = xr.DataArray([1, 2, 3, 4, 5], dims=['x']) + da = xr.DataArray([1, 2, 3, 4, 5], dims=["x"]) da.isin([2, 4]) :py:meth:`~xarray.DataArray.isin` works particularly well with @@ -305,7 +310,7 @@ already labels of an array: .. ipython:: python - lookup = xr.DataArray([-1, -2, -3, -4, -5], dims=['x']) + lookup = xr.DataArray([-1, -2, -3, -4, -5], dims=["x"]) da.where(lookup.isin([-2, -4]), drop=True) However, some caution is in order: when done repeatedly, this type of indexing @@ -328,7 +333,6 @@ MATLAB, or after using the :py:func:`numpy.ix_` helper: .. ipython:: python - da = xr.DataArray( np.arange(12).reshape((3, 4)), dims=["x", "y"], @@ -344,8 +348,8 @@ dimensions: .. ipython:: python - ind_x = xr.DataArray([0, 1], dims=['x']) - ind_y = xr.DataArray([0, 1], dims=['y']) + ind_x = xr.DataArray([0, 1], dims=["x"]) + ind_y = xr.DataArray([0, 1], dims=["y"]) da[ind_x, ind_y] # orthogonal indexing da[ind_x, ind_x] # vectorized indexing @@ -364,7 +368,7 @@ indexers' dimension: .. ipython:: python - ind = xr.DataArray([[0, 1], [0, 1]], dims=['a', 'b']) + ind = xr.DataArray([[0, 1], [0, 1]], dims=["a", "b"]) da[ind] Similar to how NumPy's `advanced indexing`_ works, vectorized @@ -378,18 +382,18 @@ Vectorized indexing also works with ``isel``, ``loc``, and ``sel``: .. ipython:: python - ind = xr.DataArray([[0, 1], [0, 1]], dims=['a', 'b']) + ind = xr.DataArray([[0, 1], [0, 1]], dims=["a", "b"]) da.isel(y=ind) # same as da[:, ind] - ind = xr.DataArray([['a', 'b'], ['b', 'a']], dims=['a', 'b']) + ind = xr.DataArray([["a", "b"], ["b", "a"]], dims=["a", "b"]) da.loc[:, ind] # same as da.sel(y=ind) These methods may also be applied to ``Dataset`` objects .. ipython:: python - ds = da.to_dataset(name='bar') - ds.isel(x=xr.DataArray([0, 1, 2], dims=['points'])) + ds = da.to_dataset(name="bar") + ds.isel(x=xr.DataArray([0, 1, 2], dims=["points"])) .. tip:: @@ -476,8 +480,8 @@ Like ``numpy.ndarray``, value assignment sometimes works differently from what o .. ipython:: python - da = xr.DataArray([0, 1, 2, 3], dims=['x']) - ind = xr.DataArray([0, 0, 0], dims=['x']) + da = xr.DataArray([0, 1, 2, 3], dims=["x"]) + ind = xr.DataArray([0, 0, 0], dims=["x"]) da[ind] -= 1 da @@ -511,7 +515,7 @@ __ https://docs.scipy.org/doc/numpy/user/basics.indexing.html#assigning-values-t .. ipython:: python - da = xr.DataArray([0, 1, 2, 3], dims=['x']) + da = xr.DataArray([0, 1, 2, 3], dims=["x"]) # DO NOT do this da.isel(x=[0, 1, 2])[1] = -1 da @@ -581,15 +585,15 @@ To reindex a particular dimension, use :py:meth:`~xarray.DataArray.reindex`: .. ipython:: python - da.reindex(space=['IA', 'CA']) + da.reindex(space=["IA", "CA"]) The :py:meth:`~xarray.DataArray.reindex_like` method is a useful shortcut. To demonstrate, we will make a subset DataArray with new values: .. ipython:: python - foo = da.rename('foo') - baz = (10 * da[:2, :2]).rename('baz') + foo = da.rename("foo") + baz = (10 * da[:2, :2]).rename("baz") baz Reindexing ``foo`` with ``baz`` selects out the first two values along each @@ -611,8 +615,8 @@ The :py:func:`~xarray.align` function lets us perform more flexible database-lik .. ipython:: python - xr.align(foo, baz, join='inner') - xr.align(foo, baz, join='outer') + xr.align(foo, baz, join="inner") + xr.align(foo, baz, join="outer") Both ``reindex_like`` and ``align`` work interchangeably between :py:class:`~xarray.DataArray` and :py:class:`~xarray.Dataset` objects, and with any number of matching dimension names: @@ -621,7 +625,7 @@ Both ``reindex_like`` and ``align`` work interchangeably between ds ds.reindex_like(baz) - other = xr.DataArray(['a', 'b', 'c'], dims='other') + other = xr.DataArray(["a", "b", "c"], dims="other") # this is a no-op, because there are no shared dimension names ds.reindex_like(other) @@ -636,7 +640,7 @@ integer-based indexing as a fallback for dimensions without a coordinate label: .. ipython:: python - da = xr.DataArray([1, 2, 3], dims='x') + da = xr.DataArray([1, 2, 3], dims="x") da.sel(x=[0, -1]) Alignment between xarray objects where one or both do not have coordinate labels @@ -675,9 +679,9 @@ labels: .. ipython:: python - da = xr.DataArray([1, 2, 3], dims='x') + da = xr.DataArray([1, 2, 3], dims="x") da - da.get_index('x') + da.get_index("x") .. _copies_vs_views: @@ -721,7 +725,6 @@ pandas: .. ipython:: python - midx = pd.MultiIndex.from_product([list("abc"), [0, 1]], names=("one", "two")) mda = xr.DataArray(np.random.rand(6, 3), [("x", midx), ("y", range(3))]) mda @@ -732,20 +735,20 @@ a slice of tuples: .. ipython:: python - mda.sel(x=[('a', 0), ('b', 1)]) + mda.sel(x=[("a", 0), ("b", 1)]) Additionally, xarray supports dictionaries: .. ipython:: python - mda.sel(x={'one': 'a', 'two': 0}) + mda.sel(x={"one": "a", "two": 0}) For convenience, ``sel`` also accepts multi-index levels directly as keyword arguments: .. ipython:: python - mda.sel(one='a', two=0) + mda.sel(one="a", two=0) Note that using ``sel`` it is not possible to mix a dimension indexer with level indexers for that dimension @@ -757,7 +760,7 @@ multi-index is reduced to a single index. .. ipython:: python - mda.loc[{'one': 'a'}, ...] + mda.loc[{"one": "a"}, ...] Unlike pandas, xarray does not guess whether you provide index levels or dimensions when using ``loc`` in some ambiguous cases. For example, for diff --git a/doc/internals.rst b/doc/internals.rst index a4870f2316a..c23aab8c5d7 100644 --- a/doc/internals.rst +++ b/doc/internals.rst @@ -46,11 +46,12 @@ Extending xarray ---------------- .. ipython:: python - :suppress: + :suppress: import numpy as np import pandas as pd import xarray as xr + np.random.seed(123456) xarray is designed as a general purpose library, and hence tries to avoid @@ -87,11 +88,12 @@ defined that returns an instance of your class: .. code-block:: python - class Dataset: - ... - @property - def geo(self) - return GeoAccessor(self) + class Dataset: + ... + + @property + def geo(self): + return GeoAccessor(self) However, using the register accessor decorators is preferable to simply adding your own ad-hoc property (i.e., ``Dataset.geo = property(...)``), for several @@ -116,14 +118,13 @@ reasons: Back in an interactive IPython session, we can use these properties: .. ipython:: python - :suppress: + :suppress: - exec(open("examples/_code/accessor_example.py").read()) + exec(open("examples/_code/accessor_example.py").read()) .. ipython:: python - ds = xr.Dataset({'longitude': np.linspace(0, 10), - 'latitude': np.linspace(0, 20)}) + ds = xr.Dataset({"longitude": np.linspace(0, 10), "latitude": np.linspace(0, 20)}) ds.geo.center ds.geo.plot() diff --git a/doc/interpolation.rst b/doc/interpolation.rst index 63e9a7cd35e..c2922813e15 100644 --- a/doc/interpolation.rst +++ b/doc/interpolation.rst @@ -4,11 +4,12 @@ Interpolating data ================== .. ipython:: python - :suppress: + :suppress: import numpy as np import pandas as pd import xarray as xr + np.random.seed(123456) xarray offers flexible interpolation routines, which have a similar interface @@ -27,9 +28,10 @@ indexing of a :py:class:`~xarray.DataArray`, .. ipython:: python - da = xr.DataArray(np.sin(0.3 * np.arange(12).reshape(4, 3)), - [('time', np.arange(4)), - ('space', [0.1, 0.2, 0.3])]) + da = xr.DataArray( + np.sin(0.3 * np.arange(12).reshape(4, 3)), + [("time", np.arange(4)), ("space", [0.1, 0.2, 0.3])], + ) # label lookup da.sel(time=3) @@ -52,16 +54,17 @@ To interpolate data with a :py:doc:`numpy.datetime64 .. ipython:: python - da_dt64 = xr.DataArray([1, 3], - [('time', pd.date_range('1/1/2000', '1/3/2000', periods=2))]) - da_dt64.interp(time='2000-01-02') + da_dt64 = xr.DataArray( + [1, 3], [("time", pd.date_range("1/1/2000", "1/3/2000", periods=2))] + ) + da_dt64.interp(time="2000-01-02") The interpolated data can be merged into the original :py:class:`~xarray.DataArray` by specifying the time periods required. .. ipython:: python - da_dt64.interp(time=pd.date_range('1/1/2000', '1/3/2000', periods=3)) + da_dt64.interp(time=pd.date_range("1/1/2000", "1/3/2000", periods=3)) Interpolation of data indexed by a :py:class:`~xarray.CFTimeIndex` is also allowed. See :ref:`CFTimeIndex` for examples. @@ -108,9 +111,10 @@ different coordinates, .. ipython:: python - other = xr.DataArray(np.sin(0.4 * np.arange(9).reshape(3, 3)), - [('time', [0.9, 1.9, 2.9]), - ('space', [0.15, 0.25, 0.35])]) + other = xr.DataArray( + np.sin(0.4 * np.arange(9).reshape(3, 3)), + [("time", [0.9, 1.9, 2.9]), ("space", [0.15, 0.25, 0.35])], + ) it might be a good idea to first interpolate ``da`` so that it will stay on the same coordinates of ``other``, and then subtract it. @@ -118,9 +122,9 @@ same coordinates of ``other``, and then subtract it. .. ipython:: python - # interpolate da along other's coordinates - interpolated = da.interp_like(other) - interpolated + # interpolate da along other's coordinates + interpolated = da.interp_like(other) + interpolated It is now possible to safely compute the difference ``other - interpolated``. @@ -135,12 +139,15 @@ The interpolation method can be specified by the optional ``method`` argument. .. ipython:: python - da = xr.DataArray(np.sin(np.linspace(0, 2 * np.pi, 10)), dims='x', - coords={'x': np.linspace(0, 1, 10)}) + da = xr.DataArray( + np.sin(np.linspace(0, 2 * np.pi, 10)), + dims="x", + coords={"x": np.linspace(0, 1, 10)}, + ) - da.plot.line('o', label='original') - da.interp(x=np.linspace(0, 1, 100)).plot.line(label='linear (default)') - da.interp(x=np.linspace(0, 1, 100), method='cubic').plot.line(label='cubic') + da.plot.line("o", label="original") + da.interp(x=np.linspace(0, 1, 100)).plot.line(label="linear (default)") + da.interp(x=np.linspace(0, 1, 100), method="cubic").plot.line(label="cubic") @savefig interpolation_sample1.png width=4in plt.legend() @@ -149,9 +156,16 @@ Additional keyword arguments can be passed to scipy's functions. .. ipython:: python # fill 0 for the outside of the original coordinates. - da.interp(x=np.linspace(-0.5, 1.5, 10), kwargs={'fill_value': 0.0}) - # extrapolation - da.interp(x=np.linspace(-0.5, 1.5, 10), kwargs={'fill_value': 'extrapolate'}) + da.interp(x=np.linspace(-0.5, 1.5, 10), kwargs={"fill_value": 0.0}) + # 1-dimensional extrapolation + da.interp(x=np.linspace(-0.5, 1.5, 10), kwargs={"fill_value": "extrapolate"}) + # multi-dimensional extrapolation + da = xr.DataArray( + np.sin(0.3 * np.arange(12).reshape(4, 3)), + [("time", np.arange(4)), ("space", [0.1, 0.2, 0.3])], + ) + + da.interp(time=4, space=np.linspace(-0.1, 0.5, 10), kwargs={"fill_value": None}) Advanced Interpolation @@ -175,17 +189,18 @@ For example: .. ipython:: python - da = xr.DataArray(np.sin(0.3 * np.arange(20).reshape(5, 4)), - [('x', np.arange(5)), - ('y', [0.1, 0.2, 0.3, 0.4])]) + da = xr.DataArray( + np.sin(0.3 * np.arange(20).reshape(5, 4)), + [("x", np.arange(5)), ("y", [0.1, 0.2, 0.3, 0.4])], + ) # advanced indexing - x = xr.DataArray([0, 2, 4], dims='z') - y = xr.DataArray([0.1, 0.2, 0.3], dims='z') + x = xr.DataArray([0, 2, 4], dims="z") + y = xr.DataArray([0.1, 0.2, 0.3], dims="z") da.sel(x=x, y=y) # advanced interpolation - x = xr.DataArray([0.5, 1.5, 2.5], dims='z') - y = xr.DataArray([0.15, 0.25, 0.35], dims='z') + x = xr.DataArray([0.5, 1.5, 2.5], dims="z") + y = xr.DataArray([0.15, 0.25, 0.35], dims="z") da.interp(x=x, y=y) where values on the original coordinates @@ -197,9 +212,8 @@ If you want to add a coordinate to the new dimension ``z``, you can supply .. ipython:: python - x = xr.DataArray([0.5, 1.5, 2.5], dims='z', coords={'z': ['a', 'b','c']}) - y = xr.DataArray([0.15, 0.25, 0.35], dims='z', - coords={'z': ['a', 'b','c']}) + x = xr.DataArray([0.5, 1.5, 2.5], dims="z", coords={"z": ["a", "b", "c"]}) + y = xr.DataArray([0.15, 0.25, 0.35], dims="z", coords={"z": ["a", "b", "c"]}) da.interp(x=x, y=y) For the details of the advanced indexing, @@ -218,19 +232,18 @@ while other methods such as ``cubic`` or ``quadratic`` return all NaN arrays. .. ipython:: python - da = xr.DataArray([0, 2, np.nan, 3, 3.25], dims='x', - coords={'x': range(5)}) + da = xr.DataArray([0, 2, np.nan, 3, 3.25], dims="x", coords={"x": range(5)}) da.interp(x=[0.5, 1.5, 2.5]) - da.interp(x=[0.5, 1.5, 2.5], method='cubic') + da.interp(x=[0.5, 1.5, 2.5], method="cubic") To avoid this, you can drop NaN by :py:meth:`~xarray.DataArray.dropna`, and then make the interpolation .. ipython:: python - dropped = da.dropna('x') + dropped = da.dropna("x") dropped - dropped.interp(x=[0.5, 1.5, 2.5], method='cubic') + dropped.interp(x=[0.5, 1.5, 2.5], method="cubic") If NaNs are distributed randomly in your multidimensional array, dropping all the columns containing more than one NaNs by @@ -240,7 +253,7 @@ which is similar to :py:meth:`pandas.Series.interpolate`. .. ipython:: python - filled = da.interpolate_na(dim='x') + filled = da.interpolate_na(dim="x") filled This fills NaN by interpolating along the specified dimension. @@ -248,7 +261,7 @@ After filling NaNs, you can interpolate: .. ipython:: python - filled.interp(x=[0.5, 1.5, 2.5], method='cubic') + filled.interp(x=[0.5, 1.5, 2.5], method="cubic") For the details of :py:meth:`~xarray.DataArray.interpolate_na`, see :ref:`Missing values `. @@ -262,18 +275,18 @@ Let's see how :py:meth:`~xarray.DataArray.interp` works on real data. .. ipython:: python # Raw data - ds = xr.tutorial.open_dataset('air_temperature').isel(time=0) + ds = xr.tutorial.open_dataset("air_temperature").isel(time=0) fig, axes = plt.subplots(ncols=2, figsize=(10, 4)) ds.air.plot(ax=axes[0]) - axes[0].set_title('Raw data') + axes[0].set_title("Raw data") # Interpolated data - new_lon = np.linspace(ds.lon[0], ds.lon[-1], ds.dims['lon'] * 4) - new_lat = np.linspace(ds.lat[0], ds.lat[-1], ds.dims['lat'] * 4) + new_lon = np.linspace(ds.lon[0], ds.lon[-1], ds.dims["lon"] * 4) + new_lat = np.linspace(ds.lat[0], ds.lat[-1], ds.dims["lat"] * 4) dsi = ds.interp(lat=new_lat, lon=new_lon) dsi.air.plot(ax=axes[1]) @savefig interpolation_sample3.png width=8in - axes[1].set_title('Interpolated data') + axes[1].set_title("Interpolated data") Our advanced interpolation can be used to remap the data to the new coordinate. Consider the new coordinates x and z on the two dimensional plane. @@ -285,20 +298,23 @@ The remapping can be done as follows x = np.linspace(240, 300, 100) z = np.linspace(20, 70, 100) # relation between new and original coordinates - lat = xr.DataArray(z, dims=['z'], coords={'z': z}) - lon = xr.DataArray((x[:, np.newaxis]-270)/np.cos(z*np.pi/180)+270, - dims=['x', 'z'], coords={'x': x, 'z': z}) + lat = xr.DataArray(z, dims=["z"], coords={"z": z}) + lon = xr.DataArray( + (x[:, np.newaxis] - 270) / np.cos(z * np.pi / 180) + 270, + dims=["x", "z"], + coords={"x": x, "z": z}, + ) fig, axes = plt.subplots(ncols=2, figsize=(10, 4)) ds.air.plot(ax=axes[0]) # draw the new coordinate on the original coordinates. for idx in [0, 33, 66, 99]: - axes[0].plot(lon.isel(x=idx), lat, '--k') + axes[0].plot(lon.isel(x=idx), lat, "--k") for idx in [0, 33, 66, 99]: - axes[0].plot(*xr.broadcast(lon.isel(z=idx), lat.isel(z=idx)), '--k') - axes[0].set_title('Raw data') + axes[0].plot(*xr.broadcast(lon.isel(z=idx), lat.isel(z=idx)), "--k") + axes[0].set_title("Raw data") dsi = ds.interp(lon=lon, lat=lat) dsi.air.plot(ax=axes[1]) @savefig interpolation_sample4.png width=8in - axes[1].set_title('Remapped data') + axes[1].set_title("Remapped data") \ No newline at end of file diff --git a/doc/io.rst b/doc/io.rst index 0c666099df8..738d8d2b7ab 100644 --- a/doc/io.rst +++ b/doc/io.rst @@ -9,11 +9,12 @@ simple :ref:`io.pickle` files to the more flexible :ref:`io.netcdf` format (recommended). .. ipython:: python - :suppress: + :suppress: import numpy as np import pandas as pd import xarray as xr + np.random.seed(123456) .. _io.netcdf: @@ -52,12 +53,16 @@ We can save a Dataset to disk using the .. ipython:: python - ds = xr.Dataset({'foo': (('x', 'y'), np.random.rand(4, 5))}, - coords={'x': [10, 20, 30, 40], - 'y': pd.date_range('2000-01-01', periods=5), - 'z': ('x', list('abcd'))}) + ds = xr.Dataset( + {"foo": (("x", "y"), np.random.rand(4, 5))}, + coords={ + "x": [10, 20, 30, 40], + "y": pd.date_range("2000-01-01", periods=5), + "z": ("x", list("abcd")), + }, + ) - ds.to_netcdf('saved_on_disk.nc') + ds.to_netcdf("saved_on_disk.nc") By default, the file is saved as netCDF4 (assuming netCDF4-Python is installed). You can control the format and engine used to write the file with @@ -76,7 +81,7 @@ We can load netCDF files to create a new Dataset using .. ipython:: python - ds_disk = xr.open_dataset('saved_on_disk.nc') + ds_disk = xr.open_dataset("saved_on_disk.nc") ds_disk Similarly, a DataArray can be saved to disk using the @@ -117,7 +122,7 @@ netCDF file. However, it's often cleaner to use a ``with`` statement: .. ipython:: python # this automatically closes the dataset after use - with xr.open_dataset('saved_on_disk.nc') as ds: + with xr.open_dataset("saved_on_disk.nc") as ds: print(ds.keys()) Although xarray provides reasonable support for incremental reads of files on @@ -171,7 +176,7 @@ You can view this encoding information (among others) in the .. ipython:: :verbatim: - In [1]: ds_disk['y'].encoding + In [1]: ds_disk["y"].encoding Out[1]: {'zlib': False, 'shuffle': False, @@ -469,7 +474,7 @@ and currently raises a warning unless ``invalid_netcdf=True`` is set: :okwarning: # Writing complex valued data - da = xr.DataArray([1.+1.j, 2.+2.j, 3.+3.j]) + da = xr.DataArray([1.0 + 1.0j, 2.0 + 2.0j, 3.0 + 3.0j]) da.to_netcdf("complex.nc", engine="h5netcdf", invalid_netcdf=True) # Reading it back @@ -479,7 +484,8 @@ and currently raises a warning unless ``invalid_netcdf=True`` is set: :suppress: import os - os.remove('complex.nc') + + os.remove("complex.nc") .. warning:: @@ -499,9 +505,11 @@ installed xarray can convert a ``DataArray`` into a ``Cube`` using .. ipython:: python - da = xr.DataArray(np.random.rand(4, 5), dims=['x', 'y'], - coords=dict(x=[10, 20, 30, 40], - y=pd.date_range('2000-01-01', periods=5))) + da = xr.DataArray( + np.random.rand(4, 5), + dims=["x", "y"], + coords=dict(x=[10, 20, 30, 40], y=pd.date_range("2000-01-01", periods=5)), + ) cube = da.to_iris() cube @@ -548,8 +556,9 @@ __ http://iri.columbia.edu/ :verbatim: In [3]: remote_data = xr.open_dataset( - ...: 'http://iridl.ldeo.columbia.edu/SOURCES/.OSU/.PRISM/.monthly/dods', - ...: decode_times=False) + ...: "http://iridl.ldeo.columbia.edu/SOURCES/.OSU/.PRISM/.monthly/dods", + ...: decode_times=False, + ...: ) In [4]: remote_data Out[4]: @@ -587,7 +596,7 @@ over the network until we look at particular values: .. ipython:: :verbatim: - In [4]: tmax = remote_data['tmax'][:500, ::3, ::3] + In [4]: tmax = remote_data["tmax"][:500, ::3, ::3] In [5]: tmax Out[5]: @@ -715,7 +724,8 @@ search indices or other automated data discovery tools. :suppress: import os - os.remove('saved_on_disk.nc') + + os.remove("saved_on_disk.nc") .. _io.rasterio: @@ -729,7 +739,7 @@ rasterio is installed. Here is an example of how to use .. ipython:: :verbatim: - In [7]: rio = xr.open_rasterio('RGB.byte.tif') + In [7]: rio = xr.open_rasterio("RGB.byte.tif") In [8]: rio Out[8]: @@ -769,7 +779,7 @@ GDAL readable raster data using `rasterio`_ as well as for exporting to a geoTIF In [1]: import rioxarray - In [2]: rds = rioxarray.open_rasterio('RGB.byte.tif') + In [2]: rds = rioxarray.open_rasterio("RGB.byte.tif") In [3]: rds Out[3]: @@ -799,7 +809,7 @@ GDAL readable raster data using `rasterio`_ as well as for exporting to a geoTIF In [6]: rds4326.rio.crs Out[6]: CRS.from_epsg(4326) - In [7]: rds4326.rio.to_raster('RGB.byte.4326.tif') + In [7]: rds4326.rio.to_raster("RGB.byte.4326.tif") .. _rasterio: https://rasterio.readthedocs.io/en/latest/ @@ -831,17 +841,21 @@ xarray. To write a dataset with zarr, we use the :py:attr:`Dataset.to_zarr` meth To write to a local directory, we pass a path to a directory .. ipython:: python - :suppress: + :suppress: ! rm -rf path/to/directory.zarr .. ipython:: python - ds = xr.Dataset({'foo': (('x', 'y'), np.random.rand(4, 5))}, - coords={'x': [10, 20, 30, 40], - 'y': pd.date_range('2000-01-01', periods=5), - 'z': ('x', list('abcd'))}) - ds.to_zarr('path/to/directory.zarr') + ds = xr.Dataset( + {"foo": (("x", "y"), np.random.rand(4, 5))}, + coords={ + "x": [10, 20, 30, 40], + "y": pd.date_range("2000-01-01", periods=5), + "z": ("x", list("abcd")), + }, + ) + ds.to_zarr("path/to/directory.zarr") (The suffix ``.zarr`` is optional--just a reminder that a zarr store lives there.) If the directory does not exist, it will be created. If a zarr @@ -854,22 +868,30 @@ It is also possible to append to an existing store. For that, set can be omitted as it will internally be set to ``'a'``. .. ipython:: python - :suppress: + :suppress: ! rm -rf path/to/directory.zarr .. ipython:: python - ds1 = xr.Dataset({'foo': (('x', 'y', 't'), np.random.rand(4, 5, 2))}, - coords={'x': [10, 20, 30, 40], - 'y': [1,2,3,4,5], - 't': pd.date_range('2001-01-01', periods=2)}) - ds1.to_zarr('path/to/directory.zarr') - ds2 = xr.Dataset({'foo': (('x', 'y', 't'), np.random.rand(4, 5, 2))}, - coords={'x': [10, 20, 30, 40], - 'y': [1,2,3,4,5], - 't': pd.date_range('2001-01-03', periods=2)}) - ds2.to_zarr('path/to/directory.zarr', append_dim='t') + ds1 = xr.Dataset( + {"foo": (("x", "y", "t"), np.random.rand(4, 5, 2))}, + coords={ + "x": [10, 20, 30, 40], + "y": [1, 2, 3, 4, 5], + "t": pd.date_range("2001-01-01", periods=2), + }, + ) + ds1.to_zarr("path/to/directory.zarr") + ds2 = xr.Dataset( + {"foo": (("x", "y", "t"), np.random.rand(4, 5, 2))}, + coords={ + "x": [10, 20, 30, 40], + "y": [1, 2, 3, 4, 5], + "t": pd.date_range("2001-01-03", periods=2), + }, + ) + ds2.to_zarr("path/to/directory.zarr", append_dim="t") To store variable length strings use ``dtype=object``. @@ -878,7 +900,7 @@ To read back a zarr dataset that has been created this way, we use the .. ipython:: python - ds_zarr = xr.open_zarr('path/to/directory.zarr') + ds_zarr = xr.open_zarr("path/to/directory.zarr") ds_zarr Cloud Storage Buckets @@ -912,15 +934,16 @@ These options can be passed to the ``to_zarr`` method as variable encoding. For example: .. ipython:: python - :suppress: + :suppress: ! rm -rf foo.zarr .. ipython:: python import zarr - compressor = zarr.Blosc(cname='zstd', clevel=3, shuffle=2) - ds.to_zarr('foo.zarr', encoding={'foo': {'compressor': compressor}}) + + compressor = zarr.Blosc(cname="zstd", clevel=3, shuffle=2) + ds.to_zarr("foo.zarr", encoding={"foo": {"compressor": compressor}}) .. note:: @@ -959,11 +982,12 @@ be done directly from zarr, as described in the .. _io.cfgrib: .. ipython:: python - :suppress: + :suppress: import shutil - shutil.rmtree('foo.zarr') - shutil.rmtree('path/to/directory.zarr') + + shutil.rmtree("foo.zarr") + shutil.rmtree("path/to/directory.zarr") GRIB format via cfgrib ---------------------- @@ -975,7 +999,7 @@ to :py:func:`open_dataset`: .. ipython:: :verbatim: - In [1]: ds_grib = xr.open_dataset('example.grib', engine='cfgrib') + In [1]: ds_grib = xr.open_dataset("example.grib", engine="cfgrib") We recommend installing ecCodes via conda:: diff --git a/doc/pandas.rst b/doc/pandas.rst index b0ec2a117dc..acf1d16b6ee 100644 --- a/doc/pandas.rst +++ b/doc/pandas.rst @@ -20,6 +20,7 @@ __ http://seaborn.pydata.org/ import numpy as np import pandas as pd import xarray as xr + np.random.seed(123456) Hierarchical and tidy data @@ -47,10 +48,15 @@ To convert any dataset to a ``DataFrame`` in tidy form, use the .. ipython:: python - ds = xr.Dataset({'foo': (('x', 'y'), np.random.randn(2, 3))}, - coords={'x': [10, 20], 'y': ['a', 'b', 'c'], - 'along_x': ('x', np.random.randn(2)), - 'scalar': 123}) + ds = xr.Dataset( + {"foo": (("x", "y"), np.random.randn(2, 3))}, + coords={ + "x": [10, 20], + "y": ["a", "b", "c"], + "along_x": ("x", np.random.randn(2)), + "scalar": 123, + }, + ) ds df = ds.to_dataframe() df @@ -91,7 +97,7 @@ DataFrames: .. ipython:: python - s = ds['foo'].to_series() + s = ds["foo"].to_series() s # or equivalently, with Series.to_xarray() xr.DataArray.from_series(s) @@ -117,8 +123,9 @@ available in pandas (i.e., a 1D array is converted to a .. ipython:: python - arr = xr.DataArray(np.random.randn(2, 3), - coords=[('x', [10, 20]), ('y', ['a', 'b', 'c'])]) + arr = xr.DataArray( + np.random.randn(2, 3), coords=[("x", [10, 20]), ("y", ["a", "b", "c"])] + ) df = arr.to_pandas() df @@ -136,9 +143,10 @@ preserve all use of multi-indexes: .. ipython:: python - index = pd.MultiIndex.from_arrays([['a', 'a', 'b'], [0, 1, 2]], - names=['one', 'two']) - df = pd.DataFrame({'x': 1, 'y': 2}, index=index) + index = pd.MultiIndex.from_arrays( + [["a", "a", "b"], [0, 1, 2]], names=["one", "two"] + ) + df = pd.DataFrame({"x": 1, "y": 2}, index=index) ds = xr.Dataset(df) ds @@ -175,9 +183,9 @@ Let's take a look: .. ipython:: python data = np.random.RandomState(0).rand(2, 3, 4) - items = list('ab') - major_axis = list('mno') - minor_axis = pd.date_range(start='2000', periods=4, name='date') + items = list("ab") + major_axis = list("mno") + minor_axis = pd.date_range(start="2000", periods=4, name="date") With old versions of pandas (prior to 0.25), this could stored in a ``Panel``: @@ -207,7 +215,7 @@ You can also easily convert this data into ``Dataset``: .. ipython:: python - array.to_dataset(dim='dim_0') + array.to_dataset(dim="dim_0") Here, there are two data variables, each representing a DataFrame on panel's ``items`` axis, and labeled as such. Each variable is a 2D array of the diff --git a/doc/plotting.rst b/doc/plotting.rst index f3d9c0213de..40c0ca1a496 100644 --- a/doc/plotting.rst +++ b/doc/plotting.rst @@ -56,6 +56,7 @@ Imports # Use defaults so we don't get gridlines in generated docs import matplotlib as mpl + mpl.rcdefaults() The following imports are necessary for all of the examples. @@ -71,7 +72,7 @@ For these examples we'll use the North American air temperature dataset. .. ipython:: python - airtemps = xr.tutorial.open_dataset('air_temperature') + airtemps = xr.tutorial.open_dataset("air_temperature") airtemps # Convert to celsius @@ -79,7 +80,7 @@ For these examples we'll use the North American air temperature dataset. # copy attributes to get nice figure labels and change Kelvin to Celsius air.attrs = airtemps.air.attrs - air.attrs['units'] = 'deg C' + air.attrs["units"] = "deg C" .. note:: Until :issue:`1614` is solved, you might need to copy over the metadata in ``attrs`` to get informative figure labels (as was done above). @@ -126,7 +127,7 @@ can be used: .. ipython:: python @savefig plotting_1d_additional_args.png width=4in - air1d[:200].plot.line('b-^') + air1d[:200].plot.line("b-^") .. note:: Not all xarray plotting methods support passing positional arguments @@ -138,7 +139,7 @@ Keyword arguments work the same way, and are more explicit. .. ipython:: python @savefig plotting_example_sin3.png width=4in - air1d[:200].plot.line(color='purple', marker='o') + air1d[:200].plot.line(color="purple", marker="o") ========================= Adding to Existing Axis @@ -219,7 +220,7 @@ plots to check the variation of air temperature at three different latitudes alo .. ipython:: python @savefig plotting_example_multiple_lines_x_kwarg.png - air.isel(lon=10, lat=[19,21,22]).plot.line(x='time') + air.isel(lon=10, lat=[19, 21, 22]).plot.line(x="time") It is required to explicitly specify either @@ -240,7 +241,7 @@ It is also possible to make line plots such that the data are on the x-axis and .. ipython:: python @savefig plotting_example_xy_kwarg.png - air.isel(time=10, lon=[10, 11]).plot(y='lat', hue='lon') + air.isel(time=10, lon=[10, 11]).plot(y="lat", hue="lon") ============ Step plots @@ -253,7 +254,7 @@ made using 1D data. :okwarning: @savefig plotting_example_step.png width=4in - air1d[:20].plot.step(where='mid') + air1d[:20].plot.step(where="mid") The argument ``where`` defines where the steps should be placed, options are ``'pre'`` (default), ``'post'``, and ``'mid'``. This is particularly handy @@ -261,15 +262,15 @@ when plotting data grouped with :py:meth:`Dataset.groupby_bins`. .. ipython:: python - air_grp = air.mean(['time','lon']).groupby_bins('lat',[0,23.5,66.5,90]) + air_grp = air.mean(["time", "lon"]).groupby_bins("lat", [0, 23.5, 66.5, 90]) air_mean = air_grp.mean() air_std = air_grp.std() air_mean.plot.step() - (air_mean + air_std).plot.step(ls=':') - (air_mean - air_std).plot.step(ls=':') - plt.ylim(-20,30) + (air_mean + air_std).plot.step(ls=":") + (air_mean - air_std).plot.step(ls=":") + plt.ylim(-20, 30) @savefig plotting_example_step_groupby.png width=4in - plt.title('Zonal mean temperature') + plt.title("Zonal mean temperature") In this case, the actual boundaries of the bins are used and the ``where`` argument is ignored. @@ -284,7 +285,9 @@ The keyword arguments ``xincrease`` and ``yincrease`` let you control the axes d .. ipython:: python @savefig plotting_example_xincrease_yincrease_kwarg.png - air.isel(time=10, lon=[10, 11]).plot.line(y='lat', hue='lon', xincrease=False, yincrease=False) + air.isel(time=10, lon=[10, 11]).plot.line( + y="lat", hue="lon", xincrease=False, yincrease=False + ) In addition, one can use ``xscale, yscale`` to set axes scaling; ``xticks, yticks`` to set axes ticks and ``xlim, ylim`` to set axes limits. These accept the same values as the matplotlib methods ``Axes.set_(x,y)scale()``, ``Axes.set_(x,y)ticks()``, ``Axes.set_(x,y)lim()`` respectively. @@ -348,7 +351,7 @@ produce plots with nonuniform coordinates. b = air2d.copy() # Apply a nonlinear transformation to one of the coords - b.coords['lat'] = np.log(b.coords['lat']) + b.coords["lat"] = np.log(b.coords["lat"]) @savefig plotting_nonuniform_coords.png width=4in b.plot() @@ -363,9 +366,9 @@ matplotlib is available. .. ipython:: python air2d.plot(cmap=plt.cm.Blues) - plt.title('These colors prove North America\nhas fallen in the ocean') - plt.ylabel('latitude') - plt.xlabel('longitude') + plt.title("These colors prove North America\nhas fallen in the ocean") + plt.ylabel("latitude") + plt.xlabel("longitude") plt.tight_layout() @savefig plotting_2d_call_matplotlib.png width=4in @@ -381,7 +384,7 @@ matplotlib is available. .. ipython:: python - plt.xlabel('Never gonna see this.') + plt.xlabel("Never gonna see this.") air2d.plot() @savefig plotting_2d_call_matplotlib2.png width=4in @@ -473,10 +476,10 @@ if using ``imshow`` or ``pcolormesh`` (but not with ``contour`` or ``contourf``, since levels are chosen automatically). .. ipython:: python - :okwarning: + :okwarning: @savefig plotting_seaborn_palette.png width=4in - air2d.plot(levels=10, cmap='husl') + air2d.plot(levels=10, cmap="husl") plt.draw() .. _plotting.faceting: @@ -520,14 +523,16 @@ arguments to the xarray plotting methods/functions. This returns a .. ipython:: python @savefig plot_facet_dataarray.png - g_simple = t.plot(x='lon', y='lat', col='time', col_wrap=3) + g_simple = t.plot(x="lon", y="lat", col="time", col_wrap=3) Faceting also works for line plots. .. ipython:: python @savefig plot_facet_dataarray_line.png - g_simple_line = t.isel(lat=slice(0,None,4)).plot(x='lon', hue='lat', col='time', col_wrap=3) + g_simple_line = t.isel(lat=slice(0, None, 4)).plot( + x="lon", hue="lat", col="time", col_wrap=3 + ) =============== 4 dimensional @@ -541,12 +546,12 @@ one were much hotter. .. ipython:: python t2 = t.isel(time=slice(0, 2)) - t4d = xr.concat([t2, t2 + 40], pd.Index(['normal', 'hot'], name='fourth_dim')) + t4d = xr.concat([t2, t2 + 40], pd.Index(["normal", "hot"], name="fourth_dim")) # This is a 4d array t4d.coords @savefig plot_facet_4d.png - t4d.plot(x='lon', y='lat', col='time', row='fourth_dim') + t4d.plot(x="lon", y="lat", col="time", row="fourth_dim") ================ Other features @@ -555,9 +560,9 @@ one were much hotter. Faceted plotting supports other arguments common to xarray 2d plots. .. ipython:: python - :suppress: + :suppress: - plt.close('all') + plt.close("all") .. ipython:: python @@ -566,9 +571,15 @@ Faceted plotting supports other arguments common to xarray 2d plots. hasoutliers[-1, -1, -1] = 400 @savefig plot_facet_robust.png - g = hasoutliers.plot.pcolormesh('lon', 'lat', col='time', col_wrap=3, - robust=True, cmap='viridis', - cbar_kwargs={'label': 'this has outliers'}) + g = hasoutliers.plot.pcolormesh( + "lon", + "lat", + col="time", + col_wrap=3, + robust=True, + cmap="viridis", + cbar_kwargs={"label": "this has outliers"}, + ) =================== FacetGrid Objects @@ -594,20 +605,20 @@ It's possible to select the :py:class:`xarray.DataArray` or .. ipython:: python - g.data.loc[g.name_dicts[0, 0]] + g.data.loc[g.name_dicts[0, 0]] Here is an example of using the lower level API and then modifying the axes after they have been plotted. .. ipython:: python - g = t.plot.imshow('lon', 'lat', col='time', col_wrap=3, robust=True) + g = t.plot.imshow("lon", "lat", col="time", col_wrap=3, robust=True) for i, ax in enumerate(g.axes.flat): - ax.set_title('Air Temperature %d' % i) + ax.set_title("Air Temperature %d" % i) bottomright = g.axes[-1, -1] - bottomright.annotate('bottom right', (240, 40)) + bottomright.annotate("bottom right", (240, 40)) @savefig plot_facet_iterator.png plt.draw() @@ -632,8 +643,8 @@ Consider this dataset .. ipython:: python - ds = xr.tutorial.scatter_example_dataset() - ds + ds = xr.tutorial.scatter_example_dataset() + ds Suppose we want to scatter ``A`` against ``B`` @@ -641,14 +652,14 @@ Suppose we want to scatter ``A`` against ``B`` .. ipython:: python @savefig ds_simple_scatter.png - ds.plot.scatter(x='A', y='B') + ds.plot.scatter(x="A", y="B") The ``hue`` kwarg lets you vary the color by variable value .. ipython:: python @savefig ds_hue_scatter.png - ds.plot.scatter(x='A', y='B', hue='w') + ds.plot.scatter(x="A", y="B", hue="w") When ``hue`` is specified, a colorbar is added for numeric ``hue`` DataArrays by default and a legend is added for non-numeric ``hue`` DataArrays (as above). @@ -659,21 +670,21 @@ Additionally, the boolean kwarg ``add_guide`` can be used to prevent the display ds = ds.assign(w=[1, 2, 3, 5]) @savefig ds_discrete_legend_hue_scatter.png - ds.plot.scatter(x='A', y='B', hue='w', hue_style='discrete') + ds.plot.scatter(x="A", y="B", hue="w", hue_style="discrete") The ``markersize`` kwarg lets you vary the point's size by variable value. You can additionally pass ``size_norm`` to control how the variable's values are mapped to point sizes. .. ipython:: python @savefig ds_hue_size_scatter.png - ds.plot.scatter(x='A', y='B', hue='z', hue_style='discrete', markersize='z') + ds.plot.scatter(x="A", y="B", hue="z", hue_style="discrete", markersize="z") Faceting is also possible .. ipython:: python @savefig ds_facet_scatter.png - ds.plot.scatter(x='A', y='B', col='x', row='z', hue='w', hue_style='discrete') + ds.plot.scatter(x="A", y="B", col="x", row="z", hue="w", hue_style="discrete") For more advanced scatter plots, we recommend converting the relevant data variables to a pandas DataFrame and using the extensive plotting capabilities of ``seaborn``. @@ -691,25 +702,33 @@ This script will plot the air temperature on a map. .. ipython:: python import cartopy.crs as ccrs - air = xr.tutorial.open_dataset('air_temperature').air + + air = xr.tutorial.open_dataset("air_temperature").air + ax = plt.axes(projection=ccrs.Orthographic(-80, 35)) - air.isel(time=0).plot.contourf(ax=ax, transform=ccrs.PlateCarree()); + air.isel(time=0).plot.contourf(ax=ax, transform=ccrs.PlateCarree()) + ax.set_global() + @savefig plotting_maps_cartopy.png width=100% - ax.set_global(); ax.coastlines(); + ax.coastlines() When faceting on maps, the projection can be transferred to the ``plot`` function using the ``subplot_kws`` keyword. The axes for the subplots created by faceting are accessible in the object returned by ``plot``: .. ipython:: python + :okwarning: - p = air.isel(time=[0, 4]).plot(transform=ccrs.PlateCarree(), col='time', - subplot_kws={'projection': ccrs.Orthographic(-80, 35)}) + p = air.isel(time=[0, 4]).plot( + transform=ccrs.PlateCarree(), + col="time", + subplot_kws={"projection": ccrs.Orthographic(-80, 35)}, + ) for ax in p.axes.flat: ax.coastlines() ax.gridlines() @savefig plotting_maps_cartopy_facetting.png width=100% - plt.draw(); + plt.draw() Details @@ -732,6 +751,7 @@ These are provided for user convenience; they all call the same code. .. ipython:: python import xarray.plot as xplt + da = xr.DataArray(range(5)) fig, axes = plt.subplots(ncols=2, nrows=2) da.plot(ax=axes[0, 0]) @@ -766,8 +786,7 @@ read on. .. ipython:: python - a0 = xr.DataArray(np.zeros((4, 3, 2)), dims=('y', 'x', 'z'), - name='temperature') + a0 = xr.DataArray(np.zeros((4, 3, 2)), dims=("y", "x", "z"), name="temperature") a0[0, 0, 0] = 1 a = a0.isel(z=0) a @@ -801,14 +820,16 @@ instead of the default ones: .. ipython:: python lon, lat = np.meshgrid(np.linspace(-20, 20, 5), np.linspace(0, 30, 4)) - lon += lat/10 - lat += lon/10 - da = xr.DataArray(np.arange(20).reshape(4, 5), dims=['y', 'x'], - coords = {'lat': (('y', 'x'), lat), - 'lon': (('y', 'x'), lon)}) + lon += lat / 10 + lat += lon / 10 + da = xr.DataArray( + np.arange(20).reshape(4, 5), + dims=["y", "x"], + coords={"lat": (("y", "x"), lat), "lon": (("y", "x"), lon)}, + ) @savefig plotting_example_2d_irreg.png width=4in - da.plot.pcolormesh('lon', 'lat'); + da.plot.pcolormesh("lon", "lat") Note that in this case, xarray still follows the pixel centered convention. This might be undesirable in some cases, for example when your data is defined @@ -818,22 +839,25 @@ this convention when plotting on a map: .. ipython:: python import cartopy.crs as ccrs - ax = plt.subplot(projection=ccrs.PlateCarree()); - da.plot.pcolormesh('lon', 'lat', ax=ax); - ax.scatter(lon, lat, transform=ccrs.PlateCarree()); + + ax = plt.subplot(projection=ccrs.PlateCarree()) + da.plot.pcolormesh("lon", "lat", ax=ax) + ax.scatter(lon, lat, transform=ccrs.PlateCarree()) + ax.coastlines() @savefig plotting_example_2d_irreg_map.png width=4in - ax.coastlines(); ax.gridlines(draw_labels=True); + ax.gridlines(draw_labels=True) You can however decide to infer the cell boundaries and use the ``infer_intervals`` keyword: .. ipython:: python - ax = plt.subplot(projection=ccrs.PlateCarree()); - da.plot.pcolormesh('lon', 'lat', ax=ax, infer_intervals=True); - ax.scatter(lon, lat, transform=ccrs.PlateCarree()); + ax = plt.subplot(projection=ccrs.PlateCarree()) + da.plot.pcolormesh("lon", "lat", ax=ax, infer_intervals=True) + ax.scatter(lon, lat, transform=ccrs.PlateCarree()) + ax.coastlines() @savefig plotting_example_2d_irreg_map_infer.png width=4in - ax.coastlines(); ax.gridlines(draw_labels=True); + ax.gridlines(draw_labels=True) .. note:: The data model of xarray does not support datasets with `cell boundaries`_ @@ -847,6 +871,6 @@ One can also make line plots with multidimensional coordinates. In this case, `` .. ipython:: python f, ax = plt.subplots(2, 1) - da.plot.line(x='lon', hue='y', ax=ax[0]); + da.plot.line(x="lon", hue="y", ax=ax[0]) @savefig plotting_example_2d_hue_xy.png - da.plot.line(x='lon', hue='x', ax=ax[1]); + da.plot.line(x="lon", hue="x", ax=ax[1]) diff --git a/doc/quick-overview.rst b/doc/quick-overview.rst index 741b3d1a5fe..09b0d4c6fbb 100644 --- a/doc/quick-overview.rst +++ b/doc/quick-overview.rst @@ -22,16 +22,14 @@ array or list, with optional *dimensions* and *coordinates*: .. ipython:: python - data = xr.DataArray(np.random.randn(2, 3), - dims=('x', 'y'), - coords={'x': [10, 20]}) + data = xr.DataArray(np.random.randn(2, 3), dims=("x", "y"), coords={"x": [10, 20]}) data In this case, we have generated a 2D array, assigned the names *x* and *y* to the two dimensions respectively and associated two *coordinate labels* '10' and '20' with the two locations along the x dimension. If you supply a pandas :py:class:`~pandas.Series` or :py:class:`~pandas.DataFrame`, metadata is copied directly: .. ipython:: python - xr.DataArray(pd.Series(range(3), index=list('abc'), name='foo')) + xr.DataArray(pd.Series(range(3), index=list("abc"), name="foo")) Here are the key properties for a ``DataArray``: @@ -75,13 +73,13 @@ While you're setting up your DataArray, it's often a good idea to set metadata a .. ipython:: python - data.attrs['long_name'] = 'random velocity' - data.attrs['units'] = 'metres/sec' - data.attrs['description'] = 'A random variable created as an example.' - data.attrs['random_attribute'] = 123 + data.attrs["long_name"] = "random velocity" + data.attrs["units"] = "metres/sec" + data.attrs["description"] = "A random variable created as an example." + data.attrs["random_attribute"] = 123 data.attrs # you can add metadata to coordinates too - data.x.attrs['units'] = 'x units' + data.x.attrs["units"] = "x units" Computation @@ -102,15 +100,15 @@ numbers: .. ipython:: python - data.mean(dim='x') + data.mean(dim="x") Arithmetic operations broadcast based on dimension name. This means you don't need to insert dummy dimensions for alignment: .. ipython:: python - a = xr.DataArray(np.random.randn(3), [data.coords['y']]) - b = xr.DataArray(np.random.randn(4), dims='z') + a = xr.DataArray(np.random.randn(3), [data.coords["y"]]) + b = xr.DataArray(np.random.randn(4), dims="z") a b @@ -139,9 +137,9 @@ xarray supports grouped operations using a very similar API to pandas (see :ref: .. ipython:: python - labels = xr.DataArray(['E', 'F', 'E'], [data.coords['y']], name='labels') + labels = xr.DataArray(["E", "F", "E"], [data.coords["y"]], name="labels") labels - data.groupby(labels).mean('y') + data.groupby(labels).mean("y") data.groupby(labels).map(lambda x: x - x.min()) Plotting @@ -178,7 +176,7 @@ objects. You can think of it as a multi-dimensional generalization of the .. ipython:: python - ds = xr.Dataset({'foo': data, 'bar': ('x', [1, 2]), 'baz': np.pi}) + ds = xr.Dataset({"foo": data, "bar": ("x", [1, 2]), "baz": np.pi}) ds @@ -186,7 +184,7 @@ This creates a dataset with three DataArrays named ``foo``, ``bar`` and ``baz``. .. ipython:: python - ds['foo'] + ds["foo"] ds.foo @@ -216,14 +214,15 @@ You can directly read and write xarray objects to disk using :py:meth:`~xarray.D .. ipython:: python - ds.to_netcdf('example.nc') - xr.open_dataset('example.nc') + ds.to_netcdf("example.nc") + xr.open_dataset("example.nc") .. ipython:: python - :suppress: + :suppress: import os - os.remove('example.nc') + + os.remove("example.nc") It is common for datasets to be distributed across multiple files (commonly one file per timestep). xarray supports this use-case by providing the :py:meth:`~xarray.open_mfdataset` and the :py:meth:`~xarray.save_mfdataset` methods. For more, see :ref:`io`. diff --git a/doc/reshaping.rst b/doc/reshaping.rst index 465ca14dfc2..40de9ea799a 100644 --- a/doc/reshaping.rst +++ b/doc/reshaping.rst @@ -7,11 +7,12 @@ Reshaping and reorganizing data These methods allow you to reorganize .. ipython:: python - :suppress: + :suppress: import numpy as np import pandas as pd import xarray as xr + np.random.seed(123456) Reordering dimensions @@ -23,9 +24,9 @@ ellipsis (`...`) can be use to represent all other dimensions: .. ipython:: python - ds = xr.Dataset({'foo': (('x', 'y', 'z'), [[[42]]]), 'bar': (('y', 'z'), [[24]])}) - ds.transpose('y', 'z', 'x') - ds.transpose(..., 'x') # equivalent + ds = xr.Dataset({"foo": (("x", "y", "z"), [[[42]]]), "bar": (("y", "z"), [[24]])}) + ds.transpose("y", "z", "x") + ds.transpose(..., "x") # equivalent ds.transpose() # reverses all dimensions Expand and squeeze dimensions @@ -37,7 +38,7 @@ use :py:meth:`~xarray.DataArray.expand_dims` .. ipython:: python - expanded = ds.expand_dims('w') + expanded = ds.expand_dims("w") expanded This method attaches a new dimension with size 1 to all data variables. @@ -48,7 +49,7 @@ use :py:meth:`~xarray.DataArray.squeeze` .. ipython:: python - expanded.squeeze('w') + expanded.squeeze("w") Converting between datasets and arrays -------------------------------------- @@ -69,14 +70,14 @@ To convert back from a DataArray to a Dataset, use .. ipython:: python - arr.to_dataset(dim='variable') + arr.to_dataset(dim="variable") The broadcasting behavior of ``to_array`` means that the resulting array includes the union of data variable dimensions: .. ipython:: python - ds2 = xr.Dataset({'a': 0, 'b': ('x', [3, 4, 5])}) + ds2 = xr.Dataset({"a": 0, "b": ("x", [3, 4, 5])}) # the input dataset has 4 elements ds2 @@ -90,7 +91,7 @@ If you use ``to_dataset`` without supplying the ``dim`` argument, the DataArray .. ipython:: python - arr.to_dataset(name='combined') + arr.to_dataset(name="combined") .. _reshape.stack: @@ -103,11 +104,12 @@ implemented :py:meth:`~xarray.DataArray.stack` and .. ipython:: python - array = xr.DataArray(np.random.randn(2, 3), - coords=[('x', ['a', 'b']), ('y', [0, 1, 2])]) - stacked = array.stack(z=('x', 'y')) + array = xr.DataArray( + np.random.randn(2, 3), coords=[("x", ["a", "b"]), ("y", [0, 1, 2])] + ) + stacked = array.stack(z=("x", "y")) stacked - stacked.unstack('z') + stacked.unstack("z") As elsewhere in xarray, an ellipsis (`...`) can be used to represent all unlisted dimensions: @@ -128,15 +130,15 @@ possible levels. Missing levels are filled in with ``NaN`` in the resulting obje stacked2 = stacked[::2] stacked2 - stacked2.unstack('z') + stacked2.unstack("z") However, xarray's ``stack`` has an important difference from pandas: unlike pandas, it does not automatically drop missing values. Compare: .. ipython:: python - array = xr.DataArray([[np.nan, 1], [2, 3]], dims=['x', 'y']) - array.stack(z=('x', 'y')) + array = xr.DataArray([[np.nan, 1], [2, 3]], dims=["x", "y"]) + array.stack(z=("x", "y")) array.to_pandas().stack() We departed from pandas's behavior here because predictable shapes for new @@ -166,16 +168,15 @@ like this: .. ipython:: python - data = xr.Dataset( - data_vars={'a': (('x', 'y'), [[0, 1, 2], [3, 4, 5]]), - 'b': ('x', [6, 7])}, - coords={'y': ['u', 'v', 'w']} - ) - data - stacked = data.to_stacked_array("z", sample_dims=['x']) - stacked - unstacked = stacked.to_unstacked_dataset("z") - unstacked + data = xr.Dataset( + data_vars={"a": (("x", "y"), [[0, 1, 2], [3, 4, 5]]), "b": ("x", [6, 7])}, + coords={"y": ["u", "v", "w"]}, + ) + data + stacked = data.to_stacked_array("z", sample_dims=["x"]) + stacked + unstacked = stacked.to_unstacked_dataset("z") + unstacked In this example, ``stacked`` is a two dimensional array that we can easily pass to a scikit-learn or another generic numerical method. @@ -202,19 +203,23 @@ coordinates using :py:meth:`~xarray.DataArray.set_index`: .. ipython:: python - da = xr.DataArray(np.random.rand(4), - coords={'band': ('x', ['a', 'a', 'b', 'b']), - 'wavenumber': ('x', np.linspace(200, 400, 4))}, - dims='x') - da - mda = da.set_index(x=['band', 'wavenumber']) - mda + da = xr.DataArray( + np.random.rand(4), + coords={ + "band": ("x", ["a", "a", "b", "b"]), + "wavenumber": ("x", np.linspace(200, 400, 4)), + }, + dims="x", + ) + da + mda = da.set_index(x=["band", "wavenumber"]) + mda These coordinates can now be used for indexing, e.g., .. ipython:: python - mda.sel(band='a') + mda.sel(band="a") Conversely, you can use :py:meth:`~xarray.DataArray.reset_index` to extract multi-index levels as coordinates (this is mainly useful @@ -222,14 +227,14 @@ for serialization): .. ipython:: python - mda.reset_index('x') + mda.reset_index("x") :py:meth:`~xarray.DataArray.reorder_levels` allows changing the order of multi-index levels: .. ipython:: python - mda.reorder_levels(x=['wavenumber', 'band']) + mda.reorder_levels(x=["wavenumber", "band"]) As of xarray v0.9 coordinate labels for each dimension are optional. You can also use ``.set_index`` / ``.reset_index`` to add / remove @@ -237,12 +242,12 @@ labels for one or several dimensions: .. ipython:: python - array = xr.DataArray([1, 2, 3], dims='x') + array = xr.DataArray([1, 2, 3], dims="x") array - array['c'] = ('x', ['a', 'b', 'c']) - array.set_index(x='c') - array = array.set_index(x='c') - array = array.reset_index('x', drop=True) + array["c"] = ("x", ["a", "b", "c"]) + array.set_index(x="c") + array = array.set_index(x="c") + array = array.reset_index("x", drop=True) .. _reshape.shift_and_roll: @@ -254,9 +259,9 @@ To adjust coordinate labels, you can use the :py:meth:`~xarray.Dataset.shift` an .. ipython:: python - array = xr.DataArray([1, 2, 3, 4], dims='x') - array.shift(x=2) - array.roll(x=2, roll_coords=True) + array = xr.DataArray([1, 2, 3, 4], dims="x") + array.shift(x=2) + array.roll(x=2, roll_coords=True) .. _reshape.sort: @@ -269,17 +274,18 @@ One may sort a DataArray/Dataset via :py:meth:`~xarray.DataArray.sortby` and .. ipython:: python - ds = xr.Dataset({'A': (('x', 'y'), [[1, 2], [3, 4]]), - 'B': (('x', 'y'), [[5, 6], [7, 8]])}, - coords={'x': ['b', 'a'], 'y': [1, 0]}) - dax = xr.DataArray([100, 99], [('x', [0, 1])]) - day = xr.DataArray([90, 80], [('y', [0, 1])]) - ds.sortby([day, dax]) + ds = xr.Dataset( + {"A": (("x", "y"), [[1, 2], [3, 4]]), "B": (("x", "y"), [[5, 6], [7, 8]])}, + coords={"x": ["b", "a"], "y": [1, 0]}, + ) + dax = xr.DataArray([100, 99], [("x", [0, 1])]) + day = xr.DataArray([90, 80], [("y", [0, 1])]) + ds.sortby([day, dax]) As a shortcut, you can refer to existing coordinates by name: .. ipython:: python - ds.sortby('x') - ds.sortby(['y', 'x']) - ds.sortby(['y', 'x'], ascending=False) + ds.sortby("x") + ds.sortby(["y", "x"]) + ds.sortby(["y", "x"], ascending=False) \ No newline at end of file diff --git a/doc/time-series.rst b/doc/time-series.rst index d838dbbd4cd..96a2edc0ea5 100644 --- a/doc/time-series.rst +++ b/doc/time-series.rst @@ -10,11 +10,12 @@ data in pandas such a joy to xarray. In most cases, we rely on pandas for the core functionality. .. ipython:: python - :suppress: + :suppress: import numpy as np import pandas as pd import xarray as xr + np.random.seed(123456) Creating datetime64 data @@ -29,8 +30,8 @@ using :py:func:`pandas.to_datetime` and :py:func:`pandas.date_range`: .. ipython:: python - pd.to_datetime(['2000-01-01', '2000-02-02']) - pd.date_range('2000-01-01', periods=365) + pd.to_datetime(["2000-01-01", "2000-02-02"]) + pd.date_range("2000-01-01", periods=365) Alternatively, you can supply arrays of Python ``datetime`` objects. These get converted automatically when used as arguments in xarray objects: @@ -38,7 +39,8 @@ converted automatically when used as arguments in xarray objects: .. ipython:: python import datetime - xr.Dataset({'time': datetime.datetime(2000, 1, 1)}) + + xr.Dataset({"time": datetime.datetime(2000, 1, 1)}) When reading or writing netCDF files, xarray automatically decodes datetime and timedelta arrays using `CF conventions`_ (that is, by using a ``units`` @@ -62,8 +64,8 @@ You can manual decode arrays in this form by passing a dataset to .. ipython:: python - attrs = {'units': 'hours since 2000-01-01'} - ds = xr.Dataset({'time': ('time', [0, 1, 2, 3], attrs)}) + attrs = {"units": "hours since 2000-01-01"} + ds = xr.Dataset({"time": ("time", [0, 1, 2, 3], attrs)}) xr.decode_cf(ds) One unfortunate limitation of using ``datetime64[ns]`` is that it limits the @@ -87,10 +89,10 @@ items and with the `slice` object: .. ipython:: python - time = pd.date_range('2000-01-01', freq='H', periods=365 * 24) - ds = xr.Dataset({'foo': ('time', np.arange(365 * 24)), 'time': time}) - ds.sel(time='2000-01') - ds.sel(time=slice('2000-06-01', '2000-06-10')) + time = pd.date_range("2000-01-01", freq="H", periods=365 * 24) + ds = xr.Dataset({"foo": ("time", np.arange(365 * 24)), "time": time}) + ds.sel(time="2000-01") + ds.sel(time=slice("2000-06-01", "2000-06-10")) You can also select a particular time by indexing with a :py:class:`datetime.time` object: @@ -113,8 +115,8 @@ given ``DataArray`` can be quickly computed using a special ``.dt`` accessor. .. ipython:: python - time = pd.date_range('2000-01-01', freq='6H', periods=365 * 4) - ds = xr.Dataset({'foo': ('time', np.arange(365 * 4)), 'time': time}) + time = pd.date_range("2000-01-01", freq="6H", periods=365 * 4) + ds = xr.Dataset({"foo": ("time", np.arange(365 * 4)), "time": time}) ds.time.dt.hour ds.time.dt.dayofweek @@ -130,16 +132,16 @@ __ http://pandas.pydata.org/pandas-docs/stable/api.html#time-date-components .. ipython:: python - ds['time.month'] - ds['time.dayofyear'] + ds["time.month"] + ds["time.dayofyear"] For use as a derived coordinate, xarray adds ``'season'`` to the list of datetime components supported by pandas: .. ipython:: python - ds['time.season'] - ds['time'].dt.season + ds["time.season"] + ds["time"].dt.season The set of valid seasons consists of 'DJF', 'MAM', 'JJA' and 'SON', labeled by the first letters of the corresponding months. @@ -152,7 +154,7 @@ __ http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases .. ipython:: python - ds['time'].dt.floor('D') + ds["time"].dt.floor("D") The ``.dt`` accessor can also be used to generate formatted datetime strings for arrays utilising the same formatting as the standard `datetime.strftime`_. @@ -161,7 +163,7 @@ for arrays utilising the same formatting as the standard `datetime.strftime`_. .. ipython:: python - ds['time'].dt.strftime('%a, %b %d %H:%M') + ds["time"].dt.strftime("%a, %b %d %H:%M") .. _resampling: @@ -173,9 +175,9 @@ Datetime components couple particularly well with grouped operations (see calculate the mean by time of day: .. ipython:: python - :okwarning: + :okwarning: - ds.groupby('time.hour').mean() + ds.groupby("time.hour").mean() For upsampling or downsampling temporal resolutions, xarray offers a :py:meth:`~xarray.Dataset.resample` method building on the core functionality @@ -187,25 +189,25 @@ same api as ``resample`` `in pandas`_. For example, we can downsample our dataset from hourly to 6-hourly: .. ipython:: python - :okwarning: + :okwarning: - ds.resample(time='6H') + ds.resample(time="6H") This will create a specialized ``Resample`` object which saves information necessary for resampling. All of the reduction methods which work with ``Resample`` objects can also be used for resampling: .. ipython:: python - :okwarning: + :okwarning: - ds.resample(time='6H').mean() + ds.resample(time="6H").mean() You can also supply an arbitrary reduction function to aggregate over each resampling group: .. ipython:: python - ds.resample(time='6H').reduce(np.mean) + ds.resample(time="6H").reduce(np.mean) For upsampling, xarray provides six methods: ``asfreq``, ``ffill``, ``bfill``, ``pad``, ``nearest`` and ``interpolate``. ``interpolate`` extends ``scipy.interpolate.interp1d`` @@ -218,7 +220,7 @@ Data that has indices outside of the given ``tolerance`` are set to ``NaN``. .. ipython:: python - ds.resample(time='1H').nearest(tolerance='1H') + ds.resample(time="1H").nearest(tolerance="1H") For more examples of using grouped operations on a time dimension, see diff --git a/doc/weather-climate.rst b/doc/weather-climate.rst index 768cf6556f9..1eb63d24630 100644 --- a/doc/weather-climate.rst +++ b/doc/weather-climate.rst @@ -4,7 +4,7 @@ Weather and climate data ======================== .. ipython:: python - :suppress: + :suppress: import xarray as xr @@ -56,11 +56,14 @@ coordinate with dates from a no-leap calendar and a .. ipython:: python - from itertools import product - from cftime import DatetimeNoLeap - dates = [DatetimeNoLeap(year, month, 1) for year, month in - product(range(1, 3), range(1, 13))] - da = xr.DataArray(np.arange(24), coords=[dates], dims=['time'], name='foo') + from itertools import product + from cftime import DatetimeNoLeap + + dates = [ + DatetimeNoLeap(year, month, 1) + for year, month in product(range(1, 3), range(1, 13)) + ] + da = xr.DataArray(np.arange(24), coords=[dates], dims=["time"], name="foo") xarray also includes a :py:func:`~xarray.cftime_range` function, which enables creating a :py:class:`~xarray.CFTimeIndex` with regularly-spaced dates. For @@ -68,8 +71,8 @@ instance, we can create the same dates and DataArray we created above using: .. ipython:: python - dates = xr.cftime_range(start='0001', periods=24, freq='MS', calendar='noleap') - da = xr.DataArray(np.arange(24), coords=[dates], dims=['time'], name='foo') + dates = xr.cftime_range(start="0001", periods=24, freq="MS", calendar="noleap") + da = xr.DataArray(np.arange(24), coords=[dates], dims=["time"], name="foo") With :py:meth:`~xarray.CFTimeIndex.strftime` we can also easily generate formatted strings from the datetime values of a :py:class:`~xarray.CFTimeIndex` directly or through the @@ -80,8 +83,8 @@ using the same formatting as the standard `datetime.strftime`_ convention . .. ipython:: python - dates.strftime('%c') - da['time'].dt.strftime('%Y%m%d') + dates.strftime("%c") + da["time"].dt.strftime("%Y%m%d") For data indexed by a :py:class:`~xarray.CFTimeIndex` xarray currently supports: @@ -90,8 +93,8 @@ For data indexed by a :py:class:`~xarray.CFTimeIndex` xarray currently supports: .. ipython:: python - da.sel(time='0001') - da.sel(time=slice('0001-05', '0002-02')) + da.sel(time="0001") + da.sel(time=slice("0001-05", "0002-02")) - Access of basic datetime components via the ``dt`` accessor (in this case just "year", "month", "day", "hour", "minute", "second", "microsecond", @@ -99,64 +102,65 @@ For data indexed by a :py:class:`~xarray.CFTimeIndex` xarray currently supports: .. ipython:: python - da.time.dt.year - da.time.dt.month - da.time.dt.season - da.time.dt.dayofyear - da.time.dt.dayofweek - da.time.dt.days_in_month + da.time.dt.year + da.time.dt.month + da.time.dt.season + da.time.dt.dayofyear + da.time.dt.dayofweek + da.time.dt.days_in_month - Rounding of datetimes to fixed frequencies via the ``dt`` accessor: .. ipython:: python - da.time.dt.ceil('3D') - da.time.dt.floor('5D') - da.time.dt.round('2D') + da.time.dt.ceil("3D") + da.time.dt.floor("5D") + da.time.dt.round("2D") - Group-by operations based on datetime accessor attributes (e.g. by month of the year): .. ipython:: python - da.groupby('time.month').sum() + da.groupby("time.month").sum() - Interpolation using :py:class:`cftime.datetime` objects: .. ipython:: python - da.interp(time=[DatetimeNoLeap(1, 1, 15), DatetimeNoLeap(1, 2, 15)]) + da.interp(time=[DatetimeNoLeap(1, 1, 15), DatetimeNoLeap(1, 2, 15)]) - Interpolation using datetime strings: .. ipython:: python - da.interp(time=['0001-01-15', '0001-02-15']) + da.interp(time=["0001-01-15", "0001-02-15"]) - Differentiation: .. ipython:: python - da.differentiate('time') + da.differentiate("time") - Serialization: .. ipython:: python - da.to_netcdf('example-no-leap.nc') - xr.open_dataset('example-no-leap.nc') + da.to_netcdf("example-no-leap.nc") + xr.open_dataset("example-no-leap.nc") .. ipython:: python :suppress: import os - os.remove('example-no-leap.nc') + + os.remove("example-no-leap.nc") - And resampling along the time dimension for data indexed by a :py:class:`~xarray.CFTimeIndex`: .. ipython:: python - da.resample(time='81T', closed='right', label='right', base=3).mean() + da.resample(time="81T", closed="right", label="right", base=3).mean() .. note:: @@ -168,13 +172,13 @@ For data indexed by a :py:class:`~xarray.CFTimeIndex` xarray currently supports: method: .. ipython:: python - :okwarning: + :okwarning: - modern_times = xr.cftime_range('2000', periods=24, freq='MS', calendar='noleap') - da = xr.DataArray(range(24), [('time', modern_times)]) + modern_times = xr.cftime_range("2000", periods=24, freq="MS", calendar="noleap") + da = xr.DataArray(range(24), [("time", modern_times)]) da - datetimeindex = da.indexes['time'].to_datetimeindex() - da['time'] = datetimeindex + datetimeindex = da.indexes["time"].to_datetimeindex() + da["time"] = datetimeindex However in this case one should use caution to only perform operations which do not depend on differences between dates (e.g. differentiation, diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 996bef6253e..4fbeb7033c7 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -4,13 +4,14 @@ What's New ========== .. ipython:: python - :suppress: + :suppress: import numpy as np import pandas as pd import xarray as xray import xarray import xarray as xr + np.random.seed(123456) .. _whats-new.0.16.0: @@ -20,6 +21,12 @@ v0.16.0 (unreleased) Breaking changes ~~~~~~~~~~~~~~~~ + +- ``groupby`` operations will restore coord dimension order. Pass ``restore_coord_dims=False`` + to revert to previous behavior. +- :meth:`DataArray.transpose` will now transpose coordinates by default. + Pass ``transpose_coords=False`` to revert to previous behaviour. + By `Maximilian Roos `_ - Alternate draw styles for :py:meth:`plot.step` must be passed using the ``drawstyle`` (or ``ds``) keyword argument, instead of the ``linestyle`` (or ``ls``) keyword argument, in line with the `upstream change in Matplotlib @@ -49,6 +56,11 @@ New Features - Support dask handling for :py:meth:`DataArray.idxmax`, :py:meth:`DataArray.idxmin`, :py:meth:`Dataset.idxmax`, :py:meth:`Dataset.idxmin`. (:pull:`3922`) By `Kai Mühlbauer `_. +- More support for unit aware arrays with pint (:pull:`3643`) + By `Justus Magin `_. +- Support overriding existing variables in ``to_zarr()`` with ``mode='a'`` even + without ``append_dim``, as long as dimension sizes do not change. + By `Stephan Hoyer `_. - Allow plotting of boolean arrays. (:pull:`3766`) By `Marek Jacob `_ - A ``days_in_month`` accessor for :py:class:`xarray.CFTimeIndex`, analogous to @@ -58,16 +70,23 @@ New Features the :py:class:`~core.accessor_dt.DatetimeAccessor` (:pull:`3935`). This feature requires cftime version 1.1.0 or greater. By `Spencer Clark `_. +- :py:meth:`map_blocks` now accepts a ``template`` kwarg. This allows use cases + where the result of a computation could not be inferred automatically. + By `Deepak Cherian `_ Bug fixes ~~~~~~~~~ +- Support dark mode in VS code (:issue:`4024`) + By `Keisuke Fujii `_. +- ``ValueError`` is raised when ``fill_value`` is not a scalar in :py:meth:`full_like`. (:issue:`3977`) + By `Huite Bootsma `_. - Fix wrong order in converting a ``pd.Series`` with a MultiIndex to ``DataArray``. (:issue:`3951`) By `Keisuke Fujii `_. - Fix renaming of coords when one or more stacked coords is not in sorted order during stack+groupby+apply operations. (:issue:`3287`, :pull:`3906`) By `Spencer Hill `_ - Fix a regression where deleting a coordinate from a copied :py:class:`DataArray` - can affect the original :py:class:`Dataarray`. (:issue:`3899`, :pull:`3871`) + can affect the original :py:class:`DataArray`. (:issue:`3899`, :pull:`3871`) By `Todd Jennings `_ - Fix :py:class:`~xarray.plot.FacetGrid` plots with a single contour. (:issue:`3569`, :pull:`3915`). By `Deepak Cherian `_ @@ -80,6 +99,15 @@ Bug fixes By `Tom Nicholas `_. - Fix ``RasterioDeprecationWarning`` when using a ``vrt`` in ``open_rasterio``. (:issue:`3964`) By `Taher Chegini `_. +- Fix ``AttributeError`` on displaying a :py:class:`Variable` + in a notebook context. (:issue:`3972`, :pull:`3973`) + By `Ian Castleden `_. +- Fix bug causing :py:meth:`DataArray.interpolate_na` to always drop attributes, + and added `keep_attrs` argument. (:issue:`3968`) + By `Tom Nicholas `_. +- Fix bug in time parsing failing to fall back to cftime. This was causing time + variables with a time unit of `'msecs'` to fail to parse. (:pull:`3998`) + By `Ryan May `_. Documentation ~~~~~~~~~~~~~ @@ -97,16 +125,30 @@ Documentation (:pull:`3935`). By `Spencer Clark `_. - Updated the list of current core developers. (:issue:`3892`) By `Tom Nicholas `_. +- Add example for multi-dimensional extrapolation and note different behavior + of ``kwargs`` in :py:meth:`Dataset.interp` and :py:meth:`DataArray.interp` + for 1-d and n-d interpolation (:pull:`3956`). + By `Matthias Riße `_. +- Apply ``black`` to all the code in the documentation (:pull:`4012`) + By `Justus Magin `_. +- Narrative documentation now describes :py:meth:`map_blocks`. :ref:`dask.automatic-parallelization`. + By `Deepak Cherian `_. Internal Changes ~~~~~~~~~~~~~~~~ +- Raise more informative error messages for chunk size conflicts when writing to zarr files. + By `Deepak Cherian `_. - Run the ``isort`` pre-commit hook only on python source files and update the ``flake8`` version. (:issue:`3750`, :pull:`3711`) By `Justus Magin `_. - Add a CI job that runs the tests with every optional dependency except ``dask``. (:issue:`3794`, :pull:`3919`) By `Justus Magin `_. - +- Use ``async`` / ``await`` for the asynchronous distributed + tests. (:issue:`3987`, :pull:`3989`) + By `Justus Magin `_. +- Various internal code clean-ups (:pull:`4026`, :pull:`4038`). + By `Prajjwal Nijhara `_. .. _whats-new.0.15.1: @@ -193,13 +235,13 @@ Bug fixes - xarray now respects the over, under and bad colors if set on a provided colormap. (:issue:`3590`, :pull:`3601`) By `johnomotani `_. -- :py:func:`coarsen` now respects ``xr.set_options(keep_attrs=True)`` +- ``coarsen`` and ``rolling`` now respect ``xr.set_options(keep_attrs=True)`` to preserve attributes. :py:meth:`Dataset.coarsen` accepts a keyword argument ``keep_attrs`` to change this setting. (:issue:`3376`, :pull:`3801`) By `Andrew Thomas `_. - Delete associated indexes when deleting coordinate variables. (:issue:`3746`). By `Deepak Cherian `_. -- Fix :py:meth:`xarray.core.dataset.Dataset.to_zarr` when using `append_dim` and `group` +- Fix :py:meth:`Dataset.to_zarr` when using ``append_dim`` and ``group`` simultaneously. (:issue:`3170`). By `Matthias Meyer `_. - Fix html repr on :py:class:`Dataset` with non-string keys (:pull:`3807`). By `Maximilian Roos `_. @@ -237,7 +279,7 @@ Internal Changes By `Maximilian Roos `_ - Remove xfails for scipy 1.0.1 for tests that append to netCDF files (:pull:`3805`). By `Mathias Hauser `_. -- Remove conversion to :py:class:`pandas.Panel`, given its removal in pandas +- Remove conversion to ``pandas.Panel``, given its removal in pandas in favor of xarray's objects. By `Maximilian Roos `_ @@ -1949,8 +1991,8 @@ Enhancements .. ipython:: python - ds = xr.Dataset({'a': 1}) - np.sin(ds) + ds = xr.Dataset({"a": 1}) + np.sin(ds) This obliviates the need for the ``xarray.ufuncs`` module, which will be deprecated in the future when xarray drops support for older versions of @@ -2041,8 +2083,8 @@ Enhancements .. ipython:: python - da = xr.DataArray(np.array([True, False, np.nan], dtype=object), dims='x') - da.sum() + da = xr.DataArray(np.array([True, False, np.nan], dtype=object), dims="x") + da.sum() (:issue:`1866`) By `Keisuke Fujii `_. @@ -2196,7 +2238,7 @@ Breaking changes .. ipython:: :verbatim: - In [1]: ds.resample('24H', dim='time', how='max') + In [1]: ds.resample("24H", dim="time", how="max") Out[1]: [...] @@ -2206,7 +2248,7 @@ Breaking changes .. ipython:: :verbatim: - In [1]: ds.resample(time='24H').max() + In [1]: ds.resample(time="24H").max() Out[1]: [...] @@ -2276,9 +2318,9 @@ Enhancements In [1]: import xarray as xr - In [2]: arr = xr.DataArray([[1, 2, 3], [4, 5, 6]], dims=('x', 'y')) + In [2]: arr = xr.DataArray([[1, 2, 3], [4, 5, 6]], dims=("x", "y")) - In [3]: xr.where(arr % 2, 'even', 'odd') + In [3]: xr.where(arr % 2, "even", "odd") Out[3]: array([['even', 'odd', 'even'], @@ -2799,7 +2841,7 @@ Breaking changes .. ipython:: :verbatim: - In [1]: xr.Dataset({'foo': (('x', 'y'), [[1, 2]])}) + In [1]: xr.Dataset({"foo": (("x", "y"), [[1, 2]])}) Out[1]: Dimensions: (x: 1, y: 2) @@ -3256,10 +3298,10 @@ Enhancements .. ipython:: :verbatim: - In [1]: import xarray as xr; import numpy as np + In [1]: import xarray as xr + ...: import numpy as np - In [2]: arr = xr.DataArray(np.arange(0, 7.5, 0.5).reshape(3, 5), - dims=('x', 'y')) + In [2]: arr = xr.DataArray(np.arange(0, 7.5, 0.5).reshape(3, 5), dims=("x", "y")) In [3]: arr Out[3]: @@ -3398,7 +3440,7 @@ Breaking changes .. ipython:: :verbatim: - In [2]: xray.DataArray([4, 5, 6], dims='x', name='x') + In [2]: xray.DataArray([4, 5, 6], dims="x", name="x") Out[2]: array([4, 5, 6]) @@ -3410,7 +3452,7 @@ Breaking changes .. ipython:: :verbatim: - In [2]: xray.DataArray([4, 5, 6], dims='x', name='x') + In [2]: xray.DataArray([4, 5, 6], dims="x", name="x") Out[2]: array([4, 5, 6]) @@ -3433,13 +3475,11 @@ Enhancements .. ipython:: :verbatim: - In [7]: df = pd.DataFrame({'foo': range(3), - ...: 'x': ['a', 'b', 'b'], - ...: 'y': [0, 0, 1]}) + In [7]: df = pd.DataFrame({"foo": range(3), "x": ["a", "b", "b"], "y": [0, 0, 1]}) - In [8]: s = df.set_index(['x', 'y'])['foo'] + In [8]: s = df.set_index(["x", "y"])["foo"] - In [12]: arr = xray.DataArray(s, dims='z') + In [12]: arr = xray.DataArray(s, dims="z") In [13]: arr Out[13]: @@ -3448,13 +3488,13 @@ Enhancements Coordinates: * z (z) object ('a', 0) ('b', 0) ('b', 1) - In [19]: arr.indexes['z'] + In [19]: arr.indexes["z"] Out[19]: MultiIndex(levels=[[u'a', u'b'], [0, 1]], labels=[[0, 1, 1], [0, 0, 1]], names=[u'x', u'y']) - In [14]: arr.unstack('z') + In [14]: arr.unstack("z") Out[14]: array([[ 0., nan], @@ -3463,7 +3503,7 @@ Enhancements * x (x) object 'a' 'b' * y (y) int64 0 1 - In [26]: arr.unstack('z').stack(z=('x', 'y')) + In [26]: arr.unstack("z").stack(z=("x", "y")) Out[26]: array([ 0., nan, 1., 2.]) @@ -3491,9 +3531,9 @@ Enhancements for shifting/rotating datasets or arrays along a dimension: .. ipython:: python - :okwarning: + :okwarning: - array = xray.DataArray([5, 6, 7, 8], dims='x') + array = xray.DataArray([5, 6, 7, 8], dims="x") array.shift(x=2) array.roll(x=2) @@ -3508,8 +3548,8 @@ Enhancements .. ipython:: python - a = xray.DataArray([1, 2, 3], dims='x') - b = xray.DataArray([5, 6], dims='y') + a = xray.DataArray([1, 2, 3], dims="x") + b = xray.DataArray([5, 6], dims="y") a b a2, b2 = xray.broadcast(a, b) @@ -3579,9 +3619,9 @@ Enhancements .. ipython:: :verbatim: - In [5]: array = xray.DataArray([1, 2, 3], dims='x') + In [5]: array = xray.DataArray([1, 2, 3], dims="x") - In [6]: array.reindex(x=[0.9, 1.5], method='nearest', tolerance=0.2) + In [6]: array.reindex(x=[0.9, 1.5], method="nearest", tolerance=0.2) Out[6]: array([ 2., nan]) @@ -3661,10 +3701,11 @@ Enhancements .. ipython:: :verbatim: - In [1]: da = xray.DataArray(np.arange(56).reshape((7, 8)), - ...: coords={'x': list('abcdefg'), - ...: 'y': 10 * np.arange(8)}, - ...: dims=['x', 'y']) + In [1]: da = xray.DataArray( + ...: np.arange(56).reshape((7, 8)), + ...: coords={"x": list("abcdefg"), "y": 10 * np.arange(8)}, + ...: dims=["x", "y"], + ...: ) In [2]: da Out[2]: @@ -3681,7 +3722,7 @@ Enhancements * x (x) |S1 'a' 'b' 'c' 'd' 'e' 'f' 'g' # we can index by position along each dimension - In [3]: da.isel_points(x=[0, 1, 6], y=[0, 1, 0], dim='points') + In [3]: da.isel_points(x=[0, 1, 6], y=[0, 1, 0], dim="points") Out[3]: array([ 0, 9, 48]) @@ -3691,7 +3732,7 @@ Enhancements * points (points) int64 0 1 2 # or equivalently by label - In [9]: da.sel_points(x=['a', 'b', 'g'], y=[0, 10, 0], dim='points') + In [9]: da.sel_points(x=["a", "b", "g"], y=[0, 10, 0], dim="points") Out[9]: array([ 0, 9, 48]) @@ -3705,11 +3746,11 @@ Enhancements .. ipython:: python - ds = xray.Dataset(coords={'x': range(100), 'y': range(100)}) - ds['distance'] = np.sqrt(ds.x ** 2 + ds.y ** 2) + ds = xray.Dataset(coords={"x": range(100), "y": range(100)}) + ds["distance"] = np.sqrt(ds.x ** 2 + ds.y ** 2) - @savefig where_example.png width=4in height=4in - ds.distance.where(ds.distance < 100).plot() + @savefig where_example.png width=4in height=4in + ds.distance.where(ds.distance < 100).plot() - Added new methods ``xray.DataArray.diff`` and ``xray.Dataset.diff`` for finite difference calculations along a given axis. @@ -3719,9 +3760,9 @@ Enhancements .. ipython:: python - da = xray.DataArray(np.random.random_sample(size=(5, 4))) - da.where(da < 0.5) - da.where(da < 0.5).to_masked_array(copy=True) + da = xray.DataArray(np.random.random_sample(size=(5, 4))) + da.where(da < 0.5) + da.where(da < 0.5).to_masked_array(copy=True) - Added new flag "drop_variables" to ``xray.open_dataset`` for excluding variables from being parsed. This may be useful to drop @@ -3779,9 +3820,9 @@ Enhancements .. ipython:: :verbatim: - In [1]: years, datasets = zip(*ds.groupby('time.year')) + In [1]: years, datasets = zip(*ds.groupby("time.year")) - In [2]: paths = ['%s.nc' % y for y in years] + In [2]: paths = ["%s.nc" % y for y in years] In [3]: xray.save_mfdataset(datasets, paths) @@ -3854,9 +3895,9 @@ Backwards incompatible changes .. ipython:: :verbatim: - In [1]: ds = xray.Dataset({'x': 0}) + In [1]: ds = xray.Dataset({"x": 0}) - In [2]: xray.concat([ds, ds], dim='y') + In [2]: xray.concat([ds, ds], dim="y") Out[2]: Dimensions: () @@ -3868,13 +3909,13 @@ Backwards incompatible changes Now, the default always concatenates data variables: .. ipython:: python - :suppress: + :suppress: - ds = xray.Dataset({'x': 0}) + ds = xray.Dataset({"x": 0}) .. ipython:: python - xray.concat([ds, ds], dim='y') + xray.concat([ds, ds], dim="y") To obtain the old behavior, supply the argument ``concat_over=[]``. @@ -3887,17 +3928,20 @@ Enhancements .. ipython:: python - ds = xray.Dataset({'a': 1, 'b': ('x', [1, 2, 3])}, - coords={'c': 42}, attrs={'Conventions': 'None'}) + ds = xray.Dataset( + {"a": 1, "b": ("x", [1, 2, 3])}, + coords={"c": 42}, + attrs={"Conventions": "None"}, + ) ds.to_array() - ds.to_array().to_dataset(dim='variable') + ds.to_array().to_dataset(dim="variable") - New ``xray.Dataset.fillna`` method to fill missing values, modeled off the pandas method of the same name: .. ipython:: python - array = xray.DataArray([np.nan, 1, np.nan, 3], dims='x') + array = xray.DataArray([np.nan, 1, np.nan, 3], dims="x") array.fillna(0) ``fillna`` works on both ``Dataset`` and ``DataArray`` objects, and uses @@ -3910,9 +3954,9 @@ Enhancements .. ipython:: python - ds = xray.Dataset({'y': ('x', [1, 2, 3])}) - ds.assign(z = lambda ds: ds.y ** 2) - ds.assign_coords(z = ('x', ['a', 'b', 'c'])) + ds = xray.Dataset({"y": ("x", [1, 2, 3])}) + ds.assign(z=lambda ds: ds.y ** 2) + ds.assign_coords(z=("x", ["a", "b", "c"])) These methods return a new Dataset (or DataArray) with updated data or coordinate variables. @@ -3925,7 +3969,7 @@ Enhancements .. ipython:: :verbatim: - In [12]: ds.sel(x=1.1, method='nearest') + In [12]: ds.sel(x=1.1, method="nearest") Out[12]: Dimensions: () @@ -3934,7 +3978,7 @@ Enhancements Data variables: y int64 2 - In [13]: ds.sel(x=[1.1, 2.1], method='pad') + In [13]: ds.sel(x=[1.1, 2.1], method="pad") Out[13]: Dimensions: (x: 2) @@ -3960,7 +4004,7 @@ Enhancements .. ipython:: python - ds = xray.Dataset({'x': np.arange(1000)}) + ds = xray.Dataset({"x": np.arange(1000)}) with xray.set_options(display_width=40): print(ds) @@ -3998,42 +4042,42 @@ Enhancements need to supply the time dimension explicitly: .. ipython:: python - :verbatim: + :verbatim: - time = pd.date_range('2000-01-01', freq='6H', periods=10) - array = xray.DataArray(np.arange(10), [('time', time)]) - array.resample('1D', dim='time') + time = pd.date_range("2000-01-01", freq="6H", periods=10) + array = xray.DataArray(np.arange(10), [("time", time)]) + array.resample("1D", dim="time") You can specify how to do the resampling with the ``how`` argument and other options such as ``closed`` and ``label`` let you control labeling: .. ipython:: python - :verbatim: + :verbatim: - array.resample('1D', dim='time', how='sum', label='right') + array.resample("1D", dim="time", how="sum", label="right") If the desired temporal resolution is higher than the original data (upsampling), xray will insert missing values: .. ipython:: python - :verbatim: + :verbatim: - array.resample('3H', 'time') + array.resample("3H", "time") - ``first`` and ``last`` methods on groupby objects let you take the first or last examples from each group along the grouped axis: .. ipython:: python - :verbatim: + :verbatim: - array.groupby('time.day').first() + array.groupby("time.day").first() These methods combine well with ``resample``: .. ipython:: python - :verbatim: + :verbatim: - array.resample('1D', dim='time', how='first') + array.resample("1D", dim="time", how="first") - ``xray.Dataset.swap_dims`` allows for easily swapping one dimension @@ -4041,9 +4085,9 @@ Enhancements .. ipython:: python - ds = xray.Dataset({'x': range(3), 'y': ('x', list('abc'))}) - ds - ds.swap_dims({'x': 'y'}) + ds = xray.Dataset({"x": range(3), "y": ("x", list("abc"))}) + ds + ds.swap_dims({"x": "y"}) This was possible in earlier versions of xray, but required some contortions. - ``xray.open_dataset`` and ``xray.Dataset.to_netcdf`` now @@ -4089,8 +4133,8 @@ Breaking changes .. ipython:: python - lhs = xray.DataArray([1, 2, 3], [('x', [0, 1, 2])]) - rhs = xray.DataArray([2, 3, 4], [('x', [1, 2, 3])]) + lhs = xray.DataArray([1, 2, 3], [("x", [0, 1, 2])]) + rhs = xray.DataArray([2, 3, 4], [("x", [1, 2, 3])]) lhs + rhs :ref:`For dataset construction and merging`, we align based on the @@ -4098,14 +4142,14 @@ Breaking changes .. ipython:: python - xray.Dataset({'foo': lhs, 'bar': rhs}) + xray.Dataset({"foo": lhs, "bar": rhs}) :ref:`For update and __setitem__`, we align based on the **original** object: .. ipython:: python - lhs.coords['rhs'] = rhs + lhs.coords["rhs"] = rhs lhs - Aggregations like ``mean`` or ``median`` now skip missing values by default: @@ -4128,8 +4172,8 @@ Breaking changes .. ipython:: python - a = xray.DataArray([1, 2], coords={'c': 0}, dims='x') - b = xray.DataArray([1, 2], coords={'c': ('x', [0, 0])}, dims='x') + a = xray.DataArray([1, 2], coords={"c": 0}, dims="x") + b = xray.DataArray([1, 2], coords={"c": ("x", [0, 0])}, dims="x") (a + b).coords This functionality can be controlled through the ``compat`` option, which @@ -4140,9 +4184,10 @@ Breaking changes .. ipython:: python - time = xray.DataArray(pd.date_range('2000-01-01', periods=365), - dims='time', name='time') - counts = time.groupby('time.month').count() + time = xray.DataArray( + pd.date_range("2000-01-01", periods=365), dims="time", name="time" + ) + counts = time.groupby("time.month").count() counts.sel(month=2) Previously, you would need to use something like @@ -4152,8 +4197,8 @@ Breaking changes .. ipython:: python - ds = xray.Dataset({'t': pd.date_range('2000-01-01', periods=12, freq='M')}) - ds['t.season'] + ds = xray.Dataset({"t": pd.date_range("2000-01-01", periods=12, freq="M")}) + ds["t.season"] Previously, it returned numbered seasons 1 through 4. - We have updated our use of the terms of "coordinates" and "variables". What @@ -4176,8 +4221,8 @@ Enhancements .. ipython:: python - data = xray.DataArray([1, 2, 3], [('x', range(3))]) - data.reindex(x=[0.5, 1, 1.5, 2, 2.5], method='pad') + data = xray.DataArray([1, 2, 3], [("x", range(3))]) + data.reindex(x=[0.5, 1, 1.5, 2, 2.5], method="pad") This will be especially useful once pandas 0.16 is released, at which point xray will immediately support reindexing with @@ -4196,15 +4241,15 @@ Enhancements makes it easy to drop explicitly listed variables or index labels: .. ipython:: python - :okwarning: + :okwarning: # drop variables - ds = xray.Dataset({'x': 0, 'y': 1}) - ds.drop('x') + ds = xray.Dataset({"x": 0, "y": 1}) + ds.drop("x") # drop index labels - arr = xray.DataArray([1, 2, 3], coords=[('x', list('abc'))]) - arr.drop(['a', 'c'], dim='x') + arr = xray.DataArray([1, 2, 3], coords=[("x", list("abc"))]) + arr.drop(["a", "c"], dim="x") - ``xray.Dataset.broadcast_equals`` has been added to correspond to the new ``compat`` option. @@ -4272,7 +4317,8 @@ Backwards incompatible changes .. ipython:: python from datetime import datetime - xray.Dataset({'t': [datetime(2000, 1, 1)]}) + + xray.Dataset({"t": [datetime(2000, 1, 1)]}) - xray now has support (including serialization to netCDF) for :py:class:`~pandas.TimedeltaIndex`. :py:class:`datetime.timedelta` objects @@ -4288,8 +4334,8 @@ Enhancements .. ipython:: python - ds = xray.Dataset({'tmin': ([], 25, {'units': 'celsius'})}) - ds.tmin.units + ds = xray.Dataset({"tmin": ([], 25, {"units": "celsius"})}) + ds.tmin.units Tab-completion for these variables should work in editors such as IPython. However, setting variables or attributes in this fashion is not yet @@ -4299,7 +4345,7 @@ Enhancements .. ipython:: python - array = xray.DataArray(np.zeros(5), dims=['x']) + array = xray.DataArray(np.zeros(5), dims=["x"]) array[dict(x=slice(3))] = 1 array diff --git a/xarray/backends/api.py b/xarray/backends/api.py index c7481e22b59..184aad579a2 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -1279,18 +1279,35 @@ def _validate_append_dim_and_encoding( return if append_dim: if append_dim not in ds.dims: - raise ValueError(f"{append_dim} not a valid dimension in the Dataset") - for data_var in ds_to_append: - if data_var in ds: - if append_dim is None: + raise ValueError( + f"append_dim={append_dim!r} does not match any existing " + f"dataset dimensions {ds.dims}" + ) + for var_name in ds_to_append: + if var_name in ds: + if ds_to_append[var_name].dims != ds[var_name].dims: + raise ValueError( + f"variable {var_name!r} already exists with different " + f"dimension names {ds[var_name].dims} != " + f"{ds_to_append[var_name].dims}, but changing variable " + "dimensions is not supported by to_zarr()." + ) + existing_sizes = { + k: v for k, v in ds[var_name].sizes.items() if k != append_dim + } + new_sizes = { + k: v for k, v in ds_to_append[var_name].sizes.items() if k != append_dim + } + if existing_sizes != new_sizes: raise ValueError( - "variable '{}' already exists, but append_dim " - "was not set".format(data_var) + f"variable {var_name!r} already exists with different " + "dimension sizes: {existing_sizes} != {new_sizes}. " + "to_zarr() only supports changing dimension sizes when " + f"explicitly appending, but append_dim={append_dim!r}." ) - if data_var in encoding.keys(): + if var_name in encoding.keys(): raise ValueError( - "variable '{}' already exists, but encoding was" - "provided".format(data_var) + f"variable {var_name!r} already exists, but encoding was provided" ) diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index c262dae2811..de6b627447e 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -65,7 +65,7 @@ def __getitem__(self, key): # could possibly have a work-around for 0d data here -def _determine_zarr_chunks(enc_chunks, var_chunks, ndim): +def _determine_zarr_chunks(enc_chunks, var_chunks, ndim, name): """ Given encoding chunks (possibly None) and variable chunks (possibly None) """ @@ -88,15 +88,16 @@ def _determine_zarr_chunks(enc_chunks, var_chunks, ndim): if var_chunks and enc_chunks is None: if any(len(set(chunks[:-1])) > 1 for chunks in var_chunks): raise ValueError( - "Zarr requires uniform chunk sizes except for final chunk." - " Variable dask chunks %r are incompatible. Consider " - "rechunking using `chunk()`." % (var_chunks,) + "Zarr requires uniform chunk sizes except for final chunk. " + f"Variable named {name!r} has incompatible dask chunks: {var_chunks!r}. " + "Consider rechunking using `chunk()`." ) if any((chunks[0] < chunks[-1]) for chunks in var_chunks): raise ValueError( "Final chunk of Zarr array must be the same size or smaller " - "than the first. Variable Dask chunks %r are incompatible. " - "Consider rechunking using `chunk()`." % var_chunks + f"than the first. Variable named {name!r} has incompatible Dask chunks {var_chunks!r}." + "Consider either rechunking using `chunk()` or instead deleting " + "or modifying `encoding['chunks']`." ) # return the first chunk for each dimension return tuple(chunk[0] for chunk in var_chunks) @@ -114,13 +115,15 @@ def _determine_zarr_chunks(enc_chunks, var_chunks, ndim): if len(enc_chunks_tuple) != ndim: # throw away encoding chunks, start over - return _determine_zarr_chunks(None, var_chunks, ndim) + return _determine_zarr_chunks(None, var_chunks, ndim, name) for x in enc_chunks_tuple: if not isinstance(x, int): raise TypeError( - "zarr chunks must be an int or a tuple of ints. " - "Instead found %r" % (enc_chunks_tuple,) + "zarr chunk sizes specified in `encoding['chunks']` " + "must be an int or a tuple of ints. " + f"Instead found encoding['chunks']={enc_chunks_tuple!r} " + f"for variable named {name!r}." ) # if there are chunks in encoding and the variable data is a numpy array, @@ -142,19 +145,22 @@ def _determine_zarr_chunks(enc_chunks, var_chunks, ndim): for dchunk in dchunks[:-1]: if dchunk % zchunk: raise NotImplementedError( - "Specified zarr chunks %r would overlap multiple dask " - "chunks %r. This is not implemented in xarray yet. " - " Consider rechunking the data using " - "`chunk()` or specifying different chunks in encoding." - % (enc_chunks_tuple, var_chunks) + f"Specified zarr chunks encoding['chunks']={enc_chunks_tuple!r} for " + f"variable named {name!r} would overlap multiple dask chunks {var_chunks!r}. " + "This is not implemented in xarray yet. " + "Consider either rechunking using `chunk()` or instead deleting " + "or modifying `encoding['chunks']`." ) if dchunks[-1] > zchunk: raise ValueError( "Final chunk of Zarr array must be the same size or " - "smaller than the first. The specified Zarr chunk " - "encoding is %r, but %r in variable Dask chunks %r is " - "incompatible. Consider rechunking using `chunk()`." - % (enc_chunks_tuple, dchunks, var_chunks) + "smaller than the first. " + f"Specified Zarr chunk encoding['chunks']={enc_chunks_tuple}, " + f"for variable named {name!r} " + f"but {dchunks} in the variable's Dask chunks {var_chunks} is " + "incompatible with this encoding. " + "Consider either rechunking using `chunk()` or instead deleting " + "or modifying `encoding['chunks']`." ) return enc_chunks_tuple @@ -177,7 +183,7 @@ def _get_zarr_dims_and_attrs(zarr_obj, dimension_key): return dimensions, attributes -def extract_zarr_variable_encoding(variable, raise_on_invalid=False): +def extract_zarr_variable_encoding(variable, raise_on_invalid=False, name=None): """ Extract zarr encoding dictionary from xarray Variable @@ -207,7 +213,7 @@ def extract_zarr_variable_encoding(variable, raise_on_invalid=False): del encoding[k] chunks = _determine_zarr_chunks( - encoding.get("chunks"), variable.chunks, variable.ndim + encoding.get("chunks"), variable.chunks, variable.ndim, name ) encoding["chunks"] = chunks return encoding @@ -439,21 +445,28 @@ def set_variables(self, variables, check_encoding_set, writer, unlimited_dims=No fill_value = attrs.pop("_FillValue", None) if v.encoding == {"_FillValue": None} and fill_value is None: v.encoding = {} - if name in self.ds: + + if self.append_dim is not None and self.append_dim in dims: + # resize existing variable + zarr_array = self.ds[name] + append_axis = dims.index(self.append_dim) + + new_region = [slice(None)] * len(dims) + new_region[append_axis] = slice(zarr_array.shape[append_axis], None) + region = tuple(new_region) + + new_shape = list(zarr_array.shape) + new_shape[append_axis] += v.shape[append_axis] + zarr_array.resize(new_shape) + elif name in self.ds: + # override existing variable zarr_array = self.ds[name] - if self.append_dim in dims: - # this is the DataArray that has append_dim as a - # dimension - append_axis = dims.index(self.append_dim) - new_shape = list(zarr_array.shape) - new_shape[append_axis] += v.shape[append_axis] - new_region = [slice(None)] * len(new_shape) - new_region[append_axis] = slice(zarr_array.shape[append_axis], None) - zarr_array.resize(new_shape) - writer.add(v.data, zarr_array, region=tuple(new_region)) + region = None else: # new variable - encoding = extract_zarr_variable_encoding(v, raise_on_invalid=check) + encoding = extract_zarr_variable_encoding( + v, raise_on_invalid=check, name=vn + ) encoded_attrs = {} # the magic for storing the hidden dimension data encoded_attrs[DIMENSION_KEY] = dims @@ -466,7 +479,9 @@ def set_variables(self, variables, check_encoding_set, writer, unlimited_dims=No name, shape=shape, dtype=dtype, fill_value=fill_value, **encoding ) zarr_array.attrs.put(encoded_attrs) - writer.add(v.data, zarr_array) + region = None + + writer.add(v.data, zarr_array, region=region) def close(self): if self._consolidate_on_close: diff --git a/xarray/coding/times.py b/xarray/coding/times.py index 965ddd8f043..d923f1ad088 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -155,7 +155,7 @@ def decode_cf_datetime(num_dates, units, calendar=None, use_cftime=None): if use_cftime is None: try: dates = _decode_datetime_with_pandas(flat_num_dates, units, calendar) - except (OutOfBoundsDatetime, OverflowError): + except (KeyError, OutOfBoundsDatetime, OverflowError): dates = _decode_datetime_with_cftime( flat_num_dates.astype(np.float), units, calendar ) diff --git a/xarray/convert.py b/xarray/convert.py index 4974a55d8e2..0c86b090f34 100644 --- a/xarray/convert.py +++ b/xarray/convert.py @@ -229,11 +229,11 @@ def _iris_cell_methods_to_str(cell_methods_obj): """ cell_methods = [] for cell_method in cell_methods_obj: - names = "".join([f"{n}: " for n in cell_method.coord_names]) + names = "".join(f"{n}: " for n in cell_method.coord_names) intervals = " ".join( - [f"interval: {interval}" for interval in cell_method.intervals] + f"interval: {interval}" for interval in cell_method.intervals ) - comments = " ".join([f"comment: {comment}" for comment in cell_method.comments]) + comments = " ".join(f"comment: {comment}" for comment in cell_method.comments) extra = " ".join([intervals, comments]).strip() if extra: extra = f" ({extra})" diff --git a/xarray/core/common.py b/xarray/core/common.py index 8f6d57e9f12..e343f342040 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -25,7 +25,7 @@ from .options import OPTIONS, _get_keep_attrs from .pycompat import dask_array_type from .rolling_exp import RollingExp -from .utils import Frozen, either_dict_or_kwargs +from .utils import Frozen, either_dict_or_kwargs, is_scalar # Used as a sentinel value to indicate a all dimensions ALL_DIMS = ... @@ -447,7 +447,7 @@ def assign_coords(self, coords=None, **coords_kwargs): New coordinate can also be attached to an existing dimension: >>> lon_2 = np.array([300, 289, 0, 1]) - >>> da.assign_coords(lon_2=('lon', lon_2)) + >>> da.assign_coords(lon_2=("lon", lon_2)) array([0.28298 , 0.667347, 0.657938, 0.177683]) Coordinates: @@ -456,7 +456,7 @@ def assign_coords(self, coords=None, **coords_kwargs): Note that the same result can also be obtained with a dict e.g. - >>> _ = da.assign_coords({"lon_2": ('lon', lon_2)}) + >>> _ = da.assign_coords({"lon_2": ("lon", lon_2)}) Notes ----- @@ -1397,6 +1397,9 @@ def full_like(other, fill_value, dtype: DTypeLike = None): from .dataset import Dataset from .variable import Variable + if not is_scalar(fill_value): + raise ValueError(f"fill_value must be scalar. Received {fill_value} instead.") + if isinstance(other, Dataset): data_vars = { k: _full_like_variable(v, fill_value, dtype) diff --git a/xarray/core/computation.py b/xarray/core/computation.py index 1937b31015e..28bf818e4a3 100644 --- a/xarray/core/computation.py +++ b/xarray/core/computation.py @@ -1191,10 +1191,10 @@ def dot(*arrays, dims=None, **kwargs): # construct einsum subscripts, such as '...abc,...ab->...c' # Note: input_core_dims are always moved to the last position subscripts_list = [ - "..." + "".join([dim_map[d] for d in ds]) for ds in input_core_dims + "..." + "".join(dim_map[d] for d in ds) for ds in input_core_dims ] subscripts = ",".join(subscripts_list) - subscripts += "->..." + "".join([dim_map[d] for d in output_core_dims[0]]) + subscripts += "->..." + "".join(dim_map[d] for d in output_core_dims[0]) join = OPTIONS["arithmetic_join"] # using "inner" emulates `(a * b).sum()` for all joins (except "exact") diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 63cba53b689..236938bac74 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -1,6 +1,5 @@ import datetime import functools -import warnings from numbers import Number from typing import ( TYPE_CHECKING, @@ -1366,7 +1365,9 @@ def interp( first. If True, x has to be an array of monotonically increasing values. kwargs: dictionary - Additional keyword passed to scipy's interpolator. + Additional keyword arguments passed to scipy's interpolator. Valid + options and their behavior depend on if 1-dimensional or + multi-dimensional interpolation is used. ``**coords_kwargs`` : {dim: coordinate, ...}, optional The keyword arguments form of ``coords``. One of coords or coords_kwargs must be provided. @@ -1913,7 +1914,7 @@ def to_unstacked_dataset(self, dim, level=0): # unstacked dataset return Dataset(data_dict) - def transpose(self, *dims: Hashable, transpose_coords: bool = None) -> "DataArray": + def transpose(self, *dims: Hashable, transpose_coords: bool = True) -> "DataArray": """Return a new DataArray object with transposed dimensions. Parameters @@ -1921,7 +1922,7 @@ def transpose(self, *dims: Hashable, transpose_coords: bool = None) -> "DataArra *dims : hashable, optional By default, reverse the dimensions. Otherwise, reorder the dimensions to this order. - transpose_coords : boolean, optional + transpose_coords : boolean, default True If True, also transpose the coordinates of this DataArray. Returns @@ -1950,15 +1951,6 @@ def transpose(self, *dims: Hashable, transpose_coords: bool = None) -> "DataArra coords[name] = coord.variable.transpose(*coord_dims) return self._replace(variable, coords) else: - if transpose_coords is None and any(self[c].ndim > 1 for c in self.coords): - warnings.warn( - "This DataArray contains multi-dimensional " - "coordinates. In the future, these coordinates " - "will be transposed as well unless you specify " - "transpose_coords=False.", - FutureWarning, - stacklevel=2, - ) return self._replace(variable) @property @@ -2098,6 +2090,7 @@ def interpolate_na( max_gap: Union[ int, float, str, pd.Timedelta, np.timedelta64, datetime.timedelta ] = None, + keep_attrs: bool = None, **kwargs: Any, ) -> "DataArray": """Fill in NaNs by interpolating according to different methods. @@ -2152,6 +2145,10 @@ def interpolate_na( * x (x) int64 0 1 2 3 4 5 6 7 8 The gap lengths are 3-0 = 3; 6-3 = 3; and 8-6 = 2 respectively + keep_attrs : bool, default True + If True, the dataarray's attributes (`attrs`) will be copied from + the original object to the new one. If False, the new + object will be returned without attributes. kwargs : dict, optional parameters passed verbatim to the underlying interpolation function @@ -2174,6 +2171,7 @@ def interpolate_na( limit=limit, use_coordinate=use_coordinate, max_gap=max_gap, + keep_attrs=keep_attrs, **kwargs, ) @@ -3252,27 +3250,25 @@ def map_blocks( func: "Callable[..., T_DSorDA]", args: Sequence[Any] = (), kwargs: Mapping[str, Any] = None, + template: Union["DataArray", "Dataset"] = None, ) -> "T_DSorDA": """ - Apply a function to each chunk of this DataArray. This method is experimental - and its signature may change. + Apply a function to each block of this DataArray. + + .. warning:: + This method is experimental and its signature may change. Parameters ---------- func: callable - User-provided function that accepts a DataArray as its first parameter. The - function will receive a subset of this DataArray, corresponding to one chunk - along each chunked dimension. ``func`` will be executed as - ``func(obj_subset, *args, **kwargs)``. - - The function will be first run on mocked-up data, that looks like this array - but has sizes 0, to determine properties of the returned object such as - dtype, variable names, new dimensions and new indexes (if any). + User-provided function that accepts a DataArray as its first + parameter. The function will receive a subset, i.e. one block, of this DataArray + (see below), corresponding to one chunk along each chunked dimension. ``func`` will be + executed as ``func(block_subset, *args, **kwargs)``. This function must return either a single DataArray or a single Dataset. - This function cannot change size of existing dimensions, or add new chunked - dimensions. + This function cannot add a new chunked dimension. args: Sequence Passed verbatim to func after unpacking, after the sliced DataArray. xarray objects, if any, will not be split by chunks. Passing dask collections is @@ -3280,6 +3276,12 @@ def map_blocks( kwargs: Mapping Passed verbatim to func after unpacking. xarray objects, if any, will not be split by chunks. Passing dask collections is not allowed. + template: (optional) DataArray, Dataset + xarray object representing the final result after compute is called. If not provided, + the function will be first run on mocked-up data, that looks like 'obj' but + has sizes 0, to determine properties of the returned object such as dtype, + variable names, new dimensions and new indexes (if any). + 'template' must be provided if the function changes the size of existing dimensions. Returns ------- @@ -3302,7 +3304,7 @@ def map_blocks( """ from .parallel import map_blocks - return map_blocks(func, self, args, kwargs) + return map_blocks(func, self, args, kwargs, template) def polyfit( self, @@ -3487,17 +3489,18 @@ def pad( Examples -------- - >>> arr = xr.DataArray([5, 6, 7], coords=[("x", [0,1,2])]) - >>> arr.pad(x=(1,2), constant_values=0) + >>> arr = xr.DataArray([5, 6, 7], coords=[("x", [0, 1, 2])]) + >>> arr.pad(x=(1, 2), constant_values=0) array([0, 5, 6, 7, 0, 0]) Coordinates: * x (x) float64 nan 0.0 1.0 2.0 nan nan - >>> da = xr.DataArray([[0,1,2,3], [10,11,12,13]], - dims=["x", "y"], - coords={"x": [0,1], "y": [10, 20 ,30, 40], "z": ("x", [100, 200])} - ) + >>> da = xr.DataArray( + ... [[0, 1, 2, 3], [10, 11, 12, 13]], + ... dims=["x", "y"], + ... coords={"x": [0, 1], "y": [10, 20, 30, 40], "z": ("x", [100, 200])}, + ... ) >>> da.pad(x=1) array([[nan, nan, nan, nan], @@ -3584,8 +3587,9 @@ def idxmin( Examples -------- - >>> array = xr.DataArray([0, 2, 1, 0, -2], dims="x", - ... coords={"x": ['a', 'b', 'c', 'd', 'e']}) + >>> array = xr.DataArray( + ... [0, 2, 1, 0, -2], dims="x", coords={"x": ["a", "b", "c", "d", "e"]} + ... ) >>> array.min() array(-2) @@ -3596,13 +3600,15 @@ def idxmin( array('e', dtype='>> array = xr.DataArray([[2.0, 1.0, 2.0, 0.0, -2.0], - ... [-4.0, np.NaN, 2.0, np.NaN, -2.0], - ... [np.NaN, np.NaN, 1., np.NaN, np.NaN]], - ... dims=["y", "x"], - ... coords={"y": [-1, 0, 1], - ... "x": np.arange(5.)**2} - ... ) + >>> array = xr.DataArray( + ... [ + ... [2.0, 1.0, 2.0, 0.0, -2.0], + ... [-4.0, np.NaN, 2.0, np.NaN, -2.0], + ... [np.NaN, np.NaN, 1.0, np.NaN, np.NaN], + ... ], + ... dims=["y", "x"], + ... coords={"y": [-1, 0, 1], "x": np.arange(5.0) ** 2}, + ... ) >>> array.min(dim="x") array([-2., -4., 1.]) @@ -3678,8 +3684,9 @@ def idxmax( Examples -------- - >>> array = xr.DataArray([0, 2, 1, 0, -2], dims="x", - ... coords={"x": ['a', 'b', 'c', 'd', 'e']}) + >>> array = xr.DataArray( + ... [0, 2, 1, 0, -2], dims="x", coords={"x": ["a", "b", "c", "d", "e"]} + ... ) >>> array.max() array(2) @@ -3690,13 +3697,15 @@ def idxmax( array('b', dtype='>> array = xr.DataArray([[2.0, 1.0, 2.0, 0.0, -2.0], - ... [-4.0, np.NaN, 2.0, np.NaN, -2.0], - ... [np.NaN, np.NaN, 1., np.NaN, np.NaN]], - ... dims=["y", "x"], - ... coords={"y": [-1, 0, 1], - ... "x": np.arange(5.)**2} - ... ) + >>> array = xr.DataArray( + ... [ + ... [2.0, 1.0, 2.0, 0.0, -2.0], + ... [-4.0, np.NaN, 2.0, np.NaN, -2.0], + ... [np.NaN, np.NaN, 1.0, np.NaN, np.NaN], + ... ], + ... dims=["y", "x"], + ... coords={"y": [-1, 0, 1], "x": np.arange(5.0) ** 2}, + ... ) >>> array.max(dim="x") array([2., 2., 1.]) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index dd1e31cc61a..3a55f3eca27 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -1055,9 +1055,7 @@ def copy(self, deep: bool = False, data: Mapping = None) -> "Dataset": structure of the original object, but with the new data. Original object is unaffected. - >>> ds.copy( - ... data={"foo": np.arange(6).reshape(2, 3), "bar": ["a", "b"]} - ... ) + >>> ds.copy(data={"foo": np.arange(6).reshape(2, 3), "bar": ["a", "b"]}) Dimensions: (dim_0: 2, dim_1: 3, x: 2) Coordinates: @@ -1537,7 +1535,7 @@ def to_netcdf( ``dask.delayed.Delayed`` object that can be computed later. invalid_netcdf: boolean Only valid along with engine='h5netcdf'. If True, allow writing - hdf5 files which are valid netcdf as described in + hdf5 files which are invalid netcdf as described in https://github.com/shoyer/h5netcdf. Default: False. """ if encoding is None: @@ -1581,7 +1579,7 @@ def to_zarr( mode : {'w', 'w-', 'a', None} Persistence mode: 'w' means create (overwrite if exists); 'w-' means create (fail if exists); - 'a' means append (create if does not exist). + 'a' means override existing variables (create if does not exist). If ``append_dim`` is set, ``mode`` can be omitted as it is internally set to ``'a'``. Otherwise, ``mode`` will default to `w-` if not set. @@ -1600,7 +1598,8 @@ def to_zarr( If True, apply zarr's `consolidate_metadata` function to the store after writing. append_dim: hashable, optional - If set, the dimension on which the data will be appended. + If set, the dimension along which the data will be appended. All + other dimensions on overriden variables must remain the same size. References ---------- @@ -1768,7 +1767,7 @@ def maybe_chunk(name, var, chunks): return self._replace(variables) def _validate_indexers( - self, indexers: Mapping[Hashable, Any], missing_dims: str = "raise", + self, indexers: Mapping[Hashable, Any], missing_dims: str = "raise" ) -> Iterator[Tuple[Hashable, Union[int, slice, np.ndarray, Variable]]]: """ Here we make sure + indexer has a valid keys @@ -2570,7 +2569,9 @@ def interp( coordinates are assumed to be an array of monotonically increasing values. kwargs: dictionary, optional - Additional keyword passed to scipy's interpolator. + Additional keyword arguments passed to scipy's interpolator. Valid + options and their behavior depend on if 1-dimensional or + multi-dimensional interpolation is used. **coords_kwargs : {dim: coordinate, ...}, optional The keyword arguments form of ``coords``. One of coords or coords_kwargs must be provided. @@ -4596,6 +4597,7 @@ def from_dataframe(cls, dataframe: pd.DataFrame, sparse: bool = False) -> "Datas See also -------- xarray.DataArray.from_series + pandas.DataFrame.to_xarray """ # TODO: Add an option to remove dimensions along which the variables # are constant, to enable consistent serialization to/from a dataframe, @@ -5707,27 +5709,25 @@ def map_blocks( func: "Callable[..., T_DSorDA]", args: Sequence[Any] = (), kwargs: Mapping[str, Any] = None, + template: Union["DataArray", "Dataset"] = None, ) -> "T_DSorDA": """ - Apply a function to each chunk of this Dataset. This method is experimental and - its signature may change. + Apply a function to each block of this Dataset. + + .. warning:: + This method is experimental and its signature may change. Parameters ---------- func: callable - User-provided function that accepts a Dataset as its first parameter. The - function will receive a subset of this Dataset, corresponding to one chunk - along each chunked dimension. ``func`` will be executed as - ``func(obj_subset, *args, **kwargs)``. - - The function will be first run on mocked-up data, that looks like this - Dataset but has sizes 0, to determine properties of the returned object such - as dtype, variable names, new dimensions and new indexes (if any). + User-provided function that accepts a Dataset as its first + parameter. The function will receive a subset, i.e. one block, of this Dataset + (see below), corresponding to one chunk along each chunked dimension. ``func`` will be + executed as ``func(block_subset, *args, **kwargs)``. This function must return either a single DataArray or a single Dataset. - This function cannot change size of existing dimensions, or add new chunked - dimensions. + This function cannot add a new chunked dimension. args: Sequence Passed verbatim to func after unpacking, after the sliced DataArray. xarray objects, if any, will not be split by chunks. Passing dask collections is @@ -5735,6 +5735,12 @@ def map_blocks( kwargs: Mapping Passed verbatim to func after unpacking. xarray objects, if any, will not be split by chunks. Passing dask collections is not allowed. + template: (optional) DataArray, Dataset + xarray object representing the final result after compute is called. If not provided, + the function will be first run on mocked-up data, that looks like 'obj' but + has sizes 0, to determine properties of the returned object such as dtype, + variable names, new dimensions and new indexes (if any). + 'template' must be provided if the function changes the size of existing dimensions. Returns ------- @@ -5757,7 +5763,7 @@ def map_blocks( """ from .parallel import map_blocks - return map_blocks(func, self, args, kwargs) + return map_blocks(func, self, args, kwargs, template) def polyfit( self, @@ -5932,7 +5938,7 @@ def polyfit( "The number of data points must exceed order to scale the covariance matrix." ) fac = residuals / (x.shape[0] - order) - covariance = xr.DataArray(Vbase, dims=("cov_i", "cov_j"),) * fac + covariance = xr.DataArray(Vbase, dims=("cov_i", "cov_j")) * fac variables[name + "polyfit_covariance"] = covariance return Dataset(data_vars=variables, attrs=self.attrs.copy()) @@ -6058,8 +6064,8 @@ def pad( Examples -------- - >>> ds = xr.Dataset({'foo': ('x', range(5))}) - >>> ds.pad(x=(1,2)) + >>> ds = xr.Dataset({"foo": ("x", range(5))}) + >>> ds.pad(x=(1, 2)) Dimensions: (x: 8) Dimensions without coordinates: x @@ -6153,17 +6159,20 @@ def idxmin( Examples -------- - >>> array1 = xr.DataArray([0, 2, 1, 0, -2], dims="x", - ... coords={"x": ['a', 'b', 'c', 'd', 'e']}) - >>> array2 = xr.DataArray([[2.0, 1.0, 2.0, 0.0, -2.0], - ... [-4.0, np.NaN, 2.0, np.NaN, -2.0], - ... [np.NaN, np.NaN, 1., np.NaN, np.NaN]], - ... dims=["y", "x"], - ... coords={"y": [-1, 0, 1], - ... "x": ['a', 'b', 'c', 'd', 'e']} - ... ) - >>> ds = xr.Dataset({'int': array1, 'float': array2}) - >>> ds.min(dim='x') + >>> array1 = xr.DataArray( + ... [0, 2, 1, 0, -2], dims="x", coords={"x": ["a", "b", "c", "d", "e"]} + ... ) + >>> array2 = xr.DataArray( + ... [ + ... [2.0, 1.0, 2.0, 0.0, -2.0], + ... [-4.0, np.NaN, 2.0, np.NaN, -2.0], + ... [np.NaN, np.NaN, 1.0, np.NaN, np.NaN], + ... ], + ... dims=["y", "x"], + ... coords={"y": [-1, 0, 1], "x": ["a", "b", "c", "d", "e"]}, + ... ) + >>> ds = xr.Dataset({"int": array1, "float": array2}) + >>> ds.min(dim="x") Dimensions: (y: 3) Coordinates: @@ -6171,7 +6180,7 @@ def idxmin( Data variables: int int64 -2 float (y) float64 -2.0 -4.0 1.0 - >>> ds.argmin(dim='x') + >>> ds.argmin(dim="x") Dimensions: (y: 3) Coordinates: @@ -6179,7 +6188,7 @@ def idxmin( Data variables: int int64 4 float (y) int64 4 0 2 - >>> ds.idxmin(dim='x') + >>> ds.idxmin(dim="x") Dimensions: (y: 3) Coordinates: @@ -6195,7 +6204,7 @@ def idxmin( skipna=skipna, fill_value=fill_value, keep_attrs=keep_attrs, - ), + ) ) def idxmax( @@ -6248,17 +6257,20 @@ def idxmax( Examples -------- - >>> array1 = xr.DataArray([0, 2, 1, 0, -2], dims="x", - ... coords={"x": ['a', 'b', 'c', 'd', 'e']}) - >>> array2 = xr.DataArray([[2.0, 1.0, 2.0, 0.0, -2.0], - ... [-4.0, np.NaN, 2.0, np.NaN, -2.0], - ... [np.NaN, np.NaN, 1., np.NaN, np.NaN]], - ... dims=["y", "x"], - ... coords={"y": [-1, 0, 1], - ... "x": ['a', 'b', 'c', 'd', 'e']} - ... ) - >>> ds = xr.Dataset({'int': array1, 'float': array2}) - >>> ds.max(dim='x') + >>> array1 = xr.DataArray( + ... [0, 2, 1, 0, -2], dims="x", coords={"x": ["a", "b", "c", "d", "e"]} + ... ) + >>> array2 = xr.DataArray( + ... [ + ... [2.0, 1.0, 2.0, 0.0, -2.0], + ... [-4.0, np.NaN, 2.0, np.NaN, -2.0], + ... [np.NaN, np.NaN, 1.0, np.NaN, np.NaN], + ... ], + ... dims=["y", "x"], + ... coords={"y": [-1, 0, 1], "x": ["a", "b", "c", "d", "e"]}, + ... ) + >>> ds = xr.Dataset({"int": array1, "float": array2}) + >>> ds.max(dim="x") Dimensions: (y: 3) Coordinates: @@ -6266,7 +6278,7 @@ def idxmax( Data variables: int int64 2 float (y) float64 2.0 2.0 1.0 - >>> ds.argmax(dim='x') + >>> ds.argmax(dim="x") Dimensions: (y: 3) Coordinates: @@ -6274,7 +6286,7 @@ def idxmax( Data variables: int int64 1 float (y) int64 0 2 2 - >>> ds.idxmax(dim='x') + >>> ds.idxmax(dim="x") Dimensions: (y: 3) Coordinates: @@ -6290,7 +6302,7 @@ def idxmax( skipna=skipna, fill_value=fill_value, keep_attrs=keep_attrs, - ), + ) ) diff --git a/xarray/core/formatting.py b/xarray/core/formatting.py index 534d253ecc8..d6732fc182e 100644 --- a/xarray/core/formatting.py +++ b/xarray/core/formatting.py @@ -298,12 +298,10 @@ def _summarize_coord_multiindex(coord, col_width, marker): def _summarize_coord_levels(coord, col_width, marker="-"): return "\n".join( - [ - summarize_variable( - lname, coord.get_level_variable(lname), col_width, marker=marker - ) - for lname in coord.level_names - ] + summarize_variable( + lname, coord.get_level_variable(lname), col_width, marker=marker + ) + for lname in coord.level_names ) @@ -562,7 +560,7 @@ def extra_items_repr(extra_keys, mapping, ab_side): for m in (a_mapping, b_mapping): attr_s = "\n".join( - [summarize_attr(ak, av) for ak, av in m[k].attrs.items()] + summarize_attr(ak, av) for ak, av in m[k].attrs.items() ) attrs_summary.append(attr_s) diff --git a/xarray/core/formatting_html.py b/xarray/core/formatting_html.py index 8678a58b381..6e345582ed0 100644 --- a/xarray/core/formatting_html.py +++ b/xarray/core/formatting_html.py @@ -183,7 +183,8 @@ def array_section(obj): # "unique" id to expand/collapse the section data_id = "section-" + str(uuid.uuid4()) collapsed = "" - preview = escape(inline_variable_array_repr(obj.variable, max_width=70)) + variable = getattr(obj, "variable", obj) + preview = escape(inline_variable_array_repr(variable, max_width=70)) data_repr = short_data_repr_html(obj) data_icon = _icon("icon-database") diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 5a5f4c0d296..299cb8ec4fa 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -29,7 +29,7 @@ def check_reduce_dims(reduce_dims, dimensions): if reduce_dims is not ...: if is_scalar(reduce_dims): reduce_dims = [reduce_dims] - if any([dim not in dimensions for dim in reduce_dims]): + if any(dim not in dimensions for dim in reduce_dims): raise ValueError( "cannot reduce over dimensions %r. expected either '...' to reduce over all dimensions or one or more of %r." % (reduce_dims, dimensions) @@ -272,8 +272,8 @@ def __init__( squeeze=False, grouper=None, bins=None, - restore_coord_dims=None, - cut_kwargs={}, + restore_coord_dims=True, + cut_kwargs=None, ): """Create a GroupBy object @@ -292,13 +292,15 @@ def __init__( bins : array-like, optional If `bins` is specified, the groups will be discretized into the specified bins by `pandas.cut`. - restore_coord_dims : bool, optional + restore_coord_dims : bool, default True If True, also restore the dimension order of multi-dimensional coordinates. cut_kwargs : dict, optional Extra keyword arguments to pass to `pandas.cut` """ + if cut_kwargs is None: + cut_kwargs = {} from .dataarray import DataArray if grouper is not None and bins is not None: @@ -387,21 +389,6 @@ def __init__( "Failed to group data. Are you grouping by a variable that is all NaN?" ) - if ( - isinstance(obj, DataArray) - and restore_coord_dims is None - and any(obj[c].ndim > 1 for c in obj.coords) - ): - warnings.warn( - "This DataArray contains multi-dimensional " - "coordinates. In the future, the dimension order " - "of these coordinates will be restored as well " - "unless you specify restore_coord_dims=False.", - FutureWarning, - stacklevel=2, - ) - restore_coord_dims = False - # specification for the groupby operation self._obj = obj self._group = group diff --git a/xarray/core/merge.py b/xarray/core/merge.py index fea94246471..35b77d700a0 100644 --- a/xarray/core/merge.py +++ b/xarray/core/merge.py @@ -841,7 +841,7 @@ def merge( from .dataarray import DataArray from .dataset import Dataset - dict_like_objects = list() + dict_like_objects = [] for obj in objects: if not isinstance(obj, (DataArray, Dataset, dict)): raise TypeError( diff --git a/xarray/core/missing.py b/xarray/core/missing.py index 40f010b3514..f973b4a5468 100644 --- a/xarray/core/missing.py +++ b/xarray/core/missing.py @@ -11,6 +11,7 @@ from .common import _contains_datetime_like_objects, ones_like from .computation import apply_ufunc from .duck_array_ops import dask_array_type, datetime_to_numeric, timedelta_to_numeric +from .options import _get_keep_attrs from .utils import OrderedSet, is_scalar from .variable import Variable, broadcast_variables @@ -294,6 +295,7 @@ def interp_na( method: str = "linear", limit: int = None, max_gap: Union[int, float, str, pd.Timedelta, np.timedelta64, dt.timedelta] = None, + keep_attrs: bool = None, **kwargs, ): """Interpolate values according to different methods. @@ -330,19 +332,22 @@ def interp_na( interp_class, kwargs = _get_interpolator(method, **kwargs) interpolator = partial(func_interpolate_na, interp_class, **kwargs) + if keep_attrs is None: + keep_attrs = _get_keep_attrs(default=True) + with warnings.catch_warnings(): warnings.filterwarnings("ignore", "overflow", RuntimeWarning) warnings.filterwarnings("ignore", "invalid value", RuntimeWarning) arr = apply_ufunc( interpolator, - index, self, + index, input_core_dims=[[dim], [dim]], output_core_dims=[[dim]], output_dtypes=[self.dtype], dask="parallelized", vectorize=True, - keep_attrs=True, + keep_attrs=keep_attrs, ).transpose(*self.dims) if limit is not None: @@ -359,8 +364,9 @@ def interp_na( return arr -def func_interpolate_na(interpolator, x, y, **kwargs): +def func_interpolate_na(interpolator, y, x, **kwargs): """helper function to apply interpolation along 1 dimension""" + # reversed arguments are so that attrs are preserved from da, not index # it would be nice if this wasn't necessary, works around: # "ValueError: assignment destination is read-only" in assignment below out = y.copy() diff --git a/xarray/core/parallel.py b/xarray/core/parallel.py index 6f1668f698f..d91dfb4a275 100644 --- a/xarray/core/parallel.py +++ b/xarray/core/parallel.py @@ -31,6 +31,30 @@ T_DSorDA = TypeVar("T_DSorDA", DataArray, Dataset) +def check_result_variables( + result: Union[DataArray, Dataset], expected: Mapping[str, Any], kind: str +): + + if kind == "coords": + nice_str = "coordinate" + elif kind == "data_vars": + nice_str = "data" + + # check that coords and data variables are as expected + missing = expected[kind] - set(getattr(result, kind)) + if missing: + raise ValueError( + "Result from applying user function does not contain " + f"{nice_str} variables {missing}." + ) + extra = set(getattr(result, kind)) - expected[kind] + if extra: + raise ValueError( + "Result from applying user function has unexpected " + f"{nice_str} variables {extra}." + ) + + def dataset_to_dataarray(obj: Dataset) -> DataArray: if not isinstance(obj, Dataset): raise TypeError("Expected Dataset, got %s" % type(obj)) @@ -80,7 +104,8 @@ def infer_template( template = func(*meta_args, **kwargs) except Exception as e: raise Exception( - "Cannot infer object returned from running user provided function." + "Cannot infer object returned from running user provided function. " + "Please supply the 'template' kwarg to map_blocks." ) from e if not isinstance(template, (Dataset, DataArray)): @@ -102,14 +127,24 @@ def make_dict(x: Union[DataArray, Dataset]) -> Dict[Hashable, Any]: return {k: v.data for k, v in x.variables.items()} +def _get_chunk_slicer(dim: Hashable, chunk_index: Mapping, chunk_bounds: Mapping): + if dim in chunk_index: + which_chunk = chunk_index[dim] + return slice(chunk_bounds[dim][which_chunk], chunk_bounds[dim][which_chunk + 1]) + return slice(None) + + def map_blocks( func: Callable[..., T_DSorDA], obj: Union[DataArray, Dataset], args: Sequence[Any] = (), kwargs: Mapping[str, Any] = None, + template: Union[DataArray, Dataset] = None, ) -> T_DSorDA: - """Apply a function to each chunk of a DataArray or Dataset. This function is - experimental and its signature may change. + """Apply a function to each block of a DataArray or Dataset. + + .. warning:: + This function is experimental and its signature may change. Parameters ---------- @@ -119,14 +154,10 @@ def map_blocks( corresponding to one chunk along each chunked dimension. ``func`` will be executed as ``func(obj_subset, *args, **kwargs)``. - The function will be first run on mocked-up data, that looks like 'obj' but - has sizes 0, to determine properties of the returned object such as dtype, - variable names, new dimensions and new indexes (if any). - This function must return either a single DataArray or a single Dataset. - This function cannot change size of existing dimensions, or add new chunked - dimensions. + This function cannot add a new chunked dimension. + obj: DataArray, Dataset Passed to the function as its first argument, one dask chunk at a time. args: Sequence @@ -135,6 +166,15 @@ def map_blocks( kwargs: Mapping Passed verbatim to func after unpacking. xarray objects, if any, will not be split by chunks. Passing dask collections is not allowed. + template: (optional) DataArray, Dataset + xarray object representing the final result after compute is called. If not provided, + the function will be first run on mocked-up data, that looks like 'obj' but + has sizes 0, to determine properties of the returned object such as dtype, + variable names, attributes, new dimensions and new indexes (if any). + 'template' must be provided if the function changes the size of existing dimensions. + When provided, `attrs` on variables in `template` are copied over to the result. Any + `attrs` set by `func` will be ignored. + Returns ------- @@ -201,22 +241,47 @@ def map_blocks( * time (time) object 1990-01-31 00:00:00 ... 1991-12-31 00:00:00 """ - def _wrapper(func, obj, to_array, args, kwargs): + def _wrapper(func, obj, to_array, args, kwargs, expected): + check_shapes = dict(obj.dims) + check_shapes.update(expected["shapes"]) + if to_array: obj = dataset_to_dataarray(obj) result = func(obj, *args, **kwargs) + # check all dims are present + missing_dimensions = set(expected["shapes"]) - set(result.sizes) + if missing_dimensions: + raise ValueError( + f"Dimensions {missing_dimensions} missing on returned object." + ) + + # check that index lengths and values are as expected for name, index in result.indexes.items(): - if name in obj.indexes: - if len(index) != len(obj.indexes[name]): + if name in check_shapes: + if len(index) != check_shapes[name]: raise ValueError( - "Length of the %r dimension has changed. This is not allowed." - % name + f"Received dimension {name!r} of length {len(index)}. Expected length {check_shapes[name]}." ) + if name in expected["indexes"]: + expected_index = expected["indexes"][name] + if not index.equals(expected_index): + raise ValueError( + f"Expected index {name!r} to be {expected_index!r}. Received {index!r} instead." + ) + + # check that all expected variables were returned + check_result_variables(result, expected, "coords") + if isinstance(result, Dataset): + check_result_variables(result, expected, "data_vars") return make_dict(result) + if template is not None and not isinstance(template, (DataArray, Dataset)): + raise TypeError( + f"template must be a DataArray or Dataset. Received {type(template).__name__} instead." + ) if not isinstance(args, Sequence): raise TypeError("args must be a sequence (for example, a list or tuple).") if kwargs is None: @@ -248,8 +313,38 @@ def _wrapper(func, obj, to_array, args, kwargs): input_is_array = False input_chunks = dataset.chunks + dataset_indexes = set(dataset.indexes) + if template is None: + # infer template by providing zero-shaped arrays + template = infer_template(func, obj, *args, **kwargs) + template_indexes = set(template.indexes) + preserved_indexes = template_indexes & dataset_indexes + new_indexes = template_indexes - dataset_indexes + indexes = {dim: dataset.indexes[dim] for dim in preserved_indexes} + indexes.update({k: template.indexes[k] for k in new_indexes}) + output_chunks = { + dim: input_chunks[dim] for dim in template.dims if dim in input_chunks + } + + else: + # template xarray object has been provided with proper sizes and chunk shapes + template_indexes = set(template.indexes) + indexes = {dim: dataset.indexes[dim] for dim in dataset_indexes} + indexes.update({k: template.indexes[k] for k in template_indexes}) + if isinstance(template, DataArray): + output_chunks = dict(zip(template.dims, template.chunks)) # type: ignore + else: + output_chunks = template.chunks # type: ignore + + for dim in output_chunks: + if dim in input_chunks and len(input_chunks[dim]) != len(output_chunks[dim]): + raise ValueError( + "map_blocks requires that one block of the input maps to one block of output. " + f"Expected number of output chunks along dimension {dim!r} to be {len(input_chunks[dim])}. " + f"Received {len(output_chunks[dim])} instead. Please provide template if not provided, or " + "fix the provided template." + ) - template: Union[DataArray, Dataset] = infer_template(func, obj, *args, **kwargs) if isinstance(template, DataArray): result_is_array = True template_name = template.name @@ -261,13 +356,6 @@ def _wrapper(func, obj, to_array, args, kwargs): f"func output must be DataArray or Dataset; got {type(template)}" ) - template_indexes = set(template.indexes) - dataset_indexes = set(dataset.indexes) - preserved_indexes = template_indexes & dataset_indexes - new_indexes = template_indexes - dataset_indexes - indexes = {dim: dataset.indexes[dim] for dim in preserved_indexes} - indexes.update({k: template.indexes[k] for k in new_indexes}) - # We're building a new HighLevelGraph hlg. We'll have one new layer # for each variable in the dataset, which is the result of the # func applied to the values. @@ -281,13 +369,16 @@ def _wrapper(func, obj, to_array, args, kwargs): # map dims to list of chunk indexes ichunk = {dim: range(len(chunks_v)) for dim, chunks_v in input_chunks.items()} # mapping from chunk index to slice bounds - chunk_index_bounds = { + input_chunk_bounds = { dim: np.cumsum((0,) + chunks_v) for dim, chunks_v in input_chunks.items() } + output_chunk_bounds = { + dim: np.cumsum((0,) + chunks_v) for dim, chunks_v in output_chunks.items() + } # iterate over all possible chunk combinations for v in itertools.product(*ichunk.values()): - chunk_index_dict = dict(zip(dataset.dims, v)) + chunk_index = dict(zip(dataset.dims, v)) # this will become [[name1, variable1], # [name2, variable2], @@ -302,9 +393,9 @@ def _wrapper(func, obj, to_array, args, kwargs): # recursively index into dask_keys nested list to get chunk chunk = variable.__dask_keys__() for dim in variable.dims: - chunk = chunk[chunk_index_dict[dim]] + chunk = chunk[chunk_index[dim]] - chunk_variable_task = (f"{gname}-{chunk[0]}",) + v + chunk_variable_task = (f"{gname}-{name}-{chunk[0]}",) + v graph[chunk_variable_task] = ( tuple, [variable.dims, chunk, variable.attrs], @@ -312,15 +403,10 @@ def _wrapper(func, obj, to_array, args, kwargs): else: # non-dask array with possibly chunked dimensions # index into variable appropriately - subsetter = {} - for dim in variable.dims: - if dim in chunk_index_dict: - which_chunk = chunk_index_dict[dim] - subsetter[dim] = slice( - chunk_index_bounds[dim][which_chunk], - chunk_index_bounds[dim][which_chunk + 1], - ) - + subsetter = { + dim: _get_chunk_slicer(dim, chunk_index, input_chunk_bounds) + for dim in variable.dims + } subset = variable.isel(subsetter) chunk_variable_task = ( "{}-{}".format(gname, dask.base.tokenize(subset)), @@ -336,6 +422,20 @@ def _wrapper(func, obj, to_array, args, kwargs): else: data_vars.append([name, chunk_variable_task]) + # expected["shapes", "coords", "data_vars", "indexes"] are used to raise nice error messages in _wrapper + expected = {} + # input chunk 0 along a dimension maps to output chunk 0 along the same dimension + # even if length of dimension is changed by the applied function + expected["shapes"] = { + k: output_chunks[k][v] for k, v in chunk_index.items() if k in output_chunks + } + expected["data_vars"] = set(template.data_vars.keys()) # type: ignore + expected["coords"] = set(template.coords.keys()) # type: ignore + expected["indexes"] = { + dim: indexes[dim][_get_chunk_slicer(dim, chunk_index, output_chunk_bounds)] + for dim in indexes + } + from_wrapper = (gname,) + v graph[from_wrapper] = ( _wrapper, @@ -344,6 +444,7 @@ def _wrapper(func, obj, to_array, args, kwargs): input_is_array, args, kwargs, + expected, ) # mapping from variable name to dask graph key @@ -356,10 +457,11 @@ def _wrapper(func, obj, to_array, args, kwargs): key: Tuple[Any, ...] = (gname_l,) for dim in variable.dims: - if dim in chunk_index_dict: - key += (chunk_index_dict[dim],) + if dim in chunk_index: + key += (chunk_index[dim],) else: # unchunked dimensions in the input have one chunk in the result + # output can have new dimensions with exactly one chunk key += (0,) # We're adding multiple new layers to the graph: @@ -382,8 +484,8 @@ def _wrapper(func, obj, to_array, args, kwargs): dims = template[name].dims var_chunks = [] for dim in dims: - if dim in input_chunks: - var_chunks.append(input_chunks[dim]) + if dim in output_chunks: + var_chunks.append(output_chunks[dim]) elif dim in indexes: var_chunks.append((len(indexes[dim]),)) elif dim in template.dims: diff --git a/xarray/core/pdcompat.py b/xarray/core/pdcompat.py index f2e4518e0dc..f2e22329fc8 100644 --- a/xarray/core/pdcompat.py +++ b/xarray/core/pdcompat.py @@ -55,4 +55,4 @@ def count_not_none(*args) -> int: Copied from pandas.core.common.count_not_none (not part of the public API) """ - return sum([arg is not None for arg in args]) + return sum(arg is not None for arg in args) diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 68e823ca426..e19132b1b06 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -2412,7 +2412,7 @@ def assert_unique_multiindex_level_names(variables): duplicate_names = [v for v in level_names.values() if len(v) > 1] if duplicate_names: - conflict_str = "\n".join([", ".join(v) for v in duplicate_names]) + conflict_str = "\n".join(", ".join(v) for v in duplicate_names) raise ValueError("conflicting MultiIndex level name(s):\n%s" % conflict_str) # Check confliction between level names and dimensions GH:2299 for k, v in variables.items(): diff --git a/xarray/plot/plot.py b/xarray/plot/plot.py index 4657bee9415..4d6033bf00d 100644 --- a/xarray/plot/plot.py +++ b/xarray/plot/plot.py @@ -30,7 +30,7 @@ def _infer_line_data(darray, x, y, hue): error_msg = "must be either None or one of ({:s})".format( - ", ".join([repr(dd) for dd in darray.dims]) + ", ".join(repr(dd) for dd in darray.dims) ) ndims = len(darray.dims) diff --git a/xarray/static/css/style.css b/xarray/static/css/style.css index 7e382de3b5b..acfe85d5ac7 100644 --- a/xarray/static/css/style.css +++ b/xarray/static/css/style.css @@ -13,6 +13,18 @@ --xr-background-color-row-odd: var(--jp-layout-color2, #eeeeee); } +html[theme=dark], +body.vscode-dark { + --xr-font-color0: rgba(255, 255, 255, 1); + --xr-font-color2: rgba(255, 255, 255, 0.54); + --xr-font-color3: rgba(255, 255, 255, 0.38); + --xr-border-color: #1F1F1F; + --xr-disabled-color: #515151; + --xr-background-color: #111111; + --xr-background-color-row-even: #111111; + --xr-background-color-row-odd: #313131; +} + .xr-wrap { min-width: 300px; max-width: 700px; diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 3fde292c04f..90deea51d2a 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -1526,12 +1526,6 @@ def roundtrip( with self.open(store_target, **open_kwargs) as ds: yield ds - @contextlib.contextmanager - def roundtrip_append( - self, data, save_kwargs=None, open_kwargs=None, allow_cleanup_failure=False - ): - pytest.skip("zarr backend does not support appending") - def test_roundtrip_consolidated(self): pytest.importorskip("zarr", minversion="2.2.1.dev2") expected = create_test_data() @@ -1685,11 +1679,27 @@ def test_chunk_encoding_with_dask(self): # should fail if dask_chunks are irregular... ds_chunk_irreg = ds.chunk({"x": (5, 4, 3)}) - with pytest.raises(ValueError) as e_info: + with raises_regex(ValueError, "uniform chunk sizes."): with self.roundtrip(ds_chunk_irreg) as actual: pass - # make sure this error message is correct and not some other error - assert e_info.match("chunks") + + # should fail if encoding["chunks"] clashes with dask_chunks + badenc = ds.chunk({"x": 4}) + badenc.var1.encoding["chunks"] = (6,) + with raises_regex(NotImplementedError, "named 'var1' would overlap"): + with self.roundtrip(badenc) as actual: + pass + + badenc.var1.encoding["chunks"] = (2,) + with raises_regex(ValueError, "Specified Zarr chunk encoding"): + with self.roundtrip(badenc) as actual: + pass + + badenc = badenc.chunk({"x": (3, 3, 6)}) + badenc.var1.encoding["chunks"] = (3,) + with raises_regex(ValueError, "incompatible with this encoding"): + with self.roundtrip(badenc) as actual: + pass # ... except if the last chunk is smaller than the first ds_chunk_irreg = ds.chunk({"x": (5, 5, 2)}) @@ -1810,7 +1820,7 @@ def test_encoding_kwarg_fixed_width_string(self): # not relevant for zarr, since we don't use EncodedStringCoder pass - # TODO: someone who understand caching figure out whether chaching + # TODO: someone who understand caching figure out whether caching # makes sense for Zarr backend @pytest.mark.xfail(reason="Zarr caching not implemented") def test_dataset_caching(self): @@ -1818,55 +1828,44 @@ def test_dataset_caching(self): @pytest.mark.skipif(LooseVersion(dask_version) < "2.4", reason="dask GH5334") def test_append_write(self): - ds, ds_to_append, _ = create_append_test_data() - with self.create_zarr_target() as store_target: - ds.to_zarr(store_target, mode="w") - ds_to_append.to_zarr(store_target, append_dim="time") - original = xr.concat([ds, ds_to_append], dim="time") - assert_identical(original, xr.open_zarr(store_target)) - - @pytest.mark.xfail(reason="Zarr stores can not be appended to") - def test_append_overwrite_values(self): - super().test_append_overwrite_values() + super().test_append_write() def test_append_with_invalid_dim_raises(self): - ds, ds_to_append, _ = create_append_test_data() - - # check failure when append_dim not valid - with pytest.raises(ValueError): - with self.create_zarr_target() as store_target: - ds.to_zarr(store_target, mode="w") + with self.create_zarr_target() as store_target: + ds.to_zarr(store_target, mode="w") + with pytest.raises( + ValueError, match="does not match any existing dataset dimensions" + ): ds_to_append.to_zarr(store_target, append_dim="notvalid") - def test_append_with_append_dim_not_set_raises(self): + def test_append_with_no_dims_raises(self): + with self.create_zarr_target() as store_target: + Dataset({"foo": ("x", [1])}).to_zarr(store_target, mode="w") + with pytest.raises(ValueError, match="different dimension names"): + Dataset({"foo": ("y", [2])}).to_zarr(store_target, mode="a") + def test_append_with_append_dim_not_set_raises(self): ds, ds_to_append, _ = create_append_test_data() - - # check failure when append_dim not set - with pytest.raises(ValueError): - with self.create_zarr_target() as store_target: - ds.to_zarr(store_target, mode="w") + with self.create_zarr_target() as store_target: + ds.to_zarr(store_target, mode="w") + with pytest.raises(ValueError, match="different dimension sizes"): ds_to_append.to_zarr(store_target, mode="a") def test_append_with_mode_not_a_raises(self): - ds, ds_to_append, _ = create_append_test_data() - - # check failure when append_dim is set and mode != 'a' - with pytest.raises(ValueError): - with self.create_zarr_target() as store_target: - ds.to_zarr(store_target, mode="w") + with self.create_zarr_target() as store_target: + ds.to_zarr(store_target, mode="w") + with pytest.raises( + ValueError, match="append_dim was set along with mode='w'" + ): ds_to_append.to_zarr(store_target, mode="w", append_dim="time") def test_append_with_existing_encoding_raises(self): - ds, ds_to_append, _ = create_append_test_data() - - # check failure when providing encoding to existing variable - with pytest.raises(ValueError): - with self.create_zarr_target() as store_target: - ds.to_zarr(store_target, mode="w") + with self.create_zarr_target() as store_target: + ds.to_zarr(store_target, mode="w") + with pytest.raises(ValueError, match="but encoding was provided"): ds_to_append.to_zarr( store_target, append_dim="time", diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index 00c34940ce4..1efd4b02bf8 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -432,6 +432,18 @@ def test_decode_360_day_calendar(): assert_array_equal(actual, expected) +@requires_cftime +def test_decode_abbreviation(): + """Test making sure we properly fall back to cftime on abbreviated units.""" + import cftime + + val = np.array([1586628000000.0]) + units = "msecs since 1970-01-01T00:00:00Z" + actual = coding.times.decode_cf_datetime(val, units) + expected = coding.times.cftime_to_nptime(cftime.num2date(val, units)) + assert_array_equal(actual, expected) + + @arm_xfail @requires_cftime @pytest.mark.parametrize( diff --git a/xarray/tests/test_dask.py b/xarray/tests/test_dask.py index 538dbbfb58b..75beb3757ca 100644 --- a/xarray/tests/test_dask.py +++ b/xarray/tests/test_dask.py @@ -1039,7 +1039,7 @@ def test_map_blocks_error(map_da, map_ds): def bad_func(darray): return (darray * darray.x + 5 * darray.y)[:1, :1] - with raises_regex(ValueError, "Length of the.* has changed."): + with raises_regex(ValueError, "Received dimension 'x' of length 1"): xr.map_blocks(bad_func, map_da).compute() def returns_numpy(darray): @@ -1109,6 +1109,11 @@ def add_attrs(obj): assert_identical(actual, expected) + # when template is specified, attrs are copied from template, not set by function + with raise_if_dask_computes(): + actual = xr.map_blocks(add_attrs, obj, template=obj) + assert_identical(actual, obj) + def test_map_blocks_change_name(map_da): def change_name(obj): @@ -1150,7 +1155,7 @@ def test_map_blocks_to_array(map_ds): lambda x: x.expand_dims(k=3), lambda x: x.assign_coords(new_coord=("y", x.y * 2)), lambda x: x.astype(np.int32), - # TODO: [lambda x: x.isel(x=1).drop_vars("x"), map_da], + lambda x: x.x, ], ) def test_map_blocks_da_transformations(func, map_da): @@ -1170,7 +1175,7 @@ def test_map_blocks_da_transformations(func, map_da): lambda x: x.expand_dims(k=[1, 2, 3]), lambda x: x.expand_dims(k=3), lambda x: x.rename({"a": "new1", "b": "new2"}), - # TODO: [lambda x: x.isel(x=1)], + lambda x: x.x, ], ) def test_map_blocks_ds_transformations(func, map_ds): @@ -1180,6 +1185,64 @@ def test_map_blocks_ds_transformations(func, map_ds): assert_identical(actual, func(map_ds)) +@pytest.mark.parametrize("obj", [make_da(), make_ds()]) +def test_map_blocks_da_ds_with_template(obj): + func = lambda x: x.isel(x=[1]) + template = obj.isel(x=[1, 5, 9]) + with raise_if_dask_computes(): + actual = xr.map_blocks(func, obj, template=template) + assert_identical(actual, template) + + with raise_if_dask_computes(): + actual = obj.map_blocks(func, template=template) + assert_identical(actual, template) + + +def test_map_blocks_template_convert_object(): + da = make_da() + func = lambda x: x.to_dataset().isel(x=[1]) + template = da.to_dataset().isel(x=[1, 5, 9]) + with raise_if_dask_computes(): + actual = xr.map_blocks(func, da, template=template) + assert_identical(actual, template) + + ds = da.to_dataset() + func = lambda x: x.to_array().isel(x=[1]) + template = ds.to_array().isel(x=[1, 5, 9]) + with raise_if_dask_computes(): + actual = xr.map_blocks(func, ds, template=template) + assert_identical(actual, template) + + +@pytest.mark.parametrize("obj", [make_da(), make_ds()]) +def test_map_blocks_errors_bad_template(obj): + with raises_regex(ValueError, "unexpected coordinate variables"): + xr.map_blocks(lambda x: x.assign_coords(a=10), obj, template=obj).compute() + with raises_regex(ValueError, "does not contain coordinate variables"): + xr.map_blocks(lambda x: x.drop_vars("cxy"), obj, template=obj).compute() + with raises_regex(ValueError, "Dimensions {'x'} missing"): + xr.map_blocks(lambda x: x.isel(x=1), obj, template=obj).compute() + with raises_regex(ValueError, "Received dimension 'x' of length 1"): + xr.map_blocks(lambda x: x.isel(x=[1]), obj, template=obj).compute() + with raises_regex(TypeError, "must be a DataArray"): + xr.map_blocks(lambda x: x.isel(x=[1]), obj, template=(obj,)).compute() + with raises_regex(ValueError, "map_blocks requires that one block"): + xr.map_blocks( + lambda x: x.isel(x=[1]).assign_coords(x=10), obj, template=obj.isel(x=[1]) + ).compute() + with raises_regex(ValueError, "Expected index 'x' to be"): + xr.map_blocks( + lambda a: a.isel(x=[1]).assign_coords(x=[120]), # assign bad index values + obj, + template=obj.isel(x=[1, 5, 9]), + ).compute() + + +def test_map_blocks_errors_bad_template_2(map_ds): + with raises_regex(ValueError, "unexpected data variables {'xyz'}"): + xr.map_blocks(lambda x: x.assign(xyz=1), map_ds, template=map_ds).compute() + + @pytest.mark.parametrize("obj", [make_da(), make_ds()]) def test_map_blocks_object_method(obj): def func(obj): diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 6eeaed66f9f..a01234616a4 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -2163,9 +2163,6 @@ def test_transpose(self): with pytest.raises(ValueError): da.transpose("x", "y") - with pytest.warns(FutureWarning): - da.transpose() - def test_squeeze(self): assert_equal(self.dv.variable.squeeze(), self.dv.squeeze().variable) @@ -2755,9 +2752,6 @@ def test_groupby_restore_coord_dims(self): )["c"] assert result.dims == expected_dims - with pytest.warns(FutureWarning): - array.groupby("x").map(lambda x: x.squeeze()) - def test_groupby_first_and_last(self): array = DataArray([1, 2, 3, 4, 5], dims="x") by = DataArray(["a"] * 2 + ["b"] * 3, dims="x", name="ab") diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index a1cb7361e77..2a89920766c 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -32,7 +32,6 @@ from . import ( InaccessibleArray, - LooseVersion, UnexpectedDataAccess, assert_allclose, assert_array_equal, @@ -496,16 +495,11 @@ def test_constructor_pandas_single(self): DataArray(np.random.rand(4, 3), dims=["a", "b"]), # df ] - if LooseVersion(pd.__version__) < "0.25.0": - das.append(DataArray(np.random.rand(4, 3, 2), dims=["a", "b", "c"])) - - with warnings.catch_warnings(): - warnings.filterwarnings("ignore", r"\W*Panel is deprecated") - for a in das: - pandas_obj = a.to_pandas() - ds_based_on_pandas = Dataset(pandas_obj) - for dim in ds_based_on_pandas.data_vars: - assert_array_equal(ds_based_on_pandas[dim], pandas_obj[dim]) + for a in das: + pandas_obj = a.to_pandas() + ds_based_on_pandas = Dataset(pandas_obj) + for dim in ds_based_on_pandas.data_vars: + assert_array_equal(ds_based_on_pandas[dim], pandas_obj[dim]) def test_constructor_compat(self): data = {"x": DataArray(0, coords={"y": 1}), "y": ("z", [1, 1, 1])} diff --git a/xarray/tests/test_distributed.py b/xarray/tests/test_distributed.py index b3c0ce37a54..8011171d223 100644 --- a/xarray/tests/test_distributed.py +++ b/xarray/tests/test_distributed.py @@ -186,7 +186,7 @@ def test_dask_distributed_cfgrib_integration_test(loop): reason="Need recent distributed version to clean up get", ) @gen_cluster(client=True, timeout=None) -def test_async(c, s, a, b): +async def test_async(c, s, a, b): x = create_test_data() assert not dask.is_dask_collection(x) y = x.chunk({"dim2": 4}) + 10 @@ -206,7 +206,7 @@ def test_async(c, s, a, b): assert futures_of(z) future = c.compute(z) - w = yield future + w = await future assert not dask.is_dask_collection(w) assert_allclose(x + 10, w) @@ -218,7 +218,7 @@ def test_hdf5_lock(): @gen_cluster(client=True) -def test_serializable_locks(c, s, a, b): +async def test_serializable_locks(c, s, a, b): def f(x, lock=None): with lock: return x + 1 @@ -233,7 +233,7 @@ def f(x, lock=None): ]: futures = c.map(f, list(range(10)), lock=lock) - yield c.gather(futures) + await c.gather(futures) lock2 = pickle.loads(pickle.dumps(lock)) assert type(lock) == type(lock2) diff --git a/xarray/tests/test_formatting_html.py b/xarray/tests/test_formatting_html.py index 239f339208d..94653016416 100644 --- a/xarray/tests/test_formatting_html.py +++ b/xarray/tests/test_formatting_html.py @@ -137,3 +137,15 @@ def test_repr_of_dataset(dataset): ) assert "<U4" in formatted or ">U4" in formatted assert "<IA>" in formatted + + +def test_variable_repr_html(): + v = xr.Variable(["time", "x"], [[1, 2, 3], [4, 5, 6]], {"foo": "bar"}) + assert hasattr(v, "_repr_html_") + with xr.set_options(display_style="html"): + html = v._repr_html_().strip() + # We don't do a complete string identity since + # html output is probably subject to change, is long and... reasons. + # Just test that something reasonable was produced. + assert html.startswith("") + assert "xarray.Variable" in html diff --git a/xarray/tests/test_missing.py b/xarray/tests/test_missing.py index 35c71c2854c..731cd165244 100644 --- a/xarray/tests/test_missing.py +++ b/xarray/tests/test_missing.py @@ -231,6 +231,17 @@ def test_interpolate_kwargs(): assert_equal(actual, expected) +def test_interpolate_keep_attrs(): + vals = np.array([1, 2, 3, 4, 5, 6], dtype=np.float64) + mvals = vals.copy() + mvals[2] = np.nan + missing = xr.DataArray(mvals, dims="x") + missing.attrs = {"test": "value"} + + actual = missing.interpolate_na(dim="x", keep_attrs=True) + assert actual.attrs == {"test": "value"} + + def test_interpolate(): vals = np.array([1, 2, 3, 4, 5, 6], dtype=np.float64) diff --git a/xarray/tests/test_units.py b/xarray/tests/test_units.py index 2826dc2479c..5dd4a42cff0 100644 --- a/xarray/tests/test_units.py +++ b/xarray/tests/test_units.py @@ -1660,7 +1660,7 @@ def test_missing_value_fillna(self, unit, error): method("equals"), pytest.param( method("identical"), - marks=pytest.mark.skip(reason="behaviour of identical is unclear"), + marks=pytest.mark.skip(reason="behavior of identical is undecided"), ), ), ids=repr, @@ -1885,7 +1885,10 @@ def test_squeeze(self, dtype): method("coarsen", windows={"y": 2}, func=np.mean), pytest.param( method("quantile", q=[0.25, 0.75]), - marks=pytest.mark.xfail(reason="nanquantile not implemented"), + marks=pytest.mark.xfail( + LooseVersion(pint.__version__) < "0.12", + reason="quantile / nanquantile not implemented yet", + ), ), pytest.param( method("rank", dim="x"), @@ -2161,8 +2164,8 @@ class TestDataArray: "with_dims", marks=pytest.mark.xfail(reason="units in indexes are not supported"), ), - pytest.param("with_coords"), - pytest.param("without_coords"), + "with_coords", + "without_coords", ), ) def test_init(self, variant, dtype): @@ -2224,21 +2227,17 @@ def test_repr(self, func, variant, dtype): @pytest.mark.parametrize( "func", ( - pytest.param( - function("all"), - marks=pytest.mark.xfail(reason="not implemented by pint yet"), - ), - pytest.param( - function("any"), - marks=pytest.mark.xfail(reason="not implemented by pint yet"), - ), + function("all"), + function("any"), function("argmax"), function("argmin"), function("max"), function("mean"), pytest.param( function("median"), - marks=pytest.mark.xfail(reason="not implemented by xarray"), + marks=pytest.mark.xfail( + reason="median does not work with dataarrays yet" + ), ), function("min"), pytest.param( @@ -2249,18 +2248,9 @@ def test_repr(self, func, variant, dtype): function("std"), function("var"), function("cumsum"), - pytest.param( - function("cumprod"), - marks=pytest.mark.xfail(reason="not implemented by pint yet"), - ), - pytest.param( - method("all"), - marks=pytest.mark.xfail(reason="not implemented by pint yet"), - ), - pytest.param( - method("any"), - marks=pytest.mark.xfail(reason="not implemented by pint yet"), - ), + function("cumprod"), + method("all"), + method("any"), method("argmax"), method("argmin"), method("max"), @@ -2269,18 +2259,13 @@ def test_repr(self, func, variant, dtype): method("min"), pytest.param( method("prod"), - marks=pytest.mark.xfail( - reason="comparison of quantity with ndarrays in nanops not implemented" - ), + marks=pytest.mark.xfail(reason="not implemented by pint yet"), ), method("sum"), method("std"), method("var"), method("cumsum"), - pytest.param( - method("cumprod"), - marks=pytest.mark.xfail(reason="pint does not implement cumprod yet"), - ), + method("cumprod"), ), ids=repr, ) @@ -2296,7 +2281,8 @@ def test_aggregation(self, func, dtype): expected = attach_units(func(strip_units(data_array)), units) actual = func(data_array) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_allclose(expected, actual) @pytest.mark.parametrize( "func", @@ -2314,7 +2300,8 @@ def test_unary_operations(self, func, dtype): expected = attach_units(func(strip_units(data_array)), units) actual = func(data_array) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.parametrize( "func", @@ -2333,7 +2320,8 @@ def test_binary_operations(self, func, dtype): expected = attach_units(func(strip_units(data_array)), units) actual = func(data_array) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.parametrize( "comparison", @@ -2383,7 +2371,8 @@ def test_comparison_operations(self, comparison, unit, error, dtype): strip_units(convert_units(to_compare_with, expected_units)), ) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.parametrize( "units,error", @@ -2411,9 +2400,10 @@ def test_univariate_ufunc(self, units, error, dtype): ) actual = func(data_array) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) - @pytest.mark.xfail(reason="xarray's `np.maximum` strips units") + @pytest.mark.xfail(reason="needs the type register system for __array_ufunc__") @pytest.mark.parametrize( "unit,error", ( @@ -2422,7 +2412,12 @@ def test_univariate_ufunc(self, units, error, dtype): unit_registry.dimensionless, DimensionalityError, id="dimensionless" ), pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), - pytest.param(unit_registry.mm, None, id="compatible_unit"), + pytest.param( + unit_registry.mm, + None, + id="compatible_unit", + marks=pytest.mark.xfail(reason="pint converts to the wrong units"), + ), pytest.param(unit_registry.m, None, id="identical_unit"), ), ) @@ -2433,7 +2428,7 @@ def test_bivariate_ufunc(self, unit, error, dtype): if error is not None: with pytest.raises(error): - np.maximum(data_array, 0 * unit) + np.maximum(data_array, 1 * unit) return @@ -2441,16 +2436,18 @@ def test_bivariate_ufunc(self, unit, error, dtype): expected = attach_units( np.maximum( strip_units(data_array), - strip_units(convert_units(0 * unit, expected_units)), + strip_units(convert_units(1 * unit, expected_units)), ), expected_units, ) - actual = np.maximum(data_array, 0 * unit) - assert_equal_with_units(expected, actual) + actual = np.maximum(data_array, 1 * unit) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) - actual = np.maximum(0 * unit, data_array) - assert_equal_with_units(expected, actual) + actual = np.maximum(1 * unit, data_array) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.parametrize("property", ("T", "imag", "real")) def test_numpy_properties(self, property, dtype): @@ -2466,7 +2463,8 @@ def test_numpy_properties(self, property, dtype): ) actual = getattr(data_array, property) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.parametrize( "func", @@ -2481,16 +2479,86 @@ def test_numpy_methods(self, func, dtype): expected = attach_units(strip_units(data_array), units) actual = func(data_array) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) + + def test_item(self, dtype): + array = np.arange(10).astype(dtype) * unit_registry.m + data_array = xr.DataArray(data=array) + + func = method("item", 2) + + expected = func(strip_units(data_array)) * unit_registry.m + actual = func(data_array) + + np.testing.assert_allclose(expected, actual) + + @pytest.mark.parametrize( + "unit,error", + ( + pytest.param(1, DimensionalityError, id="no_unit"), + pytest.param( + unit_registry.dimensionless, DimensionalityError, id="dimensionless" + ), + pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), + pytest.param(unit_registry.cm, None, id="compatible_unit"), + pytest.param(unit_registry.m, None, id="identical_unit"), + ), + ) + @pytest.mark.parametrize( + "func", + ( + method("searchsorted", 5), + pytest.param( + function("searchsorted", 5), + marks=pytest.mark.xfail( + reason="xarray does not implement __array_function__" + ), + ), + ), + ids=repr, + ) + def test_searchsorted(self, func, unit, error, dtype): + array = np.arange(10).astype(dtype) * unit_registry.m + data_array = xr.DataArray(data=array) + + scalar_types = (int, float) + args = list(value * unit for value in func.args) + kwargs = { + key: (value * unit if isinstance(value, scalar_types) else value) + for key, value in func.kwargs.items() + } + + if error is not None: + with pytest.raises(error): + func(data_array, *args, **kwargs) + + return + + units = extract_units(data_array) + expected_units = extract_units(func(array, *args, **kwargs)) + stripped_args = [strip_units(convert_units(value, units)) for value in args] + stripped_kwargs = { + key: strip_units(convert_units(value, units)) + for key, value in kwargs.items() + } + expected = attach_units( + func(strip_units(data_array), *stripped_args, **stripped_kwargs), + expected_units, + ) + actual = func(data_array, *args, **kwargs) + + assert_units_equal(expected, actual) + np.testing.assert_allclose(expected, actual) @pytest.mark.parametrize( "func", ( method("clip", min=3, max=8), pytest.param( - method("searchsorted", v=5), + function("clip", a_min=3, a_max=8), marks=pytest.mark.xfail( - reason="searchsorted somehow requires a undocumented `keys` argument" + reason="xarray does not implement __array_function__" ), ), ), @@ -2513,28 +2581,32 @@ def test_numpy_methods_with_args(self, func, unit, error, dtype): data_array = xr.DataArray(data=array) scalar_types = (int, float) + args = list(value * unit for value in func.args) kwargs = { key: (value * unit if isinstance(value, scalar_types) else value) for key, value in func.kwargs.items() } if error is not None: with pytest.raises(error): - func(data_array, **kwargs) + func(data_array, *args, **kwargs) return units = extract_units(data_array) - expected_units = extract_units(func(array, **kwargs)) + expected_units = extract_units(func(array, *args, **kwargs)) + stripped_args = [strip_units(convert_units(value, units)) for value in args] stripped_kwargs = { key: strip_units(convert_units(value, units)) for key, value in kwargs.items() } expected = attach_units( - func(strip_units(data_array), **stripped_kwargs), expected_units + func(strip_units(data_array), *stripped_args, **stripped_kwargs), + expected_units, ) - actual = func(data_array, **kwargs) + actual = func(data_array, *args, **kwargs) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.parametrize( "func", (method("isnull"), method("notnull"), method("count")), ids=repr @@ -2551,15 +2623,13 @@ def test_missing_value_detection(self, func, dtype): ) * unit_registry.degK ) - x = np.arange(array.shape[0]) * unit_registry.m - y = np.arange(array.shape[1]) * unit_registry.m - - data_array = xr.DataArray(data=array, coords={"x": x, "y": y}, dims=("x", "y")) + data_array = xr.DataArray(data=array) expected = func(strip_units(data_array)) actual = func(data_array) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.xfail(reason="ffill and bfill lose units in data") @pytest.mark.parametrize("func", (method("ffill"), method("bfill")), ids=repr) @@ -2576,7 +2646,8 @@ def test_missing_value_filling(self, func, dtype): ) actual = func(data_array, dim="x") - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.parametrize( "unit,error", @@ -2586,12 +2657,7 @@ def test_missing_value_filling(self, func, dtype): unit_registry.dimensionless, DimensionalityError, id="dimensionless" ), pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), - pytest.param( - unit_registry.cm, - None, - id="compatible_unit", - marks=pytest.mark.xfail(reason="fillna converts to value's unit"), - ), + pytest.param(unit_registry.cm, None, id="compatible_unit"), pytest.param(unit_registry.m, None, id="identical_unit"), ), ) @@ -2629,7 +2695,8 @@ def test_fillna(self, fill_value, unit, error, dtype): ) actual = func(data_array, value=value) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) def test_dropna(self, dtype): array = ( @@ -2643,18 +2710,13 @@ def test_dropna(self, dtype): expected = attach_units(strip_units(data_array).dropna(dim="x"), units) actual = data_array.dropna(dim="x") - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.parametrize( "unit", ( - pytest.param( - 1, - id="no_unit", - marks=pytest.mark.xfail( - reason="pint's isin implementation does not work well with mixed args" - ), - ), + pytest.param(1, id="no_unit"), pytest.param(unit_registry.dimensionless, id="dimensionless"), pytest.param(unit_registry.s, id="incompatible_unit"), pytest.param(unit_registry.cm, id="compatible_unit"), @@ -2677,22 +2739,11 @@ def test_isin(self, unit, dtype): ) & array.check(unit) actual = data_array.isin(values) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.parametrize( - "variant", - ( - pytest.param( - "masking", - marks=pytest.mark.xfail(reason="array(nan) is not a quantity"), - ), - "replacing_scalar", - "replacing_array", - pytest.param( - "dropping", - marks=pytest.mark.xfail(reason="array(nan) is not a quantity"), - ), - ), + "variant", ("masking", "replacing_scalar", "replacing_array", "dropping") ) @pytest.mark.parametrize( "unit,error", @@ -2742,22 +2793,24 @@ def test_where(self, variant, unit, error, dtype): ) actual = data_array.where(**kwargs) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) - @pytest.mark.xfail(reason="interpolate strips units") - def test_interpolate_na(self, dtype): + @pytest.mark.xfail(reason="uses numpy.vectorize") + def test_interpolate_na(self): array = ( np.array([-1.03, 0.1, 1.4, np.nan, 2.3, np.nan, np.nan, 9.1]) * unit_registry.m ) x = np.arange(len(array)) - data_array = xr.DataArray(data=array, coords={"x": x}, dims="x").astype(dtype) + data_array = xr.DataArray(data=array, coords={"x": x}, dims="x") units = extract_units(data_array) expected = attach_units(strip_units(data_array).interpolate_na(dim="x"), units) actual = data_array.interpolate_na(dim="x") - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.parametrize( "unit,error", @@ -2767,18 +2820,8 @@ def test_interpolate_na(self, dtype): unit_registry.dimensionless, DimensionalityError, id="dimensionless" ), pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), - pytest.param( - unit_registry.cm, - None, - id="compatible_unit", - marks=pytest.mark.xfail(reason="depends on reindex"), - ), - pytest.param( - unit_registry.m, - None, - id="identical_unit", - marks=pytest.mark.xfail(reason="depends on reindex"), - ), + pytest.param(unit_registry.cm, None, id="compatible_unit",), + pytest.param(unit_registry.m, None, id="identical_unit",), ), ) def test_combine_first(self, unit, error, dtype): @@ -2807,7 +2850,8 @@ def test_combine_first(self, unit, error, dtype): ) actual = data_array.combine_first(other) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.parametrize( "unit", @@ -2829,7 +2873,17 @@ def test_combine_first(self, unit, error, dtype): "coords", ), ) - @pytest.mark.parametrize("func", (method("equals"), method("identical")), ids=repr) + @pytest.mark.parametrize( + "func", + ( + method("equals"), + pytest.param( + method("identical"), + marks=pytest.mark.skip(reason="the behavior of identical is undecided"), + ), + ), + ids=repr, + ) def test_comparisons(self, func, variation, unit, dtype): def is_compatible(a, b): a = a if a is not None else 1 @@ -2903,7 +2957,8 @@ def test_broadcast_like(self, unit, dtype): ) actual = arr1.broadcast_like(arr2) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.parametrize( "unit", @@ -2950,7 +3005,6 @@ def test_broadcast_equals(self, unit, dtype): method("reset_coords", names="x2"), method("copy"), method("astype", np.float32), - method("item", 1), ), ids=repr, ) @@ -2978,7 +3032,8 @@ def test_content_manipulation(self, func, dtype): expected = attach_units(func(strip_units(data_array), **stripped_kwargs), units) actual = func(data_array) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.parametrize( "func", (pytest.param(method("copy", data=np.arange(20))),), ids=repr @@ -3004,7 +3059,9 @@ def test_content_manipulation_with_units(self, func, unit, dtype): ) actual = func(data_array, **kwargs) - assert_equal_with_units(expected, actual) + + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.parametrize( "indices", @@ -3024,7 +3081,8 @@ def test_isel(self, indices, dtype): ) actual = data_array.isel(x=indices) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.xfail(reason="indexes don't support units") @pytest.mark.parametrize( @@ -3067,7 +3125,9 @@ def test_sel(self, raw_values, unit, error, dtype): extract_units(data_array), ) actual = data_array.sel(x=values) - assert_equal_with_units(expected, actual) + + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.xfail(reason="indexes don't support units") @pytest.mark.parametrize( @@ -3110,7 +3170,9 @@ def test_loc(self, raw_values, unit, error, dtype): extract_units(data_array), ) actual = data_array.loc[{"x": values}] - assert_equal_with_units(expected, actual) + + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.xfail(reason="indexes don't support units") @pytest.mark.parametrize( @@ -3153,7 +3215,9 @@ def test_drop_sel(self, raw_values, unit, error, dtype): extract_units(data_array), ) actual = data_array.drop_sel(x=values) - assert_equal_with_units(expected, actual) + + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.parametrize( "shape", @@ -3181,7 +3245,9 @@ def test_squeeze(self, shape, dtype): strip_units(data_array).squeeze(), extract_units(data_array) ) actual = data_array.squeeze() - assert_equal_with_units(expected, actual) + + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) # try squeezing the dimensions separately names = tuple(dim for dim, coord in coords.items() if len(coord) == 1) @@ -3190,7 +3256,9 @@ def test_squeeze(self, shape, dtype): strip_units(data_array).squeeze(dim=name), extract_units(data_array) ) actual = data_array.squeeze(dim=name) - assert_equal_with_units(expected, actual) + + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.parametrize( "func", @@ -3212,49 +3280,42 @@ def test_head_tail_thin(self, func, dtype): ) actual = func(data_array) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) - @pytest.mark.xfail(reason="indexes don't support units") + @pytest.mark.parametrize("variant", ("data", "coords")) @pytest.mark.parametrize( - "unit,error", + "func", ( - pytest.param(1, DimensionalityError, id="no_unit"), pytest.param( - unit_registry.dimensionless, DimensionalityError, id="dimensionless" + method("interp"), marks=pytest.mark.xfail(reason="uses scipy") ), - pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), - pytest.param(unit_registry.cm, None, id="compatible_unit"), - pytest.param(unit_registry.m, None, id="identical_unit"), + method("reindex"), ), + ids=repr, ) - def test_interp(self, unit, error): - array = np.linspace(1, 2, 10 * 5).reshape(10, 5) * unit_registry.degK - new_coords = (np.arange(10) + 0.5) * unit - coords = { - "x": np.arange(10) * unit_registry.m, - "y": np.arange(5) * unit_registry.m, + def test_interp_reindex(self, variant, func, dtype): + variants = { + "data": (unit_registry.m, 1), + "coords": (1, unit_registry.m), } + data_unit, coord_unit = variants.get(variant) - data_array = xr.DataArray(array, coords=coords, dims=("x", "y")) + array = np.linspace(1, 2, 10).astype(dtype) * data_unit + y = np.arange(10) * coord_unit - if error is not None: - with pytest.raises(error): - data_array.interp(x=new_coords) - - return + x = np.arange(10) + new_x = np.arange(10) + 0.5 + data_array = xr.DataArray(array, coords={"x": x, "y": ("x", y)}, dims="x") units = extract_units(data_array) - expected = attach_units( - strip_units(data_array).interp( - x=strip_units(convert_units(new_coords, {None: unit_registry.m})) - ), - units, - ) - actual = data_array.interp(x=new_coords) + expected = attach_units(func(strip_units(data_array), x=new_x), units) + actual = func(data_array, x=new_x) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_allclose(expected, actual) - @pytest.mark.xfail(reason="indexes strip units") + @pytest.mark.xfail(reason="indexes don't support units") @pytest.mark.parametrize( "unit,error", ( @@ -3267,79 +3328,66 @@ def test_interp(self, unit, error): pytest.param(unit_registry.m, None, id="identical_unit"), ), ) - def test_interp_like(self, unit, error): - array = np.linspace(1, 2, 10 * 5).reshape(10, 5) * unit_registry.degK - coords = { - "x": (np.arange(10) + 0.3) * unit_registry.m, - "y": (np.arange(5) + 0.3) * unit_registry.m, - } - - data_array = xr.DataArray(array, coords=coords, dims=("x", "y")) - other = xr.DataArray( - data=np.empty((20, 10)) * unit_registry.degK, - coords={"x": np.arange(20) * unit, "y": np.arange(10) * unit}, - dims=("x", "y"), - ) + @pytest.mark.parametrize( + "func", (method("interp"), method("reindex")), ids=repr, + ) + def test_interp_reindex_indexing(self, func, unit, error, dtype): + array = np.linspace(1, 2, 10).astype(dtype) + x = np.arange(10) * unit_registry.m + new_x = (np.arange(10) + 0.5) * unit + data_array = xr.DataArray(array, coords={"x": x}, dims="x") if error is not None: with pytest.raises(error): - data_array.interp_like(other) + func(data_array, x=new_x) return units = extract_units(data_array) expected = attach_units( - strip_units(data_array).interp_like( - strip_units(convert_units(other, units)) + func( + strip_units(data_array), + x=strip_units(convert_units(new_x, {None: unit_registry.m})), ), units, ) - actual = data_array.interp_like(other) + actual = func(data_array, x=new_x) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) - @pytest.mark.xfail(reason="indexes don't support units") + @pytest.mark.parametrize("variant", ("data", "coords")) @pytest.mark.parametrize( - "unit,error", + "func", ( - pytest.param(1, DimensionalityError, id="no_unit"), pytest.param( - unit_registry.dimensionless, DimensionalityError, id="dimensionless" + method("interp_like"), marks=pytest.mark.xfail(reason="uses scipy") ), - pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), - pytest.param(unit_registry.cm, None, id="compatible_unit"), - pytest.param(unit_registry.m, None, id="identical_unit"), + method("reindex_like"), ), + ids=repr, ) - def test_reindex(self, unit, error, dtype): - array = ( - np.linspace(1, 2, 10 * 5).reshape(10, 5).astype(dtype) * unit_registry.degK - ) - new_coords = (np.arange(10) + 0.5) * unit - coords = { - "x": np.arange(10) * unit_registry.m, - "y": np.arange(5) * unit_registry.m, + def test_interp_reindex_like(self, variant, func, dtype): + variants = { + "data": (unit_registry.m, 1), + "coords": (1, unit_registry.m), } + data_unit, coord_unit = variants.get(variant) - data_array = xr.DataArray(array, coords=coords, dims=("x", "y")) - func = method("reindex") - - if error is not None: - with pytest.raises(error): - func(data_array, x=new_coords) + array = np.linspace(1, 2, 10).astype(dtype) * data_unit + coord = np.arange(10) * coord_unit - return + x = np.arange(10) + new_x = np.arange(-2, 2) + 0.5 + data_array = xr.DataArray(array, coords={"x": x, "y": ("x", coord)}, dims="x") + other = xr.DataArray(np.empty_like(new_x), coords={"x": new_x}, dims="x") - expected = attach_units( - func( - strip_units(data_array), - x=strip_units(convert_units(new_coords, {None: unit_registry.m})), - ), - {None: unit_registry.degK}, - ) - actual = func(data_array, x=new_coords) + units = extract_units(data_array) + expected = attach_units(func(strip_units(data_array), other), units) + actual = func(data_array, other) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_allclose(expected, actual) @pytest.mark.xfail(reason="indexes don't support units") @pytest.mark.parametrize( @@ -3354,38 +3402,35 @@ def test_reindex(self, unit, error, dtype): pytest.param(unit_registry.m, None, id="identical_unit"), ), ) - def test_reindex_like(self, unit, error, dtype): - array = ( - np.linspace(1, 2, 10 * 5).reshape(10, 5).astype(dtype) * unit_registry.degK - ) - coords = { - "x": (np.arange(10) + 0.3) * unit_registry.m, - "y": (np.arange(5) + 0.3) * unit_registry.m, - } + @pytest.mark.parametrize( + "func", (method("interp_like"), method("reindex_like")), ids=repr, + ) + def test_interp_reindex_like_indexing(self, func, unit, error, dtype): + array = np.linspace(1, 2, 10).astype(dtype) + x = np.arange(10) * unit_registry.m + new_x = (np.arange(-2, 2) + 0.5) * unit - data_array = xr.DataArray(array, coords=coords, dims=("x", "y")) - other = xr.DataArray( - data=np.empty((20, 10)) * unit_registry.degK, - coords={"x": np.arange(20) * unit, "y": np.arange(10) * unit}, - dims=("x", "y"), - ) + data_array = xr.DataArray(array, coords={"x": x}, dims="x") + other = xr.DataArray(np.empty_like(new_x), {"x": new_x}, dims="x") if error is not None: with pytest.raises(error): - data_array.reindex_like(other) + func(data_array, other) return units = extract_units(data_array) expected = attach_units( - strip_units(data_array).reindex_like( - strip_units(convert_units(other, units)) + func( + strip_units(data_array), + strip_units(convert_units(other, {None: unit_registry.m})), ), units, ) - actual = data_array.reindex_like(other) + actual = func(data_array, other) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.parametrize( "func", @@ -3407,7 +3452,8 @@ def test_stacking_stacked(self, func, dtype): expected = attach_units(func(strip_units(stacked)), {"data": unit_registry.m}) actual = func(stacked) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.xfail(reason="indexes don't support units") def test_to_unstacked_dataset(self, dtype): @@ -3430,7 +3476,8 @@ def test_to_unstacked_dataset(self, dtype): ).rename({elem.magnitude: elem for elem in x}) actual = func(data_array) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.parametrize( "func", @@ -3438,9 +3485,7 @@ def test_to_unstacked_dataset(self, dtype): method("transpose", "y", "x", "z"), method("stack", a=("x", "y")), method("set_index", x="x2"), - pytest.param( - method("shift", x=2), marks=pytest.mark.xfail(reason="strips units") - ), + method("shift", x=2), method("roll", x=2, roll_coords=False), method("sortby", "x2"), ), @@ -3466,7 +3511,8 @@ def test_stacking_reordering(self, func, dtype): expected = attach_units(func(strip_units(data_array)), {None: unit_registry.m}) actual = func(data_array) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.parametrize( "func", @@ -3476,16 +3522,13 @@ def test_stacking_reordering(self, func, dtype): method("integrate", dim="x"), pytest.param( method("quantile", q=[0.25, 0.75]), - marks=pytest.mark.xfail(reason="nanquantile not implemented"), - ), - method("reduce", func=np.sum, dim="x"), - pytest.param( - lambda x: x.dot(x), - id="method_dot", marks=pytest.mark.xfail( - reason="pint does not implement the dot method" + LooseVersion(pint.__version__) < "0.12", + reason="quantile / nanquantile not implemented yet", ), ), + method("reduce", func=np.sum, dim="x"), + pytest.param(lambda x: x.dot(x), id="method_dot"), ), ids=repr, ) @@ -3512,7 +3555,8 @@ def test_computation(self, func, dtype): expected = attach_units(func(strip_units(data_array)), units) actual = func(data_array) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.parametrize( "func", @@ -3522,7 +3566,9 @@ def test_computation(self, func, dtype): method("coarsen", y=2), pytest.param( method("rolling", y=3), - marks=pytest.mark.xfail(reason="rolling strips units"), + marks=pytest.mark.xfail( + reason="numpy.lib.stride_tricks.as_strided converts to ndarray" + ), ), pytest.param( method("rolling_exp", y=3), @@ -3545,7 +3591,8 @@ def test_computation_objects(self, func, dtype): expected = attach_units(func(strip_units(data_array)).mean(), units) actual = func(data_array).mean() - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_allclose(expected, actual) def test_resample(self, dtype): array = np.linspace(0, 5, 10).astype(dtype) * unit_registry.m @@ -3559,7 +3606,8 @@ def test_resample(self, dtype): expected = attach_units(func(strip_units(data_array)).mean(), units) actual = func(data_array).mean() - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.parametrize( "func", @@ -3569,7 +3617,10 @@ def test_resample(self, dtype): method("last"), pytest.param( method("quantile", q=[0.25, 0.5, 0.75], dim="x"), - marks=pytest.mark.xfail(reason="nanquantile not implemented"), + marks=pytest.mark.xfail( + LooseVersion(pint.__version__) < "0.12", + reason="quantile / nanquantile not implemented yet", + ), ), ), ids=repr, @@ -3598,7 +3649,8 @@ def test_grouped_operations(self, func, dtype): ) actual = func(data_array.groupby("y")) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) class TestDataset: diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index 78e3848b8fb..3003e0d66f3 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -2213,6 +2213,10 @@ def test_full_like(self): assert expect.dtype == bool assert_identical(expect, full_like(orig, True, dtype=bool)) + # raise error on non-scalar fill_value + with raises_regex(ValueError, "must be scalar"): + full_like(orig, [1.0, 2.0]) + @requires_dask def test_full_like_dask(self): orig = Variable( diff --git a/xarray/util/print_versions.py b/xarray/util/print_versions.py index 32051bb6843..96983c83aab 100755 --- a/xarray/util/print_versions.py +++ b/xarray/util/print_versions.py @@ -129,7 +129,7 @@ def show_versions(file=sys.stdout): ("sphinx", lambda mod: mod.__version__), ] - deps_blob = list() + deps_blob = [] for (modname, ver_f) in deps: try: if modname in sys.modules: