Merge remote-tracking branch 'upstream/master' into missing_value

* upstream/master: cfgrib is now part of conda-forge (pydata#2992) Add fill_value for concat and auto_combine (pydata#2964) Remove deprecated pytest.config usages (pydata#2988) Add transpose_coords option to DataArray.transpose (pydata#2556) Fix rolling.constuct() example (pydata#2967) Implement load_dataset() and load_dataarray() (pydata#2917)
dcherian · May 29, 2019 · df17e1c · df17e1c
2 parents 15d94f3 + ae1239c
commit df17e1c
Show file tree

Hide file tree

Showing 21 changed files with 341 additions and 95 deletions.
diff --git a/ci/requirements-py36.yml b/ci/requirements-py36.yml
@@ -24,12 +24,11 @@ dependencies:
   - bottleneck
   - zarr
   - pseudonetcdf>=3.0.1
-  - eccodes
+  - cfgrib>=0.9.2
   - cdms2
   - pynio
   - iris>=1.10
   - pydap
   - lxml
   - pip:
-    - cfgrib>=0.9.2
     - mypy==0.660
diff --git a/ci/requirements-py37.yml b/ci/requirements-py37.yml
@@ -25,9 +25,8 @@ dependencies:
   - bottleneck
   - zarr
   - pseudonetcdf>=3.0.1
+  - cfgrib>=0.9.2
   - lxml
-  - eccodes
   - pydap
   - pip:
-    - cfgrib>=0.9.2
     - mypy==0.650
diff --git a/conftest.py b/conftest.py
@@ -1,9 +1,29 @@
 """Configuration for pytest."""
 
+import pytest
+
 
 def pytest_addoption(parser):
     """Add command-line flags for pytest."""
     parser.addoption("--run-flaky", action="store_true",
                      help="runs flaky tests")
     parser.addoption("--run-network-tests", action="store_true",
                      help="runs tests requiring a network connection")
+
+
+def pytest_collection_modifyitems(config, items):
+
+    if not config.getoption("--run-flaky"):
+        skip_flaky = pytest.mark.skip(
+            reason="set --run-flaky option to run flaky tests")
+        for item in items:
+            if "flaky" in item.keywords:
+                item.add_marker(skip_flaky)
+
+    if not config.getoption("--run-network-tests"):
+        skip_network = pytest.mark.skip(
+            reason="set --run-network-tests option to run tests requiring an"
+            "internet connection")
+        for item in items:
+            if "network" in item.keywords:
+                item.add_marker(skip_network)
diff --git a/doc/api.rst b/doc/api.rst
@@ -460,6 +460,7 @@ Dataset methods
    :toctree: generated/
 
    open_dataset
+   load_dataset
    open_mfdataset
    open_rasterio
    open_zarr
@@ -487,6 +488,7 @@ DataArray methods
    :toctree: generated/
 
    open_dataarray
+   load_dataarray
    DataArray.to_dataset
    DataArray.to_netcdf
    DataArray.to_pandas

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
@@ -27,8 +27,21 @@ Enhancements
 - Character arrays' character dimension name decoding and encoding handled by
   ``var.encoding['char_dim_name']`` (:issue:`2895`)
   By `James McCreight <https://github.com/jmccreight>`_.
+- :py:meth:`DataArray.transpose` now accepts a keyword argument
+  ``transpose_coords`` which enables transposition of coordinates in the
+  same way as :py:meth:`Dataset.transpose`. :py:meth:`DataArray.groupby`
+  :py:meth:`DataArray.groupby_bins`, and :py:meth:`DataArray.resample` now
+  accept a keyword argument ``restore_coord_dims`` which keeps the order
+  of the dimensions of multi-dimensional coordinates intact (:issue:`1856`).
+  By `Peter Hausamann <http://github.com/phausamann>`_.
 - Clean up Python 2 compatibility in code (:issue:`2950`)
   By `Guido Imperiale <https://github.com/crusaderky>`_.
+- Implement ``load_dataset()`` and ``load_dataarray()`` as alternatives to
+  ``open_dataset()`` and ``open_dataarray()`` to open, load into memory,
+  and close files, returning the Dataset or DataArray. These functions are
+  helpful for avoiding file-lock errors when trying to write to files opened
+  using ``open_dataset()`` or ``open_dataarray()``. (:issue:`2887`)
+  By `Dan Nowacki <https://github.com/dnowacki-usgs>`_.
 
 Bug fixes
 ~~~~~~~~~
@@ -43,6 +56,8 @@ Bug fixes
   By `Martin Pletcher <https://github.com/pletchm>`_.
 - Increased support for `missing_value` (:issue:`2871`)
   By `Deepak Cherian <https://github.com/dcherian>`_.
+- Removed usages of `pytest.config`, which is deprecated (:issue:`2988`:)
+  By `Maximilian Roos <https://github.com/max-sixty>`_.
 
 .. _whats-new.0.12.1:
 
@@ -155,9 +170,9 @@ Other enhancements
   By `Keisuke Fujii <https://github.com/fujiisoup>`_.
 - Added :py:meth:`~xarray.Dataset.drop_dims` (:issue:`1949`).
   By `Kevin Squire <https://github.com/kmsquire>`_.
-- ``xr.open_zarr`` now accepts manually specified chunks with the ``chunks=`` 
-  parameter. ``auto_chunk=True`` is equivalent to ``chunks='auto'`` for 
-  backwards compatibility. The ``overwrite_encoded_chunks`` parameter is 
+- ``xr.open_zarr`` now accepts manually specified chunks with the ``chunks=``
+  parameter. ``auto_chunk=True`` is equivalent to ``chunks='auto'`` for
+  backwards compatibility. The ``overwrite_encoded_chunks`` parameter is
   added to remove the original zarr chunk encoding.
   By `Lily Wang <https://github.com/lilyminium>`_.
 

diff --git a/xarray/__init__.py b/xarray/__init__.py
@@ -17,7 +17,7 @@
 from .core.options import set_options
 
 from .backends.api import (open_dataset, open_dataarray, open_mfdataset,
-                           save_mfdataset)
+                           save_mfdataset, load_dataset, load_dataarray)
 from .backends.rasterio_ import open_rasterio
 from .backends.zarr import open_zarr
 

diff --git a/xarray/backends/api.py b/xarray/backends/api.py
@@ -185,12 +185,64 @@ def _finalize_store(write, store):
     store.close()
 
 
+def load_dataset(filename_or_obj, **kwargs):
+    """Open, load into memory, and close a Dataset from a file or file-like
+    object.
+
+    This is a thin wrapper around :py:meth:`~xarray.open_dataset`. It differs
+    from `open_dataset` in that it loads the Dataset into memory, closes the
+    file, and returns the Dataset. In contrast, `open_dataset` keeps the file
+    handle open and lazy loads its contents. All parameters are passed directly
+    to `open_dataset`. See that documentation for further details.
+
+    Returns
+    -------
+    dataset : Dataset
+        The newly created Dataset.
+
+    See Also
+    --------
+    open_dataset
+    """
+    if 'cache' in kwargs:
+        raise TypeError('cache has no effect in this context')
+
+    with open_dataset(filename_or_obj, **kwargs) as ds:
+        return ds.load()
+
+
+def load_dataarray(filename_or_obj, **kwargs):
+    """Open, load into memory, and close a DataArray from a file or file-like
+    object containing a single data variable.
+
+    This is a thin wrapper around :py:meth:`~xarray.open_dataarray`. It differs
+    from `open_dataarray` in that it loads the Dataset into memory, closes the
+    file, and returns the Dataset. In contrast, `open_dataarray` keeps the file
+    handle open and lazy loads its contents. All parameters are passed directly
+    to `open_dataarray`. See that documentation for further details.
+
+    Returns
+    -------
+    datarray : DataArray
+        The newly created DataArray.
+
+    See Also
+    --------
+    open_dataarray
+    """
+    if 'cache' in kwargs:
+        raise TypeError('cache has no effect in this context')
+
+    with open_dataarray(filename_or_obj, **kwargs) as da:
+        return da.load()
+
+
 def open_dataset(filename_or_obj, group=None, decode_cf=True,
                  mask_and_scale=None, decode_times=True, autoclose=None,
                  concat_characters=True, decode_coords=True, engine=None,
                  chunks=None, lock=None, cache=None, drop_variables=None,
                  backend_kwargs=None, use_cftime=None):
-    """Load and decode a dataset from a file or file-like object.
+    """Open and decode a dataset from a file or file-like object.
 
     Parameters
     ----------
@@ -406,7 +458,8 @@ def open_dataarray(filename_or_obj, group=None, decode_cf=True,
                    concat_characters=True, decode_coords=True, engine=None,
                    chunks=None, lock=None, cache=None, drop_variables=None,
                    backend_kwargs=None, use_cftime=None):
-    """Open an DataArray from a netCDF file containing a single data variable.
+    """Open an DataArray from a file or file-like object containing a single
+    data variable.
 
     This is designed to read netCDF files with only one data variable. If
     multiple variables are present then a ValueError is raised.

diff --git a/xarray/core/combine.py b/xarray/core/combine.py
@@ -4,7 +4,7 @@
 
 import pandas as pd
 
-from . import utils
+from . import utils, dtypes
 from .alignment import align
 from .merge import merge
 from .variable import IndexVariable, Variable, as_variable
@@ -14,7 +14,7 @@
 
 def concat(objs, dim=None, data_vars='all', coords='different',
            compat='equals', positions=None, indexers=None, mode=None,
-           concat_over=None):
+           concat_over=None, fill_value=dtypes.NA):
     """Concatenate xarray objects along a new or existing dimension.
 
     Parameters
@@ -66,6 +66,8 @@ def concat(objs, dim=None, data_vars='all', coords='different',
         List of integer arrays which specifies the integer positions to which
         to assign each dataset along the concatenated dimension. If not
         supplied, objects are concatenated in the provided order.
+    fill_value : scalar, optional
+        Value to use for newly missing values
     indexers, mode, concat_over : deprecated
 
     Returns
@@ -117,7 +119,7 @@ def concat(objs, dim=None, data_vars='all', coords='different',
     else:
         raise TypeError('can only concatenate xarray Dataset and DataArray '
                         'objects, got %s' % type(first_obj))
-    return f(objs, dim, data_vars, coords, compat, positions)
+    return f(objs, dim, data_vars, coords, compat, positions, fill_value)
 
 
 def _calc_concat_dim_coord(dim):
@@ -212,7 +214,8 @@ def process_subset_opt(opt, subset):
     return concat_over, equals
 
 
-def _dataset_concat(datasets, dim, data_vars, coords, compat, positions):
+def _dataset_concat(datasets, dim, data_vars, coords, compat, positions,
+                    fill_value=dtypes.NA):
     """
     Concatenate a sequence of datasets along a new or existing dimension
     """
@@ -225,7 +228,8 @@ def _dataset_concat(datasets, dim, data_vars, coords, compat, positions):
     dim, coord = _calc_concat_dim_coord(dim)
     # Make sure we're working on a copy (we'll be loading variables)
     datasets = [ds.copy() for ds in datasets]
-    datasets = align(*datasets, join='outer', copy=False, exclude=[dim])
+    datasets = align(*datasets, join='outer', copy=False, exclude=[dim],
+                     fill_value=fill_value)
 
     concat_over, equals = _calc_concat_over(datasets, dim, data_vars, coords)
 
@@ -317,7 +321,7 @@ def ensure_common_dims(vars):
 
 
 def _dataarray_concat(arrays, dim, data_vars, coords, compat,
-                      positions):
+                      positions, fill_value=dtypes.NA):
     arrays = list(arrays)
 
     if data_vars != 'all':
@@ -336,14 +340,15 @@ def _dataarray_concat(arrays, dim, data_vars, coords, compat,
         datasets.append(arr._to_temp_dataset())
 
     ds = _dataset_concat(datasets, dim, data_vars, coords, compat,
-                         positions)
+                         positions, fill_value)
     result = arrays[0]._from_temp_dataset(ds, name)
 
     result.name = result_name(arrays)
     return result
 
 
-def _auto_concat(datasets, dim=None, data_vars='all', coords='different'):
+def _auto_concat(datasets, dim=None, data_vars='all', coords='different',
+                 fill_value=dtypes.NA):
     if len(datasets) == 1 and dim is None:
         # There is nothing more to combine, so kick out early.
         return datasets[0]
@@ -366,7 +371,8 @@ def _auto_concat(datasets, dim=None, data_vars='all', coords='different'):
                                  'supply the ``concat_dim`` argument '
                                  'explicitly')
             dim, = concat_dims
-        return concat(datasets, dim=dim, data_vars=data_vars, coords=coords)
+        return concat(datasets, dim=dim, data_vars=data_vars,
+                      coords=coords, fill_value=fill_value)
 
 
 _CONCAT_DIM_DEFAULT = utils.ReprObject('<inferred>')
@@ -442,7 +448,8 @@ def _check_shape_tile_ids(combined_tile_ids):
 
 
 def _combine_nd(combined_ids, concat_dims, data_vars='all',
-                coords='different', compat='no_conflicts'):
+                coords='different', compat='no_conflicts',
+                fill_value=dtypes.NA):
     """
     Concatenates and merges an N-dimensional structure of datasets.
 
@@ -472,13 +479,14 @@ def _combine_nd(combined_ids, concat_dims, data_vars='all',
                                                          dim=concat_dim,
                                                          data_vars=data_vars,
                                                          coords=coords,
-                                                         compat=compat)
+                                                         compat=compat,
+                                                         fill_value=fill_value)
     combined_ds = list(combined_ids.values())[0]
     return combined_ds
 
 
 def _auto_combine_all_along_first_dim(combined_ids, dim, data_vars,
-                                      coords, compat):
+                                      coords, compat, fill_value=dtypes.NA):
     # Group into lines of datasets which must be combined along dim
     # need to sort by _new_tile_id first for groupby to work
     # TODO remove all these sorted OrderedDicts once python >= 3.6 only
@@ -490,7 +498,8 @@ def _auto_combine_all_along_first_dim(combined_ids, dim, data_vars,
         combined_ids = OrderedDict(sorted(group))
         datasets = combined_ids.values()
         new_combined_ids[new_id] = _auto_combine_1d(datasets, dim, compat,
-                                                    data_vars, coords)
+                                                    data_vars, coords,
+                                                    fill_value)
     return new_combined_ids
 
 
@@ -500,18 +509,20 @@ def vars_as_keys(ds):
 
 def _auto_combine_1d(datasets, concat_dim=_CONCAT_DIM_DEFAULT,
                      compat='no_conflicts',
-                     data_vars='all', coords='different'):
+                     data_vars='all', coords='different',
+                     fill_value=dtypes.NA):
     # This is just the old auto_combine function (which only worked along 1D)
     if concat_dim is not None:
         dim = None if concat_dim is _CONCAT_DIM_DEFAULT else concat_dim
         sorted_datasets = sorted(datasets, key=vars_as_keys)
         grouped_by_vars = itertools.groupby(sorted_datasets, key=vars_as_keys)
         concatenated = [_auto_concat(list(ds_group), dim=dim,
-                                     data_vars=data_vars, coords=coords)
+                                     data_vars=data_vars, coords=coords,
+                                     fill_value=fill_value)
                         for id, ds_group in grouped_by_vars]
     else:
         concatenated = datasets
-    merged = merge(concatenated, compat=compat)
+    merged = merge(concatenated, compat=compat, fill_value=fill_value)
     return merged
 
 
@@ -521,7 +532,7 @@ def _new_tile_id(single_id_ds_pair):
 
 
 def _auto_combine(datasets, concat_dims, compat, data_vars, coords,
-                  infer_order_from_coords, ids):
+                  infer_order_from_coords, ids, fill_value=dtypes.NA):
     """
     Calls logic to decide concatenation order before concatenating.
     """
@@ -550,12 +561,14 @@ def _auto_combine(datasets, concat_dims, compat, data_vars, coords,
 
     # Repeatedly concatenate then merge along each dimension
     combined = _combine_nd(combined_ids, concat_dims, compat=compat,
-                           data_vars=data_vars, coords=coords)
+                           data_vars=data_vars, coords=coords,
+                           fill_value=fill_value)
     return combined
 
 
 def auto_combine(datasets, concat_dim=_CONCAT_DIM_DEFAULT,
-                 compat='no_conflicts', data_vars='all', coords='different'):
+                 compat='no_conflicts', data_vars='all', coords='different',
+                 fill_value=dtypes.NA):
     """Attempt to auto-magically combine the given datasets into one.
     This method attempts to combine a list of datasets into a single entity by
     inspecting metadata and using a combination of concat and merge.
@@ -596,6 +609,8 @@ def auto_combine(datasets, concat_dim=_CONCAT_DIM_DEFAULT,
         Details are in the documentation of concat
     coords : {'minimal', 'different', 'all' or list of str}, optional
         Details are in the documentation of conca
+    fill_value : scalar, optional
+        Value to use for newly missing values
 
     Returns
     -------
@@ -622,4 +637,4 @@ def auto_combine(datasets, concat_dim=_CONCAT_DIM_DEFAULT,
     return _auto_combine(datasets, concat_dims=concat_dims, compat=compat,
                          data_vars=data_vars, coords=coords,
                          infer_order_from_coords=infer_order_from_coords,
-                         ids=False)
+                         ids=False, fill_value=fill_value)