Skip to content

Commit

Permalink
Implement load_dataset() and load_dataarray() (#2917)
Browse files Browse the repository at this point in the history
* Partial fix for #2841 to improve formatting.

Updates formatting to use .format() instead of % operator. Changed all instances of % to .format() and added test for using tuple as key, which errored using % operator.

* Revert "Partial fix for #2841 to improve formatting."

This reverts commit f17f3ad.

* Implement load_dataset() and load_dataarray()

BUG: Fixes #2887 by adding @shoyer solution for load_dataset and load_dataarray, wrappers around open_dataset and open_dataarray which open, load, and close the file and return the Dataset/DataArray
TST: Add tests for sequentially opening and writing to files using new functions
DOC: Add to whats-new.rst. Also a tiny change to the open_dataset docstring

Update docstrings and check for cache in kwargs

Undeprecate load_dataset

Add to api.rst, fix whats-new.rst typo, raise error instead of warning
  • Loading branch information
dnowacki-usgs authored and shoyer committed May 16, 2019
1 parent 4e778f0 commit bd78b7f
Show file tree
Hide file tree
Showing 6 changed files with 90 additions and 17 deletions.
2 changes: 2 additions & 0 deletions doc/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -460,6 +460,7 @@ Dataset methods
:toctree: generated/

open_dataset
load_dataset
open_mfdataset
open_rasterio
open_zarr
Expand Down Expand Up @@ -487,6 +488,7 @@ DataArray methods
:toctree: generated/

open_dataarray
load_dataarray
DataArray.to_dataset
DataArray.to_netcdf
DataArray.to_pandas
Expand Down
12 changes: 9 additions & 3 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,12 @@ Enhancements
By `James McCreight <https://github.com/jmccreight>`_.
- Clean up Python 2 compatibility in code (:issue:`2950`)
By `Guido Imperiale <https://github.com/crusaderky>`_.
- Implement ``load_dataset()`` and ``load_dataarray()`` as alternatives to
``open_dataset()`` and ``open_dataarray()`` to open, load into memory,
and close files, returning the Dataset or DataArray. These functions are
helpful for avoiding file-lock errors when trying to write to files opened
using ``open_dataset()`` or ``open_dataarray()``. (:issue:`2887`)
By `Dan Nowacki <https://github.com/dnowacki-usgs>`_.

Bug fixes
~~~~~~~~~
Expand Down Expand Up @@ -153,9 +159,9 @@ Other enhancements
By `Keisuke Fujii <https://github.com/fujiisoup>`_.
- Added :py:meth:`~xarray.Dataset.drop_dims` (:issue:`1949`).
By `Kevin Squire <https://github.com/kmsquire>`_.
- ``xr.open_zarr`` now accepts manually specified chunks with the ``chunks=``
parameter. ``auto_chunk=True`` is equivalent to ``chunks='auto'`` for
backwards compatibility. The ``overwrite_encoded_chunks`` parameter is
- ``xr.open_zarr`` now accepts manually specified chunks with the ``chunks=``
parameter. ``auto_chunk=True`` is equivalent to ``chunks='auto'`` for
backwards compatibility. The ``overwrite_encoded_chunks`` parameter is
added to remove the original zarr chunk encoding.
By `Lily Wang <https://github.com/lilyminium>`_.

Expand Down
2 changes: 1 addition & 1 deletion xarray/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from .core.options import set_options

from .backends.api import (open_dataset, open_dataarray, open_mfdataset,
save_mfdataset)
save_mfdataset, load_dataset, load_dataarray)
from .backends.rasterio_ import open_rasterio
from .backends.zarr import open_zarr

Expand Down
57 changes: 55 additions & 2 deletions xarray/backends/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,12 +185,64 @@ def _finalize_store(write, store):
store.close()


def load_dataset(filename_or_obj, **kwargs):
"""Open, load into memory, and close a Dataset from a file or file-like
object.
This is a thin wrapper around :py:meth:`~xarray.open_dataset`. It differs
from `open_dataset` in that it loads the Dataset into memory, closes the
file, and returns the Dataset. In contrast, `open_dataset` keeps the file
handle open and lazy loads its contents. All parameters are passed directly
to `open_dataset`. See that documentation for further details.
Returns
-------
dataset : Dataset
The newly created Dataset.
See Also
--------
open_dataset
"""
if 'cache' in kwargs:
raise TypeError('cache has no effect in this context')

with open_dataset(filename_or_obj, **kwargs) as ds:
return ds.load()


def load_dataarray(filename_or_obj, **kwargs):
"""Open, load into memory, and close a DataArray from a file or file-like
object containing a single data variable.
This is a thin wrapper around :py:meth:`~xarray.open_dataarray`. It differs
from `open_dataarray` in that it loads the Dataset into memory, closes the
file, and returns the Dataset. In contrast, `open_dataarray` keeps the file
handle open and lazy loads its contents. All parameters are passed directly
to `open_dataarray`. See that documentation for further details.
Returns
-------
datarray : DataArray
The newly created DataArray.
See Also
--------
open_dataarray
"""
if 'cache' in kwargs:
raise TypeError('cache has no effect in this context')

with open_dataarray(filename_or_obj, **kwargs) as da:
return da.load()


def open_dataset(filename_or_obj, group=None, decode_cf=True,
mask_and_scale=None, decode_times=True, autoclose=None,
concat_characters=True, decode_coords=True, engine=None,
chunks=None, lock=None, cache=None, drop_variables=None,
backend_kwargs=None, use_cftime=None):
"""Load and decode a dataset from a file or file-like object.
"""Open and decode a dataset from a file or file-like object.
Parameters
----------
Expand Down Expand Up @@ -406,7 +458,8 @@ def open_dataarray(filename_or_obj, group=None, decode_cf=True,
concat_characters=True, decode_coords=True, engine=None,
chunks=None, lock=None, cache=None, drop_variables=None,
backend_kwargs=None, use_cftime=None):
"""Open an DataArray from a netCDF file containing a single data variable.
"""Open an DataArray from a file or file-like object containing a single
data variable.
This is designed to read netCDF files with only one data variable. If
multiple variables are present then a ValueError is raised.
Expand Down
19 changes: 18 additions & 1 deletion xarray/tests/test_backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
import xarray as xr
from xarray import (
DataArray, Dataset, backends, open_dataarray, open_dataset, open_mfdataset,
save_mfdataset)
save_mfdataset, load_dataset, load_dataarray)
from xarray.backends.common import robust_getitem
from xarray.backends.netCDF4_ import _extract_nc4_variable_encoding
from xarray.backends.pydap_ import PydapDataStore
Expand Down Expand Up @@ -2641,6 +2641,23 @@ def test_save_mfdataset_compute_false_roundtrip(self):
with open_mfdataset([tmp1, tmp2]) as actual:
assert_identical(actual, original)

def test_load_dataset(self):
with create_tmp_file() as tmp:
original = Dataset({'foo': ('x', np.random.randn(10))})
original.to_netcdf(tmp)
ds = load_dataset(tmp)
# this would fail if we used open_dataset instead of load_dataset
ds.to_netcdf(tmp)

def test_load_dataarray(self):
with create_tmp_file() as tmp:
original = Dataset({'foo': ('x', np.random.randn(10))})
original.to_netcdf(tmp)
ds = load_dataarray(tmp)
# this would fail if we used open_dataarray instead of
# load_dataarray
ds.to_netcdf(tmp)


@requires_scipy_or_netCDF4
@requires_pydap
Expand Down
15 changes: 5 additions & 10 deletions xarray/tutorial.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def open_dataset(name, cache=True, cache_dir=_default_cache_dir,
github_url='https://github.com/pydata/xarray-data',
branch='master', **kws):
"""
Load a dataset from the online repository (requires internet).
Open a dataset from the online repository (requires internet).
If a local copy is found then always use that to avoid network traffic.
Expand Down Expand Up @@ -91,17 +91,12 @@ def open_dataset(name, cache=True, cache_dir=_default_cache_dir,

def load_dataset(*args, **kwargs):
"""
`load_dataset` will be removed a future version of xarray. The current
behavior of this function can be achived by using
`tutorial.open_dataset(...).load()`.
Open, load into memory, and close a dataset from the online repository
(requires internet).
See Also
--------
open_dataset
"""
warnings.warn(
"load_dataset` will be removed in a future version of xarray. The "
"current behavior of this function can be achived by using "
"`tutorial.open_dataset(...).load()`.",
DeprecationWarning, stacklevel=2)
return open_dataset(*args, **kwargs).load()
with open_dataset(*args, **kwargs) as ds:
return ds.load()

0 comments on commit bd78b7f

Please sign in to comment.