Skip to content
forked from pydata/xarray

Commit

Permalink
Merge remote-tracking branch 'upstream/master' into missing_value
Browse files Browse the repository at this point in the history
* upstream/master:
  cfgrib is now part of conda-forge (pydata#2992)
  Add fill_value for concat and auto_combine (pydata#2964)
  Remove deprecated pytest.config usages (pydata#2988)
  Add transpose_coords option to DataArray.transpose (pydata#2556)
  Fix rolling.constuct() example (pydata#2967)
  Implement load_dataset() and load_dataarray() (pydata#2917)
  • Loading branch information
dcherian committed May 29, 2019
2 parents 15d94f3 + ae1239c commit df17e1c
Show file tree
Hide file tree
Showing 21 changed files with 341 additions and 95 deletions.
3 changes: 1 addition & 2 deletions ci/requirements-py36.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,11 @@ dependencies:
- bottleneck
- zarr
- pseudonetcdf>=3.0.1
- eccodes
- cfgrib>=0.9.2
- cdms2
- pynio
- iris>=1.10
- pydap
- lxml
- pip:
- cfgrib>=0.9.2
- mypy==0.660
3 changes: 1 addition & 2 deletions ci/requirements-py37.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,8 @@ dependencies:
- bottleneck
- zarr
- pseudonetcdf>=3.0.1
- cfgrib>=0.9.2
- lxml
- eccodes
- pydap
- pip:
- cfgrib>=0.9.2
- mypy==0.650
20 changes: 20 additions & 0 deletions conftest.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,29 @@
"""Configuration for pytest."""

import pytest


def pytest_addoption(parser):
"""Add command-line flags for pytest."""
parser.addoption("--run-flaky", action="store_true",
help="runs flaky tests")
parser.addoption("--run-network-tests", action="store_true",
help="runs tests requiring a network connection")


def pytest_collection_modifyitems(config, items):

if not config.getoption("--run-flaky"):
skip_flaky = pytest.mark.skip(
reason="set --run-flaky option to run flaky tests")
for item in items:
if "flaky" in item.keywords:
item.add_marker(skip_flaky)

if not config.getoption("--run-network-tests"):
skip_network = pytest.mark.skip(
reason="set --run-network-tests option to run tests requiring an"
"internet connection")
for item in items:
if "network" in item.keywords:
item.add_marker(skip_network)
2 changes: 2 additions & 0 deletions doc/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -460,6 +460,7 @@ Dataset methods
:toctree: generated/

open_dataset
load_dataset
open_mfdataset
open_rasterio
open_zarr
Expand Down Expand Up @@ -487,6 +488,7 @@ DataArray methods
:toctree: generated/

open_dataarray
load_dataarray
DataArray.to_dataset
DataArray.to_netcdf
DataArray.to_pandas
Expand Down
21 changes: 18 additions & 3 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,21 @@ Enhancements
- Character arrays' character dimension name decoding and encoding handled by
``var.encoding['char_dim_name']`` (:issue:`2895`)
By `James McCreight <https://github.com/jmccreight>`_.
- :py:meth:`DataArray.transpose` now accepts a keyword argument
``transpose_coords`` which enables transposition of coordinates in the
same way as :py:meth:`Dataset.transpose`. :py:meth:`DataArray.groupby`
:py:meth:`DataArray.groupby_bins`, and :py:meth:`DataArray.resample` now
accept a keyword argument ``restore_coord_dims`` which keeps the order
of the dimensions of multi-dimensional coordinates intact (:issue:`1856`).
By `Peter Hausamann <http://github.com/phausamann>`_.
- Clean up Python 2 compatibility in code (:issue:`2950`)
By `Guido Imperiale <https://github.com/crusaderky>`_.
- Implement ``load_dataset()`` and ``load_dataarray()`` as alternatives to
``open_dataset()`` and ``open_dataarray()`` to open, load into memory,
and close files, returning the Dataset or DataArray. These functions are
helpful for avoiding file-lock errors when trying to write to files opened
using ``open_dataset()`` or ``open_dataarray()``. (:issue:`2887`)
By `Dan Nowacki <https://github.com/dnowacki-usgs>`_.

Bug fixes
~~~~~~~~~
Expand All @@ -43,6 +56,8 @@ Bug fixes
By `Martin Pletcher <https://github.com/pletchm>`_.
- Increased support for `missing_value` (:issue:`2871`)
By `Deepak Cherian <https://github.com/dcherian>`_.
- Removed usages of `pytest.config`, which is deprecated (:issue:`2988`:)
By `Maximilian Roos <https://github.com/max-sixty>`_.

.. _whats-new.0.12.1:

Expand Down Expand Up @@ -155,9 +170,9 @@ Other enhancements
By `Keisuke Fujii <https://github.com/fujiisoup>`_.
- Added :py:meth:`~xarray.Dataset.drop_dims` (:issue:`1949`).
By `Kevin Squire <https://github.com/kmsquire>`_.
- ``xr.open_zarr`` now accepts manually specified chunks with the ``chunks=``
parameter. ``auto_chunk=True`` is equivalent to ``chunks='auto'`` for
backwards compatibility. The ``overwrite_encoded_chunks`` parameter is
- ``xr.open_zarr`` now accepts manually specified chunks with the ``chunks=``
parameter. ``auto_chunk=True`` is equivalent to ``chunks='auto'`` for
backwards compatibility. The ``overwrite_encoded_chunks`` parameter is
added to remove the original zarr chunk encoding.
By `Lily Wang <https://github.com/lilyminium>`_.

Expand Down
2 changes: 1 addition & 1 deletion xarray/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from .core.options import set_options

from .backends.api import (open_dataset, open_dataarray, open_mfdataset,
save_mfdataset)
save_mfdataset, load_dataset, load_dataarray)
from .backends.rasterio_ import open_rasterio
from .backends.zarr import open_zarr

Expand Down
57 changes: 55 additions & 2 deletions xarray/backends/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,12 +185,64 @@ def _finalize_store(write, store):
store.close()


def load_dataset(filename_or_obj, **kwargs):
"""Open, load into memory, and close a Dataset from a file or file-like
object.
This is a thin wrapper around :py:meth:`~xarray.open_dataset`. It differs
from `open_dataset` in that it loads the Dataset into memory, closes the
file, and returns the Dataset. In contrast, `open_dataset` keeps the file
handle open and lazy loads its contents. All parameters are passed directly
to `open_dataset`. See that documentation for further details.
Returns
-------
dataset : Dataset
The newly created Dataset.
See Also
--------
open_dataset
"""
if 'cache' in kwargs:
raise TypeError('cache has no effect in this context')

with open_dataset(filename_or_obj, **kwargs) as ds:
return ds.load()


def load_dataarray(filename_or_obj, **kwargs):
"""Open, load into memory, and close a DataArray from a file or file-like
object containing a single data variable.
This is a thin wrapper around :py:meth:`~xarray.open_dataarray`. It differs
from `open_dataarray` in that it loads the Dataset into memory, closes the
file, and returns the Dataset. In contrast, `open_dataarray` keeps the file
handle open and lazy loads its contents. All parameters are passed directly
to `open_dataarray`. See that documentation for further details.
Returns
-------
datarray : DataArray
The newly created DataArray.
See Also
--------
open_dataarray
"""
if 'cache' in kwargs:
raise TypeError('cache has no effect in this context')

with open_dataarray(filename_or_obj, **kwargs) as da:
return da.load()


def open_dataset(filename_or_obj, group=None, decode_cf=True,
mask_and_scale=None, decode_times=True, autoclose=None,
concat_characters=True, decode_coords=True, engine=None,
chunks=None, lock=None, cache=None, drop_variables=None,
backend_kwargs=None, use_cftime=None):
"""Load and decode a dataset from a file or file-like object.
"""Open and decode a dataset from a file or file-like object.
Parameters
----------
Expand Down Expand Up @@ -406,7 +458,8 @@ def open_dataarray(filename_or_obj, group=None, decode_cf=True,
concat_characters=True, decode_coords=True, engine=None,
chunks=None, lock=None, cache=None, drop_variables=None,
backend_kwargs=None, use_cftime=None):
"""Open an DataArray from a netCDF file containing a single data variable.
"""Open an DataArray from a file or file-like object containing a single
data variable.
This is designed to read netCDF files with only one data variable. If
multiple variables are present then a ValueError is raised.
Expand Down
55 changes: 35 additions & 20 deletions xarray/core/combine.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

import pandas as pd

from . import utils
from . import utils, dtypes
from .alignment import align
from .merge import merge
from .variable import IndexVariable, Variable, as_variable
Expand All @@ -14,7 +14,7 @@

def concat(objs, dim=None, data_vars='all', coords='different',
compat='equals', positions=None, indexers=None, mode=None,
concat_over=None):
concat_over=None, fill_value=dtypes.NA):
"""Concatenate xarray objects along a new or existing dimension.
Parameters
Expand Down Expand Up @@ -66,6 +66,8 @@ def concat(objs, dim=None, data_vars='all', coords='different',
List of integer arrays which specifies the integer positions to which
to assign each dataset along the concatenated dimension. If not
supplied, objects are concatenated in the provided order.
fill_value : scalar, optional
Value to use for newly missing values
indexers, mode, concat_over : deprecated
Returns
Expand Down Expand Up @@ -117,7 +119,7 @@ def concat(objs, dim=None, data_vars='all', coords='different',
else:
raise TypeError('can only concatenate xarray Dataset and DataArray '
'objects, got %s' % type(first_obj))
return f(objs, dim, data_vars, coords, compat, positions)
return f(objs, dim, data_vars, coords, compat, positions, fill_value)


def _calc_concat_dim_coord(dim):
Expand Down Expand Up @@ -212,7 +214,8 @@ def process_subset_opt(opt, subset):
return concat_over, equals


def _dataset_concat(datasets, dim, data_vars, coords, compat, positions):
def _dataset_concat(datasets, dim, data_vars, coords, compat, positions,
fill_value=dtypes.NA):
"""
Concatenate a sequence of datasets along a new or existing dimension
"""
Expand All @@ -225,7 +228,8 @@ def _dataset_concat(datasets, dim, data_vars, coords, compat, positions):
dim, coord = _calc_concat_dim_coord(dim)
# Make sure we're working on a copy (we'll be loading variables)
datasets = [ds.copy() for ds in datasets]
datasets = align(*datasets, join='outer', copy=False, exclude=[dim])
datasets = align(*datasets, join='outer', copy=False, exclude=[dim],
fill_value=fill_value)

concat_over, equals = _calc_concat_over(datasets, dim, data_vars, coords)

Expand Down Expand Up @@ -317,7 +321,7 @@ def ensure_common_dims(vars):


def _dataarray_concat(arrays, dim, data_vars, coords, compat,
positions):
positions, fill_value=dtypes.NA):
arrays = list(arrays)

if data_vars != 'all':
Expand All @@ -336,14 +340,15 @@ def _dataarray_concat(arrays, dim, data_vars, coords, compat,
datasets.append(arr._to_temp_dataset())

ds = _dataset_concat(datasets, dim, data_vars, coords, compat,
positions)
positions, fill_value)
result = arrays[0]._from_temp_dataset(ds, name)

result.name = result_name(arrays)
return result


def _auto_concat(datasets, dim=None, data_vars='all', coords='different'):
def _auto_concat(datasets, dim=None, data_vars='all', coords='different',
fill_value=dtypes.NA):
if len(datasets) == 1 and dim is None:
# There is nothing more to combine, so kick out early.
return datasets[0]
Expand All @@ -366,7 +371,8 @@ def _auto_concat(datasets, dim=None, data_vars='all', coords='different'):
'supply the ``concat_dim`` argument '
'explicitly')
dim, = concat_dims
return concat(datasets, dim=dim, data_vars=data_vars, coords=coords)
return concat(datasets, dim=dim, data_vars=data_vars,
coords=coords, fill_value=fill_value)


_CONCAT_DIM_DEFAULT = utils.ReprObject('<inferred>')
Expand Down Expand Up @@ -442,7 +448,8 @@ def _check_shape_tile_ids(combined_tile_ids):


def _combine_nd(combined_ids, concat_dims, data_vars='all',
coords='different', compat='no_conflicts'):
coords='different', compat='no_conflicts',
fill_value=dtypes.NA):
"""
Concatenates and merges an N-dimensional structure of datasets.
Expand Down Expand Up @@ -472,13 +479,14 @@ def _combine_nd(combined_ids, concat_dims, data_vars='all',
dim=concat_dim,
data_vars=data_vars,
coords=coords,
compat=compat)
compat=compat,
fill_value=fill_value)
combined_ds = list(combined_ids.values())[0]
return combined_ds


def _auto_combine_all_along_first_dim(combined_ids, dim, data_vars,
coords, compat):
coords, compat, fill_value=dtypes.NA):
# Group into lines of datasets which must be combined along dim
# need to sort by _new_tile_id first for groupby to work
# TODO remove all these sorted OrderedDicts once python >= 3.6 only
Expand All @@ -490,7 +498,8 @@ def _auto_combine_all_along_first_dim(combined_ids, dim, data_vars,
combined_ids = OrderedDict(sorted(group))
datasets = combined_ids.values()
new_combined_ids[new_id] = _auto_combine_1d(datasets, dim, compat,
data_vars, coords)
data_vars, coords,
fill_value)
return new_combined_ids


Expand All @@ -500,18 +509,20 @@ def vars_as_keys(ds):

def _auto_combine_1d(datasets, concat_dim=_CONCAT_DIM_DEFAULT,
compat='no_conflicts',
data_vars='all', coords='different'):
data_vars='all', coords='different',
fill_value=dtypes.NA):
# This is just the old auto_combine function (which only worked along 1D)
if concat_dim is not None:
dim = None if concat_dim is _CONCAT_DIM_DEFAULT else concat_dim
sorted_datasets = sorted(datasets, key=vars_as_keys)
grouped_by_vars = itertools.groupby(sorted_datasets, key=vars_as_keys)
concatenated = [_auto_concat(list(ds_group), dim=dim,
data_vars=data_vars, coords=coords)
data_vars=data_vars, coords=coords,
fill_value=fill_value)
for id, ds_group in grouped_by_vars]
else:
concatenated = datasets
merged = merge(concatenated, compat=compat)
merged = merge(concatenated, compat=compat, fill_value=fill_value)
return merged


Expand All @@ -521,7 +532,7 @@ def _new_tile_id(single_id_ds_pair):


def _auto_combine(datasets, concat_dims, compat, data_vars, coords,
infer_order_from_coords, ids):
infer_order_from_coords, ids, fill_value=dtypes.NA):
"""
Calls logic to decide concatenation order before concatenating.
"""
Expand Down Expand Up @@ -550,12 +561,14 @@ def _auto_combine(datasets, concat_dims, compat, data_vars, coords,

# Repeatedly concatenate then merge along each dimension
combined = _combine_nd(combined_ids, concat_dims, compat=compat,
data_vars=data_vars, coords=coords)
data_vars=data_vars, coords=coords,
fill_value=fill_value)
return combined


def auto_combine(datasets, concat_dim=_CONCAT_DIM_DEFAULT,
compat='no_conflicts', data_vars='all', coords='different'):
compat='no_conflicts', data_vars='all', coords='different',
fill_value=dtypes.NA):
"""Attempt to auto-magically combine the given datasets into one.
This method attempts to combine a list of datasets into a single entity by
inspecting metadata and using a combination of concat and merge.
Expand Down Expand Up @@ -596,6 +609,8 @@ def auto_combine(datasets, concat_dim=_CONCAT_DIM_DEFAULT,
Details are in the documentation of concat
coords : {'minimal', 'different', 'all' or list of str}, optional
Details are in the documentation of conca
fill_value : scalar, optional
Value to use for newly missing values
Returns
-------
Expand All @@ -622,4 +637,4 @@ def auto_combine(datasets, concat_dim=_CONCAT_DIM_DEFAULT,
return _auto_combine(datasets, concat_dims=concat_dims, compat=compat,
data_vars=data_vars, coords=coords,
infer_order_from_coords=infer_order_from_coords,
ids=False)
ids=False, fill_value=fill_value)
Loading

0 comments on commit df17e1c

Please sign in to comment.