mfdatset, concat now support the 'join' kwarg.

Closes #1354
pydata · Jul 12, 2019 · ec7fced · ec7fced
1 parent 8f0d9e5
commit ec7fced
Show file tree

Hide file tree

Showing 3 changed files with 45 additions and 27 deletions.
diff --git a/xarray/backends/api.py b/xarray/backends/api.py
@@ -608,7 +608,7 @@ def open_mfdataset(paths, chunks=None, concat_dim='_not_supplied',
                    compat='no_conflicts', preprocess=None, engine=None,
                    lock=None, data_vars='all', coords='different',
                    combine='_old_auto', autoclose=None, parallel=False,
-                   **kwargs):
+                   join='outer', **kwargs):
     """Open multiple files as a single dataset.
 
     If combine='by_coords' then the function ``combine_by_coords`` is used to 
@@ -703,6 +703,8 @@ def open_mfdataset(paths, chunks=None, concat_dim='_not_supplied',
     parallel : bool, optional
         If True, the open and preprocess steps of this function will be
         performed in parallel using ``dask.delayed``. Default is False.
+    join : {'outer', 'inner', 'left', 'right', 'exact'}, optional
+        Passed on to align.
     **kwargs : optional
         Additional arguments passed on to :py:func:`xarray.open_dataset`.
 
@@ -788,18 +790,19 @@ def open_mfdataset(paths, chunks=None, concat_dim='_not_supplied',
             # Remove this after deprecation cycle from #2616 is complete
             combined = auto_combine(datasets, concat_dim=concat_dim,
                                     compat=compat, data_vars=data_vars,
-                                    coords=coords)
+                                    coords=coords, join=join)
         elif combine == 'nested':
             # Combined nested list by successive concat and merge operations
             # along each dimension, using structure given by "ids"
             combined = _nested_combine(datasets, concat_dims=concat_dim,
                                        compat=compat, data_vars=data_vars,
-                                       coords=coords, ids=ids)
+                                       coords=coords, ids=ids, join=join)
         elif combine == 'by_coords':
             # Redo ordering from coordinates, ignoring how they were ordered
             # previously
             combined = combine_by_coords(datasets, compat=compat,
-                                         data_vars=data_vars, coords=coords)
+                                         data_vars=data_vars, coords=coords,
+                                         join=join)
         else:
             raise ValueError("{} is an invalid option for the keyword argument"
                              " ``combine``".format(combine))

diff --git a/xarray/core/combine.py b/xarray/core/combine.py
@@ -136,7 +136,7 @@ def _check_shape_tile_ids(combined_tile_ids):
 
 def _combine_nd(combined_ids, concat_dims, data_vars='all',
                 coords='different', compat='no_conflicts',
-                fill_value=dtypes.NA):
+                fill_value=dtypes.NA, join='outer'):
     """
     Combines an N-dimensional structure of datasets into one by applying a
     series of either concat and merge operations along each dimension.
@@ -177,13 +177,14 @@ def _combine_nd(combined_ids, concat_dims, data_vars='all',
                                                     data_vars=data_vars,
                                                     coords=coords,
                                                     compat=compat,
-                                                    fill_value=fill_value)
+                                                    fill_value=fill_value,
+                                                    join=join)
     (combined_ds,) = combined_ids.values()
     return combined_ds
 
 
 def _combine_all_along_first_dim(combined_ids, dim, data_vars, coords, compat,
-                                 fill_value=dtypes.NA):
+                                 fill_value=dtypes.NA, join='outer'):
 
     # Group into lines of datasets which must be combined along dim
     # need to sort by _new_tile_id first for groupby to work
@@ -197,12 +198,13 @@ def _combine_all_along_first_dim(combined_ids, dim, data_vars, coords, compat,
         combined_ids = OrderedDict(sorted(group))
         datasets = combined_ids.values()
         new_combined_ids[new_id] = _combine_1d(datasets, dim, compat,
-                                               data_vars, coords, fill_value)
+                                               data_vars, coords, fill_value,
+                                               join)
     return new_combined_ids
 
 
 def _combine_1d(datasets, concat_dim, compat='no_conflicts', data_vars='all',
-                coords='different', fill_value=dtypes.NA):
+                coords='different', fill_value=dtypes.NA, join='outer'):
     """
     Applies either concat or merge to 1D list of datasets depending on value
     of concat_dim
@@ -222,7 +224,8 @@ def _combine_1d(datasets, concat_dim, compat='no_conflicts', data_vars='all',
             else:
                 raise
     else:
-        combined = merge(datasets, compat=compat, fill_value=fill_value)
+        combined = merge(datasets, compat=compat, fill_value=fill_value,
+                         join=join)
 
     return combined
 
@@ -233,7 +236,7 @@ def _new_tile_id(single_id_ds_pair):
 
 
 def _nested_combine(datasets, concat_dims, compat, data_vars, coords, ids,
-                    fill_value=dtypes.NA):
+                    fill_value=dtypes.NA, join='outer'):
 
     if len(datasets) == 0:
         return Dataset()
@@ -254,12 +257,13 @@ def _nested_combine(datasets, concat_dims, compat, data_vars, coords, ids,
     # Apply series of concatenate or merge operations along each dimension
     combined = _combine_nd(combined_ids, concat_dims, compat=compat,
                            data_vars=data_vars, coords=coords,
-                           fill_value=fill_value)
+                           fill_value=fill_value, join=join)
     return combined
 
 
 def combine_nested(datasets, concat_dim, compat='no_conflicts',
-                   data_vars='all', coords='different', fill_value=dtypes.NA):
+                   data_vars='all', coords='different', fill_value=dtypes.NA,
+                   join='outer'):
     """
     Explicitly combine an N-dimensional grid of datasets into one by using a
     succession of concat and merge operations along each dimension of the grid.
@@ -312,6 +316,8 @@ def combine_nested(datasets, concat_dim, compat='no_conflicts',
         Details are in the documentation of concat
     fill_value : scalar, optional
         Value to use for newly missing values
+    join : {'outer', 'inner', 'left', 'right', 'exact'}, optional
+        How to combine objects with different indexes.
 
     Returns
     -------
@@ -383,15 +389,15 @@ def combine_nested(datasets, concat_dim, compat='no_conflicts',
     # The IDs argument tells _manual_combine that datasets aren't yet sorted
     return _nested_combine(datasets, concat_dims=concat_dim, compat=compat,
                            data_vars=data_vars, coords=coords, ids=False,
-                           fill_value=fill_value)
+                           fill_value=fill_value, join=join)
 
 
 def vars_as_keys(ds):
     return tuple(sorted(ds))
 
 
 def combine_by_coords(datasets, compat='no_conflicts', data_vars='all',
-                      coords='different', fill_value=dtypes.NA):
+                      coords='different', fill_value=dtypes.NA, join='outer'):
     """
     Attempt to auto-magically combine the given datasets into one by using
     dimension coordinates.
@@ -439,6 +445,8 @@ def combine_by_coords(datasets, compat='no_conflicts', data_vars='all',
         Details are in the documentation of concat
     fill_value : scalar, optional
         Value to use for newly missing values
+    join : {'outer', 'inner', 'left', 'right', 'exact'}, optional
+        How to combine objects with different indexes.
 
     Returns
     -------
@@ -523,7 +531,8 @@ def combine_by_coords(datasets, compat='no_conflicts', data_vars='all',
 
 
 def auto_combine(datasets, concat_dim='_not_supplied', compat='no_conflicts',
-                 data_vars='all', coords='different', fill_value=dtypes.NA):
+                 data_vars='all', coords='different', fill_value=dtypes.NA,
+                 join='outer'):
     """
     Attempt to auto-magically combine the given datasets into one.
 
@@ -571,6 +580,8 @@ def auto_combine(datasets, concat_dim='_not_supplied', compat='no_conflicts',
         Details are in the documentation of concat
     fill_value : scalar, optional
         Value to use for newly missing values
+    join : {'outer', 'inner', 'left', 'right', 'exact'}, optional
+        How to combine objects with different indexes.
 
     Returns
     -------
@@ -626,7 +637,8 @@ def auto_combine(datasets, concat_dim='_not_supplied', compat='no_conflicts',
 
     return _old_auto_combine(datasets, concat_dim=concat_dim,
                              compat=compat, data_vars=data_vars,
-                             coords=coords, fill_value=fill_value)
+                             coords=coords, fill_value=fill_value,
+                             join=join)
 
 
 def _dimension_coords_exist(datasets):
@@ -667,7 +679,7 @@ def _requires_concat_and_merge(datasets):
 def _old_auto_combine(datasets, concat_dim=_CONCAT_DIM_DEFAULT,
                       compat='no_conflicts',
                       data_vars='all', coords='different',
-                      fill_value=dtypes.NA):
+                      fill_value=dtypes.NA, join='outer'):
     if concat_dim is not None:
         dim = None if concat_dim is _CONCAT_DIM_DEFAULT else concat_dim
 
@@ -676,16 +688,17 @@ def _old_auto_combine(datasets, concat_dim=_CONCAT_DIM_DEFAULT,
 
         concatenated = [_auto_concat(list(datasets), dim=dim,
                                      data_vars=data_vars, coords=coords,
-                                     fill_value=fill_value)
+                                     fill_value=fill_value, join=join)
                         for vars, datasets in grouped]
     else:
         concatenated = datasets
-    merged = merge(concatenated, compat=compat, fill_value=fill_value)
+    merged = merge(concatenated, compat=compat, fill_value=fill_value,
+                   join=join)
     return merged
 
 
 def _auto_concat(datasets, dim=None, data_vars='all', coords='different',
-                 fill_value=dtypes.NA):
+                 fill_value=dtypes.NA, join='outer'):
     if len(datasets) == 1 and dim is None:
         # There is nothing more to combine, so kick out early.
         return datasets[0]

diff --git a/xarray/core/concat.py b/xarray/core/concat.py
@@ -11,7 +11,7 @@
 
 def concat(objs, dim=None, data_vars='all', coords='different',
            compat='equals', positions=None, indexers=None, mode=None,
-           concat_over=None, fill_value=dtypes.NA):
+           concat_over=None, fill_value=dtypes.NA, join='outer'):
     """Concatenate xarray objects along a new or existing dimension.
 
     Parameters
@@ -65,6 +65,8 @@ def concat(objs, dim=None, data_vars='all', coords='different',
         supplied, objects are concatenated in the provided order.
     fill_value : scalar, optional
         Value to use for newly missing values
+    join : {'outer', 'inner', 'left', 'right', 'exact'}, optional
+        How to combine objects with different indexes.
     indexers, mode, concat_over : deprecated
 
     Returns
@@ -116,7 +118,7 @@ def concat(objs, dim=None, data_vars='all', coords='different',
     else:
         raise TypeError('can only concatenate xarray Dataset and DataArray '
                         'objects, got %s' % type(first_obj))
-    return f(objs, dim, data_vars, coords, compat, positions, fill_value)
+    return f(objs, dim, data_vars, coords, compat, positions, fill_value, join)
 
 
 def _calc_concat_dim_coord(dim):
@@ -212,7 +214,7 @@ def process_subset_opt(opt, subset):
 
 
 def _dataset_concat(datasets, dim, data_vars, coords, compat, positions,
-                    fill_value=dtypes.NA):
+                    fill_value=dtypes.NA, join='outer'):
     """
     Concatenate a sequence of datasets along a new or existing dimension
     """
@@ -225,7 +227,7 @@ def _dataset_concat(datasets, dim, data_vars, coords, compat, positions,
     dim, coord = _calc_concat_dim_coord(dim)
     # Make sure we're working on a copy (we'll be loading variables)
     datasets = [ds.copy() for ds in datasets]
-    datasets = align(*datasets, join='outer', copy=False, exclude=[dim],
+    datasets = align(*datasets, join=join, copy=False, exclude=[dim],
                      fill_value=fill_value)
 
     concat_over, equals = _calc_concat_over(datasets, dim, data_vars, coords)
@@ -318,7 +320,7 @@ def ensure_common_dims(vars):
 
 
 def _dataarray_concat(arrays, dim, data_vars, coords, compat,
-                      positions, fill_value=dtypes.NA):
+                      positions, fill_value=dtypes.NA, join='outer'):
     arrays = list(arrays)
 
     if data_vars != 'all':
@@ -337,5 +339,5 @@ def _dataarray_concat(arrays, dim, data_vars, coords, compat,
         datasets.append(arr._to_temp_dataset())
 
     ds = _dataset_concat(datasets, dim, data_vars, coords, compat,
-                         positions, fill_value=fill_value)
+                         positions, fill_value=fill_value, join=join)
     return arrays[0]._from_temp_dataset(ds, name)