Merge remote-tracking branch 'upstream/main' into whats-new-2023.02.0

* upstream/main: Update apply_ufunc output_sizes error message (pydata#7509) Zarr: drop "source" and "original_shape" from encoding (pydata#7500) [pre-commit.ci] pre-commit autoupdate (pydata#7507)
dcherian · Feb 7, 2023 · c7c20d7 · c7c20d7
2 parents 777b509 + d2a68d0
commit c7c20d7
Show file tree

Hide file tree

Showing 63 changed files with 57 additions and 222 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -16,13 +16,13 @@ repos:
         files: ^xarray/
   - repo: https://github.com/charliermarsh/ruff-pre-commit
     # Ruff version.
-    rev: 'v0.0.237'
+    rev: 'v0.0.241'
     hooks:
       - id: ruff
         args: ["--fix"]
   # https://github.com/python/black#version-control-integration
   - repo: https://github.com/psf/black
-    rev: 22.12.0
+    rev: 23.1.0
     hooks:
       - id: black
       - id: black-jupyter
@@ -31,7 +31,7 @@ repos:
     hooks:
       - id: blackdoc
         exclude: "generate_aggregations.py"
-        additional_dependencies: ["black==22.12.0"]
+        additional_dependencies: ["black==23.1.0"]
       - id: blackdoc-autoupdate-black
   - repo: https://github.com/pre-commit/mirrors-mypy
     rev: v0.991

diff --git a/doc/conf.py b/doc/conf.py
@@ -436,7 +436,6 @@ def update_videos(app: Sphinx):
 
     items = []
     for video in videos:
-
         authors = " | ".join(video["authors"])
         item = f"""
 .. grid-item-card:: {" ".join(video["title"].split())}

diff --git a/doc/examples/apply_ufunc_vectorize_1d.ipynb b/doc/examples/apply_ufunc_vectorize_1d.ipynb
@@ -663,7 +663,6 @@
     "\n",
     "\n",
     "def xr_interp(data, dim, newdim):\n",
-    "\n",
     "    interped = xr.apply_ufunc(\n",
     "        interp1d_np_gufunc,  # first the function\n",
     "        data,  # now arguments in the order expected by 'interp1_np'\n",

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
@@ -24,6 +24,10 @@ This release brings a major upgrade to :py:func:`xarray.concat`, many bug fixes,
 and a bump in supported dependency versions. Thanks to our 11 contributors:
 Aron Gergely, Deepak Cherian, Illviljan, James Bourbeau, Joe Hamman,
 Justus Magin, Hauke Schulz, Kai Mühlbauer, Ken Mankoff, Spencer Clark, Tom Nicholas.
+This release brings a major upgrade to :py:func:`xarray.concat`, bug fixes and
+a bump in supported dependency versions. Thanks to our 9 contributors:
+Aron Gergely, Deepak Cherian, Illviljan, James Bourbeau, Joe Hamman,
+Justus Magin, Kai Mühlbauer, Ken Mankoff, Spencer Clark.
 
 Breaking changes
 ~~~~~~~~~~~~~~~~
@@ -58,6 +62,11 @@ Bug fixes
   By `Kai Mühlbauer <https://github.com/kmuehlbauer>`_ and `Scott Chamberlin <https://github.com/scottcha>`_.
 - Handle ``keep_attrs`` option in binary operators of :py:meth:`Dataset` (:issue:`7390`, :pull:`7391`).
   By `Aron Gergely <https://github.com/arongergely>`_.
+- Improve error message when using dask in :py:func:`apply_ufunc` with ``output_sizes`` not supplied. (:pull:`7509`)
+  By `Tom Nicholas <https://github.com/TomNicholas>`_.
+- :py:func:`xarray.Dataset.to_zarr` now drops variable encodings that have been added by xarray during reading
+  a dataset. (:issue:`7129`, :pull:`7500`).
+  By `Hauke Schulz <https://github.com/observingClouds>`_.
 
 Documentation
 ~~~~~~~~~~~~~

diff --git a/xarray/backends/cfgrib_.py b/xarray/backends/cfgrib_.py
@@ -119,7 +119,6 @@ def open_dataset(
         squeeze=True,
         time_dims=("time", "step"),
     ):
-
         filename_or_obj = _normalize_path(filename_or_obj)
         store = CfGribDataStore(
             filename_or_obj,

diff --git a/xarray/backends/h5netcdf_.py b/xarray/backends/h5netcdf_.py
@@ -401,7 +401,6 @@ def open_dataset(
         phony_dims=None,
         decode_vlen_strings=True,
     ):
-
         filename_or_obj = _normalize_path(filename_or_obj)
         store = H5NetCDFStore.open(
             filename_or_obj,

diff --git a/xarray/backends/netCDF4_.py b/xarray/backends/netCDF4_.py
@@ -573,7 +573,6 @@ def open_dataset(
         lock=None,
         autoclose=False,
     ):
-
         filename_or_obj = _normalize_path(filename_or_obj)
         store = NetCDF4DataStore.open(
             filename_or_obj,

diff --git a/xarray/backends/pseudonetcdf_.py b/xarray/backends/pseudonetcdf_.py
@@ -156,7 +156,6 @@ def open_dataset(
         lock=None,
         **format_kwargs,
     ):
-
         filename_or_obj = _normalize_path(filename_or_obj)
         store = PseudoNetCDFDataStore.open(
             filename_or_obj, lock=lock, mode=mode, **format_kwargs

diff --git a/xarray/backends/pydap_.py b/xarray/backends/pydap_.py
@@ -178,7 +178,6 @@ def open_dataset(
         verify=None,
         user_charset=None,
     ):
-
         store = PydapDataStore.open(
             url=filename_or_obj,
             application=application,

diff --git a/xarray/backends/scipy_.py b/xarray/backends/scipy_.py
@@ -266,7 +266,6 @@ class ScipyBackendEntrypoint(BackendEntrypoint):
     url = "https://docs.xarray.dev/en/stable/generated/xarray.backends.ScipyBackendEntrypoint.html"
 
     def guess_can_open(self, filename_or_obj):
-
         magic_number = try_read_magic_number_from_file_or_path(filename_or_obj)
         if magic_number is not None and magic_number.startswith(b"\x1f\x8b"):
             with gzip.open(filename_or_obj) as f:
@@ -296,7 +295,6 @@ def open_dataset(
         mmap=None,
         lock=None,
     ):
-
         filename_or_obj = _normalize_path(filename_or_obj)
         store = ScipyDataStore(
             filename_or_obj, mode=mode, format=format, group=group, mmap=mmap, lock=lock

diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py
@@ -230,6 +230,7 @@ def extract_zarr_variable_encoding(
     """
     encoding = variable.encoding.copy()
 
+    safe_to_drop = {"source", "original_shape"}
     valid_encodings = {
         "chunks",
         "compressor",
@@ -238,6 +239,10 @@ def extract_zarr_variable_encoding(
         "write_empty_chunks",
     }
 
+    for k in safe_to_drop:
+        if k in encoding:
+            del encoding[k]
+
     if raise_on_invalid:
         invalid = [k for k in encoding if k not in valid_encodings]
         if invalid:
@@ -882,7 +887,6 @@ def open_dataset(
         stacklevel=3,
         zarr_version=None,
     ):
-
         filename_or_obj = _normalize_path(filename_or_obj)
         store = ZarrStore.open_group(
             filename_or_obj,

diff --git a/xarray/convert.py b/xarray/convert.py
@@ -115,10 +115,8 @@ def set_cdms2_attrs(var, attrs):
 
     # Curvilinear and unstructured grids
     if dataarray.name not in dataarray.coords:
-
         cdms2_axes = {}
         for coord_name in set(dataarray.coords.keys()) - set(dataarray.dims):
-
             coord_array = dataarray.coords[coord_name].to_cdms2()
 
             cdms2_axis_cls = (

diff --git a/xarray/core/accessor_dt.py b/xarray/core/accessor_dt.py
@@ -201,7 +201,6 @@ def _strftime(values, date_format):
 
 
 class TimeAccessor(Generic[T_DataArray]):
-
     __slots__ = ("_obj",)
 
     def __init__(self, obj: T_DataArray) -> None:

diff --git a/xarray/core/alignment.py b/xarray/core/alignment.py
@@ -938,7 +938,6 @@ def reindex_like(
 
 
 def _get_broadcast_dims_map_common_coords(args, exclude):
-
     common_coords = {}
     dims_map = {}
     for arg in args:
@@ -954,7 +953,6 @@ def _get_broadcast_dims_map_common_coords(args, exclude):
 def _broadcast_helper(
     arg: T_DataWithCoords, exclude, dims_map, common_coords
 ) -> T_DataWithCoords:
-
     from xarray.core.dataarray import DataArray
     from xarray.core.dataset import Dataset
 

diff --git a/xarray/core/combine.py b/xarray/core/combine.py
@@ -53,7 +53,6 @@ def _infer_tile_ids_from_nested_list(entry, current_pos):
 
 
 def _ensure_same_types(series, dim):
-
     if series.dtype == object:
         types = set(series.map(type))
         if len(types) > 1:
@@ -80,17 +79,14 @@ def _ensure_same_types(series, dim):
 
 
 def _infer_concat_order_from_coords(datasets):
-
     concat_dims = []
     tile_ids = [() for ds in datasets]
 
     # All datasets have same variables because they've been grouped as such
     ds0 = datasets[0]
     for dim in ds0.dims:
-
         # Check if dim is a coordinate dimension
         if dim in ds0:
-
             # Need to read coordinate values to do ordering
             indexes = [ds._indexes.get(dim) for ds in datasets]
             if any(index is None for index in indexes):
@@ -105,7 +101,6 @@ def _infer_concat_order_from_coords(datasets):
             # If dimension coordinate values are same on every dataset then
             # should be leaving this dimension alone (it's just a "bystander")
             if not all(index.equals(indexes[0]) for index in indexes[1:]):
-
                 # Infer order datasets should be arranged in along this dim
                 concat_dims.append(dim)
 
@@ -261,7 +256,6 @@ def _combine_all_along_first_dim(
     join: JoinOptions = "outer",
     combine_attrs: CombineAttrsOptions = "drop",
 ):
-
     # Group into lines of datasets which must be combined along dim
     # need to sort by _new_tile_id first for groupby to work
     # TODO: is the sorted need?
@@ -345,7 +339,6 @@ def _nested_combine(
     join: JoinOptions = "outer",
     combine_attrs: CombineAttrsOptions = "drop",
 ):
-
     if len(datasets) == 0:
         return Dataset()
 

diff --git a/xarray/core/common.py b/xarray/core/common.py
@@ -1747,7 +1747,6 @@ def ones_like(
 def get_chunksizes(
     variables: Iterable[Variable],
 ) -> Mapping[Any, tuple[int, ...]]:
-
     chunks: dict[Any, tuple[int, ...]] = {}
     for v in variables:
         if hasattr(v._data, "chunks"):

diff --git a/xarray/core/computation.py b/xarray/core/computation.py
@@ -156,7 +156,6 @@ def to_gufunc_string(self, exclude_dims=frozenset()):
 
         # enumerate input_core_dims contained in exclude_dims to make them unique
         if exclude_dims:
-
             exclude_dims = [self.dims_map[dim] for dim in exclude_dims]
 
             counter = Counter()
@@ -555,7 +554,6 @@ def apply_groupby_func(func, *args):
 def unified_dim_sizes(
     variables: Iterable[Variable], exclude_dims: AbstractSet = frozenset()
 ) -> dict[Hashable, int]:
-
     dim_sizes: dict[Hashable, int] = {}
 
     for var in variables:
@@ -725,7 +723,9 @@ def apply_variable_ufunc(
                 dask_gufunc_kwargs["output_sizes"] = output_sizes_renamed
 
             for key in signature.all_output_core_dims:
-                if key not in signature.all_input_core_dims and key not in output_sizes:
+                if (
+                    key not in signature.all_input_core_dims or key in exclude_dims
+                ) and key not in output_sizes:
                     raise ValueError(
                         f"dimension '{key}' in 'output_core_dims' needs corresponding (dim, size) in 'output_sizes'"
                     )

diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
@@ -395,7 +395,6 @@ def __init__(
 
             # try to fill in arguments from data if they weren't supplied
             if coords is None:
-
                 if isinstance(data, DataArray):
                     coords = data.coords
                 elif isinstance(data, pd.Series):

diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
@@ -6768,7 +6768,6 @@ def shift(
         fill_value: Any = xrdtypes.NA,
         **shifts_kwargs: int,
     ) -> T_Dataset:
-
         """Shift this dataset by an offset along one or more dimensions.
 
         Only data variables are moved; coordinates stay in place. This is

diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py
@@ -492,7 +492,6 @@ def datetime_to_numeric(array, offset=None, datetime_unit=None, dtype=float):
 
     # Convert np.NaT to np.nan
     elif array.dtype.kind in "mM":
-
         # Convert to specified timedelta units.
         if datetime_unit:
             array = array / np.timedelta64(1, datetime_unit)

diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py
@@ -46,7 +46,6 @@
 
 
 def check_reduce_dims(reduce_dims, dimensions):
-
     if reduce_dims is not ...:
         if is_scalar(reduce_dims):
             reduce_dims = [reduce_dims]
@@ -1208,7 +1207,6 @@ class DataArrayGroupBy(  # type: ignore[misc]
 
 
 class DatasetGroupByBase(GroupBy["Dataset"], DatasetGroupbyArithmetic):
-
     __slots__ = ()
     _dims: Frozen[Hashable, int] | None
 

diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py
@@ -1092,7 +1092,6 @@ def _logical_any(args):
 
 
 def _masked_result_drop_slice(key, data=None):
-
     key = (k for k in key if not isinstance(k, slice))
     chunks_hint = getattr(data, "chunks", None)
 
@@ -1345,7 +1344,6 @@ def __init__(self, array):
         self.array = array
 
     def __getitem__(self, key):
-
         if not isinstance(key, VectorizedIndexer):
             # if possible, short-circuit when keys are effectively slice(None)
             # This preserves dask name and passes lazy array equivalence checks

diff --git a/xarray/core/merge.py b/xarray/core/merge.py
@@ -174,7 +174,7 @@ def _assert_prioritized_valid(
     indexes: dict[int, Index] = {}
 
     for name, elements_list in grouped.items():
-        for (_, index) in elements_list:
+        for _, index in elements_list:
             if index is not None:
                 grouped_by_index[id(index)].append(name)
                 indexes[id(index)] = index

diff --git a/xarray/core/missing.py b/xarray/core/missing.py
@@ -80,7 +80,6 @@ class NumpyInterpolator(BaseInterpolator):
     """
 
     def __init__(self, xi, yi, method="linear", fill_value=None, period=None):
-
         if method != "linear":
             raise ValueError("only method `linear` is valid for the NumpyInterpolator")
 

diff --git a/xarray/core/parallel.py b/xarray/core/parallel.py
@@ -33,7 +33,6 @@ def assert_chunks_compatible(a: Dataset, b: Dataset):
 def check_result_variables(
     result: DataArray | Dataset, expected: Mapping[str, Any], kind: str
 ):
-
     if kind == "coords":
         nice_str = "coordinate"
     elif kind == "data_vars":

diff --git a/xarray/core/resample.py b/xarray/core/resample.py
@@ -41,7 +41,6 @@ def __init__(
         resample_dim: Hashable | None = None,
         **kwargs,
     ) -> None:
-
         if dim == resample_dim:
             raise ValueError(
                 f"Proxy resampling dimension ('{resample_dim}') "
@@ -57,7 +56,6 @@ def _flox_reduce(
         keep_attrs: bool | None = None,
         **kwargs,
     ) -> T_Xarray:
-
         from xarray.core.dataarray import DataArray
 
         kwargs.setdefault("method", "cohorts")

diff --git a/xarray/core/rolling.py b/xarray/core/rolling.py
@@ -132,7 +132,8 @@ def _reduce_method(  # type: ignore[misc]
         name: str, fillna: Any, rolling_agg_func: Callable | None = None
     ) -> Callable[..., T_Xarray]:
         """Constructs reduction methods built on a numpy reduction function (e.g. sum),
-        a bottleneck reduction function (e.g. move_sum), or a Rolling reduction (_mean)."""
+        a bottleneck reduction function (e.g. move_sum), or a Rolling reduction (_mean).
+        """
         if rolling_agg_func:
             array_agg_func = None
         else:
@@ -141,7 +142,6 @@ def _reduce_method(  # type: ignore[misc]
         bottleneck_move_func = getattr(bottleneck, "move_" + name, None)
 
         def method(self, keep_attrs=None, **kwargs):
-
             keep_attrs = self._get_keep_attrs(keep_attrs)
 
             return self._numpy_or_bottleneck_reduce(
@@ -272,7 +272,7 @@ def __iter__(self) -> Iterator[tuple[DataArray, DataArray]]:
         starts = stops - window0
         starts[: window0 - offset] = 0
 
-        for (label, start, stop) in zip(self.window_labels, starts, stops):
+        for label, start, stop in zip(self.window_labels, starts, stops):
             window = self.obj.isel({dim0: slice(start, stop)})
 
             counts = window.count(dim=[dim0])
Original file line number	Diff line number	Diff line change
Expand Up		@@ -201,7 +201,6 @@ def _strftime(values, date_format):


		class TimeAccessor(Generic[T_DataArray]):

		__slots__ = ("_obj",)

		def __init__(self, obj: T_DataArray) -> None:
Expand Down
-Original file line number
+Diff line change
@@ Expand Up / @@ -80,7 +80,6 @@ class NumpyInterpolator(BaseInterpolator): @@
         """
         def __init__(self, xi, yi, method="linear", fill_value=None, period=None):
             if method != "linear":
                 raise ValueError("only method `linear` is valid for the NumpyInterpolator")
@@ Expand Down @@