Skip to content
forked from pydata/xarray

Commit

Permalink
Merge remote-tracking branch 'upstream/main' into whats-new-2023.02.0
Browse files Browse the repository at this point in the history
* upstream/main:
  Update apply_ufunc output_sizes error message (pydata#7509)
  Zarr: drop "source" and  "original_shape" from encoding (pydata#7500)
  [pre-commit.ci] pre-commit autoupdate (pydata#7507)
  • Loading branch information
dcherian committed Feb 7, 2023
2 parents 777b509 + d2a68d0 commit c7c20d7
Show file tree
Hide file tree
Showing 63 changed files with 57 additions and 222 deletions.
6 changes: 3 additions & 3 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,13 @@ repos:
files: ^xarray/
- repo: https://github.com/charliermarsh/ruff-pre-commit
# Ruff version.
rev: 'v0.0.237'
rev: 'v0.0.241'
hooks:
- id: ruff
args: ["--fix"]
# https://github.com/python/black#version-control-integration
- repo: https://github.com/psf/black
rev: 22.12.0
rev: 23.1.0
hooks:
- id: black
- id: black-jupyter
Expand All @@ -31,7 +31,7 @@ repos:
hooks:
- id: blackdoc
exclude: "generate_aggregations.py"
additional_dependencies: ["black==22.12.0"]
additional_dependencies: ["black==23.1.0"]
- id: blackdoc-autoupdate-black
- repo: https://github.com/pre-commit/mirrors-mypy
rev: v0.991
Expand Down
1 change: 0 additions & 1 deletion doc/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -436,7 +436,6 @@ def update_videos(app: Sphinx):

items = []
for video in videos:

authors = " | ".join(video["authors"])
item = f"""
.. grid-item-card:: {" ".join(video["title"].split())}
Expand Down
1 change: 0 additions & 1 deletion doc/examples/apply_ufunc_vectorize_1d.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -663,7 +663,6 @@
"\n",
"\n",
"def xr_interp(data, dim, newdim):\n",
"\n",
" interped = xr.apply_ufunc(\n",
" interp1d_np_gufunc, # first the function\n",
" data, # now arguments in the order expected by 'interp1_np'\n",
Expand Down
9 changes: 9 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@ This release brings a major upgrade to :py:func:`xarray.concat`, many bug fixes,
and a bump in supported dependency versions. Thanks to our 11 contributors:
Aron Gergely, Deepak Cherian, Illviljan, James Bourbeau, Joe Hamman,
Justus Magin, Hauke Schulz, Kai Mühlbauer, Ken Mankoff, Spencer Clark, Tom Nicholas.
This release brings a major upgrade to :py:func:`xarray.concat`, bug fixes and
a bump in supported dependency versions. Thanks to our 9 contributors:
Aron Gergely, Deepak Cherian, Illviljan, James Bourbeau, Joe Hamman,
Justus Magin, Kai Mühlbauer, Ken Mankoff, Spencer Clark.

Breaking changes
~~~~~~~~~~~~~~~~
Expand Down Expand Up @@ -58,6 +62,11 @@ Bug fixes
By `Kai Mühlbauer <https://github.com/kmuehlbauer>`_ and `Scott Chamberlin <https://github.com/scottcha>`_.
- Handle ``keep_attrs`` option in binary operators of :py:meth:`Dataset` (:issue:`7390`, :pull:`7391`).
By `Aron Gergely <https://github.com/arongergely>`_.
- Improve error message when using dask in :py:func:`apply_ufunc` with ``output_sizes`` not supplied. (:pull:`7509`)
By `Tom Nicholas <https://github.com/TomNicholas>`_.
- :py:func:`xarray.Dataset.to_zarr` now drops variable encodings that have been added by xarray during reading
a dataset. (:issue:`7129`, :pull:`7500`).
By `Hauke Schulz <https://github.com/observingClouds>`_.

Documentation
~~~~~~~~~~~~~
Expand Down
1 change: 0 additions & 1 deletion xarray/backends/cfgrib_.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,6 @@ def open_dataset(
squeeze=True,
time_dims=("time", "step"),
):

filename_or_obj = _normalize_path(filename_or_obj)
store = CfGribDataStore(
filename_or_obj,
Expand Down
1 change: 0 additions & 1 deletion xarray/backends/h5netcdf_.py
Original file line number Diff line number Diff line change
Expand Up @@ -401,7 +401,6 @@ def open_dataset(
phony_dims=None,
decode_vlen_strings=True,
):

filename_or_obj = _normalize_path(filename_or_obj)
store = H5NetCDFStore.open(
filename_or_obj,
Expand Down
1 change: 0 additions & 1 deletion xarray/backends/netCDF4_.py
Original file line number Diff line number Diff line change
Expand Up @@ -573,7 +573,6 @@ def open_dataset(
lock=None,
autoclose=False,
):

filename_or_obj = _normalize_path(filename_or_obj)
store = NetCDF4DataStore.open(
filename_or_obj,
Expand Down
1 change: 0 additions & 1 deletion xarray/backends/pseudonetcdf_.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,6 @@ def open_dataset(
lock=None,
**format_kwargs,
):

filename_or_obj = _normalize_path(filename_or_obj)
store = PseudoNetCDFDataStore.open(
filename_or_obj, lock=lock, mode=mode, **format_kwargs
Expand Down
1 change: 0 additions & 1 deletion xarray/backends/pydap_.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,6 @@ def open_dataset(
verify=None,
user_charset=None,
):

store = PydapDataStore.open(
url=filename_or_obj,
application=application,
Expand Down
2 changes: 0 additions & 2 deletions xarray/backends/scipy_.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,7 +266,6 @@ class ScipyBackendEntrypoint(BackendEntrypoint):
url = "https://docs.xarray.dev/en/stable/generated/xarray.backends.ScipyBackendEntrypoint.html"

def guess_can_open(self, filename_or_obj):

magic_number = try_read_magic_number_from_file_or_path(filename_or_obj)
if magic_number is not None and magic_number.startswith(b"\x1f\x8b"):
with gzip.open(filename_or_obj) as f:
Expand Down Expand Up @@ -296,7 +295,6 @@ def open_dataset(
mmap=None,
lock=None,
):

filename_or_obj = _normalize_path(filename_or_obj)
store = ScipyDataStore(
filename_or_obj, mode=mode, format=format, group=group, mmap=mmap, lock=lock
Expand Down
6 changes: 5 additions & 1 deletion xarray/backends/zarr.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,7 @@ def extract_zarr_variable_encoding(
"""
encoding = variable.encoding.copy()

safe_to_drop = {"source", "original_shape"}
valid_encodings = {
"chunks",
"compressor",
Expand All @@ -238,6 +239,10 @@ def extract_zarr_variable_encoding(
"write_empty_chunks",
}

for k in safe_to_drop:
if k in encoding:
del encoding[k]

if raise_on_invalid:
invalid = [k for k in encoding if k not in valid_encodings]
if invalid:
Expand Down Expand Up @@ -882,7 +887,6 @@ def open_dataset(
stacklevel=3,
zarr_version=None,
):

filename_or_obj = _normalize_path(filename_or_obj)
store = ZarrStore.open_group(
filename_or_obj,
Expand Down
2 changes: 0 additions & 2 deletions xarray/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,10 +115,8 @@ def set_cdms2_attrs(var, attrs):

# Curvilinear and unstructured grids
if dataarray.name not in dataarray.coords:

cdms2_axes = {}
for coord_name in set(dataarray.coords.keys()) - set(dataarray.dims):

coord_array = dataarray.coords[coord_name].to_cdms2()

cdms2_axis_cls = (
Expand Down
1 change: 0 additions & 1 deletion xarray/core/accessor_dt.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,6 @@ def _strftime(values, date_format):


class TimeAccessor(Generic[T_DataArray]):

__slots__ = ("_obj",)

def __init__(self, obj: T_DataArray) -> None:
Expand Down
2 changes: 0 additions & 2 deletions xarray/core/alignment.py
Original file line number Diff line number Diff line change
Expand Up @@ -938,7 +938,6 @@ def reindex_like(


def _get_broadcast_dims_map_common_coords(args, exclude):

common_coords = {}
dims_map = {}
for arg in args:
Expand All @@ -954,7 +953,6 @@ def _get_broadcast_dims_map_common_coords(args, exclude):
def _broadcast_helper(
arg: T_DataWithCoords, exclude, dims_map, common_coords
) -> T_DataWithCoords:

from xarray.core.dataarray import DataArray
from xarray.core.dataset import Dataset

Expand Down
7 changes: 0 additions & 7 deletions xarray/core/combine.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,6 @@ def _infer_tile_ids_from_nested_list(entry, current_pos):


def _ensure_same_types(series, dim):

if series.dtype == object:
types = set(series.map(type))
if len(types) > 1:
Expand All @@ -80,17 +79,14 @@ def _ensure_same_types(series, dim):


def _infer_concat_order_from_coords(datasets):

concat_dims = []
tile_ids = [() for ds in datasets]

# All datasets have same variables because they've been grouped as such
ds0 = datasets[0]
for dim in ds0.dims:

# Check if dim is a coordinate dimension
if dim in ds0:

# Need to read coordinate values to do ordering
indexes = [ds._indexes.get(dim) for ds in datasets]
if any(index is None for index in indexes):
Expand All @@ -105,7 +101,6 @@ def _infer_concat_order_from_coords(datasets):
# If dimension coordinate values are same on every dataset then
# should be leaving this dimension alone (it's just a "bystander")
if not all(index.equals(indexes[0]) for index in indexes[1:]):

# Infer order datasets should be arranged in along this dim
concat_dims.append(dim)

Expand Down Expand Up @@ -261,7 +256,6 @@ def _combine_all_along_first_dim(
join: JoinOptions = "outer",
combine_attrs: CombineAttrsOptions = "drop",
):

# Group into lines of datasets which must be combined along dim
# need to sort by _new_tile_id first for groupby to work
# TODO: is the sorted need?
Expand Down Expand Up @@ -345,7 +339,6 @@ def _nested_combine(
join: JoinOptions = "outer",
combine_attrs: CombineAttrsOptions = "drop",
):

if len(datasets) == 0:
return Dataset()

Expand Down
1 change: 0 additions & 1 deletion xarray/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -1747,7 +1747,6 @@ def ones_like(
def get_chunksizes(
variables: Iterable[Variable],
) -> Mapping[Any, tuple[int, ...]]:

chunks: dict[Any, tuple[int, ...]] = {}
for v in variables:
if hasattr(v._data, "chunks"):
Expand Down
6 changes: 3 additions & 3 deletions xarray/core/computation.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,6 @@ def to_gufunc_string(self, exclude_dims=frozenset()):

# enumerate input_core_dims contained in exclude_dims to make them unique
if exclude_dims:

exclude_dims = [self.dims_map[dim] for dim in exclude_dims]

counter = Counter()
Expand Down Expand Up @@ -555,7 +554,6 @@ def apply_groupby_func(func, *args):
def unified_dim_sizes(
variables: Iterable[Variable], exclude_dims: AbstractSet = frozenset()
) -> dict[Hashable, int]:

dim_sizes: dict[Hashable, int] = {}

for var in variables:
Expand Down Expand Up @@ -725,7 +723,9 @@ def apply_variable_ufunc(
dask_gufunc_kwargs["output_sizes"] = output_sizes_renamed

for key in signature.all_output_core_dims:
if key not in signature.all_input_core_dims and key not in output_sizes:
if (
key not in signature.all_input_core_dims or key in exclude_dims
) and key not in output_sizes:
raise ValueError(
f"dimension '{key}' in 'output_core_dims' needs corresponding (dim, size) in 'output_sizes'"
)
Expand Down
1 change: 0 additions & 1 deletion xarray/core/dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -395,7 +395,6 @@ def __init__(

# try to fill in arguments from data if they weren't supplied
if coords is None:

if isinstance(data, DataArray):
coords = data.coords
elif isinstance(data, pd.Series):
Expand Down
1 change: 0 additions & 1 deletion xarray/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -6768,7 +6768,6 @@ def shift(
fill_value: Any = xrdtypes.NA,
**shifts_kwargs: int,
) -> T_Dataset:

"""Shift this dataset by an offset along one or more dimensions.
Only data variables are moved; coordinates stay in place. This is
Expand Down
1 change: 0 additions & 1 deletion xarray/core/duck_array_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -492,7 +492,6 @@ def datetime_to_numeric(array, offset=None, datetime_unit=None, dtype=float):

# Convert np.NaT to np.nan
elif array.dtype.kind in "mM":

# Convert to specified timedelta units.
if datetime_unit:
array = array / np.timedelta64(1, datetime_unit)
Expand Down
2 changes: 0 additions & 2 deletions xarray/core/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@


def check_reduce_dims(reduce_dims, dimensions):

if reduce_dims is not ...:
if is_scalar(reduce_dims):
reduce_dims = [reduce_dims]
Expand Down Expand Up @@ -1208,7 +1207,6 @@ class DataArrayGroupBy( # type: ignore[misc]


class DatasetGroupByBase(GroupBy["Dataset"], DatasetGroupbyArithmetic):

__slots__ = ()
_dims: Frozen[Hashable, int] | None

Expand Down
2 changes: 0 additions & 2 deletions xarray/core/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -1092,7 +1092,6 @@ def _logical_any(args):


def _masked_result_drop_slice(key, data=None):

key = (k for k in key if not isinstance(k, slice))
chunks_hint = getattr(data, "chunks", None)

Expand Down Expand Up @@ -1345,7 +1344,6 @@ def __init__(self, array):
self.array = array

def __getitem__(self, key):

if not isinstance(key, VectorizedIndexer):
# if possible, short-circuit when keys are effectively slice(None)
# This preserves dask name and passes lazy array equivalence checks
Expand Down
2 changes: 1 addition & 1 deletion xarray/core/merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ def _assert_prioritized_valid(
indexes: dict[int, Index] = {}

for name, elements_list in grouped.items():
for (_, index) in elements_list:
for _, index in elements_list:
if index is not None:
grouped_by_index[id(index)].append(name)
indexes[id(index)] = index
Expand Down
1 change: 0 additions & 1 deletion xarray/core/missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,6 @@ class NumpyInterpolator(BaseInterpolator):
"""

def __init__(self, xi, yi, method="linear", fill_value=None, period=None):

if method != "linear":
raise ValueError("only method `linear` is valid for the NumpyInterpolator")

Expand Down
1 change: 0 additions & 1 deletion xarray/core/parallel.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@ def assert_chunks_compatible(a: Dataset, b: Dataset):
def check_result_variables(
result: DataArray | Dataset, expected: Mapping[str, Any], kind: str
):

if kind == "coords":
nice_str = "coordinate"
elif kind == "data_vars":
Expand Down
2 changes: 0 additions & 2 deletions xarray/core/resample.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@ def __init__(
resample_dim: Hashable | None = None,
**kwargs,
) -> None:

if dim == resample_dim:
raise ValueError(
f"Proxy resampling dimension ('{resample_dim}') "
Expand All @@ -57,7 +56,6 @@ def _flox_reduce(
keep_attrs: bool | None = None,
**kwargs,
) -> T_Xarray:

from xarray.core.dataarray import DataArray

kwargs.setdefault("method", "cohorts")
Expand Down
6 changes: 3 additions & 3 deletions xarray/core/rolling.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,8 @@ def _reduce_method( # type: ignore[misc]
name: str, fillna: Any, rolling_agg_func: Callable | None = None
) -> Callable[..., T_Xarray]:
"""Constructs reduction methods built on a numpy reduction function (e.g. sum),
a bottleneck reduction function (e.g. move_sum), or a Rolling reduction (_mean)."""
a bottleneck reduction function (e.g. move_sum), or a Rolling reduction (_mean).
"""
if rolling_agg_func:
array_agg_func = None
else:
Expand All @@ -141,7 +142,6 @@ def _reduce_method( # type: ignore[misc]
bottleneck_move_func = getattr(bottleneck, "move_" + name, None)

def method(self, keep_attrs=None, **kwargs):

keep_attrs = self._get_keep_attrs(keep_attrs)

return self._numpy_or_bottleneck_reduce(
Expand Down Expand Up @@ -272,7 +272,7 @@ def __iter__(self) -> Iterator[tuple[DataArray, DataArray]]:
starts = stops - window0
starts[: window0 - offset] = 0

for (label, start, stop) in zip(self.window_labels, starts, stops):
for label, start, stop in zip(self.window_labels, starts, stops):
window = self.obj.isel({dim0: slice(start, stop)})

counts = window.count(dim=[dim0])
Expand Down
Loading

0 comments on commit c7c20d7

Please sign in to comment.