diff --git a/.binder/environment.yml b/.binder/environment.yml index 99a7d9f2494..053b12dfc86 100644 --- a/.binder/environment.yml +++ b/.binder/environment.yml @@ -2,11 +2,10 @@ name: xarray-examples channels: - conda-forge dependencies: - - python=3.9 + - python=3.10 - boto3 - bottleneck - cartopy - - cdms2 - cfgrib - cftime - coveralls @@ -25,7 +24,7 @@ dependencies: - numpy - packaging - pandas - - pint + - pint>=0.22 - pip - pooch - pydap @@ -38,5 +37,4 @@ dependencies: - toolz - xarray - zarr - - pip: - - numbagg + - numbagg diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 7ee197aeda3..028cb3ac817 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -67,13 +67,7 @@ jobs: run: | echo "TODAY=$(date +'%Y-%m-%d')" >> $GITHUB_ENV - if [[ "${{matrix.python-version}}" == "3.11" ]]; then - if [[ ${{matrix.os}} == windows* ]]; then - echo "CONDA_ENV_FILE=ci/requirements/environment-windows-py311.yml" >> $GITHUB_ENV - else - echo "CONDA_ENV_FILE=ci/requirements/environment-py311.yml" >> $GITHUB_ENV - fi - elif [[ ${{ matrix.os }} == windows* ]] ; + if [[ ${{ matrix.os }} == windows* ]] ; then echo "CONDA_ENV_FILE=ci/requirements/environment-windows.yml" >> $GITHUB_ENV elif [[ "${{ matrix.env }}" != "" ]] ; diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 5626f450ec0..e8482bc4461 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -3,7 +3,7 @@ ci: autoupdate_schedule: monthly repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.4.0 + rev: v4.5.0 hooks: - id: trailing-whitespace - id: end-of-file-fixer @@ -18,24 +18,24 @@ repos: files: ^xarray/ - repo: https://github.com/astral-sh/ruff-pre-commit # Ruff version. - rev: 'v0.0.292' + rev: 'v0.1.4' hooks: - id: ruff args: ["--fix"] # https://github.com/python/black#version-control-integration - repo: https://github.com/psf/black - rev: 23.9.1 + rev: 23.10.1 hooks: - id: black-jupyter - repo: https://github.com/keewis/blackdoc - rev: v0.3.8 + rev: v0.3.9 hooks: - id: blackdoc exclude: "generate_aggregations.py" - additional_dependencies: ["black==23.9.1"] + additional_dependencies: ["black==23.10.1"] - id: blackdoc-autoupdate-black - repo: https://github.com/pre-commit/mirrors-mypy - rev: v1.5.1 + rev: v1.6.1 hooks: - id: mypy # Copied from setup.cfg diff --git a/ci/requirements/all-but-dask.yml b/ci/requirements/all-but-dask.yml index 4645be08b83..c16c174ff96 100644 --- a/ci/requirements/all-but-dask.yml +++ b/ci/requirements/all-but-dask.yml @@ -3,13 +3,11 @@ channels: - conda-forge - nodefaults dependencies: - - python=3.10 - black - aiobotocore - boto3 - bottleneck - cartopy - - cdms2 - cftime - coveralls - flox @@ -26,9 +24,8 @@ dependencies: - numpy - packaging - pandas - - pint<0.21 + - pint>=0.22 - pip - - pseudonetcdf - pydap - pytest - pytest-cov diff --git a/ci/requirements/environment-py311.yml b/ci/requirements/environment-py311.yml deleted file mode 100644 index 0b9817daef3..00000000000 --- a/ci/requirements/environment-py311.yml +++ /dev/null @@ -1,48 +0,0 @@ -name: xarray-tests -channels: - - conda-forge - - nodefaults -dependencies: - - aiobotocore - - boto3 - - bottleneck - - cartopy - # - cdms2 - - cftime - - dask-core - - distributed - - flox - - fsspec!=2021.7.0 - - h5netcdf - - h5py - - hdf5 - - hypothesis - - iris - - lxml # Optional dep of pydap - - matplotlib-base - - nc-time-axis - - netcdf4 - - numba - - numbagg - - numexpr - - numpy - - packaging - - pandas - - pint<0.21 - - pip - - pooch - - pre-commit - - pseudonetcdf - - pydap - - pytest - - pytest-cov - - pytest-env - - pytest-xdist - - pytest-timeout - - rasterio - - scipy - - seaborn - - sparse - - toolz - - typing_extensions - - zarr diff --git a/ci/requirements/environment-windows-py311.yml b/ci/requirements/environment-windows-py311.yml deleted file mode 100644 index 8c36c5a9fd4..00000000000 --- a/ci/requirements/environment-windows-py311.yml +++ /dev/null @@ -1,44 +0,0 @@ -name: xarray-tests -channels: - - conda-forge -dependencies: - - boto3 - - bottleneck - - cartopy - # - cdms2 # Not available on Windows - - cftime - - dask-core - - distributed - - flox - - fsspec!=2021.7.0 - - h5netcdf - - h5py - - hdf5 - - hypothesis - - iris - - lxml # Optional dep of pydap - - matplotlib-base - - nc-time-axis - - netcdf4 - # - numba - # - numbagg - - numpy - - packaging - - pandas - - pint<0.21 - - pip - - pre-commit - - pseudonetcdf - - pydap - - pytest - - pytest-cov - - pytest-env - - pytest-xdist - - pytest-timeout - - rasterio - - scipy - - seaborn - # - sparse - - toolz - - typing_extensions - - zarr diff --git a/ci/requirements/environment-windows.yml b/ci/requirements/environment-windows.yml index efa9ccb5a9a..2a5a4bc86a5 100644 --- a/ci/requirements/environment-windows.yml +++ b/ci/requirements/environment-windows.yml @@ -5,7 +5,6 @@ dependencies: - boto3 - bottleneck - cartopy - # - cdms2 # Not available on Windows - cftime - dask-core - distributed @@ -25,10 +24,9 @@ dependencies: - numpy - packaging - pandas - - pint<0.21 + - pint>=0.22 - pip - pre-commit - - pseudonetcdf - pydap - pytest - pytest-cov diff --git a/ci/requirements/environment.yml b/ci/requirements/environment.yml index 6e93ab7a946..0aa5a6bc2f1 100644 --- a/ci/requirements/environment.yml +++ b/ci/requirements/environment.yml @@ -7,7 +7,6 @@ dependencies: - boto3 - bottleneck - cartopy - - cdms2 - cftime - dask-core - distributed @@ -29,11 +28,10 @@ dependencies: - opt_einsum - packaging - pandas - - pint<0.21 + - pint>=0.22 - pip - pooch - pre-commit - - pseudonetcdf - pydap - pytest - pytest-cov diff --git a/ci/requirements/min-all-deps.yml b/ci/requirements/min-all-deps.yml index 8400270ce1b..7d0f29c0960 100644 --- a/ci/requirements/min-all-deps.yml +++ b/ci/requirements/min-all-deps.yml @@ -11,7 +11,6 @@ dependencies: - boto3=1.24 - bottleneck=1.3 - cartopy=0.20 - - cdms2=3.1 - cftime=1.6 - coveralls - dask-core=2022.7 @@ -35,9 +34,8 @@ dependencies: - numpy=1.22 - packaging=21.3 - pandas=1.4 - - pint=0.19 + - pint=0.22 - pip - - pseudonetcdf=3.2 - pydap=3.3 - pytest - pytest-cov diff --git a/doc/api-hidden.rst b/doc/api-hidden.rst index c96b0aa5c3b..374fe41fde5 100644 --- a/doc/api-hidden.rst +++ b/doc/api-hidden.rst @@ -591,20 +591,6 @@ backends.H5netcdfBackendEntrypoint.guess_can_open backends.H5netcdfBackendEntrypoint.open_dataset - backends.PseudoNetCDFDataStore.close - backends.PseudoNetCDFDataStore.get_attrs - backends.PseudoNetCDFDataStore.get_dimensions - backends.PseudoNetCDFDataStore.get_encoding - backends.PseudoNetCDFDataStore.get_variables - backends.PseudoNetCDFDataStore.open - backends.PseudoNetCDFDataStore.open_store_variable - backends.PseudoNetCDFDataStore.ds - - backends.PseudoNetCDFBackendEntrypoint.description - backends.PseudoNetCDFBackendEntrypoint.url - backends.PseudoNetCDFBackendEntrypoint.guess_can_open - backends.PseudoNetCDFBackendEntrypoint.open_dataset - backends.PydapDataStore.close backends.PydapDataStore.get_attrs backends.PydapDataStore.get_dimensions diff --git a/doc/api.rst b/doc/api.rst index 96b4864804f..24c3aee7d47 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -557,6 +557,7 @@ Datetimelike properties DataArray.dt.seconds DataArray.dt.microseconds DataArray.dt.nanoseconds + DataArray.dt.total_seconds **Timedelta methods**: @@ -602,7 +603,7 @@ Dataset methods Dataset.as_numpy Dataset.from_dataframe Dataset.from_dict - Dataset.to_array + Dataset.to_dataarray Dataset.to_dataframe Dataset.to_dask_dataframe Dataset.to_dict @@ -627,11 +628,9 @@ DataArray methods load_dataarray open_dataarray DataArray.as_numpy - DataArray.from_cdms2 DataArray.from_dict DataArray.from_iris DataArray.from_series - DataArray.to_cdms2 DataArray.to_dask_dataframe DataArray.to_dataframe DataArray.to_dataset @@ -1116,7 +1115,6 @@ arguments for the ``load_store`` and ``dump_to_store`` Dataset methods: backends.NetCDF4DataStore backends.H5NetCDFStore - backends.PseudoNetCDFDataStore backends.PydapDataStore backends.ScipyDataStore backends.ZarrStore @@ -1132,7 +1130,6 @@ used filetypes in the xarray universe. backends.NetCDF4BackendEntrypoint backends.H5netcdfBackendEntrypoint - backends.PseudoNetCDFBackendEntrypoint backends.PydapBackendEntrypoint backends.ScipyBackendEntrypoint backends.StoreBackendEntrypoint diff --git a/doc/conf.py b/doc/conf.py index 06900618401..9f3a70717f6 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -327,6 +327,7 @@ "sparse": ("https://sparse.pydata.org/en/latest/", None), "cubed": ("https://tom-e-white.com/cubed/", None), "datatree": ("https://xarray-datatree.readthedocs.io/en/latest/", None), + "xarray-tutorial": ("https://tutorial.xarray.dev/", None), # "opt_einsum": ("https://dgasmith.github.io/opt_einsum/", None), } diff --git a/doc/getting-started-guide/faq.rst b/doc/getting-started-guide/faq.rst index e0e44dc7781..7f99fa77e3a 100644 --- a/doc/getting-started-guide/faq.rst +++ b/doc/getting-started-guide/faq.rst @@ -168,18 +168,11 @@ integration with Cartopy_. .. _Iris: https://scitools-iris.readthedocs.io/en/stable/ .. _Cartopy: https://scitools.org.uk/cartopy/docs/latest/ -`UV-CDAT`__ is another Python library that implements in-memory netCDF-like -variables and `tools for working with climate data`__. - -__ https://uvcdat.llnl.gov/ -__ https://drclimate.wordpress.com/2014/01/02/a-beginners-guide-to-scripting-with-uv-cdat/ - We think the design decisions we have made for xarray (namely, basing it on pandas) make it a faster and more flexible data analysis tool. That said, Iris -and CDAT have some great domain specific functionality, and xarray includes -methods for converting back and forth between xarray and these libraries. See -:py:meth:`~xarray.DataArray.to_iris` and :py:meth:`~xarray.DataArray.to_cdms2` -for more details. +has some great domain specific functionality, and xarray includes +methods for converting back and forth between xarray and Iris. See +:py:meth:`~xarray.DataArray.to_iris` for more details. What other projects leverage xarray? ------------------------------------ diff --git a/doc/getting-started-guide/installing.rst b/doc/getting-started-guide/installing.rst index e8c498b6664..357d7ae0d4d 100644 --- a/doc/getting-started-guide/installing.rst +++ b/doc/getting-started-guide/installing.rst @@ -38,9 +38,6 @@ For netCDF and IO - `cftime `__: recommended if you want to encode/decode datetimes for non-standard calendars or dates before year 1678 or after year 2262. -- `PseudoNetCDF `__: recommended - for accessing CAMx, GEOS-Chem (bpch), NOAA ARL files, ICARTT files - (ffi1001) and many other. - `iris `__: for conversion to and from iris' Cube objects diff --git a/doc/howdoi.rst b/doc/howdoi.rst index 8cc4e9939f2..97b0872fdc4 100644 --- a/doc/howdoi.rst +++ b/doc/howdoi.rst @@ -36,7 +36,7 @@ How do I ... * - rename a variable, dimension or coordinate - :py:meth:`Dataset.rename`, :py:meth:`DataArray.rename`, :py:meth:`Dataset.rename_vars`, :py:meth:`Dataset.rename_dims`, * - convert a DataArray to Dataset or vice versa - - :py:meth:`DataArray.to_dataset`, :py:meth:`Dataset.to_array`, :py:meth:`Dataset.to_stacked_array`, :py:meth:`DataArray.to_unstacked_dataset` + - :py:meth:`DataArray.to_dataset`, :py:meth:`Dataset.to_dataarray`, :py:meth:`Dataset.to_stacked_array`, :py:meth:`DataArray.to_unstacked_dataset` * - extract variables that have certain attributes - :py:meth:`Dataset.filter_by_attrs` * - extract the underlying array (e.g. NumPy or Dask arrays) diff --git a/doc/user-guide/io.rst b/doc/user-guide/io.rst index 9656a2ba973..1aeb393f3af 100644 --- a/doc/user-guide/io.rst +++ b/doc/user-guide/io.rst @@ -876,17 +876,20 @@ and then calling ``to_zarr`` with ``compute=False`` to write only metadata ds.to_zarr(path, compute=False) Now, a Zarr store with the correct variable shapes and attributes exists that -can be filled out by subsequent calls to ``to_zarr``. The ``region`` provides a -mapping from dimension names to Python ``slice`` objects indicating where the -data should be written (in index space, not coordinate space), e.g., +can be filled out by subsequent calls to ``to_zarr``. ``region`` can be +specified as ``"auto"``, which opens the existing store and determines the +correct alignment of the new data with the existing coordinates, or as an +explicit mapping from dimension names to Python ``slice`` objects indicating +where the data should be written (in index space, not label space), e.g., .. ipython:: python # For convenience, we'll slice a single dataset, but in the real use-case # we would create them separately possibly even from separate processes. ds = xr.Dataset({"foo": ("x", np.arange(30))}) - ds.isel(x=slice(0, 10)).to_zarr(path, region={"x": slice(0, 10)}) - ds.isel(x=slice(10, 20)).to_zarr(path, region={"x": slice(10, 20)}) + # Any of the following region specifications are valid + ds.isel(x=slice(0, 10)).to_zarr(path, region="auto") + ds.isel(x=slice(10, 20)).to_zarr(path, region={"x": "auto"}) ds.isel(x=slice(20, 30)).to_zarr(path, region={"x": slice(20, 30)}) Concurrent writes with ``region`` are safe as long as they modify distinct @@ -1308,27 +1311,6 @@ We recommend installing PyNIO via conda:: .. _PyNIO backend is deprecated: https://github.com/pydata/xarray/issues/4491 .. _PyNIO is no longer maintained: https://github.com/NCAR/pynio/issues/53 -.. _io.PseudoNetCDF: - -Formats supported by PseudoNetCDF ---------------------------------- - -Xarray can also read CAMx, BPCH, ARL PACKED BIT, and many other file -formats supported by PseudoNetCDF_, if PseudoNetCDF is installed. -PseudoNetCDF can also provide Climate Forecasting Conventions to -CMAQ files. In addition, PseudoNetCDF can automatically register custom -readers that subclass PseudoNetCDF.PseudoNetCDFFile. PseudoNetCDF can -identify readers either heuristically, or by a format specified via a key in -`backend_kwargs`. - -To use PseudoNetCDF to read such files, supply -``engine='pseudonetcdf'`` to :py:func:`open_dataset`. - -Add ``backend_kwargs={'format': ''}`` where `` -options are listed on the PseudoNetCDF page. - -.. _PseudoNetCDF: https://github.com/barronh/PseudoNetCDF - CSV and other formats supported by pandas ----------------------------------------- diff --git a/doc/user-guide/reshaping.rst b/doc/user-guide/reshaping.rst index d0b72322218..14b343549e2 100644 --- a/doc/user-guide/reshaping.rst +++ b/doc/user-guide/reshaping.rst @@ -59,11 +59,11 @@ use :py:meth:`~xarray.DataArray.squeeze` Converting between datasets and arrays -------------------------------------- -To convert from a Dataset to a DataArray, use :py:meth:`~xarray.Dataset.to_array`: +To convert from a Dataset to a DataArray, use :py:meth:`~xarray.Dataset.to_dataarray`: .. ipython:: python - arr = ds.to_array() + arr = ds.to_dataarray() arr This method broadcasts all data variables in the dataset against each other, @@ -77,7 +77,7 @@ To convert back from a DataArray to a Dataset, use arr.to_dataset(dim="variable") -The broadcasting behavior of ``to_array`` means that the resulting array +The broadcasting behavior of ``to_dataarray`` means that the resulting array includes the union of data variable dimensions: .. ipython:: python @@ -88,7 +88,7 @@ includes the union of data variable dimensions: ds2 # the resulting array has 6 elements - ds2.to_array() + ds2.to_dataarray() Otherwise, the result could not be represented as an orthogonal array. @@ -161,8 +161,8 @@ arrays as inputs. For datasets with only one variable, we only need ``stack`` and ``unstack``, but combining multiple variables in a :py:class:`xarray.Dataset` is more complicated. If the variables in the dataset have matching numbers of dimensions, we can call -:py:meth:`~xarray.Dataset.to_array` and then stack along the the new coordinate. -But :py:meth:`~xarray.Dataset.to_array` will broadcast the dataarrays together, +:py:meth:`~xarray.Dataset.to_dataarray` and then stack along the the new coordinate. +But :py:meth:`~xarray.Dataset.to_dataarray` will broadcast the dataarrays together, which will effectively tile the lower dimensional variable along the missing dimensions. The method :py:meth:`xarray.Dataset.to_stacked_array` allows combining variables of differing dimensions without this wasteful copying while diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 8f4aaf5a9ca..6f523e85ac3 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -24,9 +24,20 @@ New Features - Use `opt_einsum `_ for :py:func:`xarray.dot` by default if installed. By `Deepak Cherian `_. (:issue:`7764`, :pull:`8373`). +- Add ``DataArray.dt.total_seconds()`` method to match the Pandas API. (:pull:`8435`). + By `Ben Mares `_. +- Allow passing ``region="auto"`` in :py:meth:`Dataset.to_zarr` to automatically infer the + region to write in the original store. Also implement automatic transpose when dimension + order does not match the original store. (:issue:`7702`, :issue:`8421`, :pull:`8434`). + By `Sam Levang `_. +- Allow the usage of h5py drivers (eg: ros3) via h5netcdf (:pull:`8360`). + By `Ezequiel Cimadevilla `_. Breaking changes ~~~~~~~~~~~~~~~~ +- drop support for `cdms2 `_. Please use + `xcdat `_ instead (:pull:`8441`). + By `Justus Magin `_. +- Bump minimum tested pint version to ``>=0.22``. By `Deepak Cherian `_. Deprecations ~~~~~~~~~~~~ - +- The PseudoNetCDF backend has been removed. By `Deepak Cherian `_. - Supplying dimension-ordered sequences to :py:meth:`DataArray.chunk` & :py:meth:`Dataset.chunk` is deprecated in favor of supplying a dictionary of dimensions, or a single ``int`` or ``"auto"`` argument covering all @@ -53,6 +65,15 @@ Deprecations :py:meth:`DataArray.resample`, and :py:meth:`Dataset.resample` among others (:issue:`8394`, :pull:`8415`). By `Spencer Clark `_. +- Rename :py:meth:`Dataset.to_array` to :py:meth:`Dataset.to_dataarray` for + consistency with :py:meth:`DataArray.to_dataset` & + :py:func:`open_dataarray` functions. This is a "soft" deprecation — the + existing methods work and don't raise any warnings, given the relatively small + benefits of the change. + By `Maximilian Roos `_. +- Finally remove ``keep_attrs`` kwarg from :py:meth:`DataArray.resample` and + :py:meth:`Dataset.resample`. These were deprecated a long time ago. + By `Deepak Cherian `_. Bug fixes ~~~~~~~~~ @@ -64,6 +85,12 @@ Bug fixes ``"right"`` to xarray's implementation of resample for data indexed by a :py:class:`CFTimeIndex` (:pull:`8393`). By `Spencer Clark `_. +- Fix to once again support date offset strings as input to the loffset + parameter of resample and test this functionality (:pull:`8422`, :issue:`8399`). + By `Katelyn FitzGerald `_. +- Fix a bug where :py:meth:`DataArray.to_dataset` silently drops a variable + if a coordinate with the same name already exists (:pull:`8433`, :issue:`7823`). + By `András Gunyhó `_. Documentation ~~~~~~~~~~~~~ @@ -4529,7 +4556,7 @@ Enhancements - New PseudoNetCDF backend for many Atmospheric data formats including GEOS-Chem, CAMx, NOAA arlpacked bit and many others. See - :ref:`io.PseudoNetCDF` for more details. + ``io.PseudoNetCDF`` for more details. By `Barron Henderson `_. - The :py:class:`Dataset` constructor now aligns :py:class:`DataArray` @@ -6718,7 +6745,7 @@ Backwards incompatible changes Enhancements ~~~~~~~~~~~~ -- New ``xray.Dataset.to_array`` and enhanced +- New ``xray.Dataset.to_dataarray`` and enhanced ``xray.DataArray.to_dataset`` methods make it easy to switch back and forth between arrays and datasets: @@ -6729,8 +6756,8 @@ Enhancements coords={"c": 42}, attrs={"Conventions": "None"}, ) - ds.to_array() - ds.to_array().to_dataset(dim="variable") + ds.to_dataarray() + ds.to_dataarray().to_dataset(dim="variable") - New ``xray.Dataset.fillna`` method to fill missing values, modeled off the pandas method of the same name: diff --git a/pyproject.toml b/pyproject.toml index b16063e0370..3975468d50e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -88,7 +88,6 @@ module = [ "affine.*", "bottleneck.*", "cartopy.*", - "cdms2.*", "cf_units.*", "cfgrib.*", "cftime.*", @@ -109,7 +108,6 @@ module = [ "opt_einsum.*", "pandas.*", "pooch.*", - "PseudoNetCDF.*", "pydap.*", "pytest.*", "scipy.*", diff --git a/xarray/backends/__init__.py b/xarray/backends/__init__.py index cf27998b6fb..0044593b4ea 100644 --- a/xarray/backends/__init__.py +++ b/xarray/backends/__init__.py @@ -13,10 +13,6 @@ from xarray.backends.memory import InMemoryDataStore from xarray.backends.netCDF4_ import NetCDF4BackendEntrypoint, NetCDF4DataStore from xarray.backends.plugins import list_engines, refresh_engines -from xarray.backends.pseudonetcdf_ import ( - PseudoNetCDFBackendEntrypoint, - PseudoNetCDFDataStore, -) from xarray.backends.pydap_ import PydapBackendEntrypoint, PydapDataStore from xarray.backends.pynio_ import NioDataStore from xarray.backends.scipy_ import ScipyBackendEntrypoint, ScipyDataStore @@ -37,10 +33,8 @@ "ScipyDataStore", "H5NetCDFStore", "ZarrStore", - "PseudoNetCDFDataStore", "H5netcdfBackendEntrypoint", "NetCDF4BackendEntrypoint", - "PseudoNetCDFBackendEntrypoint", "PydapBackendEntrypoint", "ScipyBackendEntrypoint", "StoreBackendEntrypoint", diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 27e155872de..3e6d00a8059 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -27,6 +27,7 @@ _normalize_path, ) from xarray.backends.locks import _get_scheduler +from xarray.backends.zarr import open_zarr from xarray.core import indexing from xarray.core.combine import ( _infer_concat_order_from_positions, @@ -59,7 +60,7 @@ T_NetcdfEngine = Literal["netcdf4", "scipy", "h5netcdf"] T_Engine = Union[ T_NetcdfEngine, - Literal["pydap", "pynio", "pseudonetcdf", "zarr"], + Literal["pydap", "pynio", "zarr"], type[BackendEntrypoint], str, # no nice typing support for custom backends None, @@ -78,7 +79,6 @@ "pydap": backends.PydapDataStore.open, "h5netcdf": backends.H5NetCDFStore.open, "pynio": backends.NioDataStore, - "pseudonetcdf": backends.PseudoNetCDFDataStore.open, "zarr": backends.ZarrStore.open_group, } @@ -420,7 +420,7 @@ def open_dataset( scipy.io.netcdf (only netCDF3 supported). Byte-strings or file-like objects are opened by scipy.io.netcdf (netCDF3) or h5py (netCDF4/HDF). engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "pynio", \ - "pseudonetcdf", "zarr", None}, installed backend \ + "zarr", None}, installed backend \ or subclass of xarray.backends.BackendEntrypoint, optional Engine to use when reading files. If not provided, the default engine is chosen based on available dependencies, with a preference for @@ -452,8 +452,7 @@ def open_dataset( taken from variable attributes (if they exist). If the `_FillValue` or `missing_value` attribute contains multiple values a warning will be issued and all array values matching one of the multiple values will - be replaced by NA. mask_and_scale defaults to True except for the - pseudonetcdf backend. This keyword may not be supported by all the backends. + be replaced by NA. This keyword may not be supported by all the backends. decode_times : bool, optional If True, decode times encoded in the standard NetCDF datetime format into datetime objects. Otherwise, leave them encoded as numbers. @@ -523,7 +522,7 @@ def open_dataset( relevant when using dask or another form of parallelism. By default, appropriate locks are chosen to safely read and write files with the currently active dask scheduler. Supported by "netcdf4", "h5netcdf", - "scipy", "pynio", "pseudonetcdf". + "scipy", "pynio". See engine open function for kwargs accepted by each specific engine. @@ -628,7 +627,7 @@ def open_dataarray( scipy.io.netcdf (only netCDF3 supported). Byte-strings or file-like objects are opened by scipy.io.netcdf (netCDF3) or h5py (netCDF4/HDF). engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "pynio", \ - "pseudonetcdf", "zarr", None}, installed backend \ + "zarr", None}, installed backend \ or subclass of xarray.backends.BackendEntrypoint, optional Engine to use when reading files. If not provided, the default engine is chosen based on available dependencies, with a preference for @@ -658,8 +657,7 @@ def open_dataarray( taken from variable attributes (if they exist). If the `_FillValue` or `missing_value` attribute contains multiple values a warning will be issued and all array values matching one of the multiple values will - be replaced by NA. mask_and_scale defaults to True except for the - pseudonetcdf backend. This keyword may not be supported by all the backends. + be replaced by NA. This keyword may not be supported by all the backends. decode_times : bool, optional If True, decode times encoded in the standard NetCDF datetime format into datetime objects. Otherwise, leave them encoded as numbers. @@ -729,7 +727,7 @@ def open_dataarray( relevant when using dask or another form of parallelism. By default, appropriate locks are chosen to safely read and write files with the currently active dask scheduler. Supported by "netcdf4", "h5netcdf", - "scipy", "pynio", "pseudonetcdf". + "scipy", "pynio". See engine open function for kwargs accepted by each specific engine. @@ -869,7 +867,7 @@ def open_mfdataset( You can find the file-name from which each dataset was loaded in ``ds.encoding["source"]``. engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "pynio", \ - "pseudonetcdf", "zarr", None}, installed backend \ + "zarr", None}, installed backend \ or subclass of xarray.backends.BackendEntrypoint, optional Engine to use when reading files. If not provided, the default engine is chosen based on available dependencies, with a preference for @@ -1446,10 +1444,63 @@ def save_mfdataset( ) -def _validate_region(ds, region): +def _auto_detect_region(ds_new, ds_orig, dim): + # Create a mapping array of coordinates to indices on the original array + coord = ds_orig[dim] + da_map = DataArray(np.arange(coord.size), coords={dim: coord}) + + try: + da_idxs = da_map.sel({dim: ds_new[dim]}) + except KeyError as e: + if "not all values found" in str(e): + raise KeyError( + f"Not all values of coordinate '{dim}' in the new array were" + " found in the original store. Writing to a zarr region slice" + " requires that no dimensions or metadata are changed by the write." + ) + else: + raise e + + if (da_idxs.diff(dim) != 1).any(): + raise ValueError( + f"The auto-detected region of coordinate '{dim}' for writing new data" + " to the original store had non-contiguous indices. Writing to a zarr" + " region slice requires that the new data constitute a contiguous subset" + " of the original store." + ) + + dim_slice = slice(da_idxs.values[0], da_idxs.values[-1] + 1) + + return dim_slice + + +def _auto_detect_regions(ds, region, open_kwargs): + ds_original = open_zarr(**open_kwargs) + for key, val in region.items(): + if val == "auto": + region[key] = _auto_detect_region(ds, ds_original, key) + return region + + +def _validate_and_autodetect_region( + ds, region, mode, open_kwargs +) -> tuple[dict[str, slice], bool]: + if region == "auto": + region = {dim: "auto" for dim in ds.dims} + if not isinstance(region, dict): raise TypeError(f"``region`` must be a dict, got {type(region)}") + if any(v == "auto" for v in region.values()): + region_was_autodetected = True + if mode != "r+": + raise ValueError( + f"``mode`` must be 'r+' when using ``region='auto'``, got {mode}" + ) + region = _auto_detect_regions(ds, region, open_kwargs) + else: + region_was_autodetected = False + for k, v in region.items(): if k not in ds.dims: raise ValueError( @@ -1481,6 +1532,8 @@ def _validate_region(ds, region): f".drop_vars({non_matching_vars!r})" ) + return region, region_was_autodetected + def _validate_datatypes_for_zarr_append(zstore, dataset): """If variable exists in the store, confirm dtype of the data to append is compatible with @@ -1532,7 +1585,7 @@ def to_zarr( compute: Literal[True] = True, consolidated: bool | None = None, append_dim: Hashable | None = None, - region: Mapping[str, slice] | None = None, + region: Mapping[str, slice | Literal["auto"]] | Literal["auto"] | None = None, safe_chunks: bool = True, storage_options: dict[str, str] | None = None, zarr_version: int | None = None, @@ -1556,7 +1609,7 @@ def to_zarr( compute: Literal[False], consolidated: bool | None = None, append_dim: Hashable | None = None, - region: Mapping[str, slice] | None = None, + region: Mapping[str, slice | Literal["auto"]] | Literal["auto"] | None = None, safe_chunks: bool = True, storage_options: dict[str, str] | None = None, zarr_version: int | None = None, @@ -1578,7 +1631,7 @@ def to_zarr( compute: bool = True, consolidated: bool | None = None, append_dim: Hashable | None = None, - region: Mapping[str, slice] | None = None, + region: Mapping[str, slice | Literal["auto"]] | Literal["auto"] | None = None, safe_chunks: bool = True, storage_options: dict[str, str] | None = None, zarr_version: int | None = None, @@ -1643,7 +1696,20 @@ def to_zarr( _validate_dataset_names(dataset) if region is not None: - _validate_region(dataset, region) + open_kwargs = dict( + store=store, + synchronizer=synchronizer, + group=group, + consolidated=consolidated, + storage_options=storage_options, + zarr_version=zarr_version, + ) + region, region_was_autodetected = _validate_and_autodetect_region( + dataset, region, mode, open_kwargs + ) + # drop indices to avoid potential race condition with auto region + if region_was_autodetected: + dataset = dataset.drop_vars(dataset.indexes) if append_dim is not None and append_dim in region: raise ValueError( f"cannot list the same dimension in both ``append_dim`` and " diff --git a/xarray/backends/h5netcdf_.py b/xarray/backends/h5netcdf_.py index 19748084625..a68a44b5f6f 100644 --- a/xarray/backends/h5netcdf_.py +++ b/xarray/backends/h5netcdf_.py @@ -140,6 +140,8 @@ def open( invalid_netcdf=None, phony_dims=None, decode_vlen_strings=True, + driver=None, + driver_kwds=None, ): import h5netcdf @@ -161,7 +163,10 @@ def open( kwargs = { "invalid_netcdf": invalid_netcdf, "decode_vlen_strings": decode_vlen_strings, + "driver": driver, } + if driver_kwds is not None: + kwargs.update(driver_kwds) if phony_dims is not None: kwargs["phony_dims"] = phony_dims @@ -397,6 +402,8 @@ def open_dataset( # type: ignore[override] # allow LSP violation, not supporti invalid_netcdf=None, phony_dims=None, decode_vlen_strings=True, + driver=None, + driver_kwds=None, ) -> Dataset: filename_or_obj = _normalize_path(filename_or_obj) store = H5NetCDFStore.open( @@ -407,6 +414,8 @@ def open_dataset( # type: ignore[override] # allow LSP violation, not supporti invalid_netcdf=invalid_netcdf, phony_dims=phony_dims, decode_vlen_strings=decode_vlen_strings, + driver=driver, + driver_kwds=driver_kwds, ) store_entrypoint = StoreBackendEntrypoint() diff --git a/xarray/backends/pseudonetcdf_.py b/xarray/backends/pseudonetcdf_.py deleted file mode 100644 index 71cdd3199e0..00000000000 --- a/xarray/backends/pseudonetcdf_.py +++ /dev/null @@ -1,187 +0,0 @@ -from __future__ import annotations - -from collections.abc import Iterable -from typing import TYPE_CHECKING, Any - -import numpy as np - -from xarray.backends.common import ( - BACKEND_ENTRYPOINTS, - AbstractDataStore, - BackendArray, - BackendEntrypoint, - _normalize_path, -) -from xarray.backends.file_manager import CachingFileManager -from xarray.backends.locks import HDF5_LOCK, NETCDFC_LOCK, combine_locks, ensure_lock -from xarray.backends.store import StoreBackendEntrypoint -from xarray.core import indexing -from xarray.core.utils import Frozen, FrozenDict, close_on_error -from xarray.core.variable import Variable - -if TYPE_CHECKING: - import os - from io import BufferedIOBase - - from xarray.core.dataset import Dataset - -# psuedonetcdf can invoke netCDF libraries internally -PNETCDF_LOCK = combine_locks([HDF5_LOCK, NETCDFC_LOCK]) - - -class PncArrayWrapper(BackendArray): - def __init__(self, variable_name, datastore): - self.datastore = datastore - self.variable_name = variable_name - array = self.get_array() - self.shape = array.shape - self.dtype = np.dtype(array.dtype) - - def get_array(self, needs_lock=True): - ds = self.datastore._manager.acquire(needs_lock) - return ds.variables[self.variable_name] - - def __getitem__(self, key): - return indexing.explicit_indexing_adapter( - key, self.shape, indexing.IndexingSupport.OUTER_1VECTOR, self._getitem - ) - - def _getitem(self, key): - with self.datastore.lock: - array = self.get_array(needs_lock=False) - return array[key] - - -class PseudoNetCDFDataStore(AbstractDataStore): - """Store for accessing datasets via PseudoNetCDF""" - - @classmethod - def open(cls, filename, lock=None, mode=None, **format_kwargs): - from PseudoNetCDF import pncopen - - keywords = {"kwargs": format_kwargs} - # only include mode if explicitly passed - if mode is not None: - keywords["mode"] = mode - - if lock is None: - lock = PNETCDF_LOCK - - manager = CachingFileManager(pncopen, filename, lock=lock, **keywords) - return cls(manager, lock) - - def __init__(self, manager, lock=None): - self._manager = manager - self.lock = ensure_lock(lock) - - @property - def ds(self): - return self._manager.acquire() - - def open_store_variable(self, name, var): - data = indexing.LazilyIndexedArray(PncArrayWrapper(name, self)) - attrs = {k: getattr(var, k) for k in var.ncattrs()} - return Variable(var.dimensions, data, attrs) - - def get_variables(self): - return FrozenDict( - (k, self.open_store_variable(k, v)) for k, v in self.ds.variables.items() - ) - - def get_attrs(self): - return Frozen({k: getattr(self.ds, k) for k in self.ds.ncattrs()}) - - def get_dimensions(self): - return Frozen(self.ds.dimensions) - - def get_encoding(self): - return { - "unlimited_dims": { - k for k in self.ds.dimensions if self.ds.dimensions[k].isunlimited() - } - } - - def close(self): - self._manager.close() - - -class PseudoNetCDFBackendEntrypoint(BackendEntrypoint): - """ - Backend for netCDF-like data formats in the air quality field - based on the PseudoNetCDF package. - - It can open: - - CAMx - - RACM2 box-model outputs - - Kinetic Pre-Processor outputs - - ICARTT Data files (ffi1001) - - CMAQ Files - - GEOS-Chem Binary Punch/NetCDF files - - and many more - - This backend is not selected by default for any files, so make - sure to specify ``engine="pseudonetcdf"`` in ``open_dataset``. - - For more information about the underlying library, visit: - https://pseudonetcdf.readthedocs.io - - See Also - -------- - backends.PseudoNetCDFDataStore - """ - - description = ( - "Open many atmospheric science data formats using PseudoNetCDF in Xarray" - ) - url = "https://docs.xarray.dev/en/stable/generated/xarray.backends.PseudoNetCDFBackendEntrypoint.html" - - # *args and **kwargs are not allowed in open_backend_dataset_ kwargs, - # unless the open_dataset_parameters are explicitly defined like this: - open_dataset_parameters = ( - "filename_or_obj", - "mask_and_scale", - "decode_times", - "concat_characters", - "decode_coords", - "drop_variables", - "use_cftime", - "decode_timedelta", - "mode", - "lock", - ) - - def open_dataset( - self, - filename_or_obj: str | os.PathLike[Any] | BufferedIOBase | AbstractDataStore, - mask_and_scale=False, - decode_times=True, - concat_characters=True, - decode_coords=True, - drop_variables: str | Iterable[str] | None = None, - use_cftime=None, - decode_timedelta=None, - mode=None, - lock=None, - **format_kwargs, - ) -> Dataset: - filename_or_obj = _normalize_path(filename_or_obj) - store = PseudoNetCDFDataStore.open( - filename_or_obj, lock=lock, mode=mode, **format_kwargs - ) - - store_entrypoint = StoreBackendEntrypoint() - with close_on_error(store): - ds = store_entrypoint.open_dataset( - store, - mask_and_scale=mask_and_scale, - decode_times=decode_times, - concat_characters=concat_characters, - decode_coords=decode_coords, - drop_variables=drop_variables, - use_cftime=use_cftime, - decode_timedelta=decode_timedelta, - ) - return ds - - -BACKEND_ENTRYPOINTS["pseudonetcdf"] = ("PseudoNetCDF", PseudoNetCDFBackendEntrypoint) diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index 2b41fa5224e..6632e40cf6f 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -320,14 +320,19 @@ def encode_zarr_variable(var, needs_copy=True, name=None): return var -def _validate_existing_dims(var_name, new_var, existing_var, region, append_dim): +def _validate_and_transpose_existing_dims( + var_name, new_var, existing_var, region, append_dim +): if new_var.dims != existing_var.dims: - raise ValueError( - f"variable {var_name!r} already exists with different " - f"dimension names {existing_var.dims} != " - f"{new_var.dims}, but changing variable " - f"dimensions is not supported by to_zarr()." - ) + if set(existing_var.dims) == set(new_var.dims): + new_var = new_var.transpose(*existing_var.dims) + else: + raise ValueError( + f"variable {var_name!r} already exists with different " + f"dimension names {existing_var.dims} != " + f"{new_var.dims}, but changing variable " + f"dimensions is not supported by to_zarr()." + ) existing_sizes = {} for dim, size in existing_var.sizes.items(): @@ -344,9 +349,14 @@ def _validate_existing_dims(var_name, new_var, existing_var, region, append_dim) f"variable {var_name!r} already exists with different " f"dimension sizes: {existing_sizes} != {new_sizes}. " f"to_zarr() only supports changing dimension sizes when " - f"explicitly appending, but append_dim={append_dim!r}." + f"explicitly appending, but append_dim={append_dim!r}. " + f"If you are attempting to write to a subset of the " + f"existing store without changing dimension sizes, " + f"consider using the region argument in to_zarr()." ) + return new_var + def _put_attrs(zarr_obj, attrs): """Raise a more informative error message for invalid attrs.""" @@ -616,7 +626,7 @@ def store( for var_name in existing_variable_names: new_var = variables_encoded[var_name] existing_var = existing_vars[var_name] - _validate_existing_dims( + new_var = _validate_and_transpose_existing_dims( var_name, new_var, existing_var, diff --git a/xarray/convert.py b/xarray/convert.py index 5863352ae41..aeb746f4a9c 100644 --- a/xarray/convert.py +++ b/xarray/convert.py @@ -3,7 +3,6 @@ from collections import Counter import numpy as np -import pandas as pd from xarray.coding.times import CFDatetimeCoder, CFTimedeltaCoder from xarray.conventions import decode_cf @@ -12,7 +11,6 @@ from xarray.core.dtypes import get_fill_value from xarray.core.pycompat import array_type -cdms2_ignored_attrs = {"name", "tileIndex"} iris_forbidden_keys = { "standard_name", "long_name", @@ -60,92 +58,6 @@ def _filter_attrs(attrs, ignored_attrs): return {k: v for k, v in attrs.items() if k not in ignored_attrs} -def from_cdms2(variable): - """Convert a cdms2 variable into an DataArray""" - values = np.asarray(variable) - name = variable.id - dims = variable.getAxisIds() - coords = {} - for axis in variable.getAxisList(): - coords[axis.id] = DataArray( - np.asarray(axis), - dims=[axis.id], - attrs=_filter_attrs(axis.attributes, cdms2_ignored_attrs), - ) - grid = variable.getGrid() - if grid is not None: - ids = [a.id for a in grid.getAxisList()] - for axis in grid.getLongitude(), grid.getLatitude(): - if axis.id not in variable.getAxisIds(): - coords[axis.id] = DataArray( - np.asarray(axis[:]), - dims=ids, - attrs=_filter_attrs(axis.attributes, cdms2_ignored_attrs), - ) - attrs = _filter_attrs(variable.attributes, cdms2_ignored_attrs) - dataarray = DataArray(values, dims=dims, coords=coords, name=name, attrs=attrs) - return decode_cf(dataarray.to_dataset())[dataarray.name] - - -def to_cdms2(dataarray, copy=True): - """Convert a DataArray into a cdms2 variable""" - # we don't want cdms2 to be a hard dependency - import cdms2 - - def set_cdms2_attrs(var, attrs): - for k, v in attrs.items(): - setattr(var, k, v) - - # 1D axes - axes = [] - for dim in dataarray.dims: - coord = encode(dataarray.coords[dim]) - axis = cdms2.createAxis(coord.values, id=dim) - set_cdms2_attrs(axis, coord.attrs) - axes.append(axis) - - # Data - var = encode(dataarray) - cdms2_var = cdms2.createVariable( - var.values, axes=axes, id=dataarray.name, mask=pd.isnull(var.values), copy=copy - ) - - # Attributes - set_cdms2_attrs(cdms2_var, var.attrs) - - # Curvilinear and unstructured grids - if dataarray.name not in dataarray.coords: - cdms2_axes = {} - for coord_name in set(dataarray.coords.keys()) - set(dataarray.dims): - coord_array = dataarray.coords[coord_name].to_cdms2() - - cdms2_axis_cls = ( - cdms2.coord.TransientAxis2D - if coord_array.ndim - else cdms2.auxcoord.TransientAuxAxis1D - ) - cdms2_axis = cdms2_axis_cls(coord_array) - if cdms2_axis.isLongitude(): - cdms2_axes["lon"] = cdms2_axis - elif cdms2_axis.isLatitude(): - cdms2_axes["lat"] = cdms2_axis - - if "lon" in cdms2_axes and "lat" in cdms2_axes: - if len(cdms2_axes["lon"].shape) == 2: - cdms2_grid = cdms2.hgrid.TransientCurveGrid( - cdms2_axes["lat"], cdms2_axes["lon"] - ) - else: - cdms2_grid = cdms2.gengrid.AbstractGenericGrid( - cdms2_axes["lat"], cdms2_axes["lon"] - ) - for axis in cdms2_grid.getAxisList(): - cdms2_var.setAxis(cdms2_var.getAxisIds().index(axis.id), axis) - cdms2_var.setGrid(cdms2_grid) - - return cdms2_var - - def _pick_attrs(attrs, keys): """Return attrs with keys in keys list""" return {k: v for k, v in attrs.items() if k in keys} diff --git a/xarray/core/_typed_ops.py b/xarray/core/_typed_ops.py index 9b79ed46a9c..ceab91ad991 100644 --- a/xarray/core/_typed_ops.py +++ b/xarray/core/_typed_ops.py @@ -83,6 +83,10 @@ def __eq__(self, other: DsCompatible) -> Self: # type:ignore[override] def __ne__(self, other: DsCompatible) -> Self: # type:ignore[override] return self._binary_op(other, nputils.array_ne) + # When __eq__ is defined but __hash__ is not, then an object is unhashable, + # and it should be declared as follows: + __hash__: None # type:ignore[assignment] + def __radd__(self, other: DsCompatible) -> Self: return self._binary_op(other, operator.add, reflexive=True) @@ -291,6 +295,10 @@ def __eq__(self, other: DaCompatible) -> Self: # type:ignore[override] def __ne__(self, other: DaCompatible) -> Self: # type:ignore[override] return self._binary_op(other, nputils.array_ne) + # When __eq__ is defined but __hash__ is not, then an object is unhashable, + # and it should be declared as follows: + __hash__: None # type:ignore[assignment] + def __radd__(self, other: DaCompatible) -> Self: return self._binary_op(other, operator.add, reflexive=True) @@ -643,6 +651,10 @@ def __ne__(self, other: VarCompatible) -> Self: def __ne__(self, other: VarCompatible) -> Self | T_DataArray: return self._binary_op(other, nputils.array_ne) + # When __eq__ is defined but __hash__ is not, then an object is unhashable, + # and it should be declared as follows: + __hash__: None # type:ignore[assignment] + def __radd__(self, other: VarCompatible) -> Self: return self._binary_op(other, operator.add, reflexive=True) @@ -851,6 +863,10 @@ def __eq__(self, other: GroupByCompatible) -> Dataset: # type:ignore[override] def __ne__(self, other: GroupByCompatible) -> Dataset: # type:ignore[override] return self._binary_op(other, nputils.array_ne) + # When __eq__ is defined but __hash__ is not, then an object is unhashable, + # and it should be declared as follows: + __hash__: None # type:ignore[assignment] + def __radd__(self, other: GroupByCompatible) -> Dataset: return self._binary_op(other, operator.add, reflexive=True) @@ -973,6 +989,10 @@ def __eq__(self, other: T_Xarray) -> T_Xarray: # type:ignore[override] def __ne__(self, other: T_Xarray) -> T_Xarray: # type:ignore[override] return self._binary_op(other, nputils.array_ne) + # When __eq__ is defined but __hash__ is not, then an object is unhashable, + # and it should be declared as follows: + __hash__: None # type:ignore[assignment] + def __radd__(self, other: T_Xarray) -> T_Xarray: return self._binary_op(other, operator.add, reflexive=True) diff --git a/xarray/core/accessor_dt.py b/xarray/core/accessor_dt.py index 0d4a402cd19..b57c2f3857c 100644 --- a/xarray/core/accessor_dt.py +++ b/xarray/core/accessor_dt.py @@ -74,6 +74,8 @@ def _access_through_series(values, name): if name == "season": months = values_as_series.dt.month.values field_values = _season_from_months(months) + elif name == "total_seconds": + field_values = values_as_series.dt.total_seconds().values elif name == "isocalendar": # special NaT-handling can be removed when # https://github.com/pandas-dev/pandas/issues/54657 is resolved @@ -574,6 +576,13 @@ class TimedeltaAccessor(TimeAccessor[T_DataArray]): 43200, 64800]) Coordinates: * time (time) timedelta64[ns] 1 days 00:00:00 ... 5 days 18:00:00 + >>> ts.dt.total_seconds() + + array([ 86400., 108000., 129600., 151200., 172800., 194400., 216000., + 237600., 259200., 280800., 302400., 324000., 345600., 367200., + 388800., 410400., 432000., 453600., 475200., 496800.]) + Coordinates: + * time (time) timedelta64[ns] 1 days 00:00:00 ... 5 days 18:00:00 """ @property @@ -596,6 +605,11 @@ def nanoseconds(self) -> T_DataArray: """Number of nanoseconds (>= 0 and less than 1 microsecond) for each element""" return self._date_field("nanoseconds", np.int64) + # Not defined as a property in order to match the Pandas API + def total_seconds(self) -> T_DataArray: + """Total duration of each element expressed in seconds.""" + return self._date_field("total_seconds", np.float64) + class CombinedDatetimelikeAccessor( DatetimeAccessor[T_DataArray], TimedeltaAccessor[T_DataArray] diff --git a/xarray/core/common.py b/xarray/core/common.py index ab8a4d84261..fa0fa9aec0f 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -860,7 +860,6 @@ def _resample( base: int | None, offset: pd.Timedelta | datetime.timedelta | str | None, origin: str | DatetimeLike, - keep_attrs: bool | None, loffset: datetime.timedelta | str | None, restore_coord_dims: bool | None, **indexer_kwargs: str, @@ -989,13 +988,6 @@ def _resample( from xarray.core.pdcompat import _convert_base_to_offset from xarray.core.resample import RESAMPLE_DIM - if keep_attrs is not None: - warnings.warn( - "Passing ``keep_attrs`` to ``resample`` has no effect and will raise an" - " error in xarray 0.20. Pass ``keep_attrs`` directly to the applied" - " function, e.g. ``resample(...).mean(keep_attrs=True)``." - ) - # note: the second argument (now 'skipna') use to be 'dim' if ( (skipna is not None and not isinstance(skipna, bool)) @@ -1173,7 +1165,7 @@ def _dataset_indexer(dim: Hashable) -> DataArray: var for var in cond if dim not in cond[var].dims ) keepany = cond_wdim.any(dim=(d for d in cond.dims.keys() if d != dim)) - return keepany.to_array().any("variable") + return keepany.to_dataarray().any("variable") _get_indexer = ( _dataarray_indexer if isinstance(cond, DataArray) else _dataset_indexer diff --git a/xarray/core/computation.py b/xarray/core/computation.py index f506bc97a2c..0c5c9d6d5cb 100644 --- a/xarray/core/computation.py +++ b/xarray/core/computation.py @@ -1141,9 +1141,13 @@ def apply_ufunc( numba.guvectorize dask.array.apply_gufunc xarray.map_blocks + :ref:`dask.automatic-parallelization` User guide describing :py:func:`apply_ufunc` and :py:func:`map_blocks`. + :doc:`xarray-tutorial:advanced/apply_ufunc/apply_ufunc` + Advanced Tutorial on applying numpy function using :py:func:`apply_ufunc` + References ---------- .. [1] https://numpy.org/doc/stable/reference/ufuncs.html @@ -1599,7 +1603,9 @@ def cross( >>> ds_a = xr.Dataset(dict(x=("dim_0", [1]), y=("dim_0", [2]), z=("dim_0", [3]))) >>> ds_b = xr.Dataset(dict(x=("dim_0", [4]), y=("dim_0", [5]), z=("dim_0", [6]))) >>> c = xr.cross( - ... ds_a.to_array("cartesian"), ds_b.to_array("cartesian"), dim="cartesian" + ... ds_a.to_dataarray("cartesian"), + ... ds_b.to_dataarray("cartesian"), + ... dim="cartesian", ... ) >>> c.to_dataset(dim="cartesian") diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 96520cacabf..b417470fdc0 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -56,7 +56,6 @@ ReprObject, _default, either_dict_or_kwargs, - emit_user_level_warning, ) from xarray.core.variable import ( IndexVariable, @@ -81,10 +80,6 @@ from dask.delayed import Delayed except ImportError: Delayed = None # type: ignore - try: - from cdms2 import Variable as cdms2_Variable - except ImportError: - cdms2_Variable = None try: from iris.cube import Cube as iris_Cube except ImportError: @@ -579,9 +574,24 @@ def subset(dim, label): array.attrs = {} return as_variable(array) - variables = {label: subset(dim, label) for label in self.get_index(dim)} - variables.update({k: v for k, v in self._coords.items() if k != dim}) + variables_from_split = { + label: subset(dim, label) for label in self.get_index(dim) + } coord_names = set(self._coords) - {dim} + + ambiguous_vars = set(variables_from_split) & coord_names + if ambiguous_vars: + rename_msg_fmt = ", ".join([f"{v}=..." for v in sorted(ambiguous_vars)]) + raise ValueError( + f"Splitting along the dimension {dim!r} would produce the variables " + f"{tuple(sorted(ambiguous_vars))} which are also existing coordinate " + f"variables. Use DataArray.rename({rename_msg_fmt}) or " + f"DataArray.assign_coords({dim}=...) to resolve this ambiguity." + ) + + variables = variables_from_split | { + k: v for k, v in self._coords.items() if k != dim + } indexes = filter_indexes_from_coords(self._indexes, coord_names) dataset = Dataset._construct_direct( variables, coord_names, indexes=indexes, attrs=self.attrs @@ -1431,6 +1441,12 @@ def isel( Dataset.isel DataArray.sel + :doc:`xarray-tutorial:intermediate/indexing/indexing` + Tutorial material on indexing with Xarray objects + + :doc:`xarray-tutorial:fundamentals/02.1_indexing_Basic` + Tutorial material on basics of indexing + Examples -------- >>> da = xr.DataArray(np.arange(25).reshape(5, 5), dims=("x", "y")) @@ -1563,6 +1579,12 @@ def sel( Dataset.sel DataArray.isel + :doc:`xarray-tutorial:intermediate/indexing/indexing` + Tutorial material on indexing with Xarray objects + + :doc:`xarray-tutorial:fundamentals/02.1_indexing_Basic` + Tutorial material on basics of indexing + Examples -------- >>> da = xr.DataArray( @@ -2196,6 +2218,9 @@ def interp( scipy.interpolate.interp1d scipy.interpolate.interpn + :doc:`xarray-tutorial:fundamentals/02.2_manipulating_dimensions` + Tutorial material on manipulating data resolution using :py:func:`~xarray.DataArray.interp` + Examples -------- >>> da = xr.DataArray( @@ -4387,47 +4412,6 @@ def from_series(cls, series: pd.Series, sparse: bool = False) -> DataArray: result.name = series.name return result - def to_cdms2(self) -> cdms2_Variable: - """Convert this array into a cdms2.Variable - - .. deprecated:: 2023.06.0 - The `cdms2`_ library has been deprecated. Please consider using the - `xcdat`_ library instead. - - .. _cdms2: https://github.com/CDAT/cdms - .. _xcdat: https://github.com/xCDAT/xcdat - """ - from xarray.convert import to_cdms2 - - emit_user_level_warning( - "The cdms2 library has been deprecated." - " Please consider using the xcdat library instead.", - DeprecationWarning, - ) - - return to_cdms2(self) - - @classmethod - def from_cdms2(cls, variable: cdms2_Variable) -> Self: - """Convert a cdms2.Variable into an xarray.DataArray - - .. deprecated:: 2023.06.0 - The `cdms2`_ library has been deprecated. Please consider using the - `xcdat`_ library instead. - - .. _cdms2: https://github.com/CDAT/cdms - .. _xcdat: https://github.com/xCDAT/xcdat - """ - from xarray.convert import from_cdms2 - - emit_user_level_warning( - "The cdms2 library has been deprecated." - " Please consider using the xcdat library instead.", - DeprecationWarning, - ) - - return from_cdms2(variable) - def to_iris(self) -> iris_Cube: """Convert this array into a iris.cube.Cube""" from xarray.convert import to_iris @@ -5461,6 +5445,9 @@ def map_blocks( dask.array.map_blocks, xarray.apply_ufunc, xarray.Dataset.map_blocks xarray.DataArray.map_blocks + :doc:`xarray-tutorial:advanced/map_blocks/map_blocks` + Advanced Tutorial on map_blocks with dask + Examples -------- Calculate an anomaly from climatology using ``.groupby()``. Using @@ -6676,10 +6663,20 @@ def groupby( -------- :ref:`groupby` Users guide explanation of how to group and bin data. + + :doc:`xarray-tutorial:intermediate/01-high-level-computation-patterns` + Tutorial on :py:func:`~xarray.DataArray.Groupby` for windowed computation + + :doc:`xarray-tutorial:fundamentals/03.2_groupby_with_xarray` + Tutorial on :py:func:`~xarray.DataArray.Groupby` demonstrating reductions, transformation and comparison with :py:func:`~xarray.DataArray.resample` + DataArray.groupby_bins Dataset.groupby core.groupby.DataArrayGroupBy + DataArray.coarsen pandas.DataFrame.groupby + Dataset.resample + DataArray.resample """ from xarray.core.groupby import ( DataArrayGroupBy, @@ -6814,6 +6811,13 @@ def weighted(self, weights: DataArray) -> DataArrayWeighted: See Also -------- Dataset.weighted + + :ref:`comput.weighted` + User guide on weighted array reduction using :py:func:`~xarray.DataArray.weighted` + + :doc:`xarray-tutorial:fundamentals/03.4_weighted` + Tutorial on Weighted Reduction using :py:func:`~xarray.DataArray.weighted` + """ from xarray.core.weighted import DataArrayWeighted @@ -6955,6 +6959,16 @@ def coarsen( -------- core.rolling.DataArrayCoarsen Dataset.coarsen + + :ref:`reshape.coarsen` + User guide describing :py:func:`~xarray.DataArray.coarsen` + + :ref:`compute.coarsen` + User guide on block arrgragation :py:func:`~xarray.DataArray.coarsen` + + :doc:`xarray-tutorial:fundamentals/03.3_windowed` + Tutorial on windowed computation using :py:func:`~xarray.DataArray.coarsen` + """ from xarray.core.rolling import DataArrayCoarsen @@ -6976,7 +6990,6 @@ def resample( base: int | None = None, offset: pd.Timedelta | datetime.timedelta | str | None = None, origin: str | DatetimeLike = "start_day", - keep_attrs: bool | None = None, loffset: datetime.timedelta | str | None = None, restore_coord_dims: bool | None = None, **indexer_kwargs: str, @@ -7098,7 +7111,6 @@ def resample( base=base, offset=offset, origin=origin, - keep_attrs=keep_attrs, loffset=loffset, restore_coord_dims=restore_coord_dims, **indexer_kwargs, diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 1ca092505fc..21ef85d60a6 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -1502,7 +1502,7 @@ def __array__(self, dtype=None): "cannot directly convert an xarray.Dataset into a " "numpy array. Instead, create an xarray.DataArray " "first, either with indexing on the Dataset or by " - "invoking the `to_array()` method." + "invoking the `to_dataarray()` method." ) @property @@ -2305,7 +2305,7 @@ def to_zarr( compute: Literal[True] = True, consolidated: bool | None = None, append_dim: Hashable | None = None, - region: Mapping[str, slice] | None = None, + region: Mapping[str, slice | Literal["auto"]] | Literal["auto"] | None = None, safe_chunks: bool = True, storage_options: dict[str, str] | None = None, zarr_version: int | None = None, @@ -2328,7 +2328,7 @@ def to_zarr( compute: Literal[False], consolidated: bool | None = None, append_dim: Hashable | None = None, - region: Mapping[str, slice] | None = None, + region: Mapping[str, slice | Literal["auto"]] | Literal["auto"] | None = None, safe_chunks: bool = True, storage_options: dict[str, str] | None = None, zarr_version: int | None = None, @@ -2349,7 +2349,7 @@ def to_zarr( compute: bool = True, consolidated: bool | None = None, append_dim: Hashable | None = None, - region: Mapping[str, slice] | None = None, + region: Mapping[str, slice | Literal["auto"]] | Literal["auto"] | None = None, safe_chunks: bool = True, storage_options: dict[str, str] | None = None, zarr_version: int | None = None, @@ -2411,7 +2411,7 @@ def to_zarr( append_dim : hashable, optional If set, the dimension along which the data will be appended. All other dimensions on overridden variables must remain the same size. - region : dict, optional + region : dict or "auto", optional Optional mapping from dimension names to integer slices along dataset dimensions to indicate the region of existing zarr array(s) in which to write this dataset's data. For example, @@ -2419,6 +2419,12 @@ def to_zarr( that values should be written to the region ``0:1000`` along ``x`` and ``10000:11000`` along ``y``. + Can also specify ``"auto"``, in which case the existing store will be + opened and the region inferred by matching the new data's coordinates. + ``"auto"`` can be used as a single string, which will automatically infer + the region for all dimensions, or as dictionary values for specific + dimensions mixed together with explicit slices for other dimensions. + Two restrictions apply to the use of ``region``: - If ``region`` is set, _all_ variables in a dataset must have at @@ -2902,6 +2908,13 @@ def isel( -------- Dataset.sel DataArray.isel + + :doc:`xarray-tutorial:intermediate/indexing/indexing` + Tutorial material on indexing with Xarray objects + + :doc:`xarray-tutorial:fundamentals/02.1_indexing_Basic` + Tutorial material on basics of indexing + """ indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "isel") if any(is_fancy_indexer(idx) for idx in indexers.values()): @@ -3049,6 +3062,13 @@ def sel( -------- Dataset.isel DataArray.sel + + :doc:`xarray-tutorial:intermediate/indexing/indexing` + Tutorial material on indexing with Xarray objects + + :doc:`xarray-tutorial:fundamentals/02.1_indexing_Basic` + Tutorial material on basics of indexing + """ indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "sel") query_results = map_index_queries( @@ -3801,6 +3821,9 @@ def interp( scipy.interpolate.interp1d scipy.interpolate.interpn + :doc:`xarray-tutorial:fundamentals/02.2_manipulating_dimensions` + Tutorial material on manipulating data resolution using :py:func:`~xarray.Dataset.interp` + Examples -------- >>> ds = xr.Dataset( @@ -4393,16 +4416,18 @@ def swap_dims( # rename_dims() method that only renames dimensions. dims_dict = either_dict_or_kwargs(dims_dict, dims_kwargs, "swap_dims") - for k, v in dims_dict.items(): - if k not in self.dims: + for current_name, new_name in dims_dict.items(): + if current_name not in self.dims: raise ValueError( - f"cannot swap from dimension {k!r} because it is " + f"cannot swap from dimension {current_name!r} because it is " f"not one of the dimensions of this dataset {tuple(self.dims)}" ) - if v in self.variables and self.variables[v].dims != (k,): + if new_name in self.variables and self.variables[new_name].dims != ( + current_name, + ): raise ValueError( - f"replacement dimension {v!r} is not a 1D " - f"variable along the old dimension {k!r}" + f"replacement dimension {new_name!r} is not a 1D " + f"variable along the old dimension {current_name!r}" ) result_dims = {dims_dict.get(dim, dim) for dim in self.dims} @@ -4412,24 +4437,24 @@ def swap_dims( variables: dict[Hashable, Variable] = {} indexes: dict[Hashable, Index] = {} - for k, v in self.variables.items(): - dims = tuple(dims_dict.get(dim, dim) for dim in v.dims) + for current_name, current_variable in self.variables.items(): + dims = tuple(dims_dict.get(dim, dim) for dim in current_variable.dims) var: Variable - if k in result_dims: - var = v.to_index_variable() + if current_name in result_dims: + var = current_variable.to_index_variable() var.dims = dims - if k in self._indexes: - indexes[k] = self._indexes[k] - variables[k] = var + if current_name in self._indexes: + indexes[current_name] = self._indexes[current_name] + variables[current_name] = var else: index, index_vars = create_default_index_implicit(var) indexes.update({name: index for name in index_vars}) variables.update(index_vars) coord_names.update(index_vars) else: - var = v.to_base_variable() + var = current_variable.to_base_variable() var.dims = dims - variables[k] = var + variables[current_name] = var return self._replace_with_new_dims(variables, coord_names, indexes=indexes) @@ -5241,7 +5266,7 @@ def to_stacked_array( """Combine variables of differing dimensionality into a DataArray without broadcasting. - This method is similar to Dataset.to_array but does not broadcast the + This method is similar to Dataset.to_dataarray but does not broadcast the variables. Parameters @@ -5270,7 +5295,7 @@ def to_stacked_array( See Also -------- - Dataset.to_array + Dataset.to_dataarray Dataset.stack DataArray.to_unstacked_dataset @@ -7000,7 +7025,7 @@ def assign( return data - def to_array( + def to_dataarray( self, dim: Hashable = "variable", name: Hashable | None = None ) -> DataArray: """Convert this dataset into an xarray.DataArray @@ -7037,6 +7062,12 @@ def to_array( return DataArray._construct_direct(variable, coords, name, indexes) + def to_array( + self, dim: Hashable = "variable", name: Hashable | None = None + ) -> DataArray: + """Deprecated version of to_dataarray""" + return self.to_dataarray(dim=dim, name=name) + def _normalize_dim_order( self, dim_order: Sequence[Hashable] | None = None ) -> dict[Hashable, int]: @@ -8650,6 +8681,10 @@ def map_blocks( dask.array.map_blocks, xarray.apply_ufunc, xarray.Dataset.map_blocks xarray.DataArray.map_blocks + :doc:`xarray-tutorial:advanced/map_blocks/map_blocks` + Advanced Tutorial on map_blocks with dask + + Examples -------- Calculate an anomaly from climatology using ``.groupby()``. Using @@ -10035,10 +10070,18 @@ def groupby( -------- :ref:`groupby` Users guide explanation of how to group and bin data. + + :doc:`xarray-tutorial:intermediate/01-high-level-computation-patterns` + Tutorial on :py:func:`~xarray.Dataset.Groupby` for windowed computation. + + :doc:`xarray-tutorial:fundamentals/03.2_groupby_with_xarray` + Tutorial on :py:func:`~xarray.Dataset.Groupby` demonstrating reductions, transformation and comparision with :py:func:`~xarray.Dataset.resample`. + Dataset.groupby_bins DataArray.groupby core.groupby.DatasetGroupBy pandas.DataFrame.groupby + Dataset.coarsen Dataset.resample DataArray.resample """ @@ -10176,6 +10219,13 @@ def weighted(self, weights: DataArray) -> DatasetWeighted: See Also -------- DataArray.weighted + + :ref:`comput.weighted` + User guide on weighted array reduction using :py:func:`~xarray.Dataset.weighted` + + :doc:`xarray-tutorial:fundamentals/03.4_weighted` + Tutorial on Weighted Reduction using :py:func:`~xarray.Dataset.weighted` + """ from xarray.core.weighted import DatasetWeighted @@ -10252,6 +10302,16 @@ def coarsen( -------- core.rolling.DatasetCoarsen DataArray.coarsen + + :ref:`reshape.coarsen` + User guide describing :py:func:`~xarray.Dataset.coarsen` + + :ref:`compute.coarsen` + User guide on block arrgragation :py:func:`~xarray.Dataset.coarsen` + + :doc:`xarray-tutorial:fundamentals/03.3_windowed` + Tutorial on windowed computation using :py:func:`~xarray.Dataset.coarsen` + """ from xarray.core.rolling import DatasetCoarsen @@ -10273,7 +10333,6 @@ def resample( base: int | None = None, offset: pd.Timedelta | datetime.timedelta | str | None = None, origin: str | DatetimeLike = "start_day", - keep_attrs: bool | None = None, loffset: datetime.timedelta | str | None = None, restore_coord_dims: bool | None = None, **indexer_kwargs: str, @@ -10350,7 +10409,6 @@ def resample( base=base, offset=offset, origin=origin, - keep_attrs=keep_attrs, loffset=loffset, restore_coord_dims=restore_coord_dims, **indexer_kwargs, diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 788e1efa80b..8c81d3e6a96 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -251,6 +251,10 @@ def to_dataarray(self) -> DataArray: data=self.data, dims=(self.name,), coords=self.coords, name=self.name ) + def to_array(self) -> DataArray: + """Deprecated version of to_dataarray.""" + return self.to_dataarray() + T_Group = Union["T_DataArray", "IndexVariable", _DummyGroup] diff --git a/xarray/core/resample_cftime.py b/xarray/core/resample_cftime.py index 5e3f2f57397..7241faa1c61 100644 --- a/xarray/core/resample_cftime.py +++ b/xarray/core/resample_cftime.py @@ -151,7 +151,10 @@ def first_items(self, index: CFTimeIndex): f"Got {self.loffset}." ) - labels = labels + pd.to_timedelta(self.loffset) + if isinstance(self.loffset, datetime.timedelta): + labels = labels + self.loffset + else: + labels = labels + to_offset(self.loffset) # check binner fits data if index[0] < datetime_bins[0]: diff --git a/xarray/core/rolling.py b/xarray/core/rolling.py index b85092982e3..8f21fe37072 100644 --- a/xarray/core/rolling.py +++ b/xarray/core/rolling.py @@ -852,6 +852,7 @@ def __init__( Returns ------- coarsen + """ self.obj = obj self.windows = windows diff --git a/xarray/tests/__init__.py b/xarray/tests/__init__.py index 07ba0be6a8c..fec695f83d7 100644 --- a/xarray/tests/__init__.py +++ b/xarray/tests/__init__.py @@ -67,7 +67,6 @@ def _importorskip( has_netCDF4, requires_netCDF4 = _importorskip("netCDF4") has_h5netcdf, requires_h5netcdf = _importorskip("h5netcdf") has_pynio, requires_pynio = _importorskip("Nio") -has_pseudonetcdf, requires_pseudonetcdf = _importorskip("PseudoNetCDF") has_cftime, requires_cftime = _importorskip("cftime") has_dask, requires_dask = _importorskip("dask") has_bottleneck, requires_bottleneck = _importorskip("bottleneck") @@ -95,6 +94,10 @@ def _importorskip( requires_pandas_version_two = pytest.mark.skipif( not has_pandas_version_two, reason="requires pandas 2.0.0" ) +has_h5netcdf_ros3 = _importorskip("h5netcdf", "1.3.0") +requires_h5netcdf_ros3 = pytest.mark.skipif( + not has_h5netcdf_ros3[0], reason="requires h5netcdf 1.3.0" +) # change some global options for tests set_options(warn_for_unclosed_files=True) diff --git a/xarray/tests/test_accessor_dt.py b/xarray/tests/test_accessor_dt.py index 58fd5f3af84..387929d3fe9 100644 --- a/xarray/tests/test_accessor_dt.py +++ b/xarray/tests/test_accessor_dt.py @@ -6,6 +6,7 @@ import xarray as xr from xarray.tests import ( + assert_allclose, assert_array_equal, assert_chunks_equal, assert_equal, @@ -100,6 +101,19 @@ def test_field_access(self, field) -> None: assert expected.dtype == actual.dtype assert_identical(expected, actual) + def test_total_seconds(self) -> None: + # Subtract a value in the middle of the range to ensure that some values + # are negative + delta = self.data.time - np.datetime64("2000-01-03") + actual = delta.dt.total_seconds() + expected = xr.DataArray( + np.arange(-48, 52, dtype=np.float64) * 3600, + name="total_seconds", + coords=[self.data.time], + ) + # This works with assert_identical when pandas is >=1.5.0. + assert_allclose(expected, actual) + @pytest.mark.parametrize( "field, pandas_field", [ diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 73352c3f7e1..80b6951dbff 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -69,9 +69,9 @@ requires_dask, requires_fsspec, requires_h5netcdf, + requires_h5netcdf_ros3, requires_iris, requires_netCDF4, - requires_pseudonetcdf, requires_pydap, requires_pynio, requires_scipy, @@ -3449,6 +3449,34 @@ def test_write_inconsistent_chunks(self) -> None: assert actual["y"].encoding["chunksizes"] == (100, 50) +@requires_h5netcdf_ros3 +class TestH5NetCDFDataRos3Driver(TestCommon): + engine: T_NetcdfEngine = "h5netcdf" + test_remote_dataset: str = ( + "https://www.unidata.ucar.edu/software/netcdf/examples/OMI-Aura_L2-example.nc" + ) + + def test_get_variable_list(self) -> None: + with open_dataset( + self.test_remote_dataset, + engine="h5netcdf", + backend_kwargs={"driver": "ros3"}, + ) as actual: + assert "Temperature" in list(actual) + + def test_get_variable_list_empty_driver_kwds(self) -> None: + driver_kwds = { + "secret_id": b"", + "secret_key": b"", + } + backend_kwargs = {"driver": "ros3", "driver_kwds": driver_kwds} + + with open_dataset( + self.test_remote_dataset, engine="h5netcdf", backend_kwargs=backend_kwargs + ) as actual: + assert "Temperature" in list(actual) + + @pytest.fixture(params=["scipy", "netcdf4", "h5netcdf", "pynio", "zarr"]) def readengine(request): return request.param @@ -4440,226 +4468,6 @@ def test_weakrefs(self) -> None: assert_identical(actual, expected) -@requires_pseudonetcdf -@pytest.mark.filterwarnings("ignore:IOAPI_ISPH is assumed to be 6370000") -class TestPseudoNetCDFFormat: - def open(self, path, **kwargs): - return open_dataset(path, engine="pseudonetcdf", **kwargs) - - @contextlib.contextmanager - def roundtrip( - self, data, save_kwargs=None, open_kwargs=None, allow_cleanup_failure=False - ): - if save_kwargs is None: - save_kwargs = {} - if open_kwargs is None: - open_kwargs = {} - with create_tmp_file(allow_cleanup_failure=allow_cleanup_failure) as path: - self.save(data, path, **save_kwargs) - with self.open(path, **open_kwargs) as ds: - yield ds - - def test_ict_format(self) -> None: - """ - Open a CAMx file and test data variables - """ - stdattr = { - "fill_value": -9999.0, - "missing_value": -9999, - "scale": 1, - "llod_flag": -8888, - "llod_value": "N/A", - "ulod_flag": -7777, - "ulod_value": "N/A", - } - - def myatts(**attrs): - outattr = stdattr.copy() - outattr.update(attrs) - return outattr - - input = { - "coords": {}, - "attrs": { - "fmt": "1001", - "n_header_lines": 29, - "PI_NAME": "Henderson, Barron", - "ORGANIZATION_NAME": "U.S. EPA", - "SOURCE_DESCRIPTION": "Example file with artificial data", - "MISSION_NAME": "JUST_A_TEST", - "VOLUME_INFO": "1, 1", - "SDATE": "2018, 04, 27", - "WDATE": "2018, 04, 27", - "TIME_INTERVAL": "0", - "INDEPENDENT_VARIABLE_DEFINITION": "Start_UTC", - "INDEPENDENT_VARIABLE": "Start_UTC", - "INDEPENDENT_VARIABLE_UNITS": "Start_UTC", - "ULOD_FLAG": "-7777", - "ULOD_VALUE": "N/A", - "LLOD_FLAG": "-8888", - "LLOD_VALUE": ("N/A, N/A, N/A, N/A, 0.025"), - "OTHER_COMMENTS": ( - "www-air.larc.nasa.gov/missions/etc/" + "IcarttDataFormat.htm" - ), - "REVISION": "R0", - "R0": "No comments for this revision.", - "TFLAG": "Start_UTC", - }, - "dims": {"POINTS": 4}, - "data_vars": { - "Start_UTC": { - "data": [43200.0, 46800.0, 50400.0, 50400.0], - "dims": ("POINTS",), - "attrs": myatts(units="Start_UTC", standard_name="Start_UTC"), - }, - "lat": { - "data": [41.0, 42.0, 42.0, 42.0], - "dims": ("POINTS",), - "attrs": myatts(units="degrees_north", standard_name="lat"), - }, - "lon": { - "data": [-71.0, -72.0, -73.0, -74.0], - "dims": ("POINTS",), - "attrs": myatts(units="degrees_east", standard_name="lon"), - }, - "elev": { - "data": [5.0, 15.0, 20.0, 25.0], - "dims": ("POINTS",), - "attrs": myatts(units="meters", standard_name="elev"), - }, - "TEST_ppbv": { - "data": [1.2345, 2.3456, 3.4567, 4.5678], - "dims": ("POINTS",), - "attrs": myatts(units="ppbv", standard_name="TEST_ppbv"), - }, - "TESTM_ppbv": { - "data": [2.22, -9999.0, -7777.0, -8888.0], - "dims": ("POINTS",), - "attrs": myatts( - units="ppbv", standard_name="TESTM_ppbv", llod_value=0.025 - ), - }, - }, - } - chkfile = Dataset.from_dict(input) - with open_example_dataset( - "example.ict", engine="pseudonetcdf", backend_kwargs={"format": "ffi1001"} - ) as ictfile: - assert_identical(ictfile, chkfile) - - def test_ict_format_write(self) -> None: - fmtkw = {"format": "ffi1001"} - with open_example_dataset( - "example.ict", engine="pseudonetcdf", backend_kwargs=fmtkw - ) as expected: - with self.roundtrip( - expected, save_kwargs=fmtkw, open_kwargs={"backend_kwargs": fmtkw} - ) as actual: - assert_identical(expected, actual) - - def test_uamiv_format_read(self) -> None: - """ - Open a CAMx file and test data variables - """ - - camxfile = open_example_dataset( - "example.uamiv", engine="pseudonetcdf", backend_kwargs={"format": "uamiv"} - ) - data = np.arange(20, dtype="f").reshape(1, 1, 4, 5) - expected = xr.Variable( - ("TSTEP", "LAY", "ROW", "COL"), - data, - dict(units="ppm", long_name="O3".ljust(16), var_desc="O3".ljust(80)), - ) - actual = camxfile.variables["O3"] - assert_allclose(expected, actual) - - data = np.array([[[2002154, 0]]], dtype="i") - expected = xr.Variable( - ("TSTEP", "VAR", "DATE-TIME"), - data, - dict( - long_name="TFLAG".ljust(16), - var_desc="TFLAG".ljust(80), - units="DATE-TIME".ljust(16), - ), - ) - actual = camxfile.variables["TFLAG"] - assert_allclose(expected, actual) - camxfile.close() - - @requires_dask - def test_uamiv_format_mfread(self) -> None: - """ - Open a CAMx file and test data variables - """ - - camxfile = open_example_mfdataset( - ["example.uamiv", "example.uamiv"], - engine="pseudonetcdf", - concat_dim="TSTEP", - combine="nested", - backend_kwargs={"format": "uamiv"}, - ) - - data1 = np.arange(20, dtype="f").reshape(1, 1, 4, 5) - data = np.concatenate([data1] * 2, axis=0) - expected = xr.Variable( - ("TSTEP", "LAY", "ROW", "COL"), - data, - dict(units="ppm", long_name="O3".ljust(16), var_desc="O3".ljust(80)), - ) - actual = camxfile.variables["O3"] - assert_allclose(expected, actual) - - data = np.array([[[2002154, 0]]], dtype="i").repeat(2, 0) - attrs = dict( - long_name="TFLAG".ljust(16), - var_desc="TFLAG".ljust(80), - units="DATE-TIME".ljust(16), - ) - dims = ("TSTEP", "VAR", "DATE-TIME") - expected = xr.Variable(dims, data, attrs) - actual = camxfile.variables["TFLAG"] - assert_allclose(expected, actual) - camxfile.close() - - @pytest.mark.xfail(reason="Flaky; see GH3711") - def test_uamiv_format_write(self) -> None: - fmtkw = {"format": "uamiv"} - - expected = open_example_dataset( - "example.uamiv", engine="pseudonetcdf", backend_kwargs=fmtkw - ) - with self.roundtrip( - expected, - save_kwargs=fmtkw, - open_kwargs={"backend_kwargs": fmtkw}, - allow_cleanup_failure=True, - ) as actual: - assert_identical(expected, actual) - - expected.close() - - def save(self, dataset, path, **save_kwargs): - import PseudoNetCDF as pnc - - pncf = pnc.PseudoNetCDFFile() - pncf.dimensions = { - k: pnc.PseudoNetCDFDimension(pncf, k, v) for k, v in dataset.dims.items() - } - pncf.variables = { - k: pnc.PseudoNetCDFVariable( - pncf, k, v.dtype.char, v.dims, values=v.data[...], **v.attrs - ) - for k, v in dataset.variables.items() - } - for pk, pv in dataset.attrs.items(): - setattr(pncf, pk, pv) - - pnc.pncwrite(pncf, path, **save_kwargs) - - class TestEncodingInvalid: def test_extract_nc4_variable_encoding(self) -> None: var = xr.Variable(("x",), [1, 2, 3], {}, {"foo": "bar"}) @@ -5431,3 +5239,198 @@ def test_raise_writing_to_nczarr(self, mode) -> None: def test_pickle_open_mfdataset_dataset(): ds = open_example_mfdataset(["bears.nc"]) assert_identical(ds, pickle.loads(pickle.dumps(ds))) + + +@requires_zarr +class TestZarrRegionAuto: + def test_zarr_region_auto_all(self, tmp_path): + x = np.arange(0, 50, 10) + y = np.arange(0, 20, 2) + data = np.ones((5, 10)) + ds = xr.Dataset( + { + "test": xr.DataArray( + data, + dims=("x", "y"), + coords={"x": x, "y": y}, + ) + } + ) + ds.to_zarr(tmp_path / "test.zarr") + + ds_region = 1 + ds.isel(x=slice(2, 4), y=slice(6, 8)) + ds_region.to_zarr(tmp_path / "test.zarr", region="auto") + + ds_updated = xr.open_zarr(tmp_path / "test.zarr") + + expected = ds.copy() + expected["test"][2:4, 6:8] += 1 + assert_identical(ds_updated, expected) + + def test_zarr_region_auto_mixed(self, tmp_path): + x = np.arange(0, 50, 10) + y = np.arange(0, 20, 2) + data = np.ones((5, 10)) + ds = xr.Dataset( + { + "test": xr.DataArray( + data, + dims=("x", "y"), + coords={"x": x, "y": y}, + ) + } + ) + ds.to_zarr(tmp_path / "test.zarr") + + ds_region = 1 + ds.isel(x=slice(2, 4), y=slice(6, 8)) + ds_region.to_zarr( + tmp_path / "test.zarr", region={"x": "auto", "y": slice(6, 8)} + ) + + ds_updated = xr.open_zarr(tmp_path / "test.zarr") + + expected = ds.copy() + expected["test"][2:4, 6:8] += 1 + assert_identical(ds_updated, expected) + + def test_zarr_region_auto_noncontiguous(self, tmp_path): + x = np.arange(0, 50, 10) + y = np.arange(0, 20, 2) + data = np.ones((5, 10)) + ds = xr.Dataset( + { + "test": xr.DataArray( + data, + dims=("x", "y"), + coords={"x": x, "y": y}, + ) + } + ) + ds.to_zarr(tmp_path / "test.zarr") + + ds_region = 1 + ds.isel(x=[0, 2, 3], y=[5, 6]) + with pytest.raises(ValueError): + ds_region.to_zarr(tmp_path / "test.zarr", region={"x": "auto", "y": "auto"}) + + def test_zarr_region_auto_new_coord_vals(self, tmp_path): + x = np.arange(0, 50, 10) + y = np.arange(0, 20, 2) + data = np.ones((5, 10)) + ds = xr.Dataset( + { + "test": xr.DataArray( + data, + dims=("x", "y"), + coords={"x": x, "y": y}, + ) + } + ) + ds.to_zarr(tmp_path / "test.zarr") + + x = np.arange(5, 55, 10) + y = np.arange(0, 20, 2) + data = np.ones((5, 10)) + ds = xr.Dataset( + { + "test": xr.DataArray( + data, + dims=("x", "y"), + coords={"x": x, "y": y}, + ) + } + ) + + ds_region = 1 + ds.isel(x=slice(2, 4), y=slice(6, 8)) + with pytest.raises(KeyError): + ds_region.to_zarr(tmp_path / "test.zarr", region={"x": "auto", "y": "auto"}) + + def test_zarr_region_index_write(self, tmp_path): + from xarray.backends.zarr import ZarrStore + + x = np.arange(0, 50, 10) + y = np.arange(0, 20, 2) + data = np.ones((5, 10)) + ds = xr.Dataset( + { + "test": xr.DataArray( + data, + dims=("x", "y"), + coords={"x": x, "y": y}, + ) + } + ) + + ds_region = 1 + ds.isel(x=slice(2, 4), y=slice(6, 8)) + + ds.to_zarr(tmp_path / "test.zarr") + + with patch.object( + ZarrStore, + "set_variables", + side_effect=ZarrStore.set_variables, + autospec=True, + ) as mock: + ds_region.to_zarr(tmp_path / "test.zarr", region="auto", mode="r+") + + # should write the data vars but never the index vars with auto mode + for call in mock.call_args_list: + written_variables = call.args[1].keys() + assert "test" in written_variables + assert "x" not in written_variables + assert "y" not in written_variables + + def test_zarr_region_append(self, tmp_path): + x = np.arange(0, 50, 10) + y = np.arange(0, 20, 2) + data = np.ones((5, 10)) + ds = xr.Dataset( + { + "test": xr.DataArray( + data, + dims=("x", "y"), + coords={"x": x, "y": y}, + ) + } + ) + ds.to_zarr(tmp_path / "test.zarr") + + x_new = np.arange(40, 70, 10) + data_new = np.ones((3, 10)) + ds_new = xr.Dataset( + { + "test": xr.DataArray( + data_new, + dims=("x", "y"), + coords={"x": x_new, "y": y}, + ) + } + ) + + # Don't allow auto region detection in append mode due to complexities in + # implementing the overlap logic and lack of safety with parallel writes + with pytest.raises(ValueError): + ds_new.to_zarr( + tmp_path / "test.zarr", mode="a", append_dim="x", region="auto" + ) + + +@requires_zarr +def test_zarr_region_transpose(tmp_path): + x = np.arange(0, 50, 10) + y = np.arange(0, 20, 2) + data = np.ones((5, 10)) + ds = xr.Dataset( + { + "test": xr.DataArray( + data, + dims=("x", "y"), + coords={"x": x, "y": y}, + ) + } + ) + ds.to_zarr(tmp_path / "test.zarr") + + ds_region = 1 + ds.isel(x=[0], y=[0]).transpose() + ds_region.to_zarr( + tmp_path / "test.zarr", region={"x": slice(0, 1), "y": slice(0, 1)} + ) diff --git a/xarray/tests/test_cftimeindex_resample.py b/xarray/tests/test_cftimeindex_resample.py index f6ca6ef853e..9bdab8a6d7c 100644 --- a/xarray/tests/test_cftimeindex_resample.py +++ b/xarray/tests/test_cftimeindex_resample.py @@ -260,7 +260,7 @@ def test_timedelta_offset() -> None: xr.testing.assert_identical(timedelta_result, string_result) -@pytest.mark.parametrize("loffset", ["12h", datetime.timedelta(hours=-12)]) +@pytest.mark.parametrize("loffset", ["MS", "12h", datetime.timedelta(hours=-12)]) def test_resample_loffset_cftimeindex(loffset) -> None: datetimeindex = pd.date_range("2000-01-01", freq="6h", periods=10) da_datetimeindex = xr.DataArray(np.arange(10), [("time", datetimeindex)]) diff --git a/xarray/tests/test_concat.py b/xarray/tests/test_concat.py index 11d0d38594d..92415631748 100644 --- a/xarray/tests/test_concat.py +++ b/xarray/tests/test_concat.py @@ -1070,10 +1070,10 @@ def test_concat_fill_value(self, fill_value) -> None: def test_concat_join_kwarg(self) -> None: ds1 = Dataset( {"a": (("x", "y"), [[0]])}, coords={"x": [0], "y": [0]} - ).to_array() + ).to_dataarray() ds2 = Dataset( {"a": (("x", "y"), [[0]])}, coords={"x": [1], "y": [0.0001]} - ).to_array() + ).to_dataarray() expected: dict[JoinOptions, Any] = {} expected["outer"] = Dataset( @@ -1101,7 +1101,7 @@ def test_concat_join_kwarg(self) -> None: for join in expected: actual = concat([ds1, ds2], join=join, dim="x") - assert_equal(actual, expected[join].to_array()) + assert_equal(actual, expected[join].to_dataarray()) def test_concat_combine_attrs_kwarg(self) -> None: da1 = DataArray([0], coords=[("x", [0])], attrs={"b": 42}) @@ -1224,7 +1224,7 @@ def test_concat_preserve_coordinate_order() -> None: def test_concat_typing_check() -> None: ds = Dataset({"foo": 1}, {"bar": 2}) - da = Dataset({"foo": 3}, {"bar": 4}).to_array(dim="foo") + da = Dataset({"foo": 3}, {"bar": 4}).to_dataarray(dim="foo") # concatenate a list of non-homogeneous types must raise TypeError with pytest.raises( diff --git a/xarray/tests/test_dask.py b/xarray/tests/test_dask.py index 1c2511427ac..c2a77c97d85 100644 --- a/xarray/tests/test_dask.py +++ b/xarray/tests/test_dask.py @@ -608,11 +608,11 @@ def test_to_dataset_roundtrip(self): v = self.lazy_array expected = u.assign_coords(x=u["x"]) - self.assertLazyAndEqual(expected, v.to_dataset("x").to_array("x")) + self.assertLazyAndEqual(expected, v.to_dataset("x").to_dataarray("x")) def test_merge(self): def duplicate_and_merge(array): - return xr.merge([array, array.rename("bar")]).to_array() + return xr.merge([array, array.rename("bar")]).to_dataarray() expected = duplicate_and_merge(self.eager_array) actual = duplicate_and_merge(self.lazy_array) @@ -1306,12 +1306,12 @@ def test_map_blocks_kwargs(obj): assert_identical(actual, expected) -def test_map_blocks_to_array(map_ds): +def test_map_blocks_to_dataarray(map_ds): with raise_if_dask_computes(): - actual = xr.map_blocks(lambda x: x.to_array(), map_ds) + actual = xr.map_blocks(lambda x: x.to_dataarray(), map_ds) - # to_array does not preserve name, so cannot use assert_identical - assert_equal(actual, map_ds.to_array()) + # to_dataarray does not preserve name, so cannot use assert_identical + assert_equal(actual, map_ds.to_dataarray()) @pytest.mark.parametrize( @@ -1376,8 +1376,8 @@ def test_map_blocks_template_convert_object(): assert_identical(actual, template) ds = da.to_dataset() - func = lambda x: x.to_array().isel(x=[1]) - template = ds.to_array().isel(x=[1, 5, 9]) + func = lambda x: x.to_dataarray().isel(x=[1]) + template = ds.to_dataarray().isel(x=[1, 5, 9]) with raise_if_dask_computes(): actual = xr.map_blocks(func, ds, template=template) assert_identical(actual, template) diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 26537766f4d..44b9790f0b7 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -12,7 +12,6 @@ import numpy as np import pandas as pd import pytest -from packaging.version import Version # remove once numpy 2.0 is the oldest supported version try: @@ -31,7 +30,6 @@ set_options, ) from xarray.coding.times import CFDatetimeCoder -from xarray.convert import from_cdms2 from xarray.core import dtypes from xarray.core.common import full_like from xarray.core.coordinates import Coordinates @@ -3663,111 +3661,6 @@ def test_to_masked_array(self) -> None: ma = da.to_masked_array() assert len(ma.mask) == N - @pytest.mark.skipif( - Version(np.__version__) > Version("1.24") or sys.version_info[:2] > (3, 10), - reason="cdms2 is unmaintained and does not support newer `numpy` or python versions", - ) - def test_to_and_from_cdms2_classic(self) -> None: - """Classic with 1D axes""" - pytest.importorskip("cdms2") - - original = DataArray( - np.arange(6).reshape(2, 3), - [ - ("distance", [-2, 2], {"units": "meters"}), - ("time", pd.date_range("2000-01-01", periods=3)), - ], - name="foo", - attrs={"baz": 123}, - ) - expected_coords = [ - IndexVariable("distance", [-2, 2]), - IndexVariable("time", [0, 1, 2]), - ] - with pytest.deprecated_call(match=".*cdms2"): - actual = original.to_cdms2() - assert_array_equal(actual.asma(), original) - assert actual.id == original.name - assert tuple(actual.getAxisIds()) == original.dims - for axis, coord in zip(actual.getAxisList(), expected_coords): - assert axis.id == coord.name - assert_array_equal(axis, coord.values) - assert actual.baz == original.attrs["baz"] - - component_times = actual.getAxis(1).asComponentTime() - assert len(component_times) == 3 - assert str(component_times[0]) == "2000-1-1 0:0:0.0" - - with pytest.deprecated_call(match=".*cdms2"): - roundtripped = DataArray.from_cdms2(actual) - assert_identical(original, roundtripped) - - back = from_cdms2(actual) - assert original.dims == back.dims - assert original.coords.keys() == back.coords.keys() - for coord_name in original.coords.keys(): - assert_array_equal(original.coords[coord_name], back.coords[coord_name]) - - @pytest.mark.skipif( - Version(np.__version__) > Version("1.24") or sys.version_info[:2] > (3, 10), - reason="cdms2 is unmaintained and does not support newer `numpy` or python versions", - ) - def test_to_and_from_cdms2_sgrid(self) -> None: - """Curvilinear (structured) grid - - The rectangular grid case is covered by the classic case - """ - pytest.importorskip("cdms2") - - lonlat = np.mgrid[:3, :4] - lon = DataArray(lonlat[1], dims=["y", "x"], name="lon") - lat = DataArray(lonlat[0], dims=["y", "x"], name="lat") - x = DataArray(np.arange(lon.shape[1]), dims=["x"], name="x") - y = DataArray(np.arange(lon.shape[0]), dims=["y"], name="y") - original = DataArray( - lonlat.sum(axis=0), - dims=["y", "x"], - coords=dict(x=x, y=y, lon=lon, lat=lat), - name="sst", - ) - with pytest.deprecated_call(): - actual = original.to_cdms2() - assert tuple(actual.getAxisIds()) == original.dims - assert_array_equal(original.coords["lon"], actual.getLongitude().asma()) - assert_array_equal(original.coords["lat"], actual.getLatitude().asma()) - - back = from_cdms2(actual) - assert original.dims == back.dims - assert set(original.coords.keys()) == set(back.coords.keys()) - assert_array_equal(original.coords["lat"], back.coords["lat"]) - assert_array_equal(original.coords["lon"], back.coords["lon"]) - - @pytest.mark.skipif( - Version(np.__version__) > Version("1.24") or sys.version_info[:2] > (3, 10), - reason="cdms2 is unmaintained and does not support newer `numpy` or python versions", - ) - def test_to_and_from_cdms2_ugrid(self) -> None: - """Unstructured grid""" - pytest.importorskip("cdms2") - - lon = DataArray(np.random.uniform(size=5), dims=["cell"], name="lon") - lat = DataArray(np.random.uniform(size=5), dims=["cell"], name="lat") - cell = DataArray(np.arange(5), dims=["cell"], name="cell") - original = DataArray( - np.arange(5), dims=["cell"], coords={"lon": lon, "lat": lat, "cell": cell} - ) - with pytest.deprecated_call(match=".*cdms2"): - actual = original.to_cdms2() - assert tuple(actual.getAxisIds()) == original.dims - assert_array_equal(original.coords["lon"], actual.getLongitude().getValue()) - assert_array_equal(original.coords["lat"], actual.getLatitude().getValue()) - - back = from_cdms2(actual) - assert set(original.dims) == set(back.dims) - assert set(original.coords.keys()) == set(back.coords.keys()) - assert_array_equal(original.coords["lat"], back.coords["lat"]) - assert_array_equal(original.coords["lon"], back.coords["lon"]) - def test_to_dataset_whole(self) -> None: unnamed = DataArray([1, 2], dims="x") with pytest.raises(ValueError, match=r"unable to convert unnamed"): @@ -3793,15 +3686,23 @@ def test_to_dataset_whole(self) -> None: actual = named.to_dataset("bar") def test_to_dataset_split(self) -> None: - array = DataArray([1, 2, 3], coords=[("x", list("abc"))], attrs={"a": 1}) - expected = Dataset({"a": 1, "b": 2, "c": 3}, attrs={"a": 1}) + array = DataArray( + [[1, 2], [3, 4], [5, 6]], + coords=[("x", list("abc")), ("y", [0.0, 0.1])], + attrs={"a": 1}, + ) + expected = Dataset( + {"a": ("y", [1, 2]), "b": ("y", [3, 4]), "c": ("y", [5, 6])}, + coords={"y": [0.0, 0.1]}, + attrs={"a": 1}, + ) actual = array.to_dataset("x") assert_identical(expected, actual) with pytest.raises(TypeError): array.to_dataset("x", name="foo") - roundtripped = actual.to_array(dim="x") + roundtripped = actual.to_dataarray(dim="x") assert_identical(array, roundtripped) array = DataArray([1, 2, 3], dims="x") @@ -3818,10 +3719,55 @@ def test_to_dataset_retains_keys(self) -> None: array = DataArray([1, 2, 3], coords=[("x", dates)], attrs={"a": 1}) # convert to dateset and back again - result = array.to_dataset("x").to_array(dim="x") + result = array.to_dataset("x").to_dataarray(dim="x") assert_equal(array, result) + def test_to_dataset_coord_value_is_dim(self) -> None: + # github issue #7823 + + array = DataArray( + np.zeros((3, 3)), + coords={ + # 'a' is both a coordinate value and the name of a coordinate + "x": ["a", "b", "c"], + "a": [1, 2, 3], + }, + ) + + with pytest.raises( + ValueError, + match=( + re.escape("dimension 'x' would produce the variables ('a',)") + + ".*" + + re.escape("DataArray.rename(a=...) or DataArray.assign_coords(x=...)") + ), + ): + array.to_dataset("x") + + # test error message formatting when there are multiple ambiguous + # values/coordinates + array2 = DataArray( + np.zeros((3, 3, 2)), + coords={ + "x": ["a", "b", "c"], + "a": [1, 2, 3], + "b": [0.0, 0.1], + }, + ) + + with pytest.raises( + ValueError, + match=( + re.escape("dimension 'x' would produce the variables ('a', 'b')") + + ".*" + + re.escape( + "DataArray.rename(a=..., b=...) or DataArray.assign_coords(x=...)" + ) + ), + ): + array2.to_dataset("x") + def test__title_for_slice(self) -> None: array = DataArray( np.ones((4, 3, 2)), diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index c5b4103c527..ff7703a1cf5 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -4569,7 +4569,7 @@ def test_squeeze_drop(self) -> None: selected = data.squeeze(drop=True) assert_identical(data, selected) - def test_to_array(self) -> None: + def test_to_dataarray(self) -> None: ds = Dataset( {"a": 1, "b": ("x", [1, 2, 3])}, coords={"c": 42}, @@ -4579,10 +4579,10 @@ def test_to_array(self) -> None: coords = {"c": 42, "variable": ["a", "b"]} dims = ("variable", "x") expected = DataArray(data, coords, dims, attrs=ds.attrs) - actual = ds.to_array() + actual = ds.to_dataarray() assert_identical(expected, actual) - actual = ds.to_array("abc", name="foo") + actual = ds.to_dataarray("abc", name="foo") expected = expected.rename({"variable": "abc"}).rename("foo") assert_identical(expected, actual) diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py index 4cb832fcdfb..b166992deb1 100644 --- a/xarray/tests/test_groupby.py +++ b/xarray/tests/test_groupby.py @@ -600,19 +600,19 @@ def test_groupby_grouping_errors() -> None: with pytest.raises( ValueError, match=r"None of the data falls within bins with edges" ): - dataset.to_array().groupby_bins("x", bins=[0.1, 0.2, 0.3]) + dataset.to_dataarray().groupby_bins("x", bins=[0.1, 0.2, 0.3]) with pytest.raises(ValueError, match=r"All bin edges are NaN."): dataset.groupby_bins("x", bins=[np.nan, np.nan, np.nan]) with pytest.raises(ValueError, match=r"All bin edges are NaN."): - dataset.to_array().groupby_bins("x", bins=[np.nan, np.nan, np.nan]) + dataset.to_dataarray().groupby_bins("x", bins=[np.nan, np.nan, np.nan]) with pytest.raises(ValueError, match=r"Failed to group data."): dataset.groupby(dataset.foo * np.nan) with pytest.raises(ValueError, match=r"Failed to group data."): - dataset.to_array().groupby(dataset.foo * np.nan) + dataset.to_dataarray().groupby(dataset.foo * np.nan) def test_groupby_reduce_dimension_error(array) -> None: @@ -1776,11 +1776,6 @@ def test_resample_keep_attrs(self): expected = DataArray([1, 1, 1], [("time", times[::4])], attrs=array.attrs) assert_identical(result, expected) - with pytest.warns( - UserWarning, match="Passing ``keep_attrs`` to ``resample`` has no effect." - ): - array.resample(time="1D", keep_attrs=True) - def test_resample_skipna(self): times = pd.date_range("2000-01-01", freq="6h", periods=10) array = DataArray(np.ones(10), [("time", times)]) @@ -2138,11 +2133,6 @@ def test_resample_by_mean_with_keep_attrs(self): expected = ds.attrs assert expected == actual - with pytest.warns( - UserWarning, match="Passing ``keep_attrs`` to ``resample`` has no effect." - ): - ds.resample(time="1D", keep_attrs=True) - def test_resample_loffset(self): times = pd.date_range("2000-01-01", freq="6h", periods=10) ds = Dataset( diff --git a/xarray/tests/test_plugins.py b/xarray/tests/test_plugins.py index 441f16f4dca..1af255d30bb 100644 --- a/xarray/tests/test_plugins.py +++ b/xarray/tests/test_plugins.py @@ -15,7 +15,6 @@ from xarray.tests import ( has_h5netcdf, has_netCDF4, - has_pseudonetcdf, has_pydap, has_pynio, has_scipy, @@ -222,7 +221,6 @@ def test_lazy_import() -> None: blacklisted = [ "h5netcdf", "netCDF4", - "PseudoNetCDF", "pydap", "Nio", "scipy", @@ -279,7 +277,6 @@ def test_list_engines() -> None: assert ("scipy" in engines) == has_scipy assert ("h5netcdf" in engines) == has_h5netcdf assert ("netcdf4" in engines) == has_netCDF4 - assert ("pseudonetcdf" in engines) == has_pseudonetcdf assert ("pydap" in engines) == has_pydap assert ("zarr" in engines) == has_zarr assert ("pynio" in engines) == has_pynio diff --git a/xarray/tests/test_rolling.py b/xarray/tests/test_rolling.py index 3b213db0b88..cb7b723a208 100644 --- a/xarray/tests/test_rolling.py +++ b/xarray/tests/test_rolling.py @@ -631,7 +631,7 @@ def test_rolling_construct(self, center: bool, window: int) -> None: ds_rolling_mean = ds_rolling.construct("window", stride=2, fill_value=0.0).mean( "window" ) - assert (ds_rolling_mean.isnull().sum() == 0).to_array(dim="vars").all() + assert (ds_rolling_mean.isnull().sum() == 0).to_dataarray(dim="vars").all() assert (ds_rolling_mean["x"] == 0.0).sum() >= 0 @pytest.mark.parametrize("center", (True, False)) diff --git a/xarray/tests/test_sparse.py b/xarray/tests/test_sparse.py index 489836b70fd..5b75c10631a 100644 --- a/xarray/tests/test_sparse.py +++ b/xarray/tests/test_sparse.py @@ -578,7 +578,7 @@ def setUp(self): def test_to_dataset_roundtrip(self): x = self.sp_xr - assert_equal(x, x.to_dataset("x").to_array("x")) + assert_equal(x, x.to_dataset("x").to_dataarray("x")) def test_align(self): a1 = xr.DataArray( @@ -830,7 +830,7 @@ def test_reindex(self): @pytest.mark.xfail def test_merge(self): x = self.sp_xr - y = xr.merge([x, x.rename("bar")]).to_array() + y = xr.merge([x, x.rename("bar")]).to_dataarray() assert isinstance(y, sparse.SparseArray) @pytest.mark.xfail diff --git a/xarray/tests/test_units.py b/xarray/tests/test_units.py index e25b403216e..af86c18668f 100644 --- a/xarray/tests/test_units.py +++ b/xarray/tests/test_units.py @@ -2,12 +2,10 @@ import functools import operator -import sys import numpy as np import pandas as pd import pytest -from packaging import version import xarray as xr from xarray.core import dtypes, duck_array_ops @@ -1513,10 +1511,6 @@ def test_dot_dataarray(dtype): class TestVariable: - @pytest.mark.skipif( - (sys.version_info >= (3, 11)) and sys.platform.startswith("win"), - reason="fails for some reason on win and 3.11, GH7971", - ) @pytest.mark.parametrize( "func", ( @@ -1539,13 +1533,6 @@ class TestVariable: ids=repr, ) def test_aggregation(self, func, dtype): - if ( - func.name == "prod" - and dtype.kind == "f" - and version.parse(pint.__version__) < version.parse("0.19") - ): - pytest.xfail(reason="nanprod is not by older `pint` versions") - array = np.linspace(0, 1, 10).astype(dtype) * ( unit_registry.m if func.name != "cumprod" else unit_registry.dimensionless ) @@ -2348,10 +2335,6 @@ def test_repr(self, func, variant, dtype): # warnings or errors, but does not check the result func(data_array) - @pytest.mark.skipif( - (sys.version_info >= (3, 11)) and sys.platform.startswith("win"), - reason="fails for some reason on win and 3.11, GH7971", - ) @pytest.mark.parametrize( "func", ( @@ -2404,13 +2387,6 @@ def test_repr(self, func, variant, dtype): ids=repr, ) def test_aggregation(self, func, dtype): - if ( - func.name == "prod" - and dtype.kind == "f" - and version.parse(pint.__version__) < version.parse("0.19") - ): - pytest.xfail(reason="nanprod is not by older `pint` versions") - array = np.arange(10).astype(dtype) * ( unit_registry.m if func.name != "cumprod" else unit_registry.dimensionless ) @@ -2429,10 +2405,6 @@ def test_aggregation(self, func, dtype): assert_units_equal(expected, actual) assert_allclose(expected, actual) - @pytest.mark.skipif( - (sys.version_info >= (3, 11)) and sys.platform.startswith("win"), - reason="fails for some reason on win and 3.11, GH7971", - ) @pytest.mark.parametrize( "func", ( @@ -4085,10 +4057,6 @@ def test_repr(self, func, variant, dtype): # warnings or errors, but does not check the result func(ds) - @pytest.mark.skipif( - (sys.version_info >= (3, 11)) and sys.platform.startswith("win"), - reason="fails for some reason on win and 3.11, GH7971", - ) @pytest.mark.parametrize( "func", ( @@ -4110,13 +4078,6 @@ def test_repr(self, func, variant, dtype): ids=repr, ) def test_aggregation(self, func, dtype): - if ( - func.name == "prod" - and dtype.kind == "f" - and version.parse(pint.__version__) < version.parse("0.19") - ): - pytest.xfail(reason="nanprod is not by older `pint` versions") - unit_a, unit_b = ( (unit_registry.Pa, unit_registry.degK) if func.name != "cumprod" @@ -5647,10 +5608,6 @@ def test_merge(self, variant, unit, error, dtype): @requires_dask class TestPintWrappingDask: - @pytest.mark.skipif( - version.parse(pint.__version__) <= version.parse("0.21"), - reason="pint didn't support dask properly before 0.21", - ) def test_duck_array_ops(self): import dask.array diff --git a/xarray/util/generate_ops.py b/xarray/util/generate_ops.py index 5859934f646..f9aa69d983b 100644 --- a/xarray/util/generate_ops.py +++ b/xarray/util/generate_ops.py @@ -116,6 +116,10 @@ def {method}(self) -> Self: template_other_unary = """ def {method}(self, *args: Any, **kwargs: Any) -> Self: return self._unary_op({func}, *args, **kwargs)""" +unhashable = """ + # When __eq__ is defined but __hash__ is not, then an object is unhashable, + # and it should be declared as follows: + __hash__: None # type:ignore[assignment]""" # For some methods we override return type `bool` defined by base class `object`. # We need to add "# type: ignore[override]" @@ -152,6 +156,7 @@ def binops( template_binop, extras | {"type_ignore": _type_ignore(type_ignore_eq)}, ), + ([(None, None)], unhashable, extras), (BINOPS_REFLEXIVE, template_reflexive, extras), ] @@ -185,6 +190,7 @@ def binops_overload( "overload_type_ignore": _type_ignore(type_ignore_eq), }, ), + ([(None, None)], unhashable, extras), (BINOPS_REFLEXIVE, template_reflexive, extras), ] diff --git a/xarray/util/print_versions.py b/xarray/util/print_versions.py index e4984def498..4b7f28cb34b 100755 --- a/xarray/util/print_versions.py +++ b/xarray/util/print_versions.py @@ -107,7 +107,6 @@ def show_versions(file=sys.stdout): ("zarr", lambda mod: mod.__version__), ("cftime", lambda mod: mod.__version__), ("nc_time_axis", lambda mod: mod.__version__), - ("PseudoNetCDF", lambda mod: mod.__version__), ("iris", lambda mod: mod.__version__), ("bottleneck", lambda mod: mod.__version__), ("dask", lambda mod: mod.__version__),