diff --git a/HOW_TO_RELEASE.md b/HOW_TO_RELEASE.md index 24995a389c4..ec0cca59545 100644 --- a/HOW_TO_RELEASE.md +++ b/HOW_TO_RELEASE.md @@ -1,4 +1,4 @@ -# How to issue an xarray release in 17 easy steps +# How to issue an xarray release in 20 easy steps Time required: about an hour. @@ -23,9 +23,9 @@ upstream https://github.com/pydata/xarray (push) git log v{0.X.Y-1}.. --format=%aN | sort -u | perl -pe 's/\n/$1, /' ``` Add these into `whats-new.rst` somewhere :) - 2. Write a release summary: ~50 words describing the high level features. This + 3. Write a release summary: ~50 words describing the high level features. This will be used in the release emails, tweets, GitHub release notes, etc. - 3. Look over whats-new.rst and the docs. Make sure "What's New" is complete + 4. Look over whats-new.rst and the docs. Make sure "What's New" is complete (check the date!) and add the release summary at the top. Things to watch out for: - Important new features should be highlighted towards the top. @@ -34,46 +34,46 @@ upstream https://github.com/pydata/xarray (push) due to a bad merge. Check for these before a release by using git diff, e.g., `git diff v{0.X.Y-1} whats-new.rst` where {0.X.Y-1} is the previous release. - 4. If possible, open a PR with the release summary and whatsnew changes. - 4. After merging, again ensure your master branch is synced to upstream: + 5. If possible, open a PR with the release summary and whatsnew changes. + 6. After merging, again ensure your master branch is synced to upstream: ```sh git pull upstream master ``` - 4. If you have any doubts, run the full test suite one final time! + 7. If you have any doubts, run the full test suite one final time! ```sh pytest ``` - 5. Check that the ReadTheDocs build is passing. - 6. On the master branch, commit the release in git: + 8. Check that the ReadTheDocs build is passing. + 9. On the master branch, commit the release in git: ```s git commit -am 'Release v{0.X.Y}' ``` - 7. Tag the release: +10. Tag the release: ```sh git tag -a v{0.X.Y} -m 'v{0.X.Y}' ``` - 8. Build source and binary wheels for PyPI: +11. Build source and binary wheels for PyPI: ```sh git clean -xdf # this deletes all uncommitted changes! python setup.py bdist_wheel sdist ``` - 9. Use twine to check the package build: +12. Use twine to check the package build: ```sh twine check dist/xarray-{0.X.Y}* ``` -10. Use twine to register and upload the release on PyPI. Be careful, you can't +13. Use twine to register and upload the release on PyPI. Be careful, you can't take this back! ```sh twine upload dist/xarray-{0.X.Y}* ``` You will need to be listed as a package owner at https://pypi.python.org/pypi/xarray for this to work. -11. Push your changes to master: +14. Push your changes to master: ```sh git push upstream master git push upstream --tags ``` -12. Update the stable branch (used by ReadTheDocs) and switch back to master: +15. Update the stable branch (used by ReadTheDocs) and switch back to master: ```sh git checkout stable git rebase master @@ -83,7 +83,7 @@ upstream https://github.com/pydata/xarray (push) It's OK to force push to 'stable' if necessary. (We also update the stable branch with `git cherry-pick` for documentation only fixes that apply the current released version.) -13. Add a section for the next release {0.X.Y+1} to doc/whats-new.rst: +16. Add a section for the next release {0.X.Y+1} to doc/whats-new.rst: ``` .. _whats-new.{0.X.Y+1}: @@ -109,19 +109,19 @@ upstream https://github.com/pydata/xarray (push) Internal Changes ~~~~~~~~~~~~~~~~ ``` -14. Commit your changes and push to master again: +17. Commit your changes and push to master again: ```sh git commit -am 'New whatsnew section' git push upstream master ``` You're done pushing to master! -15. Issue the release on GitHub. Click on "Draft a new release" at +18. Issue the release on GitHub. Click on "Draft a new release" at https://github.com/pydata/xarray/releases. Type in the version number and paste the release summary in the notes. -16. Update the docs. Login to https://readthedocs.org/projects/xray/versions/ +19. Update the docs. Login to https://readthedocs.org/projects/xray/versions/ and switch your new release tag (at the bottom) from "Inactive" to "Active". It should now build automatically. -17. Issue the release announcement to mailing lists & Twitter. For bug fix releases, I +20. Issue the release announcement to mailing lists & Twitter. For bug fix releases, I usually only email xarray@googlegroups.com. For major/feature releases, I will email a broader list (no more than once every 3-6 months): - pydata@googlegroups.com diff --git a/ci/min_deps_check.py b/ci/min_deps_check.py index 527093cf5bc..93d12754365 100755 --- a/ci/min_deps_check.py +++ b/ci/min_deps_check.py @@ -23,9 +23,21 @@ "pytest-env", } -POLICY_MONTHS = {"python": 42, "numpy": 24, "pandas": 12, "scipy": 12} -POLICY_MONTHS_DEFAULT = 6 - +POLICY_MONTHS = {"python": 42, "numpy": 24, "setuptools": 42} +POLICY_MONTHS_DEFAULT = 12 +POLICY_OVERRIDE = { + # dask < 2.9 has trouble with nan-reductions + # TODO remove this special case and the matching note in installing.rst + # after January 2021. + "dask": (2, 9), + "distributed": (2, 9), + # setuptools-scm doesn't work with setuptools < 36.7 (Nov 2017). + # The conda metadata is malformed for setuptools < 38.4 (Jan 2018) + # (it's missing a timestamp which prevents this tool from working). + # TODO remove this special case and the matching note in installing.rst + # after July 2021. + "setuptools": (38, 4), +} has_errors = False @@ -151,6 +163,11 @@ def process_pkg( policy_minor = minor policy_published_actual = published + try: + policy_major, policy_minor = POLICY_OVERRIDE[pkg] + except KeyError: + pass + if (req_major, req_minor) < (policy_major, policy_minor): status = "<" elif (req_major, req_minor) > (policy_major, policy_minor): diff --git a/ci/requirements/doc.yml b/ci/requirements/doc.yml index 6caebc46cdf..be1b073cf1e 100644 --- a/ci/requirements/doc.yml +++ b/ci/requirements/doc.yml @@ -13,14 +13,18 @@ dependencies: - ipython - iris>=2.3 - jupyter_client + - matplotlib-base=3.3.0 - nbsphinx - netcdf4>=1.5 - numba - numpy>=1.17 - - pandas>=1.0 + # FIXME https://github.com/pydata/xarray/issues/4287 + # - pandas>=1.0 + - pandas=1.0 - rasterio>=1.1 - seaborn - setuptools - - sphinx>=2.3 + - sphinx=3.1 - sphinx_rtd_theme>=0.4 + - sphinx-autosummary-accessors - zarr>=2.4 diff --git a/ci/requirements/py36-bare-minimum.yml b/ci/requirements/py36-bare-minimum.yml index 00fef672855..aaba5366f67 100644 --- a/ci/requirements/py36-bare-minimum.yml +++ b/ci/requirements/py36-bare-minimum.yml @@ -10,4 +10,4 @@ dependencies: - pytest-env - numpy=1.15 - pandas=0.25 - - setuptools=41.2 + - setuptools=38.4 diff --git a/ci/requirements/py36-min-all-deps.yml b/ci/requirements/py36-min-all-deps.yml index c11c52bd19f..2a977449033 100644 --- a/ci/requirements/py36-min-all-deps.yml +++ b/ci/requirements/py36-min-all-deps.yml @@ -43,7 +43,7 @@ dependencies: - rasterio=1.0 - scipy=1.3 - seaborn=0.9 - - setuptools=41.2 + - setuptools=38.4 # - sparse # See py36-min-nep18.yml - toolz=0.10 - zarr=2.3 diff --git a/ci/requirements/py36-min-nep18.yml b/ci/requirements/py36-min-nep18.yml index a9f12abfeae..17aae6932ac 100644 --- a/ci/requirements/py36-min-nep18.yml +++ b/ci/requirements/py36-min-nep18.yml @@ -15,6 +15,6 @@ dependencies: - pytest - pytest-cov - pytest-env - - scipy=1.2 - - setuptools=41.2 + - scipy=1.3 + - setuptools=38.4 - sparse=0.8 diff --git a/doc/api-hidden.rst b/doc/api-hidden.rst index efef4259b74..6aca90860d2 100644 --- a/doc/api-hidden.rst +++ b/doc/api-hidden.rst @@ -665,13 +665,10 @@ backends.NetCDF4DataStore.encode backends.NetCDF4DataStore.encode_attribute backends.NetCDF4DataStore.encode_variable - backends.NetCDF4DataStore.get backends.NetCDF4DataStore.get_attrs backends.NetCDF4DataStore.get_dimensions backends.NetCDF4DataStore.get_encoding backends.NetCDF4DataStore.get_variables - backends.NetCDF4DataStore.items - backends.NetCDF4DataStore.keys backends.NetCDF4DataStore.load backends.NetCDF4DataStore.open backends.NetCDF4DataStore.open_store_variable @@ -685,27 +682,20 @@ backends.NetCDF4DataStore.store backends.NetCDF4DataStore.store_dataset backends.NetCDF4DataStore.sync - backends.NetCDF4DataStore.values - backends.NetCDF4DataStore.attrs backends.NetCDF4DataStore.autoclose - backends.NetCDF4DataStore.dimensions backends.NetCDF4DataStore.ds backends.NetCDF4DataStore.format backends.NetCDF4DataStore.is_remote backends.NetCDF4DataStore.lock - backends.NetCDF4DataStore.variables backends.H5NetCDFStore.close backends.H5NetCDFStore.encode backends.H5NetCDFStore.encode_attribute backends.H5NetCDFStore.encode_variable - backends.H5NetCDFStore.get backends.H5NetCDFStore.get_attrs backends.H5NetCDFStore.get_dimensions backends.H5NetCDFStore.get_encoding backends.H5NetCDFStore.get_variables - backends.H5NetCDFStore.items - backends.H5NetCDFStore.keys backends.H5NetCDFStore.load backends.H5NetCDFStore.open_store_variable backends.H5NetCDFStore.prepare_variable @@ -718,39 +708,25 @@ backends.H5NetCDFStore.store backends.H5NetCDFStore.store_dataset backends.H5NetCDFStore.sync - backends.H5NetCDFStore.values - backends.H5NetCDFStore.attrs - backends.H5NetCDFStore.dimensions backends.H5NetCDFStore.ds - backends.H5NetCDFStore.variables backends.PydapDataStore.close - backends.PydapDataStore.get backends.PydapDataStore.get_attrs backends.PydapDataStore.get_dimensions backends.PydapDataStore.get_encoding backends.PydapDataStore.get_variables - backends.PydapDataStore.items - backends.PydapDataStore.keys backends.PydapDataStore.load backends.PydapDataStore.open backends.PydapDataStore.open_store_variable - backends.PydapDataStore.values - backends.PydapDataStore.attrs - backends.PydapDataStore.dimensions - backends.PydapDataStore.variables backends.ScipyDataStore.close backends.ScipyDataStore.encode backends.ScipyDataStore.encode_attribute backends.ScipyDataStore.encode_variable - backends.ScipyDataStore.get backends.ScipyDataStore.get_attrs backends.ScipyDataStore.get_dimensions backends.ScipyDataStore.get_encoding backends.ScipyDataStore.get_variables - backends.ScipyDataStore.items - backends.ScipyDataStore.keys backends.ScipyDataStore.load backends.ScipyDataStore.open_store_variable backends.ScipyDataStore.prepare_variable @@ -763,11 +739,7 @@ backends.ScipyDataStore.store backends.ScipyDataStore.store_dataset backends.ScipyDataStore.sync - backends.ScipyDataStore.values - backends.ScipyDataStore.attrs - backends.ScipyDataStore.dimensions backends.ScipyDataStore.ds - backends.ScipyDataStore.variables backends.FileManager.acquire backends.FileManager.acquire_context diff --git a/doc/api.rst b/doc/api.rst index 72a6dd4d97a..5e8a2be0ed4 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -572,7 +572,9 @@ Universal functions With recent versions of numpy, dask and xarray, NumPy ufuncs are now supported directly on all xarray and dask objects. This obviates the need for the ``xarray.ufuncs`` module, which should not be used for new code - unless compatibility with versions of NumPy prior to v1.13 is required. + unless compatibility with versions of NumPy prior to v1.13 is + required. They will be removed once support for NumPy prior to + v1.17 is dropped. These functions are copied from NumPy, but extended to work on NumPy arrays, dask arrays and all xarray objects. You can find them in the ``xarray.ufuncs`` diff --git a/doc/computation.rst b/doc/computation.rst index 3660aed93ed..dcfe270a942 100644 --- a/doc/computation.rst +++ b/doc/computation.rst @@ -188,9 +188,16 @@ a value when aggregating: r = arr.rolling(y=3, center=True, min_periods=2) r.mean() +From version 0.17, xarray supports multidimensional rolling, + +.. ipython:: python + + r = arr.rolling(x=2, y=3, min_periods=2) + r.mean() + .. tip:: - Note that rolling window aggregations are faster and use less memory when bottleneck_ is installed. This only applies to numpy-backed xarray objects. + Note that rolling window aggregations are faster and use less memory when bottleneck_ is installed. This only applies to numpy-backed xarray objects with 1d-rolling. .. _bottleneck: https://github.com/pydata/bottleneck/ @@ -227,9 +234,9 @@ windowed rolling, convolution, short-time FFT etc. .. ipython:: python # rolling with 2-point stride - rolling_da = r.construct("window_dim", stride=2) + rolling_da = r.construct(x="x_win", y="y_win", stride=2) rolling_da - rolling_da.mean("window_dim", skipna=False) + rolling_da.mean(["x_win", "y_win"], skipna=False) Because the ``DataArray`` given by ``r.construct('window_dim')`` is a view of the original array, it is memory efficient. @@ -238,7 +245,7 @@ You can also use ``construct`` to compute a weighted rolling sum: .. ipython:: python weight = xr.DataArray([0.25, 0.5, 0.25], dims=["window"]) - arr.rolling(y=3).construct("window").dot(weight) + arr.rolling(y=3).construct(y="window").dot(weight) .. note:: numpy's Nan-aggregation functions such as ``nansum`` copy the original array. diff --git a/doc/conf.py b/doc/conf.py index d3d126cb33f..2f97c884ff5 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -20,11 +20,7 @@ import sys from contextlib import suppress -# --------- autosummary templates ------------------ -# TODO: eventually replace this with a sphinx.ext.auto_accessor module -import sphinx -from sphinx.ext.autodoc import AttributeDocumenter, Documenter, MethodDocumenter -from sphinx.util import rpartition +import sphinx_autosummary_accessors # make sure the source version is preferred (#3567) root = pathlib.Path(__file__).absolute().parent.parent @@ -53,14 +49,14 @@ matplotlib.use("Agg") try: - import rasterio + import rasterio # noqa: F401 except ImportError: allowed_failures.update( ["gallery/plot_rasterio_rgb.py", "gallery/plot_rasterio.py"] ) try: - import cartopy + import cartopy # noqa: F401 except ImportError: allowed_failures.update( [ @@ -88,6 +84,7 @@ "IPython.sphinxext.ipython_directive", "IPython.sphinxext.ipython_console_highlighting", "nbsphinx", + "sphinx_autosummary_accessors", ] extlinks = { @@ -116,7 +113,7 @@ numpydoc_show_class_members = False # Add any paths that contain templates here, relative to this directory. -templates_path = ["_templates"] +templates_path = ["_templates", sphinx_autosummary_accessors.templates_path] # The suffix of source filenames. source_suffix = ".rst" @@ -275,14 +272,14 @@ # -- Options for LaTeX output --------------------------------------------- -latex_elements = { - # The paper size ('letterpaper' or 'a4paper'). - # 'papersize': 'letterpaper', - # The font size ('10pt', '11pt' or '12pt'). - # 'pointsize': '10pt', - # Additional stuff for the LaTeX preamble. - # 'preamble': '', -} +# latex_elements = { +# # The paper size ('letterpaper' or 'a4paper'). +# # 'papersize': 'letterpaper', +# # The font size ('10pt', '11pt' or '12pt'). +# # 'pointsize': '10pt', +# # Additional stuff for the LaTeX preamble. +# # 'preamble': '', +# } # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, @@ -364,113 +361,3 @@ "dask": ("https://docs.dask.org/en/latest", None), "cftime": ("https://unidata.github.io/cftime", None), } - - -# --------- autosummary templates ------------------ -# TODO: eventually replace this with a sphinx.ext.auto_accessor module -class AccessorDocumenter(MethodDocumenter): - """ - Specialized Documenter subclass for accessors. - """ - - objtype = "accessor" - directivetype = "method" - - # lower than MethodDocumenter so this is not chosen for normal methods - priority = 0.6 - - def format_signature(self): - # this method gives an error/warning for the accessors, therefore - # overriding it (accessor has no arguments) - return "" - - -class AccessorLevelDocumenter(Documenter): - """ - Specialized Documenter subclass for objects on accessor level (methods, - attributes). - """ - - # This is the simple straightforward version - # modname is None, base the last elements (eg 'hour') - # and path the part before (eg 'Series.dt') - # def resolve_name(self, modname, parents, path, base): - # modname = 'pandas' - # mod_cls = path.rstrip('.') - # mod_cls = mod_cls.split('.') - # - # return modname, mod_cls + [base] - - def resolve_name(self, modname, parents, path, base): - if modname is None: - if path: - mod_cls = path.rstrip(".") - else: - mod_cls = None - # if documenting a class-level object without path, - # there must be a current class, either from a parent - # auto directive ... - mod_cls = self.env.temp_data.get("autodoc:class") - # ... or from a class directive - if mod_cls is None: - mod_cls = self.env.temp_data.get("py:class") - # ... if still None, there's no way to know - if mod_cls is None: - return None, [] - # HACK: this is added in comparison to ClassLevelDocumenter - # mod_cls still exists of class.accessor, so an extra - # rpartition is needed - modname, accessor = rpartition(mod_cls, ".") - modname, cls = rpartition(modname, ".") - parents = [cls, accessor] - # if the module name is still missing, get it like above - if not modname: - modname = self.env.temp_data.get("autodoc:module") - if not modname: - if sphinx.__version__ > "1.3": - modname = self.env.ref_context.get("py:module") - else: - modname = self.env.temp_data.get("py:module") - # ... else, it stays None, which means invalid - return modname, parents + [base] - - -class AccessorAttributeDocumenter(AccessorLevelDocumenter, AttributeDocumenter): - - objtype = "accessorattribute" - directivetype = "attribute" - - # lower than AttributeDocumenter so this is not chosen for normal attributes - priority = 0.6 - - -class AccessorMethodDocumenter(AccessorLevelDocumenter, MethodDocumenter): - - objtype = "accessormethod" - directivetype = "method" - - # lower than MethodDocumenter so this is not chosen for normal methods - priority = 0.6 - - -class AccessorCallableDocumenter(AccessorLevelDocumenter, MethodDocumenter): - """ - This documenter lets us removes .__call__ from the method signature for - callable accessors like Series.plot - """ - - objtype = "accessorcallable" - directivetype = "method" - - # lower than MethodDocumenter; otherwise the doc build prints warnings - priority = 0.5 - - def format_name(self): - return MethodDocumenter.format_name(self).rstrip(".__call__") - - -def setup(app): - app.add_autodocumenter(AccessorDocumenter) - app.add_autodocumenter(AccessorAttributeDocumenter) - app.add_autodocumenter(AccessorMethodDocumenter) - app.add_autodocumenter(AccessorCallableDocumenter) diff --git a/doc/installing.rst b/doc/installing.rst index a25bf65e342..62e026e20a4 100644 --- a/doc/installing.rst +++ b/doc/installing.rst @@ -7,7 +7,7 @@ Required dependencies --------------------- - Python (3.6 or later) -- setuptools +- setuptools (38.4 or later) - `numpy `__ (1.15 or later) - `pandas `__ (0.25 or later) @@ -93,16 +93,16 @@ dependencies: - **Python:** 42 months (`NEP-29 `_) +- **setuptools:** 42 months (but no older than 38.4) - **numpy:** 24 months (`NEP-29 `_) -- **pandas:** 12 months -- **scipy:** 12 months +- **dask and dask.distributed:** 12 months (but no older than 2.9) - **sparse, pint** and other libraries that rely on `NEP-18 `_ for integration: very latest available versions only, until the technology will have matured. This extends to dask when used in conjunction with any of these libraries. numpy >=1.17. -- **all other libraries:** 6 months +- **all other libraries:** 12 months The above should be interpreted as *the minor version (X.Y) initially published no more than N months ago*. Patch versions (x.y.Z) are not pinned, and only the latest available diff --git a/doc/internals.rst b/doc/internals.rst index 46c117e312b..aa9e1dedc68 100644 --- a/doc/internals.rst +++ b/doc/internals.rst @@ -42,6 +42,38 @@ xarray objects via the (readonly) :py:attr:`Dataset.variables ` and :py:attr:`DataArray.variable ` attributes. +Duck arrays +----------- + +.. warning:: + + This is a experimental feature. + +xarray can wrap custom `duck array`_ objects as long as they define numpy's +``shape``, ``dtype`` and ``ndim`` properties and the ``__array__``, +``__array_ufunc__`` and ``__array_function__`` methods. + +In certain situations (e.g. when printing the collapsed preview of +variables of a ``Dataset``), xarray will display the repr of a `duck array`_ +in a single line, truncating it to a certain number of characters. If that +would drop too much information, the `duck array`_ may define a +``_repr_inline_`` method that takes ``max_width`` (number of characters) as an +argument: + +.. code:: python + + class MyDuckArray: + ... + + def _repr_inline_(self, max_width): + """ format to a single line with at most max_width characters """ + ... + + ... + +.. _duck array: https://numpy.org/neps/nep-0022-ndarray-duck-typing-overview.html + + Extending xarray ---------------- @@ -139,6 +171,11 @@ To help users keep things straight, please `let us know for an open source library. In the future, we will maintain a list of accessors and the libraries that implement them on this page. +To make documenting accessors with ``sphinx`` and ``sphinx.ext.autosummary`` +easier, you can use `sphinx-ext-autosummary`_. + +.. _sphinx-ext-autosummary: https://sphinx-autosummary-accessors.readthedocs.io/ + .. _zarr_encoding: Zarr Encoding Specification diff --git a/doc/roadmap.rst b/doc/roadmap.rst index 401dac779ad..2f7063434b8 100644 --- a/doc/roadmap.rst +++ b/doc/roadmap.rst @@ -224,6 +224,7 @@ Current core developers - Tom Nicholas - Guido Imperiale - Justus Magin +- Mathias Hauser NumFOCUS ~~~~~~~~ diff --git a/doc/whats-new.rst b/doc/whats-new.rst index d85fa0dee14..3521e8215dd 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -25,22 +25,57 @@ Breaking changes New Features ~~~~~~~~~~~~ +- :py:meth:`~xarray.DataArray.rolling` and :py:meth:`~xarray.Dataset.rolling` + now accept more than 1 dimension.(:pull:`4219`) + By `Keisuke Fujii `_. - Build :py:meth:`CFTimeIndex.__repr__` explicitly as :py:class:`pandas.Index`. Add ``calendar`` as a new property for :py:class:`CFTimeIndex` and show ``calendar`` and ``length`` in :py:meth:`CFTimeIndex.__repr__` (:issue:`2416`, :pull:`4092`) `Aaron Spring `_. +- Relaxed the :ref:`mindeps_policy` to support: + + - all versions of setuptools released in the last 42 months (but no older than 38.4) + - all versions of dask and dask.distributed released in the last 12 months (but no + older than 2.9) + - all versions of other packages released in the last 12 months + + All are up from 6 months (:issue:`4295`) + `Guido Imperiale `_. +- Use a wrapped array's ``_repr_inline_`` method to construct the collapsed ``repr`` + of :py:class:`DataArray` and :py:class:`Dataset` objects and + document the new method in :doc:`internals`. (:pull:`4248`). + By `Justus Magin `_. +- :py:meth:`~xarray.DataArray.to_dataframe` and :py:meth:`~xarray.Dataset.to_dataframe` + now accept a ``dim_order`` parameter allowing to specify the resulting dataframe's + dimensions order (:issue:`4331`, :pull:`4333`). + By `Thomas Zilio `_. Bug fixes ~~~~~~~~~ +- Fixed a bug in backend caused by basic installation of Dask (:issue:`4164`, :pull:`4318`) + `Sam Morley `_. +- Fixed inconsistencies between docstring and functionality for :py:meth:`DataArray.str.get` + and :py:meth:`DataArray.str.wrap` (:issue:`4334`). By `Mathias Hauser `_. +- Fixed overflow issue causing incorrect results in computing means of :py:class:`cftime.datetime` + arrays (:issue:`4341`). By `Spencer Clark `_. Documentation ~~~~~~~~~~~~~ +- update the docstring of :py:meth:`DataArray.copy` to remove incorrect mention of 'dataset' (:issue:`3606`) + By `Sander van Rijn `_. +- removed skipna argument from :py:meth:`DataArray.count`, :py:meth:`DataArray.any`, :py:meth:`DataArray.all`. (:issue:`755`) + By `Sander van Rijn `_ Internal Changes ~~~~~~~~~~~~~~~~ +- Fix ``pip install .`` when no ``.git`` directory exists; namely when the xarray source + directory has been rsync'ed by PyCharm Professional for a remote deployment over SSH. + By `Guido Imperiale `_ +- Only load resource files when running inside a Jupyter Notebook + (:issue:`4294`) By `Guido Imperiale `_ .. _whats-new.0.16.0: @@ -158,9 +193,10 @@ New Features Enhancements ~~~~~~~~~~~~ - Performance improvement of :py:meth:`DataArray.interp` and :py:func:`Dataset.interp` - For orthogonal linear- and nearest-neighbor interpolation, we do 1d-interpolation sequentially - rather than interpolating in multidimensional space. (:issue:`2223`) + We performs independant interpolation sequentially rather than interpolating in + one large multidimensional space. (:issue:`2223`) By `Keisuke Fujii `_. +- :py:meth:`DataArray.interp` now support interpolations over chunked dimensions (:pull:`4155`). By `Alexandre Poux `_. - Major performance improvement for :py:meth:`Dataset.from_dataframe` when the dataframe has a MultiIndex (:pull:`4184`). By `Stephan Hoyer `_. diff --git a/readthedocs.yml b/readthedocs.yml index 88abb57ae43..072a4b5110c 100644 --- a/readthedocs.yml +++ b/readthedocs.yml @@ -7,6 +7,6 @@ conda: environment: ci/requirements/doc.yml sphinx: - fail_on_warning: false + fail_on_warning: true formats: [] diff --git a/requirements.txt b/requirements.txt index f73887ff5cc..3cbeb368c09 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,4 +4,4 @@ numpy >= 1.15 pandas >= 0.25 -setuptools >= 41.2 +setuptools >= 38.4 diff --git a/setup.cfg b/setup.cfg index 42dc53bb882..93d55cbca75 100644 --- a/setup.cfg +++ b/setup.cfg @@ -69,16 +69,16 @@ classifiers = Topic :: Scientific/Engineering [options] -packages = xarray +packages = find: zip_safe = False # https://mypy.readthedocs.io/en/latest/installed_packages.html include_package_data = True python_requires = >=3.6 install_requires = numpy >= 1.15 pandas >= 0.25 - setuptools >= 41.2 # For pkg_resources + setuptools >= 38.4 # For pkg_resources setup_requires = - setuptools >= 41.2 + setuptools >= 38.4 setuptools_scm [options.package_data] @@ -138,6 +138,8 @@ ignore_missing_imports = True ignore_missing_imports = True [mypy-cftime.*] ignore_missing_imports = True +[mypy-cupy.*] +ignore_missing_imports = True [mypy-dask.*] ignore_missing_imports = True [mypy-distributed.*] @@ -195,4 +197,4 @@ ignore_errors = True test = pytest [pytest-watch] -nobeep = True \ No newline at end of file +nobeep = True diff --git a/setup.py b/setup.py index 76755a445f7..e7cd9bc18e2 100755 --- a/setup.py +++ b/setup.py @@ -1,4 +1,11 @@ #!/usr/bin/env python from setuptools import setup -setup(use_scm_version=True) +try: + setup(use_scm_version=True) +except LookupError as e: + # .git has been removed, and this is not a package created by sdist + # This is the case e.g. of a remote deployment with PyCharm Professional + if not str(e).startswith("setuptools-scm was unable to detect version"): + raise + setup(version="999") diff --git a/xarray/backends/common.py b/xarray/backends/common.py index 63c4c956f86..da619905ce6 100644 --- a/xarray/backends/common.py +++ b/xarray/backends/common.py @@ -1,8 +1,6 @@ import logging import time import traceback -import warnings -from collections.abc import Mapping import numpy as np @@ -74,18 +72,9 @@ def __array__(self, dtype=None): return np.asarray(self[key], dtype=dtype) -class AbstractDataStore(Mapping): +class AbstractDataStore: __slots__ = () - def __iter__(self): - return iter(self.variables) - - def __getitem__(self, key): - return self.variables[key] - - def __len__(self): - return len(self.variables) - def get_dimensions(self): # pragma: no cover raise NotImplementedError() @@ -125,38 +114,6 @@ def load(self): attributes = FrozenDict(self.get_attrs()) return variables, attributes - @property - def variables(self): # pragma: no cover - warnings.warn( - "The ``variables`` property has been deprecated and " - "will be removed in xarray v0.11.", - FutureWarning, - stacklevel=2, - ) - variables, _ = self.load() - return variables - - @property - def attrs(self): # pragma: no cover - warnings.warn( - "The ``attrs`` property has been deprecated and " - "will be removed in xarray v0.11.", - FutureWarning, - stacklevel=2, - ) - _, attrs = self.load() - return attrs - - @property - def dimensions(self): # pragma: no cover - warnings.warn( - "The ``dimensions`` property has been deprecated and " - "will be removed in xarray v0.11.", - FutureWarning, - stacklevel=2, - ) - return self.get_dimensions() - def close(self): pass diff --git a/xarray/backends/locks.py b/xarray/backends/locks.py index 435690f2079..bb876a432c8 100644 --- a/xarray/backends/locks.py +++ b/xarray/backends/locks.py @@ -72,12 +72,15 @@ def _get_scheduler(get=None, collection=None) -> Optional[str]: dask.base.get_scheduler """ try: - import dask # noqa: F401 + # Fix for bug caused by dask installation that doesn't involve the toolz library + # Issue: 4164 + import dask + from dask.base import get_scheduler # noqa: F401 + + actual_get = get_scheduler(get, collection) except ImportError: return None - actual_get = dask.base.get_scheduler(get, collection) - try: from dask.distributed import Client diff --git a/xarray/core/accessor_str.py b/xarray/core/accessor_str.py index 5502ba72855..1f0c95af71e 100644 --- a/xarray/core/accessor_str.py +++ b/xarray/core/accessor_str.py @@ -90,7 +90,7 @@ def _apply(self, f, dtype=None): def len(self): """ - Compute the length of each element in the array. + Compute the length of each string in the array. Returns ------- @@ -104,9 +104,9 @@ def __getitem__(self, key): else: return self.get(key) - def get(self, i): + def get(self, i, default=""): """ - Extract element from indexable in each element in the array. + Extract character number `i` from each string in the array. Parameters ---------- @@ -120,12 +120,18 @@ def get(self, i): ------- items : array of objects """ - obj = slice(-1, None) if i == -1 else slice(i, i + 1) - return self._apply(lambda x: x[obj]) + s = slice(-1, None) if i == -1 else slice(i, i + 1) + + def f(x): + item = x[s] + + return item if item else default + + return self._apply(f) def slice(self, start=None, stop=None, step=None): """ - Slice substrings from each element in the array. + Slice substrings from each string in the array. Parameters ---------- @@ -359,7 +365,7 @@ def count(self, pat, flags=0): def startswith(self, pat): """ - Test if the start of each string element matches a pattern. + Test if the start of each string in the array matches a pattern. Parameters ---------- @@ -378,7 +384,7 @@ def startswith(self, pat): def endswith(self, pat): """ - Test if the end of each string element matches a pattern. + Test if the end of each string in the array matches a pattern. Parameters ---------- @@ -432,8 +438,7 @@ def pad(self, width, side="left", fillchar=" "): def center(self, width, fillchar=" "): """ - Filling left and right side of strings in the array with an - additional character. + Pad left and right side of each string in the array. Parameters ---------- @@ -451,8 +456,7 @@ def center(self, width, fillchar=" "): def ljust(self, width, fillchar=" "): """ - Filling right side of strings in the array with an additional - character. + Pad right side of each string in the array. Parameters ---------- @@ -470,7 +474,7 @@ def ljust(self, width, fillchar=" "): def rjust(self, width, fillchar=" "): """ - Filling left side of strings in the array with an additional character. + Pad left side of each string in the array. Parameters ---------- @@ -488,7 +492,7 @@ def rjust(self, width, fillchar=" "): def zfill(self, width): """ - Pad strings in the array by prepending '0' characters. + Pad each string in the array by prepending '0' characters. Strings in the array are padded with '0' characters on the left of the string to reach a total string length `width`. Strings @@ -508,7 +512,7 @@ def zfill(self, width): def contains(self, pat, case=True, flags=0, regex=True): """ - Test if pattern or regex is contained within a string of the array. + Test if pattern or regex is contained within each string of the array. Return boolean array based on whether a given pattern or regex is contained within a string of the array. @@ -554,7 +558,7 @@ def contains(self, pat, case=True, flags=0, regex=True): def match(self, pat, case=True, flags=0): """ - Determine if each string matches a regular expression. + Determine if each string in the array matches a regular expression. Parameters ---------- @@ -613,7 +617,7 @@ def strip(self, to_strip=None, side="both"): def lstrip(self, to_strip=None): """ - Remove leading and trailing characters. + Remove leading characters. Strip whitespaces (including newlines) or a set of specified characters from each string in the array from the left side. @@ -633,7 +637,7 @@ def lstrip(self, to_strip=None): def rstrip(self, to_strip=None): """ - Remove leading and trailing characters. + Remove trailing characters. Strip whitespaces (including newlines) or a set of specified characters from each string in the array from the right side. @@ -653,8 +657,7 @@ def rstrip(self, to_strip=None): def wrap(self, width, **kwargs): """ - Wrap long strings in the array to be formatted in paragraphs with - length less than a given width. + Wrap long strings in the array in paragraphs with length less than `width`. This method has the same keyword parameters and defaults as :class:`textwrap.TextWrapper`. @@ -663,38 +666,20 @@ def wrap(self, width, **kwargs): ---------- width : int Maximum line-width - expand_tabs : bool, optional - If true, tab characters will be expanded to spaces (default: True) - replace_whitespace : bool, optional - If true, each whitespace character (as defined by - string.whitespace) remaining after tab expansion will be replaced - by a single space (default: True) - drop_whitespace : bool, optional - If true, whitespace that, after wrapping, happens to end up at the - beginning or end of a line is dropped (default: True) - break_long_words : bool, optional - If true, then words longer than width will be broken in order to - ensure that no lines are longer than width. If it is false, long - words will not be broken, and some lines may be longer than width. - (default: True) - break_on_hyphens : bool, optional - If true, wrapping will occur preferably on whitespace and right - after hyphens in compound words, as it is customary in English. If - false, only whitespaces will be considered as potentially good - places for line breaks, but you need to set break_long_words to - false if you want truly insecable words. (default: True) + **kwargs + keyword arguments passed into :class:`textwrap.TextWrapper`. Returns ------- wrapped : same type as values """ - tw = textwrap.TextWrapper(width=width) + tw = textwrap.TextWrapper(width=width, **kwargs) f = lambda x: "\n".join(tw.wrap(x)) return self._apply(f) def translate(self, table): """ - Map all characters in the string through the given mapping table. + Map characters of each string through the given mapping table. Parameters ---------- diff --git a/xarray/core/common.py b/xarray/core/common.py index c95df77313e..bc5035b682e 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -786,7 +786,7 @@ def rolling( self, dim: Mapping[Hashable, int] = None, min_periods: int = None, - center: bool = False, + center: Union[bool, Mapping[Hashable, bool]] = False, keep_attrs: bool = None, **window_kwargs: int, ): @@ -802,7 +802,7 @@ def rolling( Minimum number of observations in window required to have a value (otherwise result is NA). The default, None, is equivalent to setting min_periods equal to the size of the window. - center : boolean, default False + center : boolean, or a mapping, default False Set the labels at the center of the window. keep_attrs : bool, optional If True, the object's attributes (`attrs`) will be copied from diff --git a/xarray/core/computation.py b/xarray/core/computation.py index b2a5dae13e6..0c99c54ca3c 100644 --- a/xarray/core/computation.py +++ b/xarray/core/computation.py @@ -638,9 +638,8 @@ def func(*arrays): if data.ndim != len(dims): raise ValueError( "applied function returned data with unexpected " - "number of dimensions: {} vs {}, for dimensions {}".format( - data.ndim, len(dims), dims - ) + f"number of dimensions. Received {data.ndim} dimension(s) but " + f"expected {len(dims)} dimensions with names: {dims!r}" ) var = Variable(dims, data, fastpath=True) @@ -1000,9 +999,10 @@ def earth_mover_distance(first_samples, input_core_dims = ((),) * (len(args)) elif len(input_core_dims) != len(args): raise ValueError( - "input_core_dims must be None or a tuple with the length same to " - "the number of arguments. Given input_core_dims: {}, " - "number of args: {}.".format(input_core_dims, len(args)) + f"input_core_dims must be None or a tuple with the length same to " + f"the number of arguments. " + f"Given {len(input_core_dims)} input_core_dims: {input_core_dims}, " + f" but number of args is {len(args)}." ) if kwargs is None: @@ -1010,11 +1010,17 @@ def earth_mover_distance(first_samples, signature = _UFuncSignature(input_core_dims, output_core_dims) - if exclude_dims and not exclude_dims <= signature.all_core_dims: - raise ValueError( - "each dimension in `exclude_dims` must also be a " - "core dimension in the function signature" - ) + if exclude_dims: + if not isinstance(exclude_dims, set): + raise TypeError( + f"Expected exclude_dims to be a 'set'. Received '{type(exclude_dims).__name__}' instead." + ) + if not exclude_dims <= signature.all_core_dims: + raise ValueError( + f"each dimension in `exclude_dims` must also be a " + f"core dimension in the function signature. " + f"Please make {(exclude_dims - signature.all_core_dims)} a core dimension" + ) if kwargs: func = functools.partial(func, **kwargs) diff --git a/xarray/core/dask_array_ops.py b/xarray/core/dask_array_ops.py index 87f646352eb..74474f4321e 100644 --- a/xarray/core/dask_array_ops.py +++ b/xarray/core/dask_array_ops.py @@ -32,69 +32,80 @@ def rolling_window(a, axis, window, center, fill_value): """ import dask.array as da + if not hasattr(axis, "__len__"): + axis = [axis] + window = [window] + center = [center] + orig_shape = a.shape - if axis < 0: - axis = a.ndim + axis depth = {d: 0 for d in range(a.ndim)} - depth[axis] = int(window / 2) - # For evenly sized window, we need to crop the first point of each block. - offset = 1 if window % 2 == 0 else 0 - - if depth[axis] > min(a.chunks[axis]): - raise ValueError( - "For window size %d, every chunk should be larger than %d, " - "but the smallest chunk size is %d. Rechunk your array\n" - "with a larger chunk size or a chunk size that\n" - "more evenly divides the shape of your array." - % (window, depth[axis], min(a.chunks[axis])) - ) - - # Although da.overlap pads values to boundaries of the array, - # the size of the generated array is smaller than what we want - # if center == False. - if center: - start = int(window / 2) # 10 -> 5, 9 -> 4 - end = window - 1 - start - else: - start, end = window - 1, 0 - pad_size = max(start, end) + offset - depth[axis] - drop_size = 0 - # pad_size becomes more than 0 when the overlapped array is smaller than - # needed. In this case, we need to enlarge the original array by padding - # before overlapping. - if pad_size > 0: - if pad_size < depth[axis]: - # overlapping requires each chunk larger than depth. If pad_size is - # smaller than the depth, we enlarge this and truncate it later. - drop_size = depth[axis] - pad_size - pad_size = depth[axis] - shape = list(a.shape) - shape[axis] = pad_size - chunks = list(a.chunks) - chunks[axis] = (pad_size,) - fill_array = da.full(shape, fill_value, dtype=a.dtype, chunks=chunks) - a = da.concatenate([fill_array, a], axis=axis) - + offset = [0] * a.ndim + drop_size = [0] * a.ndim + pad_size = [0] * a.ndim + for ax, win, cent in zip(axis, window, center): + if ax < 0: + ax = a.ndim + ax + depth[ax] = int(win / 2) + # For evenly sized window, we need to crop the first point of each block. + offset[ax] = 1 if win % 2 == 0 else 0 + + if depth[ax] > min(a.chunks[ax]): + raise ValueError( + "For window size %d, every chunk should be larger than %d, " + "but the smallest chunk size is %d. Rechunk your array\n" + "with a larger chunk size or a chunk size that\n" + "more evenly divides the shape of your array." + % (win, depth[ax], min(a.chunks[ax])) + ) + + # Although da.overlap pads values to boundaries of the array, + # the size of the generated array is smaller than what we want + # if center == False. + if cent: + start = int(win / 2) # 10 -> 5, 9 -> 4 + end = win - 1 - start + else: + start, end = win - 1, 0 + pad_size[ax] = max(start, end) + offset[ax] - depth[ax] + drop_size[ax] = 0 + # pad_size becomes more than 0 when the overlapped array is smaller than + # needed. In this case, we need to enlarge the original array by padding + # before overlapping. + if pad_size[ax] > 0: + if pad_size[ax] < depth[ax]: + # overlapping requires each chunk larger than depth. If pad_size is + # smaller than the depth, we enlarge this and truncate it later. + drop_size[ax] = depth[ax] - pad_size[ax] + pad_size[ax] = depth[ax] + + # TODO maybe following two lines can be summarized. + a = da.pad( + a, [(p, 0) for p in pad_size], mode="constant", constant_values=fill_value + ) boundary = {d: fill_value for d in range(a.ndim)} # create overlap arrays ag = da.overlap.overlap(a, depth=depth, boundary=boundary) - # apply rolling func - def func(x, window, axis=-1): + def func(x, window, axis): x = np.asarray(x) - rolling = nputils._rolling_window(x, window, axis) - return rolling[(slice(None),) * axis + (slice(offset, None),)] - - chunks = list(a.chunks) - chunks.append(window) + index = [slice(None)] * x.ndim + for ax, win in zip(axis, window): + x = nputils._rolling_window(x, win, ax) + index[ax] = slice(offset[ax], None) + return x[tuple(index)] + + chunks = list(a.chunks) + window + new_axis = [a.ndim + i for i in range(len(axis))] out = ag.map_blocks( - func, dtype=a.dtype, new_axis=a.ndim, chunks=chunks, window=window, axis=axis + func, dtype=a.dtype, new_axis=new_axis, chunks=chunks, window=window, axis=axis ) # crop boundary. - index = (slice(None),) * axis + (slice(drop_size, drop_size + orig_shape[axis]),) - return out[index] + index = [slice(None)] * a.ndim + for ax in axis: + index[ax] = slice(drop_size[ax], drop_size[ax] + orig_shape[ax]) + return out[tuple(index)] def least_squares(lhs, rhs, rcond=None, skipna=False): diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index dbc4877fa1d..4ad29baee04 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -862,8 +862,8 @@ def copy(self, deep: bool = True, data: Any = None) -> "DataArray": """Returns a copy of this array. If `deep=True`, a deep copy is made of the data array. - Otherwise, a shallow copy is made, so each variable in the new - array's dataset is also a variable in this array's dataset. + Otherwise, a shallow copy is made, and the returned data array's + values are a new view of this data array's values. Use `data` to create a new object with the same structure as original but entirely new data. @@ -1027,7 +1027,7 @@ def isel( missing_dims : {"raise", "warn", "ignore"}, default "raise" What to do if dimensions that should be selected from are not present in the DataArray: - - "exception": raise an exception + - "raise": raise an exception - "warning": raise a warning, and ignore the missing dimensions - "ignore": ignore the missing dimensions **indexers_kwargs : {dim: indexer, ...}, optional @@ -2012,7 +2012,7 @@ def T(self) -> "DataArray": def drop_vars( self, names: Union[Hashable, Iterable[Hashable]], *, errors: str = "raise" ) -> "DataArray": - """Drop variables from this DataArray. + """Returns an array with dropped variables. Parameters ---------- @@ -2026,7 +2026,7 @@ def drop_vars( Returns ------- dropped : Dataset - + New Dataset copied from `self` with variables removed. """ ds = self._to_temp_dataset().drop_vars(names, errors=errors) return self._from_temp_dataset(ds) @@ -2365,13 +2365,36 @@ def to_pandas(self) -> Union["DataArray", pd.Series, pd.DataFrame]: indexes = [self.get_index(dim) for dim in self.dims] return constructor(self.values, *indexes) - def to_dataframe(self, name: Hashable = None) -> pd.DataFrame: + def to_dataframe( + self, name: Hashable = None, dim_order: List[Hashable] = None + ) -> pd.DataFrame: """Convert this array and its coordinates into a tidy pandas.DataFrame. The DataFrame is indexed by the Cartesian product of index coordinates (in the form of a :py:class:`pandas.MultiIndex`). Other coordinates are included as columns in the DataFrame. + + Parameters + ---------- + name + Name to give to this array (required if unnamed). + dim_order + Hierarchical dimension order for the resulting dataframe. + Array content is transposed to this order and then written out as flat + vectors in contiguous order, so the last dimension in this list + will be contiguous in the resulting DataFrame. This has a major + influence on which operations are efficient on the resulting + dataframe. + + If provided, must include all dimensions of this DataArray. By default, + dimensions are sorted according to the DataArray dimensions order. + + Returns + ------- + result + DataArray as a pandas DataFrame. + """ if name is None: name = self.name @@ -2381,7 +2404,6 @@ def to_dataframe(self, name: Hashable = None) -> pd.DataFrame: "DataFrame: use the ``name`` parameter" ) - dims = dict(zip(self.dims, self.shape)) # By using a unique name, we can convert a DataArray into a DataFrame # even if it shares a name with one of its coordinates. # I would normally use unique_name = object() but that results in a @@ -2389,7 +2411,13 @@ def to_dataframe(self, name: Hashable = None) -> pd.DataFrame: # been able to debug (possibly a pandas bug?). unique_name = "__unique_name_identifier_z98xfz98xugfg73ho__" ds = self._to_dataset_whole(name=unique_name) - df = ds._to_dataframe(dims) + + if dim_order is None: + ordered_dims = dict(zip(self.dims, self.shape)) + else: + ordered_dims = ds._normalize_dim_order(dim_order=dim_order) + + df = ds._to_dataframe(ordered_dims) df.columns = [name if c == unique_name else c for c in df.columns] return df @@ -3312,24 +3340,21 @@ def map_blocks( This function cannot add a new chunked dimension. - obj: DataArray, Dataset - Passed to the function as its first argument, one block at a time. args: Sequence Passed to func after unpacking and subsetting any xarray objects by blocks. - xarray objects in args must be aligned with obj, otherwise an error is raised. + xarray objects in args must be aligned with this object, otherwise an error is raised. kwargs: Mapping Passed verbatim to func after unpacking. xarray objects, if any, will not be subset to blocks. Passing dask collections in kwargs is not allowed. template: (optional) DataArray, Dataset xarray object representing the final result after compute is called. If not provided, - the function will be first run on mocked-up data, that looks like ``obj`` but + the function will be first run on mocked-up data, that looks like this object but has sizes 0, to determine properties of the returned object such as dtype, variable names, attributes, new dimensions and new indexes (if any). ``template`` must be provided if the function changes the size of existing dimensions. When provided, ``attrs`` on variables in `template` are copied over to the result. Any ``attrs`` set by ``func`` will be ignored. - Returns ------- A single DataArray or Dataset with dask backend, reassembled from the outputs of the @@ -3341,7 +3366,7 @@ def map_blocks( subset to each block. In the more common case where ``func`` can work on numpy arrays, it is recommended to use ``apply_ufunc``. - If none of the variables in ``obj`` is backed by dask arrays, calling this function is + If none of the variables in this object is backed by dask arrays, calling this function is equivalent to calling ``func(obj, *args, **kwargs)``. See Also @@ -3361,9 +3386,12 @@ def map_blocks( ... clim = gb.mean(dim="time") ... return gb - clim >>> time = xr.cftime_range("1990-01", "1992-01", freq="M") + >>> month = xr.DataArray(time.month, coords={"time": time}, dims=["time"]) >>> np.random.seed(123) >>> array = xr.DataArray( - ... np.random.rand(len(time)), dims="time", coords=[time] + ... np.random.rand(len(time)), + ... dims=["time"], + ... coords={"time": time, "month": month}, ... ).chunk() >>> array.map_blocks(calculate_anomaly, template=array).compute() @@ -3374,21 +3402,19 @@ def map_blocks( 0.07673453, 0.22865714, 0.19063865, -0.0590131 ]) Coordinates: * time (time) object 1990-01-31 00:00:00 ... 1991-12-31 00:00:00 + month (time) int64 1 2 3 4 5 6 7 8 9 10 11 12 1 2 3 4 5 6 7 8 9 10 11 12 Note that one must explicitly use ``args=[]`` and ``kwargs={}`` to pass arguments to the function being applied in ``xr.map_blocks()``: >>> array.map_blocks( ... calculate_anomaly, kwargs={"groupby_type": "time.year"}, template=array, - ... ) + ... ) # doctest: +ELLIPSIS - array([ 0.15361741, -0.25671244, -0.31600032, 0.008463 , 0.1766172 , - -0.11974531, 0.43791243, 0.14197797, -0.06191987, -0.15073425, - -0.19967375, 0.18619794, -0.05100474, -0.42989909, -0.09153273, - 0.24841842, -0.30708526, -0.31412523, 0.04197439, 0.0422506 , - 0.14482397, 0.35985481, 0.23487834, 0.12144652]) + dask.array Coordinates: - * time (time) object 1990-01-31 00:00:00 ... 1991-12-31 00:00:00 + * time (time) object 1990-01-31 00:00:00 ... 1991-12-31 00:00:00 + month (time) int64 dask.array """ from .parallel import map_blocks @@ -3878,9 +3904,10 @@ def argmin( >>> array.isel(array.argmin(...)) array(-1) - >>> array = xr.DataArray([[[3, 2, 1], [3, 1, 2], [2, 1, 3]], - ... [[1, 3, 2], [2, -5, 1], [2, 3, 1]]], - ... dims=("x", "y", "z")) + >>> array = xr.DataArray( + ... [[[3, 2, 1], [3, 1, 2], [2, 1, 3]], [[1, 3, 2], [2, -5, 1], [2, 3, 1]]], + ... dims=("x", "y", "z"), + ... ) >>> array.min(dim="x") array([[ 1, 2, 1], @@ -3944,7 +3971,7 @@ def argmax( this is deprecated, in future will return a dict with indices for all dimensions; to return a dict with all dimensions now, pass '...'. axis : int, optional - Axis over which to apply `argmin`. Only one of the 'dim' and 'axis' arguments + Axis over which to apply `argmax`. Only one of the 'dim' and 'axis' arguments can be supplied. keep_attrs : bool, optional If True, the attributes (`attrs`) will be copied from the original @@ -3980,9 +4007,10 @@ def argmax( array(3) - >>> array = xr.DataArray([[[3, 2, 1], [3, 1, 2], [2, 1, 3]], - ... [[1, 3, 2], [2, 5, 1], [2, 3, 1]]], - ... dims=("x", "y", "z")) + >>> array = xr.DataArray( + ... [[[3, 2, 1], [3, 1, 2], [2, 1, 3]], [[1, 3, 2], [2, 5, 1], [2, 3, 1]]], + ... dims=("x", "y", "z"), + ... ) >>> array.max(dim="x") array([[3, 3, 2], diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 216833ccce0..66c2110f963 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -1923,7 +1923,7 @@ def isel( missing_dims : {"raise", "warn", "ignore"}, default "raise" What to do if dimensions that should be selected from are not present in the Dataset: - - "exception": raise an exception + - "raise": raise an exception - "warning": raise a warning, and ignore the missing dimensions - "ignore": ignore the missing dimensions **indexers_kwargs : {dim: indexer, ...}, optional @@ -4527,23 +4527,75 @@ def to_array(self, dim="variable", name=None): data, coords, dims, attrs=self.attrs, name=name, indexes=indexes ) - def _to_dataframe(self, ordered_dims): + def _normalize_dim_order( + self, dim_order: List[Hashable] = None + ) -> Dict[Hashable, int]: + """ + Check the validity of the provided dimensions if any and return the mapping + between dimension name and their size. + + Parameters + ---------- + dim_order + Dimension order to validate (default to the alphabetical order if None). + + Returns + ------- + result + Validated dimensions mapping. + + """ + if dim_order is None: + dim_order = list(self.dims) + elif set(dim_order) != set(self.dims): + raise ValueError( + "dim_order {} does not match the set of dimensions of this " + "Dataset: {}".format(dim_order, list(self.dims)) + ) + + ordered_dims = {k: self.dims[k] for k in dim_order} + + return ordered_dims + + def _to_dataframe(self, ordered_dims: Mapping[Hashable, int]): columns = [k for k in self.variables if k not in self.dims] data = [ self._variables[k].set_dims(ordered_dims).values.reshape(-1) for k in columns ] - index = self.coords.to_index(ordered_dims) + index = self.coords.to_index([*ordered_dims]) return pd.DataFrame(dict(zip(columns, data)), index=index) - def to_dataframe(self): + def to_dataframe(self, dim_order: List[Hashable] = None) -> pd.DataFrame: """Convert this dataset into a pandas.DataFrame. Non-index variables in this dataset form the columns of the - DataFrame. The DataFrame is be indexed by the Cartesian product of + DataFrame. The DataFrame is indexed by the Cartesian product of this dataset's indices. + + Parameters + ---------- + dim_order + Hierarchical dimension order for the resulting dataframe. All + arrays are transposed to this order and then written out as flat + vectors in contiguous order, so the last dimension in this list + will be contiguous in the resulting DataFrame. This has a major + influence on which operations are efficient on the resulting + dataframe. + + If provided, must include all dimensions of this dataset. By + default, dimensions are sorted alphabetically. + + Returns + ------- + result + Dataset as a pandas DataFrame. + """ - return self._to_dataframe(self.dims) + + ordered_dims = self._normalize_dim_order(dim_order=dim_order) + + return self._to_dataframe(ordered_dims=ordered_dims) def _set_sparse_data_from_dataframe( self, idx: pd.Index, arrays: List[Tuple[Hashable, np.ndarray]], dims: tuple @@ -4697,11 +4749,11 @@ def to_dask_dataframe(self, dim_order=None, set_index=False): influence on which operations are efficient on the resulting dask dataframe. - If provided, must include all dimensions on this dataset. By + If provided, must include all dimensions of this dataset. By default, dimensions are sorted alphabetically. set_index : bool, optional If set_index=True, the dask DataFrame is indexed by this dataset's - coordinate. Since dask DataFrames to not support multi-indexes, + coordinate. Since dask DataFrames do not support multi-indexes, set_index only works if the dataset only contains one dimension. Returns @@ -4712,15 +4764,7 @@ def to_dask_dataframe(self, dim_order=None, set_index=False): import dask.array as da import dask.dataframe as dd - if dim_order is None: - dim_order = list(self.dims) - elif set(dim_order) != set(self.dims): - raise ValueError( - "dim_order {} does not match the set of dimensions on this " - "Dataset: {}".format(dim_order, list(self.dims)) - ) - - ordered_dims = {k: self.dims[k] for k in dim_order} + ordered_dims = self._normalize_dim_order(dim_order=dim_order) columns = list(ordered_dims) columns.extend(k for k in self.coords if k not in self.dims) @@ -4747,6 +4791,8 @@ def to_dask_dataframe(self, dim_order=None, set_index=False): df = dd.concat(series_list, axis=1) if set_index: + dim_order = [*ordered_dims] + if len(dim_order) == 1: (dim,) = dim_order df = df.set_index(dim) @@ -5776,8 +5822,6 @@ def map_blocks( This function cannot add a new chunked dimension. - obj: DataArray, Dataset - Passed to the function as its first argument, one block at a time. args: Sequence Passed to func after unpacking and subsetting any xarray objects by blocks. xarray objects in args must be aligned with obj, otherwise an error is raised. @@ -5786,7 +5830,7 @@ def map_blocks( subset to blocks. Passing dask collections in kwargs is not allowed. template: (optional) DataArray, Dataset xarray object representing the final result after compute is called. If not provided, - the function will be first run on mocked-up data, that looks like ``obj`` but + the function will be first run on mocked-up data, that looks like this object but has sizes 0, to determine properties of the returned object such as dtype, variable names, attributes, new dimensions and new indexes (if any). ``template`` must be provided if the function changes the size of existing dimensions. @@ -5805,7 +5849,7 @@ def map_blocks( subset to each block. In the more common case where ``func`` can work on numpy arrays, it is recommended to use ``apply_ufunc``. - If none of the variables in ``obj`` is backed by dask arrays, calling this function is + If none of the variables in this object is backed by dask arrays, calling this function is equivalent to calling ``func(obj, *args, **kwargs)``. See Also @@ -5825,20 +5869,22 @@ def map_blocks( ... clim = gb.mean(dim="time") ... return gb - clim >>> time = xr.cftime_range("1990-01", "1992-01", freq="M") + >>> month = xr.DataArray(time.month, coords={"time": time}, dims=["time"]) >>> np.random.seed(123) >>> array = xr.DataArray( - ... np.random.rand(len(time)), dims="time", coords=[time] + ... np.random.rand(len(time)), + ... dims=["time"], + ... coords={"time": time, "month": month}, ... ).chunk() >>> ds = xr.Dataset({"a": array}) >>> ds.map_blocks(calculate_anomaly, template=ds).compute() - - array([ 0.12894847, 0.11323072, -0.0855964 , -0.09334032, 0.26848862, - 0.12382735, 0.22460641, 0.07650108, -0.07673453, -0.22865714, - -0.19063865, 0.0590131 , -0.12894847, -0.11323072, 0.0855964 , - 0.09334032, -0.26848862, -0.12382735, -0.22460641, -0.07650108, - 0.07673453, 0.22865714, 0.19063865, -0.0590131 ]) + + Dimensions: (time: 24) Coordinates: * time (time) object 1990-01-31 00:00:00 ... 1991-12-31 00:00:00 + month (time) int64 1 2 3 4 5 6 7 8 9 10 11 12 1 2 3 4 5 6 7 8 9 10 11 12 + Data variables: + a (time) float64 0.1289 0.1132 -0.0856 ... 0.2287 0.1906 -0.05901 Note that one must explicitly use ``args=[]`` and ``kwargs={}`` to pass arguments to the function being applied in ``xr.map_blocks()``: @@ -5846,14 +5892,13 @@ def map_blocks( >>> ds.map_blocks( ... calculate_anomaly, kwargs={"groupby_type": "time.year"}, template=ds, ... ) - - array([ 0.15361741, -0.25671244, -0.31600032, 0.008463 , 0.1766172 , - -0.11974531, 0.43791243, 0.14197797, -0.06191987, -0.15073425, - -0.19967375, 0.18619794, -0.05100474, -0.42989909, -0.09153273, - 0.24841842, -0.30708526, -0.31412523, 0.04197439, 0.0422506 , - 0.14482397, 0.35985481, 0.23487834, 0.12144652]) + + Dimensions: (time: 24) Coordinates: - * time (time) object 1990-01-31 00:00:00 ... 1991-12-31 00:00:00 + * time (time) object 1990-01-31 00:00:00 ... 1991-12-31 00:00:00 + month (time) int64 dask.array + Data variables: + a (time) float64 dask.array """ from .parallel import map_blocks @@ -5975,7 +6020,7 @@ def polyfit( skipna_da = np.any(da.isnull()) dims_to_stack = [dimname for dimname in da.dims if dimname != dim] - stacked_coords = {} + stacked_coords: Dict[Hashable, DataArray] = {} if dims_to_stack: stacked_dim = utils.get_temp_dimname(dims_to_stack, "stacked") rhs = da.transpose(dim, *dims_to_stack).stack( diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py index df579d23544..377e7377b6a 100644 --- a/xarray/core/duck_array_ops.py +++ b/xarray/core/duck_array_ops.py @@ -4,9 +4,9 @@ accept or return xarray objects. """ import contextlib +import datetime import inspect import warnings -from distutils.version import LooseVersion from functools import partial import numpy as np @@ -14,21 +14,13 @@ from . import dask_array_compat, dask_array_ops, dtypes, npcompat, nputils from .nputils import nanfirst, nanlast -from .pycompat import dask_array_type +from .pycompat import cupy_array_type, dask_array_type try: import dask.array as dask_array except ImportError: dask_array = None # type: ignore -# TODO: remove after we stop supporting dask < 2.9.1 -try: - import dask - - dask_version = dask.__version__ -except ImportError: - dask_version = None - def _dask_or_eager_func( name, @@ -158,17 +150,23 @@ def trapz(y, x, axis): ) -def asarray(data): +def asarray(data, xp=np): return ( data if (isinstance(data, dask_array_type) or hasattr(data, "__array_function__")) - else np.asarray(data) + else xp.asarray(data) ) def as_shared_dtype(scalars_or_arrays): """Cast a arrays to a shared dtype using xarray's type promotion rules.""" - arrays = [asarray(x) for x in scalars_or_arrays] + + if any([isinstance(x, cupy_array_type) for x in scalars_or_arrays]): + import cupy as cp + + arrays = [asarray(x, xp=cp) for x in scalars_or_arrays] + else: + arrays = [asarray(x) for x in scalars_or_arrays] # Pass arrays directly instead of dtypes to result_type so scalars # get handled properly. # Note that result_type() safely gets the dtype from dask arrays without @@ -211,16 +209,6 @@ def allclose_or_equiv(arr1, arr2, rtol=1e-5, atol=1e-8): lazy_equiv = lazy_array_equiv(arr1, arr2) if lazy_equiv is None: - # TODO: remove after we require dask >= 2.9.1 - sufficient_dask_version = ( - dask_version is not None and LooseVersion(dask_version) >= "2.9.1" - ) - if not sufficient_dask_version and any( - isinstance(arr, dask_array_type) for arr in [arr1, arr2] - ): - arr1 = np.array(arr1) - arr2 = np.array(arr2) - return bool(isclose(arr1, arr2, rtol=rtol, atol=atol, equal_nan=True).all()) else: return lazy_equiv @@ -483,8 +471,7 @@ def timedelta_to_numeric(value, datetime_unit="ns", dtype=float): def _to_pytimedelta(array, unit="us"): - index = pd.TimedeltaIndex(array.ravel(), unit=unit) - return index.to_pytimedelta().reshape(array.shape) + return array.astype(f"timedelta64[{unit}]").astype(datetime.timedelta) def np_timedelta64_to_float(array, datetime_unit): diff --git a/xarray/core/formatting.py b/xarray/core/formatting.py index 28eaae5f05b..9aa20f2b87e 100644 --- a/xarray/core/formatting.py +++ b/xarray/core/formatting.py @@ -261,6 +261,8 @@ def inline_variable_array_repr(var, max_width): return inline_dask_repr(var.data) elif isinstance(var._data, sparse_array_type): return inline_sparse_repr(var.data) + elif hasattr(var._data, "_repr_inline_"): + return var._data._repr_inline_(max_width) elif hasattr(var._data, "__array_function__"): return maybe_truncate(repr(var._data).replace("\n", " "), max_width) else: diff --git a/xarray/core/formatting_html.py b/xarray/core/formatting_html.py index 400ef61502e..5521b33e2e4 100644 --- a/xarray/core/formatting_html.py +++ b/xarray/core/formatting_html.py @@ -1,18 +1,23 @@ import uuid from collections import OrderedDict -from functools import partial +from functools import lru_cache, partial from html import escape import pkg_resources from .formatting import inline_variable_array_repr, short_data_repr -CSS_FILE_PATH = "/".join(("static", "css", "style.css")) -CSS_STYLE = pkg_resources.resource_string("xarray", CSS_FILE_PATH).decode("utf8") +STATIC_FILES = ("static/html/icons-svg-inline.html", "static/css/style.css") -ICONS_SVG_PATH = "/".join(("static", "html", "icons-svg-inline.html")) -ICONS_SVG = pkg_resources.resource_string("xarray", ICONS_SVG_PATH).decode("utf8") +@lru_cache(None) +def _load_static_files(): + """Lazily load the resource files into memory the first time they are needed + """ + return [ + pkg_resources.resource_string("xarray", fname).decode("utf8") + for fname in STATIC_FILES + ] def short_data_repr_html(array): @@ -233,9 +238,10 @@ def _obj_repr(obj, header_components, sections): header = f"
{''.join(h for h in header_components)}
" sections = "".join(f"
  • {s}
  • " for s in sections) + icons_svg, css_style = _load_static_files() return ( "
    " - f"{ICONS_SVG}" + f"{icons_svg}" f"
    {escape(repr(obj))}
    " "