diff --git a/.binder/environment.yml b/.binder/environment.yml index fee5ed07cf7..a60eb372831 100644 --- a/.binder/environment.yml +++ b/.binder/environment.yml @@ -16,7 +16,7 @@ dependencies: - h5py - hdf5 - iris - - lxml # Optional dep of pydap + - lxml # Optional dep of pydap - matplotlib - nc-time-axis - netcdf4 diff --git a/.codecov.yml b/.codecov.yml index d0bec9539f8..6e08afff173 100644 --- a/.codecov.yml +++ b/.codecov.yml @@ -10,7 +10,7 @@ coverage: flags: - unittests paths: - - "!xarray/tests/" + - "!xarray/tests/" unittests: target: 90% flags: diff --git a/.github/dependabot.yml b/.github/dependabot.yml index bd72c5b9396..8a2d4fb61df 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -1,10 +1,10 @@ version: 2 updates: - - package-ecosystem: 'github-actions' - directory: '/' + - package-ecosystem: "github-actions" + directory: "/" schedule: # Check for updates once a week - interval: 'weekly' + interval: "weekly" groups: actions: patterns: diff --git a/.github/workflows/benchmarks-last-release.yml b/.github/workflows/benchmarks-last-release.yml index 5e36613368d..f9fc29d8d72 100644 --- a/.github/workflows/benchmarks-last-release.yml +++ b/.github/workflows/benchmarks-last-release.yml @@ -24,7 +24,7 @@ jobs: - name: Set up conda environment uses: mamba-org/setup-micromamba@v2 with: - micromamba-version: '1.5.10-0' + micromamba-version: "1.5.10-0" environment-file: ${{env.CONDA_ENV_FILE}} environment-name: xarray-tests cache-environment: true @@ -32,7 +32,7 @@ jobs: create-args: >- asv - - name: 'Get Previous tag' + - name: "Get Previous tag" id: previoustag uses: "WyriHaximus/github-action-get-previous-tag@v1" # with: diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml index e2cde27532d..0500187331e 100644 --- a/.github/workflows/benchmarks.yml +++ b/.github/workflows/benchmarks.yml @@ -27,7 +27,7 @@ jobs: - name: Set up conda environment uses: mamba-org/setup-micromamba@v2 with: - micromamba-version: '1.5.10-0' + micromamba-version: "1.5.10-0" environment-file: ${{env.CONDA_ENV_FILE}} environment-name: xarray-tests cache-environment: true @@ -38,7 +38,6 @@ jobs: python-build mamba<=1.5.10 - - name: Run benchmarks shell: bash -l {0} id: benchmark diff --git a/.github/workflows/hypothesis.yaml b/.github/workflows/hypothesis.yaml index 2a904c06824..bf3a1be550d 100644 --- a/.github/workflows/hypothesis.yaml +++ b/.github/workflows/hypothesis.yaml @@ -37,12 +37,12 @@ jobs: runs-on: "ubuntu-latest" needs: detect-ci-trigger if: | - always() - && ( - needs.detect-ci-trigger.outputs.triggered == 'false' - && ( (github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') - || contains( github.event.pull_request.labels.*.name, 'run-slow-hypothesis')) - ) + always() + && ( + needs.detect-ci-trigger.outputs.triggered == 'false' + && ( (github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') + || contains( github.event.pull_request.labels.*.name, 'run-slow-hypothesis')) + ) defaults: run: shell: bash -l {0} @@ -92,8 +92,8 @@ jobs: if: success() id: status run: | - python -m pytest --hypothesis-show-statistics --run-slow-hypothesis properties/*.py \ - --report-log output-${{ matrix.python-version }}-log.jsonl + python -m pytest --hypothesis-show-statistics --run-slow-hypothesis properties/*.py \ + --report-log output-${{ matrix.python-version }}-log.jsonl # explicitly save the cache so it gets updated, also do this even if it fails. - name: Save cached hypothesis directory diff --git a/.github/workflows/pypi-release.yaml b/.github/workflows/pypi-release.yaml index decb8ff3ba3..e6e984ce400 100644 --- a/.github/workflows/pypi-release.yaml +++ b/.github/workflows/pypi-release.yaml @@ -5,7 +5,7 @@ on: - published push: tags: - - 'v*' + - "v*" jobs: build-artifacts: @@ -93,7 +93,6 @@ jobs: repository_url: https://test.pypi.org/legacy/ verbose: true - upload-to-pypi: needs: test-built-dist if: github.event_name == 'release' diff --git a/.github/workflows/upstream-dev-ci.yaml b/.github/workflows/upstream-dev-ci.yaml index 05745f09fcf..55e72bfa065 100644 --- a/.github/workflows/upstream-dev-ci.yaml +++ b/.github/workflows/upstream-dev-ci.yaml @@ -43,12 +43,12 @@ jobs: env: ZARR_V3_EXPERIMENTAL_API: 1 if: | - always() - && ( - (github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') - || needs.detect-ci-trigger.outputs.triggered == 'true' - || contains( github.event.pull_request.labels.*.name, 'run-upstream') - ) + always() + && ( + (github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') + || needs.detect-ci-trigger.outputs.triggered == 'true' + || contains( github.event.pull_request.labels.*.name, 'run-upstream') + ) defaults: run: shell: bash -l {0} @@ -101,10 +101,10 @@ jobs: runs-on: ubuntu-latest needs: detect-ci-trigger if: | - always() - && ( - contains( github.event.pull_request.labels.*.name, 'run-upstream') - ) + always() + && ( + contains( github.event.pull_request.labels.*.name, 'run-upstream') + ) defaults: run: shell: bash -l {0} diff --git a/.gitignore b/.gitignore index 21011f0eaa7..fc4f6ae42d1 100644 --- a/.gitignore +++ b/.gitignore @@ -40,6 +40,7 @@ pip-log.txt .tox nosetests.xml .cache +.prettier_cache .dmypy.json .mypy_cache .ropeproject/ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 2bdb1ecaa69..e2bffbfefde 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,7 +1,7 @@ # https://pre-commit.com/ ci: - autoupdate_schedule: monthly - autoupdate_commit_msg: 'Update pre-commit hooks' + autoupdate_schedule: monthly + autoupdate_commit_msg: "Update pre-commit hooks" repos: - repo: https://github.com/pre-commit/pre-commit-hooks rev: v5.0.0 @@ -11,9 +11,21 @@ repos: - id: check-yaml - id: debug-statements - id: mixed-line-ending + - repo: https://github.com/pre-commit/pygrep-hooks + rev: v1.10.0 + hooks: + # - id: python-check-blanket-noqa # checked by ruff + # - id: python-check-blanket-type-ignore # checked by ruff + # - id: python-check-mock-methods # checked by ruff + - id: python-no-log-warn + # - id: python-use-type-annotations # too many false positives + - id: rst-backticks + - id: rst-directive-colons + - id: rst-inline-touching-normal + - id: text-unicode-replacement-char - repo: https://github.com/astral-sh/ruff-pre-commit # Ruff version. - rev: 'v0.7.2' + rev: v0.7.2 hooks: - id: ruff-format - id: ruff @@ -24,6 +36,11 @@ repos: - id: blackdoc exclude: "generate_aggregations.py" additional_dependencies: ["black==24.8.0"] + - repo: https://github.com/rbubley/mirrors-prettier + rev: v3.3.3 + hooks: + - id: prettier + args: [--cache-location=.prettier_cache/cache] - repo: https://github.com/pre-commit/mirrors-mypy rev: v1.13.0 hooks: @@ -45,4 +62,4 @@ repos: - repo: https://github.com/citation-file-format/cff-converter-python rev: ebf0b5e44d67f8beaa1cd13a0d0393ea04c6058d hooks: - - id: validate-cff + - id: validate-cff diff --git a/CITATION.cff b/CITATION.cff index 2eee84b4714..53f1df3a721 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -1,94 +1,94 @@ cff-version: 1.2.0 message: "If you use this software, please cite it as below." authors: -- family-names: "Hoyer" - given-names: "Stephan" - orcid: "https://orcid.org/0000-0002-5207-0380" -- family-names: "Roos" - given-names: "Maximilian" -- family-names: "Joseph" - given-names: "Hamman" - orcid: "https://orcid.org/0000-0001-7479-8439" -- family-names: "Magin" - given-names: "Justus" - orcid: "https://orcid.org/0000-0002-4254-8002" -- family-names: "Cherian" - given-names: "Deepak" - orcid: "https://orcid.org/0000-0002-6861-8734" -- family-names: "Fitzgerald" - given-names: "Clark" - orcid: "https://orcid.org/0000-0003-3446-6389" -- family-names: "Hauser" - given-names: "Mathias" - orcid: "https://orcid.org/0000-0002-0057-4878" -- family-names: "Fujii" - given-names: "Keisuke" - orcid: "https://orcid.org/0000-0003-0390-9984" -- family-names: "Maussion" - given-names: "Fabien" - orcid: "https://orcid.org/0000-0002-3211-506X" -- family-names: "Imperiale" - given-names: "Guido" -- family-names: "Clark" - given-names: "Spencer" - orcid: "https://orcid.org/0000-0001-5595-7895" -- family-names: "Kleeman" - given-names: "Alex" -- family-names: "Nicholas" - given-names: "Thomas" - orcid: "https://orcid.org/0000-0002-2176-0530" -- family-names: "Kluyver" - given-names: "Thomas" - orcid: "https://orcid.org/0000-0003-4020-6364" -- family-names: "Westling" - given-names: "Jimmy" -- family-names: "Munroe" - given-names: "James" - orcid: "https://orcid.org/0000-0001-9098-6309" -- family-names: "Amici" - given-names: "Alessandro" - orcid: "https://orcid.org/0000-0002-1778-4505" -- family-names: "Barghini" - given-names: "Aureliana" -- family-names: "Banihirwe" - given-names: "Anderson" - orcid: "https://orcid.org/0000-0001-6583-571X" -- family-names: "Bell" - given-names: "Ray" - orcid: "https://orcid.org/0000-0003-2623-0587" -- family-names: "Hatfield-Dodds" - given-names: "Zac" - orcid: "https://orcid.org/0000-0002-8646-8362" -- family-names: "Abernathey" - given-names: "Ryan" - orcid: "https://orcid.org/0000-0001-5999-4917" -- family-names: "Bovy" - given-names: "Benoît" -- family-names: "Omotani" - given-names: "John" - orcid: "https://orcid.org/0000-0002-3156-8227" -- family-names: "Mühlbauer" - given-names: "Kai" - orcid: "https://orcid.org/0000-0001-6599-1034" -- family-names: "Roszko" - given-names: "Maximilian K." - orcid: "https://orcid.org/0000-0001-9424-2526" -- family-names: "Wolfram" - given-names: "Phillip J." - orcid: "https://orcid.org/0000-0001-5971-4241" -- family-names: "Henderson" - given-names: "Scott" - orcid: "https://orcid.org/0000-0003-0624-4965" -- family-names: "Awowale" - given-names: "Eniola Olufunke" -- family-names: "Scheick" - given-names: "Jessica" - orcid: "https://orcid.org/0000-0002-3421-4459" -- family-names: "Savoie" - given-names: "Matthew" - orcid: "https://orcid.org/0000-0002-8881-2550" -- family-names: "Littlejohns" - given-names: "Owen" + - family-names: "Hoyer" + given-names: "Stephan" + orcid: "https://orcid.org/0000-0002-5207-0380" + - family-names: "Roos" + given-names: "Maximilian" + - family-names: "Joseph" + given-names: "Hamman" + orcid: "https://orcid.org/0000-0001-7479-8439" + - family-names: "Magin" + given-names: "Justus" + orcid: "https://orcid.org/0000-0002-4254-8002" + - family-names: "Cherian" + given-names: "Deepak" + orcid: "https://orcid.org/0000-0002-6861-8734" + - family-names: "Fitzgerald" + given-names: "Clark" + orcid: "https://orcid.org/0000-0003-3446-6389" + - family-names: "Hauser" + given-names: "Mathias" + orcid: "https://orcid.org/0000-0002-0057-4878" + - family-names: "Fujii" + given-names: "Keisuke" + orcid: "https://orcid.org/0000-0003-0390-9984" + - family-names: "Maussion" + given-names: "Fabien" + orcid: "https://orcid.org/0000-0002-3211-506X" + - family-names: "Imperiale" + given-names: "Guido" + - family-names: "Clark" + given-names: "Spencer" + orcid: "https://orcid.org/0000-0001-5595-7895" + - family-names: "Kleeman" + given-names: "Alex" + - family-names: "Nicholas" + given-names: "Thomas" + orcid: "https://orcid.org/0000-0002-2176-0530" + - family-names: "Kluyver" + given-names: "Thomas" + orcid: "https://orcid.org/0000-0003-4020-6364" + - family-names: "Westling" + given-names: "Jimmy" + - family-names: "Munroe" + given-names: "James" + orcid: "https://orcid.org/0000-0001-9098-6309" + - family-names: "Amici" + given-names: "Alessandro" + orcid: "https://orcid.org/0000-0002-1778-4505" + - family-names: "Barghini" + given-names: "Aureliana" + - family-names: "Banihirwe" + given-names: "Anderson" + orcid: "https://orcid.org/0000-0001-6583-571X" + - family-names: "Bell" + given-names: "Ray" + orcid: "https://orcid.org/0000-0003-2623-0587" + - family-names: "Hatfield-Dodds" + given-names: "Zac" + orcid: "https://orcid.org/0000-0002-8646-8362" + - family-names: "Abernathey" + given-names: "Ryan" + orcid: "https://orcid.org/0000-0001-5999-4917" + - family-names: "Bovy" + given-names: "Benoît" + - family-names: "Omotani" + given-names: "John" + orcid: "https://orcid.org/0000-0002-3156-8227" + - family-names: "Mühlbauer" + given-names: "Kai" + orcid: "https://orcid.org/0000-0001-6599-1034" + - family-names: "Roszko" + given-names: "Maximilian K." + orcid: "https://orcid.org/0000-0001-9424-2526" + - family-names: "Wolfram" + given-names: "Phillip J." + orcid: "https://orcid.org/0000-0001-5971-4241" + - family-names: "Henderson" + given-names: "Scott" + orcid: "https://orcid.org/0000-0003-0624-4965" + - family-names: "Awowale" + given-names: "Eniola Olufunke" + - family-names: "Scheick" + given-names: "Jessica" + orcid: "https://orcid.org/0000-0002-3421-4459" + - family-names: "Savoie" + given-names: "Matthew" + orcid: "https://orcid.org/0000-0002-8881-2550" + - family-names: "Littlejohns" + given-names: "Owen" title: "xarray" abstract: "N-D labeled arrays and datasets in Python." license: Apache-2.0 @@ -98,12 +98,12 @@ repository-code: "https://github.com/pydata/xarray" preferred-citation: type: article authors: - - family-names: "Hoyer" - given-names: "Stephan" - orcid: "https://orcid.org/0000-0002-5207-0380" - - family-names: "Joseph" - given-names: "Hamman" - orcid: "https://orcid.org/0000-0001-7479-8439" + - family-names: "Hoyer" + given-names: "Stephan" + orcid: "https://orcid.org/0000-0002-5207-0380" + - family-names: "Joseph" + given-names: "Hamman" + orcid: "https://orcid.org/0000-0001-7479-8439" doi: "10.5334/jors.148" journal: "Journal of Open Research Software" month: 4 diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md index 541fd2fa659..2f3ddd49ba8 100644 --- a/CODE_OF_CONDUCT.md +++ b/CODE_OF_CONDUCT.md @@ -8,19 +8,19 @@ In the interest of fostering an open and welcoming environment, we as contributo Examples of behavior that contributes to creating a positive environment include: -* Using welcoming and inclusive language -* Being respectful of differing viewpoints and experiences -* Gracefully accepting constructive criticism -* Focusing on what is best for the community -* Showing empathy towards other community members +- Using welcoming and inclusive language +- Being respectful of differing viewpoints and experiences +- Gracefully accepting constructive criticism +- Focusing on what is best for the community +- Showing empathy towards other community members Examples of unacceptable behavior by participants include: -* The use of sexualized language or imagery and unwelcome sexual attention or advances -* Trolling, insulting/derogatory comments, and personal or political attacks -* Public or private harassment -* Publishing others' private information, such as a physical or electronic address, without explicit permission -* Other conduct which could reasonably be considered inappropriate in a professional setting +- The use of sexualized language or imagery and unwelcome sexual attention or advances +- Trolling, insulting/derogatory comments, and personal or political attacks +- Public or private harassment +- Publishing others' private information, such as a physical or electronic address, without explicit permission +- Other conduct which could reasonably be considered inappropriate in a professional setting ## Our Responsibilities diff --git a/CORE_TEAM_GUIDE.md b/CORE_TEAM_GUIDE.md index a093788fc81..24c137cc881 100644 --- a/CORE_TEAM_GUIDE.md +++ b/CORE_TEAM_GUIDE.md @@ -1,8 +1,8 @@ -> **_Note:_** This Core Team Member Guide was adapted from the [napari project's Core Developer Guide](https://napari.org/stable/developers/core_dev_guide.html) and the [Pandas maintainers guide](https://pandas.pydata.org/docs/development/maintaining.html). +> **_Note:_** This Core Team Member Guide was adapted from the [napari project's Core Developer Guide](https://napari.org/stable/developers/core_dev_guide.html) and the [Pandas maintainers guide](https://pandas.pydata.org/docs/development/maintaining.html). # Core Team Member Guide -Welcome, new core team member! We appreciate the quality of your work, and enjoy working with you! +Welcome, new core team member! We appreciate the quality of your work, and enjoy working with you! Thank you for your numerous contributions to the project so far. By accepting the invitation to become a core team member you are **not required to commit to doing any more work** - @@ -107,8 +107,7 @@ Here’s a typical workflow for triaging a newly opened issue or discussion: 6. **Is this a usage question?** - We prefer that usage questions are asked on StackOverflow with the [`python-xarray` tag](https://stackoverflow.com/questions/tagged/python-xarray -) or as a [GitHub discussion topic](https://github.com/pydata/xarray/discussions). + We prefer that usage questions are asked on StackOverflow with the [`python-xarray` tag](https://stackoverflow.com/questions/tagged/python-xarray) or as a [GitHub discussion topic](https://github.com/pydata/xarray/discussions). If it’s easy to answer, feel free to link to the relevant documentation section, let them know that in the future this kind of question should be on StackOverflow, and close the issue. @@ -136,7 +135,7 @@ guidelines for how to do that. ### All contributors are treated the same You should now have gained the ability to merge or approve -other contributors' pull requests. Merging contributions is a shared power: +other contributors' pull requests. Merging contributions is a shared power: only merge contributions you yourself have carefully reviewed, and that are clear improvements for the project. When in doubt, and especially for more complex changes, wait until at least one other core team member has approved. @@ -167,13 +166,13 @@ require the approval of another core team member before they can be merged. ### How to conduct a good review -*Always* be kind to contributors. Contributors are often doing +_Always_ be kind to contributors. Contributors are often doing volunteer work, for which we are tremendously grateful. Provide constructive criticism on ideas and implementations, and remind yourself of how it felt when your own work was being evaluated as a novice. -``xarray`` strongly values mentorship in code review. New users +`xarray` strongly values mentorship in code review. New users often need more handholding, having little to no git experience. Repeat yourself liberally, and, if you don’t recognize a contributor, point them to our development guide, or other GitHub @@ -186,44 +185,44 @@ an abandoned pull request. When reviewing, focus on the following: 1. **Usability and generality:** `xarray` is a user-facing package that strives to be accessible -to both novice and advanced users, and new features should ultimately be -accessible to everyone using the package. `xarray` targets the scientific user -community broadly, and core features should be domain-agnostic and general purpose. -Custom functionality is meant to be provided through our various types of interoperability. + to both novice and advanced users, and new features should ultimately be + accessible to everyone using the package. `xarray` targets the scientific user + community broadly, and core features should be domain-agnostic and general purpose. + Custom functionality is meant to be provided through our various types of interoperability. 2. **Performance and benchmarks:** As `xarray` targets scientific applications that often involve -large multidimensional datasets, high performance is a key value of `xarray`. While -every new feature won't scale equally to all sizes of data, keeping in mind performance -and our [benchmarks](https://github.com/pydata/xarray/tree/main/asv_bench) during a review may be important, and you may -need to ask for benchmarks to be run and reported or new benchmarks to be added. -You can run the CI benchmarking suite on any PR by tagging it with the ``run-benchmark`` label. + large multidimensional datasets, high performance is a key value of `xarray`. While + every new feature won't scale equally to all sizes of data, keeping in mind performance + and our [benchmarks](https://github.com/pydata/xarray/tree/main/asv_bench) during a review may be important, and you may + need to ask for benchmarks to be run and reported or new benchmarks to be added. + You can run the CI benchmarking suite on any PR by tagging it with the `run-benchmark` label. 3. **APIs and stability:** Coding users and developers will make -extensive use of our APIs. The foundation of a healthy ecosystem will be -a fully capable and stable set of APIs, so as `xarray` matures it will -very important to ensure our APIs are stable. Spending the extra time to consider names of public facing -variables and methods, alongside function signatures, could save us considerable -trouble in the future. We do our best to provide [deprecation cycles](https://docs.xarray.dev/en/stable/contributing.html#backwards-compatibility) -when making backwards-incompatible changes. + extensive use of our APIs. The foundation of a healthy ecosystem will be + a fully capable and stable set of APIs, so as `xarray` matures it will + very important to ensure our APIs are stable. Spending the extra time to consider names of public facing + variables and methods, alongside function signatures, could save us considerable + trouble in the future. We do our best to provide [deprecation cycles](https://docs.xarray.dev/en/stable/contributing.html#backwards-compatibility) + when making backwards-incompatible changes. 4. **Documentation and tutorials:** All new methods should have appropriate doc -strings following [PEP257](https://peps.python.org/pep-0257/) and the -[NumPy documentation guide](https://numpy.org/devdocs/dev/howto-docs.html#documentation-style). -For any major new features, accompanying changes should be made to our -[tutorials](https://tutorial.xarray.dev). These should not only -illustrates the new feature, but explains it. + strings following [PEP257](https://peps.python.org/pep-0257/) and the + [NumPy documentation guide](https://numpy.org/devdocs/dev/howto-docs.html#documentation-style). + For any major new features, accompanying changes should be made to our + [tutorials](https://tutorial.xarray.dev). These should not only + illustrates the new feature, but explains it. 5. **Implementations and algorithms:** You should understand the code being modified -or added before approving it. (See [Merge Only Changes You Understand](#merge-only-changes-you-understand) -below.) Implementations should do what they claim and be simple, readable, and efficient -in that order. + or added before approving it. (See [Merge Only Changes You Understand](#merge-only-changes-you-understand) + below.) Implementations should do what they claim and be simple, readable, and efficient + in that order. -6. **Tests:** All contributions *must* be tested, and each added line of code -should be covered by at least one test. Good tests not only execute the code, -but explore corner cases. It can be tempting not to review tests, but please -do so. +6. **Tests:** All contributions _must_ be tested, and each added line of code + should be covered by at least one test. Good tests not only execute the code, + but explore corner cases. It can be tempting not to review tests, but please + do so. -Other changes may be *nitpicky*: spelling mistakes, formatting, +Other changes may be _nitpicky_: spelling mistakes, formatting, etc. Do not insist contributors make these changes, but instead you should offer to make these changes by [pushing to their branch](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/committing-changes-to-a-pull-request-branch-created-from-a-fork), or using GitHub’s [suggestion](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/reviewing-changes-in-pull-requests/commenting-on-a-pull-request) @@ -233,7 +232,7 @@ it gives the contributor a choice in whether to accept the changes. Unless you know that a contributor is experienced with git, don’t ask for a rebase when merge conflicts arise. Instead, rebase the -branch yourself, force-push to their branch, and advise the contributor to force-pull. If the contributor is +branch yourself, force-push to their branch, and advise the contributor to force-pull. If the contributor is no longer active, you may take over their branch by submitting a new pull request and closing the original, including a reference to the original pull request. In doing so, ensure you communicate that you are not throwing the @@ -243,17 +242,17 @@ to the pull request using the `Co-authored-by` ### Merge only changes you understand -*Long-term maintainability* is an important concern. Code doesn't -merely have to *work*, but should be *understood* by multiple core -developers. Changes will have to be made in the future, and the +_Long-term maintainability_ is an important concern. Code doesn't +merely have to _work_, but should be _understood_ by multiple core +developers. Changes will have to be made in the future, and the original contributor may have moved on. -Therefore, *do not merge a code change unless you understand it*. Ask +Therefore, _do not merge a code change unless you understand it_. Ask for help freely: we can consult community members, or even external developers, for added insight where needed, and see this as a great learning opportunity. While we collectively "own" any patches (and bugs!) that become part -of the code base, you are vouching for changes you merge. Please take +of the code base, you are vouching for changes you merge. Please take that responsibility seriously. Feel free to ping other active maintainers with any questions you may have. @@ -268,8 +267,8 @@ resources such as: - Our [philosophy and development roadmap](https://docs.xarray.dev/en/stable/roadmap.html). - [PEP8](https://peps.python.org/pep-0008/) for Python style. - [PEP257](https://peps.python.org/pep-0257/) and the - [NumPy documentation guide](https://numpy.org/devdocs/dev/howto-docs.html#documentation-style) - for docstring conventions. + [NumPy documentation guide](https://numpy.org/devdocs/dev/howto-docs.html#documentation-style) + for docstring conventions. - [`pre-commit`](https://pre-commit.com) hooks for autoformatting. - [`ruff`](https://github.com/astral-sh/ruff) autoformatting and linting. - [python-xarray](https://stackoverflow.com/questions/tagged/python-xarray) on Stack Overflow. @@ -308,7 +307,7 @@ their approval and voting rights until they become active again. ## Contribute to this guide (!) -This guide reflects the experience of the current core team members. We +This guide reflects the experience of the current core team members. We may well have missed things that, by now, have become second nature—things that you, as a new team member, will spot more easily. Please ask the other core team members if you have any questions, and @@ -316,6 +315,6 @@ submit a pull request with insights gained. ## Conclusion -We are excited to have you on board! We look forward to your -contributions to the code base and the community. Thank you in +We are excited to have you on board! We look forward to your +contributions to the code base and the community. Thank you in advance! diff --git a/DATATREE_MIGRATION_GUIDE.md b/DATATREE_MIGRATION_GUIDE.md index 8c573999c53..b1f5207c0b5 100644 --- a/DATATREE_MIGRATION_GUIDE.md +++ b/DATATREE_MIGRATION_GUIDE.md @@ -5,7 +5,7 @@ _15th October 2024_ This guide is for previous users of the prototype `datatree.DataTree` class in the `xarray-contrib/datatree repository`. That repository has now been archived, and will not be maintained. This guide is intended to help smooth your transition to using the new, updated `xarray.DataTree` class. > [!IMPORTANT] -> There are breaking changes! You should not expect that code written with `xarray-contrib/datatree` will work without any modifications. At the absolute minimum you will need to change the top-level import statement, but there are other changes too. +> There are breaking changes! You should not expect that code written with `xarray-contrib/datatree` will work without any modifications. At the absolute minimum you will need to change the top-level import statement, but there are other changes too. We have made various changes compared to the prototype version. These can be split into three categories: data model changes, which affect the hierarchal structure itself; integration with xarray's IO backends; and minor API changes, which mostly consist of renaming methods to be more self-consistent. @@ -28,6 +28,7 @@ Now xarray's backend entrypoint system has been generalized to include `open_dat This means we can now extend other xarray backends to support `open_datatree`! If you are the maintainer of an xarray backend we encourage you to add support for `open_datatree` and `open_groups`! Additionally: + - A `group` kwarg has been added to `open_datatree` for choosing which group in the file should become the root group of the created tree. - Various performance improvements have been made, which should help when opening netCDF files and Zarr stores with large numbers of groups. - We anticipate further performance improvements being possible for datatree IO. @@ -35,6 +36,7 @@ Additionally: ### API changes A number of other API changes have been made, which should only require minor modifications to your code: + - The top-level import has changed, from `from datatree import DataTree, open_datatree` to `from xarray import DataTree, open_datatree`. Alternatively you can now just use the `import xarray as xr` namespace convention for everything datatree-related. - The `DataTree.ds` property has been changed to `DataTree.dataset`, though `DataTree.ds` remains as an alias for `DataTree.dataset`. - Similarly the `ds` kwarg in the `DataTree.__init__` constructor has been replaced by `dataset`, i.e. use `DataTree(dataset=)` instead of `DataTree(ds=...)`. diff --git a/HOW_TO_RELEASE.md b/HOW_TO_RELEASE.md index 28f7b8e8775..289519c574c 100644 --- a/HOW_TO_RELEASE.md +++ b/HOW_TO_RELEASE.md @@ -13,26 +13,31 @@ upstream https://github.com/pydata/xarray (push) - 1. Ensure your main branch is synced to upstream: - ```sh - git switch main - git pull upstream main - ``` - 2. Add a list of contributors. +1. Ensure your main branch is synced to upstream: + ```sh + git switch main + git pull upstream main + ``` +2. Add a list of contributors. First fetch all previous release tags so we can see the version number of the last release was: + ```sh git fetch upstream --tags ``` + Then run + ```sh python ci/release_contributors.py ``` + (needs `gitpython` and `toolz` / `cytoolz`) and copy the output. - 3. Write a release summary: ~50 words describing the high level features. This + +3. Write a release summary: ~50 words describing the high level features. This will be used in the release emails, tweets, GitHub release notes, etc. - 4. Look over whats-new.rst and the docs. Make sure "What's New" is complete +4. Look over whats-new.rst and the docs. Make sure "What's New" is complete (check the date!) and add the release summary at the top. Things to watch out for: - Important new features should be highlighted towards the top. @@ -41,77 +46,83 @@ upstream https://github.com/pydata/xarray (push) due to a bad merge. Check for these before a release by using git diff, e.g., `git diff v{YYYY.MM.X-1} whats-new.rst` where {YYYY.MM.X-1} is the previous release. - 5. Open a PR with the release summary and whatsnew changes; in particular the +5. Open a PR with the release summary and whatsnew changes; in particular the release headline should get feedback from the team on what's important to include. - 6. After merging, again ensure your main branch is synced to upstream: - ```sh - git pull upstream main - ``` - 7. If you have any doubts, run the full test suite one final time! - ```sh - pytest - ``` - 8. Check that the [ReadTheDocs build](https://readthedocs.org/projects/xray/) is passing on the `latest` build version (which is built from the `main` branch). - 9. Issue the release on GitHub. Click on "Draft a new release" at +6. After merging, again ensure your main branch is synced to upstream: + ```sh + git pull upstream main + ``` +7. If you have any doubts, run the full test suite one final time! + ```sh + pytest + ``` +8. Check that the [ReadTheDocs build](https://readthedocs.org/projects/xray/) is passing on the `latest` build version (which is built from the `main` branch). +9. Issue the release on GitHub. Click on "Draft a new release" at . Type in the version number (with a "v") and paste the release summary in the notes. - 10. This should automatically trigger an upload of the new build to PyPI via GitHub Actions. +10. This should automatically trigger an upload of the new build to PyPI via GitHub Actions. Check this has run [here](https://github.com/pydata/xarray/actions/workflows/pypi-release.yaml), and that the version number you expect is displayed [on PyPI](https://pypi.org/project/xarray/) 11. Add a section for the next release {YYYY.MM.X+1} to doc/whats-new.rst (we avoid doing this earlier so that it doesn't show up in the RTD build): - ```rst - .. _whats-new.YYYY.MM.X+1: - vYYYY.MM.X+1 (unreleased) - ----------------------- + ```rst + .. _whats-new.YYYY.MM.X+1: + + vYYYY.MM.X+1 (unreleased) + ----------------------- - New Features - ~~~~~~~~~~~~ + New Features + ~~~~~~~~~~~~ - Breaking changes - ~~~~~~~~~~~~~~~~ + Breaking changes + ~~~~~~~~~~~~~~~~ - Deprecations - ~~~~~~~~~~~~ + Deprecations + ~~~~~~~~~~~~ - Bug fixes - ~~~~~~~~~ + Bug fixes + ~~~~~~~~~ - Documentation - ~~~~~~~~~~~~~ + Documentation + ~~~~~~~~~~~~~ - Internal Changes - ~~~~~~~~~~~~~~~~ + Internal Changes + ~~~~~~~~~~~~~~~~ + + ``` - ``` 12. Commit your changes and push to main again: - ```sh - git commit -am 'New whatsnew section' - git push upstream main - ``` + + ```sh + git commit -am 'New whatsnew section' + git push upstream main + ``` + You're done pushing to main! 13. Update the version available on pyodide: + - Open the PyPI page for [Xarray downloads](https://pypi.org/project/xarray/#files) - Edit [`pyodide/packages/xarray/meta.yaml`](https://github.com/pyodide/pyodide/blob/main/packages/xarray/meta.yaml) to update the - - version number - - link to the wheel (under "Built Distribution" on the PyPI page) - - SHA256 hash (Click "Show Hashes" next to the link to the wheel) + - version number + - link to the wheel (under "Built Distribution" on the PyPI page) + - SHA256 hash (Click "Show Hashes" next to the link to the wheel) - Open a pull request to pyodide 14. Issue the release announcement to mailing lists & Twitter. For bug fix releases, I usually only email xarray@googlegroups.com. For major/feature releases, I will email a broader list (no more than once every 3-6 months): - - pydata@googlegroups.com - - xarray@googlegroups.com - - numpy-discussion@scipy.org - - scipy-user@scipy.org - - pyaos@lists.johnny-lin.com + + - pydata@googlegroups.com + - xarray@googlegroups.com + - numpy-discussion@scipy.org + - scipy-user@scipy.org + - pyaos@lists.johnny-lin.com Google search will turn up examples of prior release announcements (look for "ANN xarray"). diff --git a/asv_bench/asv.conf.json b/asv_bench/asv.conf.json index ab256079c90..20c873540de 100644 --- a/asv_bench/asv.conf.json +++ b/asv_bench/asv.conf.json @@ -1,161 +1,161 @@ { - // The version of the config file format. Do not change, unless - // you know what you are doing. - "version": 1, - - // The name of the project being benchmarked - "project": "xarray", - - // The project's homepage - "project_url": "https://docs.xarray.dev/", - - // The URL or local path of the source code repository for the - // project being benchmarked - "repo": "..", - - // List of branches to benchmark. If not provided, defaults to "master" - // (for git) or "default" (for mercurial). - "branches": ["main"], // for git - // "branches": ["default"], // for mercurial - - // The DVCS being used. If not set, it will be automatically - // determined from "repo" by looking at the protocol in the URL - // (if remote), or by looking for special directories, such as - // ".git" (if local). - "dvcs": "git", - - // The tool to use to create environments. May be "conda", - // "virtualenv" or other value depending on the plugins in use. - // If missing or the empty string, the tool will be automatically - // determined by looking for tools on the PATH environment - // variable. - "environment_type": "mamba", - "conda_channels": ["conda-forge"], - - // timeout in seconds for installing any dependencies in environment - // defaults to 10 min - "install_timeout": 600, - - // the base URL to show a commit for the project. - "show_commit_url": "https://github.com/pydata/xarray/commit/", - - // The Pythons you'd like to test against. If not provided, defaults - // to the current version of Python used to run `asv`. - "pythons": ["3.11"], - - // The matrix of dependencies to test. Each key is the name of a - // package (in PyPI) and the values are version numbers. An empty - // list or empty string indicates to just test against the default - // (latest) version. null indicates that the package is to not be - // installed. If the package to be tested is only available from - // PyPi, and the 'environment_type' is conda, then you can preface - // the package name by 'pip+', and the package will be installed via - // pip (with all the conda available packages installed first, - // followed by the pip installed packages). - // - // "matrix": { - // "numpy": ["1.6", "1.7"], - // "six": ["", null], // test with and without six installed - // "pip+emcee": [""], // emcee is only available for install with pip. - // }, - "matrix": { - "setuptools_scm": [""], // GH6609 - "numpy": [""], - "pandas": [""], - "netcdf4": [""], - "scipy": [""], - "bottleneck": [""], - "dask": [""], - "distributed": [""], - "flox": [""], - "numpy_groupies": [""], - "sparse": [""], - "cftime": [""] - }, - // fix for bad builds - // https://github.com/airspeed-velocity/asv/issues/1389#issuecomment-2076131185 - "build_command": [ - "python -m build", - "python -mpip wheel --no-deps --no-build-isolation --no-index -w {build_cache_dir} {build_dir}" - ], - // Combinations of libraries/python versions can be excluded/included - // from the set to test. Each entry is a dictionary containing additional - // key-value pairs to include/exclude. - // - // An exclude entry excludes entries where all values match. The - // values are regexps that should match the whole string. - // - // An include entry adds an environment. Only the packages listed - // are installed. The 'python' key is required. The exclude rules - // do not apply to includes. - // - // In addition to package names, the following keys are available: - // - // - python - // Python version, as in the *pythons* variable above. - // - environment_type - // Environment type, as above. - // - sys_platform - // Platform, as in sys.platform. Possible values for the common - // cases: 'linux2', 'win32', 'cygwin', 'darwin'. - // - // "exclude": [ - // {"python": "3.2", "sys_platform": "win32"}, // skip py3.2 on windows - // {"environment_type": "conda", "six": null}, // don't run without six on conda - // ], - // - // "include": [ - // // additional env for python2.7 - // {"python": "2.7", "numpy": "1.8"}, - // // additional env if run on windows+conda - // {"platform": "win32", "environment_type": "conda", "python": "2.7", "libpython": ""}, - // ], - - // The directory (relative to the current directory) that benchmarks are - // stored in. If not provided, defaults to "benchmarks" - "benchmark_dir": "benchmarks", - - // The directory (relative to the current directory) to cache the Python - // environments in. If not provided, defaults to "env" - "env_dir": ".asv/env", - - // The directory (relative to the current directory) that raw benchmark - // results are stored in. If not provided, defaults to "results". - "results_dir": ".asv/results", - - // The directory (relative to the current directory) that the html tree - // should be written to. If not provided, defaults to "html". - "html_dir": ".asv/html", - - // The number of characters to retain in the commit hashes. - // "hash_length": 8, - - // `asv` will cache wheels of the recent builds in each - // environment, making them faster to install next time. This is - // number of builds to keep, per environment. - // "wheel_cache_size": 0 - - // The commits after which the regression search in `asv publish` - // should start looking for regressions. Dictionary whose keys are - // regexps matching to benchmark names, and values corresponding to - // the commit (exclusive) after which to start looking for - // regressions. The default is to start from the first commit - // with results. If the commit is `null`, regression detection is - // skipped for the matching benchmark. - // - // "regressions_first_commits": { - // "some_benchmark": "352cdf", // Consider regressions only after this commit - // "another_benchmark": null, // Skip regression detection altogether - // } - - // The thresholds for relative change in results, after which `asv - // publish` starts reporting regressions. Dictionary of the same - // form as in ``regressions_first_commits``, with values - // indicating the thresholds. If multiple entries match, the - // maximum is taken. If no entry matches, the default is 5%. - // - // "regressions_thresholds": { - // "some_benchmark": 0.01, // Threshold of 1% - // "another_benchmark": 0.5, // Threshold of 50% - // } + // The version of the config file format. Do not change, unless + // you know what you are doing. + "version": 1, + + // The name of the project being benchmarked + "project": "xarray", + + // The project's homepage + "project_url": "https://docs.xarray.dev/", + + // The URL or local path of the source code repository for the + // project being benchmarked + "repo": "..", + + // List of branches to benchmark. If not provided, defaults to "master" + // (for git) or "default" (for mercurial). + "branches": ["main"], // for git + // "branches": ["default"], // for mercurial + + // The DVCS being used. If not set, it will be automatically + // determined from "repo" by looking at the protocol in the URL + // (if remote), or by looking for special directories, such as + // ".git" (if local). + "dvcs": "git", + + // The tool to use to create environments. May be "conda", + // "virtualenv" or other value depending on the plugins in use. + // If missing or the empty string, the tool will be automatically + // determined by looking for tools on the PATH environment + // variable. + "environment_type": "mamba", + "conda_channels": ["conda-forge"], + + // timeout in seconds for installing any dependencies in environment + // defaults to 10 min + "install_timeout": 600, + + // the base URL to show a commit for the project. + "show_commit_url": "https://github.com/pydata/xarray/commit/", + + // The Pythons you'd like to test against. If not provided, defaults + // to the current version of Python used to run `asv`. + "pythons": ["3.11"], + + // The matrix of dependencies to test. Each key is the name of a + // package (in PyPI) and the values are version numbers. An empty + // list or empty string indicates to just test against the default + // (latest) version. null indicates that the package is to not be + // installed. If the package to be tested is only available from + // PyPi, and the 'environment_type' is conda, then you can preface + // the package name by 'pip+', and the package will be installed via + // pip (with all the conda available packages installed first, + // followed by the pip installed packages). + // + // "matrix": { + // "numpy": ["1.6", "1.7"], + // "six": ["", null], // test with and without six installed + // "pip+emcee": [""], // emcee is only available for install with pip. + // }, + "matrix": { + "setuptools_scm": [""], // GH6609 + "numpy": [""], + "pandas": [""], + "netcdf4": [""], + "scipy": [""], + "bottleneck": [""], + "dask": [""], + "distributed": [""], + "flox": [""], + "numpy_groupies": [""], + "sparse": [""], + "cftime": [""] + }, + // fix for bad builds + // https://github.com/airspeed-velocity/asv/issues/1389#issuecomment-2076131185 + "build_command": [ + "python -m build", + "python -mpip wheel --no-deps --no-build-isolation --no-index -w {build_cache_dir} {build_dir}" + ], + // Combinations of libraries/python versions can be excluded/included + // from the set to test. Each entry is a dictionary containing additional + // key-value pairs to include/exclude. + // + // An exclude entry excludes entries where all values match. The + // values are regexps that should match the whole string. + // + // An include entry adds an environment. Only the packages listed + // are installed. The 'python' key is required. The exclude rules + // do not apply to includes. + // + // In addition to package names, the following keys are available: + // + // - python + // Python version, as in the *pythons* variable above. + // - environment_type + // Environment type, as above. + // - sys_platform + // Platform, as in sys.platform. Possible values for the common + // cases: 'linux2', 'win32', 'cygwin', 'darwin'. + // + // "exclude": [ + // {"python": "3.2", "sys_platform": "win32"}, // skip py3.2 on windows + // {"environment_type": "conda", "six": null}, // don't run without six on conda + // ], + // + // "include": [ + // // additional env for python2.7 + // {"python": "2.7", "numpy": "1.8"}, + // // additional env if run on windows+conda + // {"platform": "win32", "environment_type": "conda", "python": "2.7", "libpython": ""}, + // ], + + // The directory (relative to the current directory) that benchmarks are + // stored in. If not provided, defaults to "benchmarks" + "benchmark_dir": "benchmarks", + + // The directory (relative to the current directory) to cache the Python + // environments in. If not provided, defaults to "env" + "env_dir": ".asv/env", + + // The directory (relative to the current directory) that raw benchmark + // results are stored in. If not provided, defaults to "results". + "results_dir": ".asv/results", + + // The directory (relative to the current directory) that the html tree + // should be written to. If not provided, defaults to "html". + "html_dir": ".asv/html" + + // The number of characters to retain in the commit hashes. + // "hash_length": 8, + + // `asv` will cache wheels of the recent builds in each + // environment, making them faster to install next time. This is + // number of builds to keep, per environment. + // "wheel_cache_size": 0 + + // The commits after which the regression search in `asv publish` + // should start looking for regressions. Dictionary whose keys are + // regexps matching to benchmark names, and values corresponding to + // the commit (exclusive) after which to start looking for + // regressions. The default is to start from the first commit + // with results. If the commit is `null`, regression detection is + // skipped for the matching benchmark. + // + // "regressions_first_commits": { + // "some_benchmark": "352cdf", // Consider regressions only after this commit + // "another_benchmark": null, // Skip regression detection altogether + // } + + // The thresholds for relative change in results, after which `asv + // publish` starts reporting regressions. Dictionary of the same + // form as in ``regressions_first_commits``, with values + // indicating the thresholds. If multiple entries match, the + // maximum is taken. If no entry matches, the default is 5%. + // + // "regressions_thresholds": { + // "some_benchmark": 0.01, // Threshold of 1% + // "another_benchmark": 0.5, // Threshold of 50% + // } } diff --git a/asv_bench/benchmarks/README_CI.md b/asv_bench/benchmarks/README_CI.md index 9d86cc257ef..9c35e8a93b2 100644 --- a/asv_bench/benchmarks/README_CI.md +++ b/asv_bench/benchmarks/README_CI.md @@ -10,12 +10,12 @@ The `asv` suite can be run for any PR on GitHub Actions (check workflow `.github We use `asv continuous` to run the job, which runs a relative performance measurement. This means that there's no state to be saved and that regressions are only caught in terms of performance ratio (absolute numbers are available but they are not useful since we do not use stable hardware over time). `asv continuous` will: -* Compile `scikit-image` for _both_ commits. We use `ccache` to speed up the process, and `mamba` is used to create the build environments. -* Run the benchmark suite for both commits, _twice_ (since `processes=2` by default). -* Generate a report table with performance ratios: - * `ratio=1.0` -> performance didn't change. - * `ratio<1.0` -> PR made it slower. - * `ratio>1.0` -> PR made it faster. +- Compile `scikit-image` for _both_ commits. We use `ccache` to speed up the process, and `mamba` is used to create the build environments. +- Run the benchmark suite for both commits, _twice_ (since `processes=2` by default). +- Generate a report table with performance ratios: + - `ratio=1.0` -> performance didn't change. + - `ratio<1.0` -> PR made it slower. + - `ratio>1.0` -> PR made it faster. Due to the sensitivity of the test, we cannot guarantee that false positives are not produced. In practice, values between `(0.7, 1.5)` are to be considered part of the measurement noise. When in doubt, running the benchmark suite one more time will provide more information about the test being a false positive or not. @@ -30,12 +30,12 @@ Due to the sensitivity of the test, we cannot guarantee that false positives are The CI job will also generate an artifact. This is the `.asv/results` directory compressed in a zip file. Its contents include: -* `fv-xxxxx-xx/`. A directory for the machine that ran the suite. It contains three files: - * `.json`, `.json`: the benchmark results for each commit, with stats. - * `machine.json`: details about the hardware. -* `benchmarks.json`: metadata about the current benchmark suite. -* `benchmarks.log`: the CI logs for this run. -* This README. +- `fv-xxxxx-xx/`. A directory for the machine that ran the suite. It contains three files: + - `.json`, `.json`: the benchmark results for each commit, with stats. + - `machine.json`: details about the hardware. +- `benchmarks.json`: metadata about the current benchmark suite. +- `benchmarks.log`: the CI logs for this run. +- This README. ## Re-running the analysis diff --git a/ci/requirements/all-but-dask.yml b/ci/requirements/all-but-dask.yml index 1b6db04671f..b7bf167188f 100644 --- a/ci/requirements/all-but-dask.yml +++ b/ci/requirements/all-but-dask.yml @@ -15,7 +15,7 @@ dependencies: - h5py - hdf5 - hypothesis - - lxml # Optional dep of pydap + - lxml # Optional dep of pydap - matplotlib-base - nc-time-axis - netcdf4 diff --git a/ci/requirements/doc.yml b/ci/requirements/doc.yml index 183aa28d703..a9a5fb2887e 100644 --- a/ci/requirements/doc.yml +++ b/ci/requirements/doc.yml @@ -14,7 +14,7 @@ dependencies: - hypothesis>=6.75.8 - h5netcdf>=0.13 - ipykernel - - ipywidgets # silence nbsphinx warning + - ipywidgets # silence nbsphinx warning - ipython - iris>=2.3 - jupyter_client diff --git a/ci/requirements/min-all-deps.yml b/ci/requirements/min-all-deps.yml index b5a9176a62b..92ce0001224 100644 --- a/ci/requirements/min-all-deps.yml +++ b/ci/requirements/min-all-deps.yml @@ -8,7 +8,7 @@ dependencies: # When upgrading python, numpy, or pandas, must also change # doc/user-guide/installing.rst, doc/user-guide/plotting.rst and setup.py. - python=3.10 - - array-api-strict=1.0 # dependency for testing the array api compat + - array-api-strict=1.0 # dependency for testing the array api compat - boto3=1.28 - bottleneck=1.3 - cartopy=0.22 @@ -29,7 +29,7 @@ dependencies: - hdf5=1.12 - hypothesis - iris=3.7 - - lxml=4.9 # Optional dep of pydap + - lxml=4.9 # Optional dep of pydap - matplotlib-base=3.7 - nc-time-axis=1.4 # netcdf follows a 1.major.minor[.patch] convention diff --git a/design_notes/flexible_indexes_notes.md b/design_notes/flexible_indexes_notes.md index c53acfa62b7..76c618aa37c 100644 --- a/design_notes/flexible_indexes_notes.md +++ b/design_notes/flexible_indexes_notes.md @@ -43,7 +43,7 @@ Coordinates: * y (y) float64 ... ``` -This refactoring would allow creating a geographic index for `lat` and `lon` and two simple indexes for `x` and `y` such that we could select data with either `da.sel(lon=..., lat=...)` or `da.sel(x=..., y=...)`. +This refactoring would allow creating a geographic index for `lat` and `lon` and two simple indexes for `x` and `y` such that we could select data with either `da.sel(lon=..., lat=...)` or `da.sel(x=..., y=...)`. Refactoring the dimension -> index one-to-one relationship into many-to-many would also introduce some issues that we'll need to address, e.g., ambiguous cases like `da.sel(chi=..., drainage_area=...)` where multiple indexes may potentially return inconsistent positional indexers along a dimension. @@ -305,16 +305,16 @@ Xarray also provides a number of Dataset/DataArray methods where indexes are use - `resample` (`CFTimeIndex` and a `DatetimeIntervalIndex`) - `DatetimeAccessor` & `TimedeltaAccessor` properties (`CFTimeIndex` and a `DatetimeIntervalIndex`) - `interp` & `interpolate_na`, - - with `IntervalIndex`, these become regridding operations. Should we support hooks for these operations? + - with `IntervalIndex`, these become regridding operations. Should we support hooks for these operations? - `differentiate`, `integrate`, `polyfit` - - raise an error if not a "simple" 1D index? + - raise an error if not a "simple" 1D index? - `pad` - `coarsen` has to make choices about output index labels. - `sortby` - `stack`/`unstack` - plotting - - `plot.pcolormesh` "infers" interval breaks along axes, which are really inferred `bounds` for the appropriate indexes. - - `plot.step` again uses `bounds`. In fact, we may even want `step` to be the default 1D plotting function if the axis has `bounds` attached. + - `plot.pcolormesh` "infers" interval breaks along axes, which are really inferred `bounds` for the appropriate indexes. + - `plot.step` again uses `bounds`. In fact, we may even want `step` to be the default 1D plotting function if the axis has `bounds` attached. It would be reasonable to first restrict those methods to the indexes that are currently available in Xarray, and maybe extend the `XarrayIndex` API later upon request when the opportunity arises. @@ -379,7 +379,7 @@ Option A may be more reasonable for now. ## 6. Coordinate duck arrays -Another opportunity of this refactoring is support for duck arrays as index coordinates. Decoupling coordinates and indexes would *de-facto* enable it. +Another opportunity of this refactoring is support for duck arrays as index coordinates. Decoupling coordinates and indexes would _de-facto_ enable it. However, support for duck arrays in index-based operations such as data selection or alignment would probably require some protocol extension, e.g., diff --git a/design_notes/grouper_objects.md b/design_notes/grouper_objects.md index 508ed5e9716..ca6f099377f 100644 --- a/design_notes/grouper_objects.md +++ b/design_notes/grouper_objects.md @@ -1,14 +1,18 @@ # Grouper Objects + **Author**: Deepak Cherian **Created**: Nov 21, 2023 ## Abstract I propose the addition of Grouper objects to Xarray's public API so that + ```python Dataset.groupby(x=BinGrouper(bins=np.arange(10, 2)))) ``` + is identical to today's syntax: + ```python Dataset.groupby_bins("x", bins=np.arange(10, 2)) ``` @@ -17,6 +21,7 @@ Dataset.groupby_bins("x", bins=np.arange(10, 2)) Xarray's GroupBy API implements the split-apply-combine pattern (Wickham, 2011)[^1], which applies to a very large number of problems: histogramming, compositing, climatological averaging, resampling to a different time frequency, etc. The pattern abstracts the following pseudocode: + ```python results = [] for element in unique_labels: @@ -29,12 +34,14 @@ xr.concat(results) # combine ``` to + ```python ds.groupby('x').mean() # splits, applies, and combines ``` Efficient vectorized implementations of this pattern are implemented in numpy's [`ufunc.at`](https://numpy.org/doc/stable/reference/generated/numpy.ufunc.at.html), [`ufunc.reduceat`](https://numpy.org/doc/stable/reference/generated/numpy.ufunc.reduceat.html), [`numbagg.grouped`](https://github.com/numbagg/numbagg/blob/main/numbagg/grouped.py), [`numpy_groupies`](https://github.com/ml31415/numpy-groupies), and probably more. -These vectorized implementations *all* require, as input, an array of integer codes or labels that identify unique elements in the array being grouped over (`'x'` in the example above). +These vectorized implementations _all_ require, as input, an array of integer codes or labels that identify unique elements in the array being grouped over (`'x'` in the example above). + ```python import numpy as np @@ -55,6 +62,7 @@ out # array([2, 3, 1]) One can 'factorize' or construct such an array of integer codes using `pandas.factorize` or `numpy.unique(..., return_inverse=True)` for categorical arrays; `pandas.cut`, `pandas.qcut`, or `np.digitize` for discretizing continuous variables. In practice, since `GroupBy` objects exist, much of complexity in applying the groupby paradigm stems from appropriately factorizing or generating labels for the operation. Consider these two examples: + 1. [Bins that vary in a dimension](https://flox.readthedocs.io/en/latest/user-stories/nD-bins.html) 2. [Overlapping groups](https://flox.readthedocs.io/en/latest/user-stories/overlaps.html) 3. [Rolling resampling](https://github.com/pydata/xarray/discussions/8361) @@ -65,13 +73,14 @@ Grouper objects will close the gap. ## Usage and impact - Grouper objects + 1. Will abstract useful factorization algorithms, and 2. Present a natural way to extend GroupBy to grouping by multiple variables: `ds.groupby(x=BinGrouper(...), t=Resampler(freq="M", ...)).mean()`. In addition, Grouper objects provide a nice interface to add often-requested grouping functionality -1. A new `SpaceResampler` would allow specifying resampling spatial dimensions. ([issue](https://github.com/pydata/xarray/issues/4008)) + +1. A new `SpaceResampler` would allow specifying resampling spatial dimensions. ([issue](https://github.com/pydata/xarray/issues/4008)) 2. `RollingTimeResampler` would allow rolling-like functionality that understands timestamps ([issue](https://github.com/pydata/xarray/issues/3216)) 3. A `QuantileBinGrouper` to abstract away `pd.cut` ([issue](https://github.com/pydata/xarray/discussions/7110)) 4. A `SeasonGrouper` and `SeasonResampler` would abstract away common annoyances with such calculations today @@ -86,6 +95,7 @@ In addition, Grouper objects provide a nice interface to add often-requested gro ## Backward Compatibility Xarray's existing grouping functionality will be exposed using two new Groupers: + 1. `UniqueGrouper` which uses `pandas.factorize` 2. `BinGrouper` which uses `pandas.cut` 3. `TimeResampler` which mimics pandas' `.resample` @@ -96,6 +106,7 @@ Similarly, `ds.groupby_bins('x', bins=np.arange(10, 2))` will be unchanged and i ## Detailed description All Grouper objects will subclass from a Grouper object + ```python import abc @@ -115,19 +126,22 @@ class CustomGrouper(Grouper): ``` ### The `factorize` method + Today, the `factorize` method takes as input the group variable and returns 4 variables (I propose to clean this up below): + 1. `codes`: An array of same shape as the `group` with int dtype. NaNs in `group` are coded by `-1` and ignored later. 2. `group_indices` is a list of index location of `group` elements that belong to a single group. 3. `unique_coord` is (usually) a `pandas.Index` object of all unique `group` members present in `group`. 4. `full_index` is a `pandas.Index` of all `group` members. This is different from `unique_coord` for binning and resampling, where not all groups in the output may be represented in the input `group`. For grouping by a categorical variable e.g. `['a', 'b', 'a', 'c']`, `full_index` and `unique_coord` are identical. -There is some redundancy here since `unique_coord` is always equal to or a subset of `full_index`. -We can clean this up (see Implementation below). + There is some redundancy here since `unique_coord` is always equal to or a subset of `full_index`. + We can clean this up (see Implementation below). ### The `weights` method (?) The proposed `weights` method is optional and unimplemented today. Groupers with `weights` will allow composing `weighted` and `groupby` ([issue](https://github.com/pydata/xarray/issues/3937)). The `weights` method should return an appropriate array of weights such that the following property is satisfied + ```python gb_sum = ds.groupby(by).sum() @@ -136,18 +150,22 @@ weighted_sum = xr.dot(ds, weights) assert_identical(gb_sum, weighted_sum) ``` + For example, the boolean weights for `group=np.array(['a', 'b', 'c', 'a', 'a'])` should be + ``` [[1, 0, 0, 1, 1], [0, 1, 0, 0, 0], [0, 0, 1, 0, 0]] ``` + This is the boolean "summarization matrix" referred to in the classic Iverson (1980, Section 4.3)[^2] and "nub sieve" in [various APLs](https://aplwiki.com/wiki/Nub_Sieve). > [!NOTE] > We can always construct `weights` automatically using `group_indices` from `factorize`, so this is not a required method. For a rolling resampling, windowed weights are possible + ``` [[0.5, 1, 0.5, 0, 0], [0, 0.25, 1, 1, 0], @@ -159,30 +177,40 @@ For a rolling resampling, windowed weights are possible Rechunking support is another optional extension point. In `flox` I experimented some with automatically rechunking to make a groupby more parallel-friendly ([example 1](https://flox.readthedocs.io/en/latest/generated/flox.rechunk_for_blockwise.html), [example 2](https://flox.readthedocs.io/en/latest/generated/flox.rechunk_for_cohorts.html)). A great example is for resampling-style groupby reductions, for which `codes` might look like + ``` 0001|11122|3333 ``` + where `|` represents chunk boundaries. A simple rechunking to + ``` 000|111122|3333 ``` + would make this resampling reduction an embarrassingly parallel blockwise problem. Similarly consider monthly-mean climatologies for which the month numbers might be + ``` 1 2 3 4 5 | 6 7 8 9 10 | 11 12 1 2 3 | 4 5 6 7 8 | 9 10 11 12 | ``` + A slight rechunking to + ``` 1 2 3 4 | 5 6 7 8 | 9 10 11 12 | 1 2 3 4 | 5 6 7 8 | 9 10 11 12 | ``` + allows us to reduce `1, 2, 3, 4` separately from `5,6,7,8` and `9, 10, 11, 12` while still being parallel friendly (see the [flox documentation](https://flox.readthedocs.io/en/latest/implementation.html#method-cohorts) for more). We could attempt to detect these patterns, or we could just have the Grouper take as input `chunks` and return a tuple of "nice" chunk sizes to rechunk to. + ```python def preferred_chunks(self, chunks: ChunksTuple) -> ChunksTuple: pass ``` + For monthly means, since the period of repetition of labels is 12, the Grouper might choose possible chunk sizes of `((2,),(3,),(4,),(6,))`. For resampling, the Grouper could choose to resample to a multiple or an even fraction of the resampling frequency. @@ -193,9 +221,9 @@ However, these objects do not appear to be extension points, unlike the Grouper Instead, Pandas' `ExtensionArray` has a [`factorize`](https://pandas.pydata.org/docs/reference/api/pandas.api.extensions.ExtensionArray.factorize.html) method. Composing rolling with time resampling is a common workload: + 1. Polars has [`group_by_dynamic`](https://pola-rs.github.io/polars/py-polars/html/reference/dataframe/api/polars.DataFrame.group_by_dynamic.html) which appears to be like the proposed `RollingResampler`. -2. scikit-downscale provides [`PaddedDOYGrouper`]( -https://github.com/pangeo-data/scikit-downscale/blob/e16944a32b44f774980fa953ea18e29a628c71b8/skdownscale/pointwise_models/groupers.py#L19) +2. scikit-downscale provides [`PaddedDOYGrouper`](https://github.com/pangeo-data/scikit-downscale/blob/e16944a32b44f774980fa953ea18e29a628c71b8/skdownscale/pointwise_models/groupers.py#L19) ## Implementation Proposal @@ -215,10 +243,12 @@ https://github.com/pangeo-data/scikit-downscale/blob/e16944a32b44f774980fa953ea1 One major design choice made here was to adopt the syntax `ds.groupby(x=BinGrouper(...))` instead of `ds.groupby(BinGrouper('x', ...))`. This allows reuse of Grouper objects, example + ```python grouper = BinGrouper(...) ds.groupby(x=grouper, y=grouper) ``` + but requires that all variables being grouped by (`x` and `y` above) are present in Dataset `ds`. This does not seem like a bad requirement. Importantly `Grouper` instances will be copied internally so that they can safely cache state that might be shared between `factorize` and `weights`. @@ -227,6 +257,7 @@ Today, it is possible to `ds.groupby(DataArray, ...)`. This syntax will still be ## Discussion This proposal builds on these discussions: + 1. https://github.com/xarray-contrib/flox/issues/191#issuecomment-1328898836 2. https://github.com/pydata/xarray/issues/6610 @@ -236,5 +267,6 @@ This document has been placed in the public domain. ## References and footnotes -[^1]: Wickham, H. (2011). The split-apply-combine strategy for data analysis. https://vita.had.co.nz/papers/plyr.html -[^2]: Iverson, K.E. (1980). Notation as a tool of thought. Commun. ACM 23, 8 (Aug. 1980), 444–465. https://doi.org/10.1145/358896.358899 +[^1]: Wickham, H. (2011). The split-apply-combine strategy for data analysis. https://vita.had.co.nz/papers/plyr.html + +[^2]: Iverson, K.E. (1980). Notation as a tool of thought. Commun. ACM 23, 8 (Aug. 1980), 444–465. https://doi.org/10.1145/358896.358899 diff --git a/design_notes/named_array_design_doc.md b/design_notes/named_array_design_doc.md index 0050471cd01..5dcf6e29257 100644 --- a/design_notes/named_array_design_doc.md +++ b/design_notes/named_array_design_doc.md @@ -38,51 +38,52 @@ The creation of named-array is intended to separate the `xarray.Variable` from X Since the new named-array is envisioned to contain the core features of Xarray's variable, existing code using Variable from Xarray should be able to switch to named-array with minimal changes. However, there are several potential issues related to backward compatibility: -* **API Changes**: as the Variable is decoupled from Xarray and moved into named-array, some changes to the API may be necessary. These changes might include differences in function signature, etc. These changes could break existing code that relies on the current API and associated utility functions (e.g. `as_variable()`). The `xarray.Variable` object will subclass `NamedArray`, and provide the existing interface for compatibility. +- **API Changes**: as the Variable is decoupled from Xarray and moved into named-array, some changes to the API may be necessary. These changes might include differences in function signature, etc. These changes could break existing code that relies on the current API and associated utility functions (e.g. `as_variable()`). The `xarray.Variable` object will subclass `NamedArray`, and provide the existing interface for compatibility. ## Detailed Description named-array aims to provide a lightweight, efficient array structure with named dimensions, or axes, that enables convenient broadcasting and indexing. The primary component of named-array is a standalone version of the xarray.Variable data structure, which was previously a part of the Xarray library. The xarray.Variable data structure in named-array will maintain the core features of its counterpart in Xarray, including: -* **Named Axes (Dimensions)**: Each axis of the array can be given a name, providing a descriptive and intuitive way to reference the dimensions of the array. +- **Named Axes (Dimensions)**: Each axis of the array can be given a name, providing a descriptive and intuitive way to reference the dimensions of the array. -* **Arbitrary Metadata (Attributes)**: named-array will support the attachment of arbitrary metadata to arrays as a dict, providing a mechanism to store additional information about the data that the array represents. +- **Arbitrary Metadata (Attributes)**: named-array will support the attachment of arbitrary metadata to arrays as a dict, providing a mechanism to store additional information about the data that the array represents. -* **Convenient Broadcasting and Indexing**: With named dimensions, broadcasting and indexing operations become more intuitive and less error-prone. +- **Convenient Broadcasting and Indexing**: With named dimensions, broadcasting and indexing operations become more intuitive and less error-prone. The named-array package is designed to be interoperable with other scientific Python libraries. It will follow established scientific Python community standards and use standard array protocols, as well as the new data-apis standard. This allows named-array to wrap multiple duck-array objects, including, but not limited to, NumPy, Dask, Sparse, Pint, CuPy, and Pytorch. ## Implementation -* **Decoupling**: making `variable.py` agnostic to Xarray internals by decoupling it from the rest of the library. This will make the code more modular and easier to maintain. However, this will also make the code more complex, as we will need to define a clear interface for how the functionality in `variable.py` interacts with the rest of the library, particularly the ExplicitlyIndexed subclasses used to enable lazy indexing of data on disk. -* **Move Xarray's internal lazy indexing classes to follow standard Array Protocols**: moving the lazy indexing classes like `ExplicitlyIndexed` to use standard array protocols will be a key step in decoupling. It will also potentially improve interoperability with other libraries that use these protocols, and prepare these classes [for eventual movement out](https://github.com/pydata/xarray/issues/5081) of the Xarray code base. However, this will also require significant changes to the code, and we will need to ensure that all existing functionality is preserved. - * Use [https://data-apis.org/array-api-compat/](https://data-apis.org/array-api-compat/) to handle compatibility issues? -* **Leave lazy indexing classes in Xarray for now** -* **Preserve support for Dask collection protocols**: named-array will preserve existing support for the dask collections protocol namely the __dask_***__ methods -* **Preserve support for ChunkManagerEntrypoint?** Opening variables backed by dask vs cubed arrays currently is [handled within Variable.chunk](https://github.com/pydata/xarray/blob/92c8b33eb464b09d6f8277265b16cae039ab57ee/xarray/core/variable.py#L1272C15-L1272C15). If we are preserving dask support it would be nice to preserve general chunked array type support, but this currently requires an entrypoint. +- **Decoupling**: making `variable.py` agnostic to Xarray internals by decoupling it from the rest of the library. This will make the code more modular and easier to maintain. However, this will also make the code more complex, as we will need to define a clear interface for how the functionality in `variable.py` interacts with the rest of the library, particularly the ExplicitlyIndexed subclasses used to enable lazy indexing of data on disk. +- **Move Xarray's internal lazy indexing classes to follow standard Array Protocols**: moving the lazy indexing classes like `ExplicitlyIndexed` to use standard array protocols will be a key step in decoupling. It will also potentially improve interoperability with other libraries that use these protocols, and prepare these classes [for eventual movement out](https://github.com/pydata/xarray/issues/5081) of the Xarray code base. However, this will also require significant changes to the code, and we will need to ensure that all existing functionality is preserved. + - Use [https://data-apis.org/array-api-compat/](https://data-apis.org/array-api-compat/) to handle compatibility issues? +- **Leave lazy indexing classes in Xarray for now** +- **Preserve support for Dask collection protocols**: named-array will preserve existing support for the dask collections protocol namely the **dask\_\*\*\*** methods +- **Preserve support for ChunkManagerEntrypoint?** Opening variables backed by dask vs cubed arrays currently is [handled within Variable.chunk](https://github.com/pydata/xarray/blob/92c8b33eb464b09d6f8277265b16cae039ab57ee/xarray/core/variable.py#L1272C15-L1272C15). If we are preserving dask support it would be nice to preserve general chunked array type support, but this currently requires an entrypoint. ### Plan 1. Create a new baseclass for `xarray.Variable` to its own module e.g. `xarray.core.base_variable` 2. Remove all imports of internal Xarray classes and utils from `base_variable.py`. `base_variable.Variable` should not depend on anything in xarray.core - * Will require moving the lazy indexing classes (subclasses of ExplicitlyIndexed) to be standards compliant containers.` - * an array-api compliant container that provides **array_namespace**` - * Support `.oindex` and `.vindex` for explicit indexing - * Potentially implement this by introducing a new compliant wrapper object? - * Delete the `NON_NUMPY_SUPPORTED_ARRAY_TYPES` variable which special-cases ExplicitlyIndexed and `pd.Index.` - * `ExplicitlyIndexed` class and subclasses should provide `.oindex` and `.vindex` for indexing by `Variable.__getitem__.`: `oindex` and `vindex` were proposed in [NEP21](https://numpy.org/neps/nep-0021-advanced-indexing.html), but have not been implemented yet - * Delete the ExplicitIndexer objects (`BasicIndexer`, `VectorizedIndexer`, `OuterIndexer`) - * Remove explicit support for `pd.Index`. When provided with a `pd.Index` object, Variable will coerce to an array using `np.array(pd.Index)`. For Xarray's purposes, Xarray can use `as_variable` to explicitly wrap these in PandasIndexingAdapter and pass them to `Variable.__init__`. + - Will require moving the lazy indexing classes (subclasses of ExplicitlyIndexed) to be standards compliant containers.` + - an array-api compliant container that provides **array_namespace**` + - Support `.oindex` and `.vindex` for explicit indexing + - Potentially implement this by introducing a new compliant wrapper object? + - Delete the `NON_NUMPY_SUPPORTED_ARRAY_TYPES` variable which special-cases ExplicitlyIndexed and `pd.Index.` + - `ExplicitlyIndexed` class and subclasses should provide `.oindex` and `.vindex` for indexing by `Variable.__getitem__.`: `oindex` and `vindex` were proposed in [NEP21](https://numpy.org/neps/nep-0021-advanced-indexing.html), but have not been implemented yet + - Delete the ExplicitIndexer objects (`BasicIndexer`, `VectorizedIndexer`, `OuterIndexer`) + - Remove explicit support for `pd.Index`. When provided with a `pd.Index` object, Variable will coerce to an array using `np.array(pd.Index)`. For Xarray's purposes, Xarray can use `as_variable` to explicitly wrap these in PandasIndexingAdapter and pass them to `Variable.__init__`. 3. Define a minimal variable interface that the rest of Xarray can use: - 1. `dims`: tuple of dimension names - 2. `data`: numpy/dask/duck arrays` - 3. `attrs``: dictionary of attributes + + 1. `dims`: tuple of dimension names + 2. `data`: numpy/dask/duck arrays` + 3. `attrs``: dictionary of attributes 4. Implement basic functions & methods for manipulating these objects. These methods will be a cleaned-up subset (for now) of functionality on xarray.Variable, with adaptations inspired by the [Python array API](https://data-apis.org/array-api/2022.12/API_specification/index.html). 5. Existing Variable structures - 1. Keep Variable object which subclasses the new structure that adds the `.encoding` attribute and potentially other methods needed for easy refactoring. - 2. IndexVariable will remain in xarray.core.variable and subclass the new named-array data structure pending future deletion. + 1. Keep Variable object which subclasses the new structure that adds the `.encoding` attribute and potentially other methods needed for easy refactoring. + 2. IndexVariable will remain in xarray.core.variable and subclass the new named-array data structure pending future deletion. 6. Docstrings and user-facing APIs will need to be updated to reflect the changed methods on Variable objects. Further implementation details are in Appendix: [Implementation Details](#appendix-implementation-details). @@ -91,6 +92,7 @@ Further implementation details are in Appendix: [Implementation Details](#append Today's implementation Xarray's lazy indexing functionality uses three private objects: `*Indexer`, `*IndexingAdapter`, `*Array`. These objects are needed for two reason: + 1. We need to translate from Xarray (NamedArray) indexing rules to bare array indexing rules. - `*Indexer` objects track the type of indexing - basic, orthogonal, vectorized 2. Not all arrays support the same indexing rules, so we need `*Indexing` adapters @@ -99,6 +101,7 @@ These objects are needed for two reason: 1. These again support different types of indexing, so we have `explicit_indexing_adapter` that understands `*Indexer` objects. ### Goals + 1. We would like to keep the lazy indexing array objects, and backend array objects within Xarray. Thus NamedArray cannot treat these objects specially. 2. A key source of confusion (and coupling) is that both lazy indexing arrays and indexing adapters, both handle Indexer objects, and both subclass `ExplicitlyIndexedNDArrayMixin`. These are however conceptually different. @@ -131,8 +134,8 @@ We have identified the following milestones for the completion of this project: 5. Refactor the existing Xarray codebase to rely on the newly created package (named-array): This will help to demonstrate the usefulness of the new package, and also provide an example for others who may want to use it. 6. Expand tests, add documentation, and write a blog post: expanding the test suite will help to ensure that the code is reliable and that changes do not introduce bugs. Adding documentation will make it easier for others to understand and use the project. 7. Finally, we will write a series of blog posts on [xarray.dev](https://xarray.dev/) to promote the project and attract more contributors. - * Toward the end of the process, write a few blog posts that demonstrate the use of the newly available data structure - * pick the same example applications used by other implementations/applications (e.g. Pytorch, sklearn, and Levanter) to show how it can work. + - Toward the end of the process, write a few blog posts that demonstrate the use of the newly available data structure + - pick the same example applications used by other implementations/applications (e.g. Pytorch, sklearn, and Levanter) to show how it can work. ## Related Work @@ -141,35 +144,35 @@ We have identified the following milestones for the completion of this project: 3. [Levanter — Legible, Scalable, Reproducible Foundation Models with JAX](https://crfm.stanford.edu/2023/06/16/levanter-1_0-release.html) 4. [google/xarray-tensorstore](https://github.com/google/xarray-tensorstore) 5. [State of Torch Named Tensors · Issue #60832 · pytorch/pytorch · GitHub](https://github.com/pytorch/pytorch/issues/60832) - * Incomplete support: Many primitive operations result in errors, making it difficult to use NamedTensors in Practice. Users often have to resort to removing the names from tensors to avoid these errors. - * Lack of active development: the development of the NamedTensor feature in PyTorch is not currently active due a lack of bandwidth for resolving ambiguities in the design. - * Usability issues: the current form of NamedTensor is not user-friendly and sometimes raises errors, making it difficult for users to incorporate NamedTensors into their workflows. + - Incomplete support: Many primitive operations result in errors, making it difficult to use NamedTensors in Practice. Users often have to resort to removing the names from tensors to avoid these errors. + - Lack of active development: the development of the NamedTensor feature in PyTorch is not currently active due a lack of bandwidth for resolving ambiguities in the design. + - Usability issues: the current form of NamedTensor is not user-friendly and sometimes raises errors, making it difficult for users to incorporate NamedTensors into their workflows. 6. [Scikit-learn Enhancement Proposals (SLEPs) 8, 12, 14](https://github.com/scikit-learn/enhancement_proposals/pull/18) - * Some of the key points and limitations discussed in these proposals are: - * Inconsistency in feature name handling: Scikit-learn currently lacks a consistent and comprehensive way to handle and propagate feature names through its pipelines and estimators ([SLEP 8](https://github.com/scikit-learn/enhancement_proposals/pull/18),[SLEP 12](https://scikit-learn-enhancement-proposals.readthedocs.io/en/latest/slep012/proposal.html)). - * Memory intensive for large feature sets: storing and propagating feature names can be memory intensive, particularly in cases where the entire "dictionary" becomes the features, such as in NLP use cases ([SLEP 8](https://github.com/scikit-learn/enhancement_proposals/pull/18),[GitHub issue #35](https://github.com/scikit-learn/enhancement_proposals/issues/35)) - * Sparse matrices: sparse data structures present a challenge for feature name propagation. For instance, the sparse data structure functionality in Pandas 1.0 only supports converting directly to the coordinate format (COO), which can be an issue with transformers such as the OneHotEncoder.transform that has been optimized to construct a CSR matrix ([SLEP 14](https://scikit-learn-enhancement-proposals.readthedocs.io/en/latest/slep014/proposal.html)) - * New Data structures: the introduction of new data structures, such as "InputArray" or "DataArray" could lead to more burden for third-party estimator maintainers and increase the learning curve for users. Xarray's "DataArray" is mentioned as a potential alternative, but the proposal mentions that the conversion from a Pandas dataframe to a Dataset is not lossless ([SLEP 12](https://scikit-learn-enhancement-proposals.readthedocs.io/en/latest/slep012/proposal.html),[SLEP 14](https://scikit-learn-enhancement-proposals.readthedocs.io/en/latest/slep014/proposal.html),[GitHub issue #35](https://github.com/scikit-learn/enhancement_proposals/issues/35)). - * Dependency on other libraries: solutions that involve using Xarray and/or Pandas to handle feature names come with the challenge of managing dependencies. While a soft dependency approach is suggested, this means users would be able to have/enable the feature only if they have the dependency installed. Xarra-lite's integration with other scientific Python libraries could potentially help with this issue ([GitHub issue #35](https://github.com/scikit-learn/enhancement_proposals/issues/35)). + - Some of the key points and limitations discussed in these proposals are: + - Inconsistency in feature name handling: Scikit-learn currently lacks a consistent and comprehensive way to handle and propagate feature names through its pipelines and estimators ([SLEP 8](https://github.com/scikit-learn/enhancement_proposals/pull/18),[SLEP 12](https://scikit-learn-enhancement-proposals.readthedocs.io/en/latest/slep012/proposal.html)). + - Memory intensive for large feature sets: storing and propagating feature names can be memory intensive, particularly in cases where the entire "dictionary" becomes the features, such as in NLP use cases ([SLEP 8](https://github.com/scikit-learn/enhancement_proposals/pull/18),[GitHub issue #35](https://github.com/scikit-learn/enhancement_proposals/issues/35)) + - Sparse matrices: sparse data structures present a challenge for feature name propagation. For instance, the sparse data structure functionality in Pandas 1.0 only supports converting directly to the coordinate format (COO), which can be an issue with transformers such as the OneHotEncoder.transform that has been optimized to construct a CSR matrix ([SLEP 14](https://scikit-learn-enhancement-proposals.readthedocs.io/en/latest/slep014/proposal.html)) + - New Data structures: the introduction of new data structures, such as "InputArray" or "DataArray" could lead to more burden for third-party estimator maintainers and increase the learning curve for users. Xarray's "DataArray" is mentioned as a potential alternative, but the proposal mentions that the conversion from a Pandas dataframe to a Dataset is not lossless ([SLEP 12](https://scikit-learn-enhancement-proposals.readthedocs.io/en/latest/slep012/proposal.html),[SLEP 14](https://scikit-learn-enhancement-proposals.readthedocs.io/en/latest/slep014/proposal.html),[GitHub issue #35](https://github.com/scikit-learn/enhancement_proposals/issues/35)). + - Dependency on other libraries: solutions that involve using Xarray and/or Pandas to handle feature names come with the challenge of managing dependencies. While a soft dependency approach is suggested, this means users would be able to have/enable the feature only if they have the dependency installed. Xarra-lite's integration with other scientific Python libraries could potentially help with this issue ([GitHub issue #35](https://github.com/scikit-learn/enhancement_proposals/issues/35)). ## References and Previous Discussion -* [[Proposal] Expose Variable without Pandas dependency · Issue #3981 · pydata/xarray · GitHub](https://github.com/pydata/xarray/issues/3981) -* [https://github.com/pydata/xarray/issues/3981#issuecomment-985051449](https://github.com/pydata/xarray/issues/3981#issuecomment-985051449) -* [Lazy indexing arrays as a stand-alone package · Issue #5081 · pydata/xarray · GitHub](https://github.com/pydata/xarray/issues/5081) +- [[Proposal] Expose Variable without Pandas dependency · Issue #3981 · pydata/xarray · GitHub](https://github.com/pydata/xarray/issues/3981) +- [https://github.com/pydata/xarray/issues/3981#issuecomment-985051449](https://github.com/pydata/xarray/issues/3981#issuecomment-985051449) +- [Lazy indexing arrays as a stand-alone package · Issue #5081 · pydata/xarray · GitHub](https://github.com/pydata/xarray/issues/5081) ### Appendix: Engagement with the Community We plan to publicize this document on : -* [x] `Xarray dev call` -* [ ] `Scientific Python discourse` -* [ ] `Xarray Github` -* [ ] `Twitter` -* [ ] `Respond to NamedTensor and Scikit-Learn issues?` -* [ ] `Pangeo Discourse` -* [ ] `Numpy, SciPy email lists?` -* [ ] `Xarray blog` +- [x] `Xarray dev call` +- [ ] `Scientific Python discourse` +- [ ] `Xarray Github` +- [ ] `Twitter` +- [ ] `Respond to NamedTensor and Scikit-Learn issues?` +- [ ] `Pangeo Discourse` +- [ ] `Numpy, SciPy email lists?` +- [ ] `Xarray blog` Additionally, We plan on writing a series of blog posts to effectively showcase the implementation and potential of the newly available functionality. To illustrate this, we will use the same example applications as other established libraries (such as Pytorch, sklearn), providing practical demonstrations of how these new data structures can be leveraged. @@ -180,12 +183,12 @@ Questions: 1. Document Xarray indexing rules 2. Document use of .oindex and .vindex protocols 3. Do we use `.mean` and `.nanmean` or `.mean(skipna=...)`? - * Default behavior in named-array should mirror NumPy / the array API standard, not pandas. - * nanmean is not (yet) in the [array API](https://github.com/pydata/xarray/pull/7424#issuecomment-1373979208). There are a handful of other key functions (e.g., median) that are are also missing. I think that should be OK, as long as what we support is a strict superset of the array API. + - Default behavior in named-array should mirror NumPy / the array API standard, not pandas. + - nanmean is not (yet) in the [array API](https://github.com/pydata/xarray/pull/7424#issuecomment-1373979208). There are a handful of other key functions (e.g., median) that are are also missing. I think that should be OK, as long as what we support is a strict superset of the array API. 4. What methods need to be exposed on Variable? - * `Variable.concat` classmethod: create two functions, one as the equivalent of `np.stack` and other for `np.concat` - * `.rolling_window` and `.coarsen_reshape` ? - * `named-array.apply_ufunc`: used in astype, clip, quantile, isnull, notnull` + - `Variable.concat` classmethod: create two functions, one as the equivalent of `np.stack` and other for `np.concat` + - `.rolling_window` and `.coarsen_reshape` ? + - `named-array.apply_ufunc`: used in astype, clip, quantile, isnull, notnull` #### methods to be preserved from xarray.Variable @@ -324,6 +327,7 @@ Questions: #### Attributes to be renamed from xarray.Variable ```python + ``` #### Attributes to be removed from xarray.Variable @@ -339,7 +343,7 @@ Questions: ### Appendix: Implementation Details -* Merge in VariableArithmetic's parent classes: AbstractArray, NdimSizeLenMixin with the new data structure.. +- Merge in VariableArithmetic's parent classes: AbstractArray, NdimSizeLenMixin with the new data structure.. ```python class VariableArithmetic( @@ -356,16 +360,17 @@ class VariableArithmetic( ``` -* Move over `_typed_ops.VariableOpsMixin` -* Build a list of utility functions used elsewhere : Which of these should become public API? - * `broadcast_variables`: `dataset.py`, `dataarray.py`,`missing.py` - * This could be just called "broadcast" in named-array. - * `Variable._getitem_with_mask` : `alignment.py` - * keep this method/function as private and inside Xarray. -* The Variable constructor will need to be rewritten to no longer accept tuples, encodings, etc. These details should be handled at the Xarray data structure level. -* What happens to `duck_array_ops?` -* What about Variable.chunk and "chunk managers"? - * Could this functionality be left in Xarray proper for now? Alternative array types like JAX also have some notion of "chunks" for parallel arrays, but the details differ in a number of ways from the Dask/Cubed. - * Perhaps variable.chunk/load methods should become functions defined in xarray that convert Variable objects. This is easy so long as xarray can reach in and replace .data - -* Utility functions like `as_variable` should be moved out of `base_variable.py` so they can convert BaseVariable objects to/from DataArray or Dataset containing explicitly indexed arrays. +- Move over `_typed_ops.VariableOpsMixin` +- Build a list of utility functions used elsewhere : Which of these should become public API? + - `broadcast_variables`: `dataset.py`, `dataarray.py`,`missing.py` + - This could be just called "broadcast" in named-array. + - `Variable._getitem_with_mask` : `alignment.py` + - keep this method/function as private and inside Xarray. +- The Variable constructor will need to be rewritten to no longer accept tuples, encodings, etc. These details should be handled at the Xarray data structure level. +- What happens to `duck_array_ops?` +- What about Variable.chunk and "chunk managers"? + + - Could this functionality be left in Xarray proper for now? Alternative array types like JAX also have some notion of "chunks" for parallel arrays, but the details differ in a number of ways from the Dask/Cubed. + - Perhaps variable.chunk/load methods should become functions defined in xarray that convert Variable objects. This is easy so long as xarray can reach in and replace .data + +- Utility functions like `as_variable` should be moved out of `base_variable.py` so they can convert BaseVariable objects to/from DataArray or Dataset containing explicitly indexed arrays. diff --git a/doc/_static/style.css b/doc/_static/style.css index bd0b13c32a5..3419f89dcfc 100644 --- a/doc/_static/style.css +++ b/doc/_static/style.css @@ -1,10 +1,10 @@ table.colwidths-given { - table-layout: fixed; - width: 100%; + table-layout: fixed; + width: 100%; } table.docutils td { - white-space: unset; - word-wrap: break-word; + white-space: unset; + word-wrap: break-word; } .bd-header-announcement { @@ -13,11 +13,14 @@ table.docutils td { /* Reduce left and right margins */ -.container, .container-lg, .container-md, .container-sm, .container-xl { +.container, +.container-lg, +.container-md, +.container-sm, +.container-xl { max-width: 1350px !important; } - /* Copied from https://github.com/bokeh/bokeh/blob/branch-2.4/sphinx/source/bokeh/static/custom.css */ @@ -215,7 +218,7 @@ dt:target { https://github.com/pandas-dev/pandas-sphinx-theme/issues/6 */ main *:target::before { display: block; - content: ''; + content: ""; height: var(--navbar-height); margin-top: calc(-1 * var(--navbar-height)); } diff --git a/doc/combined.json b/doc/combined.json index 345462e055f..f37a0aa72b8 100644 --- a/doc/combined.json +++ b/doc/combined.json @@ -1,30 +1,18 @@ { - "version": 1, - "refs": { - ".zgroup": "{\"zarr_format\":2}", - "foo/.zarray": "{\"chunks\":[4,5],\"compressor\":null,\"dtype\":\"`_, such as ``sum()``, ``mean()``, ``min()``, ``max()``, and others, have a skipna argument that controls whether missing values (represented by NaN) should be skipped (True) or treated as NaN (False) when performing the calculation. - By default, ``skipna`` is set to `True`, so missing values are ignored when computing the result. However, you can set ``skipna`` to `False` if you want missing values to be treated as NaN and included in the calculation. + By default, ``skipna`` is set to ``True``, so missing values are ignored when computing the result. However, you can set ``skipna`` to ``False`` if you want missing values to be treated as NaN and included in the calculation. - On `plotting `_ an xarray dataset or array that contains missing values, xarray will simply leave the missing values as blank spaces in the plot. diff --git a/doc/getting-started-guide/quick-overview.rst b/doc/getting-started-guide/quick-overview.rst index b88ba971e7f..8b908f0cd70 100644 --- a/doc/getting-started-guide/quick-overview.rst +++ b/doc/getting-started-guide/quick-overview.rst @@ -46,7 +46,7 @@ Here are the key properties for a ``DataArray``: Indexing -------- -Xarray supports four kinds of indexing. Since we have assigned coordinate labels to the x dimension we can use label-based indexing along that dimension just like pandas. The four examples below all yield the same result (the value at `x=10`) but at varying levels of convenience and intuitiveness. +Xarray supports four kinds of indexing. Since we have assigned coordinate labels to the x dimension we can use label-based indexing along that dimension just like pandas. The four examples below all yield the same result (the value at ``x=10``) but at varying levels of convenience and intuitiveness. .. ipython:: python diff --git a/doc/internals/how-to-add-new-backend.rst b/doc/internals/how-to-add-new-backend.rst index a979abe34e2..1b0c62b5c81 100644 --- a/doc/internals/how-to-add-new-backend.rst +++ b/doc/internals/how-to-add-new-backend.rst @@ -114,7 +114,7 @@ The input of ``open_dataset`` method are one argument - ``filename_or_obj``: can be any object but usually it is a string containing a path or an instance of :py:class:`pathlib.Path`. -- ``drop_variables``: can be `None` or an iterable containing the variable +- ``drop_variables``: can be ``None`` or an iterable containing the variable names to be dropped when reading the data. If it makes sense for your backend, your ``open_dataset`` method @@ -362,7 +362,7 @@ grouped in three types of indexes This implies that the implementation of the method ``__getitem__`` can be tricky. In order to simplify this task, Xarray provides a helper function, :py:func:`~xarray.core.indexing.explicit_indexing_adapter`, that transforms -all the input ``indexer`` types (`basic`, `outer`, `vectorized`) in a tuple +all the input indexer types (basic, outer, vectorized) in a tuple which is interpreted correctly by your backend. This is an example ``BackendArray`` subclass implementation: @@ -469,8 +469,8 @@ combining multiple input list with ``itertools.product()``: The ``OUTER_1VECTOR`` indexing shall supports number, slices and at most one list. The behaviour with the list shall be the same of ``OUTER`` indexing. -If you support more complex indexing as `explicit indexing` or -`numpy indexing`, you can have a look to the implementation of Zarr backend and Scipy backend, +If you support more complex indexing as explicit indexing or +numpy indexing, you can have a look to the implementation of Zarr backend and Scipy backend, currently available in :py:mod:`~xarray.backends` module. .. _RST preferred_chunks: diff --git a/doc/internals/how-to-create-custom-index.rst b/doc/internals/how-to-create-custom-index.rst index 90b3412c2cb..688a80193e4 100644 --- a/doc/internals/how-to-create-custom-index.rst +++ b/doc/internals/how-to-create-custom-index.rst @@ -11,7 +11,7 @@ How to create a custom index introduced in v2022.06.0 and is still incomplete. API is subject to change without deprecation notice. However we encourage you to experiment and report issues that arise. -Xarray's built-in support for label-based indexing (e.g. `ds.sel(latitude=40, method="nearest")`) and alignment operations +Xarray's built-in support for label-based indexing (e.g. ``ds.sel(latitude=40, method="nearest")``) and alignment operations relies on :py:class:`pandas.Index` objects. Pandas Indexes are powerful and suitable for many applications but also have some limitations: @@ -67,7 +67,7 @@ Optional requirements --------------------- Pretty much everything else is optional. Depending on the method, in the absence -of a (re)implementation, an index will either raise a `NotImplementedError` +of a (re)implementation, an index will either raise a ``NotImplementedError`` or won't do anything specific (just drop, pass or copy itself from/to the resulting Dataset or DataArray). diff --git a/doc/internals/internal-design.rst b/doc/internals/internal-design.rst index 93009b002c4..cb86424b405 100644 --- a/doc/internals/internal-design.rst +++ b/doc/internals/internal-design.rst @@ -166,7 +166,7 @@ something interesting: var._data -You're looking at one of xarray's internal `Lazy Indexing Classes`. These powerful classes are hidden from the user, +You're looking at one of xarray's internal Lazy Indexing Classes. These powerful classes are hidden from the user, but provide important functionality. Calling the public :py:attr:`~xarray.Variable.data` property loads the underlying array into memory. diff --git a/doc/user-guide/computation.rst b/doc/user-guide/computation.rst index 768911490e9..ff12902cf56 100644 --- a/doc/user-guide/computation.rst +++ b/doc/user-guide/computation.rst @@ -50,7 +50,7 @@ Use :py:func:`~xarray.where` to conditionally switch between values: xr.where(arr > 0, "positive", "negative") -Use `@` to compute the :py:func:`~xarray.dot` product: +Use ``@`` to compute the :py:func:`~xarray.dot` product: .. ipython:: python @@ -207,8 +207,8 @@ for more. Aggregation =========== -Aggregation methods have been updated to take a `dim` argument instead of -`axis`. This allows for very intuitive syntax for aggregation methods that are +Aggregation methods have been updated to take a ``dim`` argument instead of +``axis``. This allows for very intuitive syntax for aggregation methods that are applied along particular dimension(s): .. ipython:: python @@ -552,7 +552,7 @@ best fitting coefficients along a given dimension and for a given order, out = a.polyfit(dim="x", deg=1, full=True) out -The method outputs a dataset containing the coefficients (and more if `full=True`). +The method outputs a dataset containing the coefficients (and more if ``full=True``). The inverse operation is done with :py:meth:`~xarray.polyval`, .. ipython:: python diff --git a/doc/user-guide/dask.rst b/doc/user-guide/dask.rst index d7fb7cbd41e..3ad84133d0b 100644 --- a/doc/user-guide/dask.rst +++ b/doc/user-guide/dask.rst @@ -381,7 +381,7 @@ In this case, automatic inference has worked so let's check that the result is a mapped.identical(ds.time) Note that we use ``.load(scheduler="single-threaded")`` to execute the computation. -This executes the Dask graph in `serial` using a for loop, but allows for printing to screen and other +This executes the Dask graph in serial using a for loop, but allows for printing to screen and other debugging techniques. We can easily see that our function is receiving blocks of shape 10x180x180 and the returned result is identical to ``ds.time`` as expected. diff --git a/doc/user-guide/data-structures.rst b/doc/user-guide/data-structures.rst index 2e5c527a703..68e7f840c9a 100644 --- a/doc/user-guide/data-structures.rst +++ b/doc/user-guide/data-structures.rst @@ -293,7 +293,7 @@ pressure that were made under various conditions: * they were made at two separate locations, which we will represent using their latitude and longitude; and * they were made using instruments by three different manufacturers, which we - will refer to as `'manufac1'`, `'manufac2'`, and `'manufac3'`. + will refer to as ``'manufac1'``, ``'manufac2'``, and ``'manufac3'``. .. ipython:: python @@ -369,7 +369,7 @@ dictionary-like attributes: ds.coords Finally, like data arrays, datasets also store arbitrary metadata in the form -of `attributes`: +of ``attributes``: .. ipython:: python @@ -539,7 +539,7 @@ parent is known as a "root" node (represented by the ``parent`` attribute pointing to ``None``). Nodes can have multiple children, but as each child node has at most one parent, there can only ever be one root node in a given tree. -The overall structure is technically a `connected acyclic undirected rooted graph`, +The overall structure is technically a connected acyclic undirected rooted graph, otherwise known as a `"Tree" `_. :py:class:`~xarray.DataTree` objects can also optionally have a ``name`` as well as ``attrs``, @@ -590,7 +590,7 @@ We can add a second node to this tree, assigning it to the parent node ``dt``: More idiomatically you can create a tree from a dictionary of ``Datasets`` and -`DataTrees`. In this case we add a new node under ``dt["child-node"]`` by +``DataTrees``. In this case we add a new node under ``dt["child-node"]`` by providing the explicit path under ``"child-node"`` as the dictionary key: .. ipython:: python @@ -608,7 +608,7 @@ We have created a tree with three nodes in it: -Consistency checks are enforced. For instance, if we try to create a `cycle`, +Consistency checks are enforced. For instance, if we try to create a cycle, where the root node is also a child of a descendant, the constructor will raise an (:py:class:`~xarray.InvalidTreeError`): diff --git a/doc/user-guide/groupby.rst b/doc/user-guide/groupby.rst index 069c7e0cb10..f7d76edadf8 100644 --- a/doc/user-guide/groupby.rst +++ b/doc/user-guide/groupby.rst @@ -100,7 +100,7 @@ The binning is implemented via :func:`pandas.cut`, whose documentation details h the bins are assigned. As seen in the example above, by default, the bins are labeled with strings using set notation to precisely identify the bin limits. To override this behavior, you can specify the bin labels explicitly. Here we -choose `float` labels which identify the bin centers: +choose ``float`` labels which identify the bin centers: .. ipython:: python @@ -213,7 +213,7 @@ may be desirable: da.groupby_bins("lon", [0, 45, 50]).sum() -These methods group by `lon` values. It is also possible to groupby each +These methods group by ``lon`` values. It is also possible to groupby each cell in a grid, regardless of value, by stacking multiple dimensions, applying your function, and then unstacking the result: @@ -222,7 +222,7 @@ applying your function, and then unstacking the result: stacked = da.stack(gridcell=["ny", "nx"]) stacked.groupby("gridcell").sum(...).unstack("gridcell") -Alternatively, you can groupby both `lat` and `lon` at the :ref:`same time `. +Alternatively, you can groupby both ``lat`` and ``lon`` at the :ref:`same time `. .. _groupby.groupers: diff --git a/doc/user-guide/hierarchical-data.rst b/doc/user-guide/hierarchical-data.rst index 2d22110afa4..5f3a341323f 100644 --- a/doc/user-guide/hierarchical-data.rst +++ b/doc/user-guide/hierarchical-data.rst @@ -412,7 +412,7 @@ We can use :py:meth:`xarray.DataTree.match` for this: We can also subset trees by the contents of the nodes. :py:meth:`xarray.DataTree.filter` retains only the nodes of a tree that meet a certain condition. For example, we could recreate the Simpson's family tree with the ages of each individual, then filter for only the adults: -First lets recreate the tree but with an `age` data variable in every node: +First lets recreate the tree but with an ``age`` data variable in every node: .. ipython:: python diff --git a/doc/user-guide/indexing.rst b/doc/user-guide/indexing.rst index 0f575160113..e1d9cbd9a2b 100644 --- a/doc/user-guide/indexing.rst +++ b/doc/user-guide/indexing.rst @@ -92,8 +92,8 @@ fast. To do label based indexing, use the :py:attr:`~xarray.DataArray.loc` attri da.loc["2000-01-01":"2000-01-02", "IA"] In this example, the selected is a subpart of the array -in the range '2000-01-01':'2000-01-02' along the first coordinate `time` -and with 'IA' value from the second coordinate `space`. +in the range '2000-01-01':'2000-01-02' along the first coordinate ``time`` +and with 'IA' value from the second coordinate ``space``. You can perform any of the `label indexing operations supported by pandas`__, including indexing with individual, slices and lists/arrays of labels, as well as @@ -323,7 +323,7 @@ Vectorized Indexing ------------------- Like numpy and pandas, xarray supports indexing many array elements at once in a -`vectorized` manner. +vectorized manner. If you only provide integers, slices, or unlabeled arrays (array without dimension names, such as ``np.ndarray``, ``list``, but not @@ -630,7 +630,7 @@ Xarray's ``reindex``, ``reindex_like`` and ``align`` impose a ``DataArray`` or ``Dataset`` onto a new set of coordinates corresponding to dimensions. The original values are subset to the index labels still found in the new labels, and values corresponding to new labels not found in the original object are -in-filled with `NaN`. +in-filled with ``NaN``. Xarray operations that combine multiple objects generally automatically align their arguments to share the same indexes. However, manual alignment can be @@ -659,7 +659,7 @@ dimension: foo.reindex_like(baz) The opposite operation asks us to reindex to a larger shape, so we fill in -the missing values with `NaN`: +the missing values with ``NaN``: .. ipython:: python diff --git a/doc/user-guide/interpolation.rst b/doc/user-guide/interpolation.rst index 3bc055ae78e..f1199ec7af3 100644 --- a/doc/user-guide/interpolation.rst +++ b/doc/user-guide/interpolation.rst @@ -17,7 +17,7 @@ to our :ref:`indexing `. .. note:: - ``interp`` requires `scipy` installed. + ``interp`` requires ``scipy`` installed. Scalar and 1-dimensional interpolation diff --git a/doc/user-guide/io.rst b/doc/user-guide/io.rst index 7175933dcbc..6f0be112024 100644 --- a/doc/user-guide/io.rst +++ b/doc/user-guide/io.rst @@ -19,14 +19,14 @@ format (recommended). np.random.seed(123456) -You can read different types of files in `xr.open_dataset` by specifying the engine to be used: +You can read different types of files in ``xr.open_dataset`` by specifying the engine to be used: .. code:: python xr.open_dataset("example.nc", engine="netcdf4") The "engine" provides a set of instructions that tells xarray how -to read the data and pack them into a `dataset` (or `dataarray`). +to read the data and pack them into a ``Dataset`` (or ``Dataarray``). These instructions are stored in an underlying "backend". Xarray comes with several backends that cover many common data formats. @@ -677,7 +677,7 @@ from being overwritten. To override this behavior and overwrite an existing store, add ``mode='w'`` when invoking :py:meth:`~Dataset.to_zarr`. DataArrays can also be saved to disk using the :py:meth:`DataArray.to_zarr` method, -and loaded from disk using the :py:func:`open_dataarray` function with `engine='zarr'`. +and loaded from disk using the :py:func:`open_dataarray` function with ``engine='zarr'``. Similar to :py:meth:`DataArray.to_netcdf`, :py:meth:`DataArray.to_zarr` will convert the ``DataArray`` to a ``Dataset`` before saving, and then convert back when loading, ensuring that the ``DataArray`` that is loaded is always exactly @@ -910,7 +910,7 @@ supersede the default chunking heuristics in zarr. Importantly, this logic applies to every array in the zarr store individually, including coordinate arrays. Therefore, if a dataset contains one or more dask arrays, it may still be desirable to specify a chunk size for the coordinate arrays -(for example, with a chunk size of `-1` to include the full coordinate). +(for example, with a chunk size of ``-1`` to include the full coordinate). To specify chunks manually using the ``encoding`` argument, provide a nested dictionary with the structure ``{'variable_or_coord_name': {'chunks': chunks_tuple}}``. @@ -1026,7 +1026,7 @@ Instead of creating a new copy of the dataset in the Zarr spec/format or downloading the files locally, Kerchunk reads through the data archive and extracts the byte range and compression information of each chunk and saves as a ``reference``. These references are then saved as ``json`` files or ``parquet`` (more efficient) -for later use. You can view some of these stored in the `references` +for later use. You can view some of these stored in the ``references`` directory `here `_. @@ -1039,7 +1039,7 @@ directory `here `_. Reading these data archives becomes really easy with ``kerchunk`` in combination with ``xarray``, especially when these archives are large in size. A single combined reference can refer to thousands of the original data files present in these archives. -You can view the whole dataset with from this `combined reference` using the above packages. +You can view the whole dataset with from this combined reference using the above packages. The following example shows opening a combined references generated from a ``.hdf`` file stored locally. diff --git a/doc/user-guide/plotting.rst b/doc/user-guide/plotting.rst index 2bc049f1e2d..42cbd1eb5b0 100644 --- a/doc/user-guide/plotting.rst +++ b/doc/user-guide/plotting.rst @@ -283,7 +283,7 @@ It is required to explicitly specify either Thus, we could have made the previous plot by specifying ``hue='lat'`` instead of ``x='time'``. If required, the automatic legend can be turned off using ``add_legend=False``. Alternatively, -``hue`` can be passed directly to :py:func:`xarray.plot.line` as `air.isel(lon=10, lat=[19,21,22]).plot.line(hue='lat')`. +``hue`` can be passed directly to :py:func:`xarray.plot.line` as ``air.isel(lon=10, lat=[19,21,22]).plot.line(hue='lat')``. ======================== diff --git a/doc/user-guide/time-series.rst b/doc/user-guide/time-series.rst index 82172aa8998..8ec5dfea6c1 100644 --- a/doc/user-guide/time-series.rst +++ b/doc/user-guide/time-series.rst @@ -84,8 +84,8 @@ Datetime indexing Xarray borrows powerful indexing machinery from pandas (see :ref:`indexing`). This allows for several useful and succinct forms of indexing, particularly for -`datetime64` data. For example, we support indexing with strings for single -items and with the `slice` object: +``datetime64`` data. For example, we support indexing with strings for single +items and with the ``slice`` object: .. ipython:: python @@ -226,7 +226,7 @@ resampling group: ds.resample(time="6h").reduce(np.mean) You can also resample on the time dimension while applying reducing along other dimensions at the same time -by specifying the `dim` keyword argument +by specifying the ``dim`` keyword argument .. code-block:: python diff --git a/doc/user-guide/weather-climate.rst b/doc/user-guide/weather-climate.rst index 5014f5a8641..5cc7b2e5af9 100644 --- a/doc/user-guide/weather-climate.rst +++ b/doc/user-guide/weather-climate.rst @@ -49,7 +49,7 @@ variable with the attribute, rather than with the dimensions. CF-compliant coordinate variables --------------------------------- -`MetPy`_ adds a ``metpy`` accessor that allows accessing coordinates with appropriate CF metadata using generic names ``x``, ``y``, ``vertical`` and ``time``. There is also a `cartopy_crs` attribute that provides projection information, parsed from the appropriate CF metadata, as a `Cartopy`_ projection object. See the `metpy documentation`_ for more information. +`MetPy`_ adds a ``metpy`` accessor that allows accessing coordinates with appropriate CF metadata using generic names ``x``, ``y``, ``vertical`` and ``time``. There is also a ``cartopy_crs`` attribute that provides projection information, parsed from the appropriate CF metadata, as a `Cartopy`_ projection object. See the `metpy documentation`_ for more information. .. _`MetPy`: https://unidata.github.io/MetPy/dev/index.html .. _`metpy documentation`: https://unidata.github.io/MetPy/dev/tutorials/xarray_tutorial.html#coordinates @@ -137,7 +137,7 @@ Conversion between non-standard calendar and to/from pandas DatetimeIndexes is facilitated with the :py:meth:`xarray.Dataset.convert_calendar` method (also available as :py:meth:`xarray.DataArray.convert_calendar`). Here, like elsewhere in xarray, the ``use_cftime`` argument controls which datetime backend is used in the output. The default (``None``) is to -use `pandas` when possible, i.e. when the calendar is standard and dates are within 1678 and 2262. +use ``pandas`` when possible, i.e. when the calendar is standard and dates are within 1678 and 2262. .. ipython:: python @@ -148,7 +148,7 @@ use `pandas` when possible, i.e. when the calendar is standard and dates are wit The data is unchanged, only the timestamps are modified. Further options are implemented for the special ``"360_day"`` calendar and for handling missing dates. There is also :py:meth:`xarray.Dataset.interp_calendar` (and :py:meth:`xarray.DataArray.interp_calendar`) -for `interpolating` data between calendars. +for interpolating data between calendars. For data indexed by a :py:class:`~xarray.CFTimeIndex` xarray currently supports: diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 06bf664b3b1..0db776b2e7f 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -35,7 +35,7 @@ New Features - Optimize ffill, bfill with dask when limit is specified (:pull:`9771`). By `Joseph Nowak `_, and - `Patrick Hoefler `. + `Patrick Hoefler `_. - Allow wrapping ``np.ndarray`` subclasses, e.g. ``astropy.units.Quantity`` (:issue:`9704`, :pull:`9760`). By `Sam Levang `_ and `Tien Vo `_. - Optimize :py:meth:`DataArray.polyfit` and :py:meth:`Dataset.polyfit` with dask, when used with @@ -67,7 +67,7 @@ Bug fixes By `Pascal Bourgault `_. - Fix CF decoding of ``grid_mapping`` to allow all possible formats, add tests (:issue:`9761`, :pull:`9765`). By `Kai Mühlbauer `_. -- Add `User-Agent` to request-headers when retrieving tutorial data (:issue:`9774`, :pull:`9782`) +- Add ``User-Agent`` to request-headers when retrieving tutorial data (:issue:`9774`, :pull:`9782`) By `Kai Mühlbauer `_. Documentation @@ -87,7 +87,7 @@ Internal Changes v2024.10.0 (Oct 24th, 2024) --------------------------- -This release brings official support for `xarray.DataTree`, and compatibility with zarr-python v3! +This release brings official support for ``xarray.DataTree``, and compatibility with zarr-python v3! Aside from these two huge features, it also improves support for vectorised interpolation and fixes various bugs. @@ -107,7 +107,7 @@ New Features `Tom Nicholas `_, `Justus Magin `_, and `Alfonso Ladino `_. -- A migration guide for users of the prototype `xarray-contrib/datatree repository `_ has been added, and can be found in the `DATATREE_MIGRATION_GUIDE.md` file in the repository root. +- A migration guide for users of the prototype `xarray-contrib/datatree repository `_ has been added, and can be found in the ``DATATREE_MIGRATION_GUIDE.md`` file in the repository root. By `Tom Nicholas `_. - Support for Zarr-Python 3 (:issue:`95515`, :pull:`9552`). By `Tom Augspurger `_, @@ -121,7 +121,7 @@ New Features - Implement handling of complex numbers (netcdf4/h5netcdf) and enums (h5netcdf) (:issue:`9246`, :issue:`3297`, :pull:`9509`). By `Kai Mühlbauer `_. - Fix passing missing arguments to when opening hdf5 and netCDF4 datatrees - (:issue:`9427`, :pull: `9428`). + (:issue:`9427`, :pull:`9428`). By `Alfonso Ladino `_. Bug fixes @@ -139,7 +139,8 @@ Bug fixes the non-missing times could in theory be encoded with integers (:issue:`9488`, :pull:`9497`). By `Spencer Clark `_. -- Fix a few bugs affecting groupby reductions with `flox`. (:issue:`8090`, :issue:`9398`, :issue:`9648`). +- Fix a few bugs affecting groupby reductions with ``flox``. (:issue:`8090`, :issue:`9398`, :issue:`9648`). +- Fix a few bugs affecting groupby reductions with ``flox``. (:issue:`8090`, :issue:`9398`). By `Deepak Cherian `_. - Fix the safe_chunks validation option on the to_zarr method (:issue:`5511`, :pull:`9559`). By `Joseph Nowak @@ -188,7 +189,7 @@ Performance Binary operations after grouping by multiple arrays are not supported yet. (:issue:`1056`, :issue:`9332`, :issue:`324`, :pull:`9372`). By `Deepak Cherian `_. -- Allow data variable specific ``constant_values`` in the dataset ``pad`` function (:pull:`9353``). +- Allow data variable specific ``constant_values`` in the dataset ``pad`` function (:pull:`9353`). By `Tiago Sanona `_. - Speed up grouping by avoiding deep-copy of non-dimension coordinates (:issue:`9426`, :pull:`9393`) By `Deepak Cherian `_. @@ -220,7 +221,7 @@ Bug fixes - Fix bug with rechunking to a frequency when some periods contain no data (:issue:`9360`). By `Deepak Cherian `_. -- Fix bug causing `DataTree.from_dict` to be sensitive to insertion order (:issue:`9276`, :pull:`9292`). +- Fix bug causing ``DataTree.from_dict`` to be sensitive to insertion order (:issue:`9276`, :pull:`9292`). By `Tom Nicholas `_. - Fix resampling error with monthly, quarterly, or yearly frequencies with cftime when the time bins straddle the date "0001-01-01". For example, this @@ -234,6 +235,9 @@ Bug fixes - Fix deprecation warning that was raised when calling ``np.array`` on an ``xr.DataArray`` in NumPy 2.0 (:issue:`9312`, :pull:`9393`) By `Andrew Scherer `_. +- Fix passing missing arguments to when opening hdf5 and netCDF4 datatrees + (:issue:`9427`, :pull:`9428`). + By `Alfonso Ladino `_. - Fix support for using ``pandas.DateOffset``, ``pandas.Timedelta``, and ``datetime.timedelta`` objects as ``resample`` frequencies (:issue:`9408`, :pull:`9413`). @@ -249,7 +253,7 @@ Internal Changes v2024.07.0 (Jul 30, 2024) ------------------------- -This release extends the API for groupby operations with various `grouper objects `, and includes improvements to the documentation and numerous bugfixes. +This release extends the API for groupby operations with various `grouper objects `_, and includes improvements to the documentation and numerous bugfixes. Thanks to the 22 contributors to this release: Alfonso Ladino, ChrisCleaner, David Hoese, Deepak Cherian, Dieter Werthmüller, Illviljan, Jessica Scheick, Joel Jaeschke, Justus Magin, K. Arthur Endsley, Kai Mühlbauer, Mark Harfouche, Martin Raspaud, Mathijs Verhaegh, Maximilian Roos, Michael Niklas, Michał Górny, Moritz Schreiber, Pontus Lurcock, Spencer Clark, Stephan Hoyer and Tom Nicholas @@ -261,7 +265,7 @@ New Features By `Joel Jaeschke `_. - Introduce new :py:class:`groupers.UniqueGrouper`, :py:class:`groupers.BinGrouper`, and :py:class:`groupers.TimeResampler` objects as a step towards supporting grouping by - multiple variables. See the `docs ` and the `grouper design doc + multiple variables. See the `docs `_ and the `grouper design doc `_ for more. (:issue:`6610`, :pull:`8840`). By `Deepak Cherian `_. @@ -291,9 +295,9 @@ Breaking changes using the ``loffset`` parameter. (:pull:`9233`) By `Deepak Cherian `_. - The ``squeeze`` kwarg to ``groupby`` is now ignored. This has been the source of some - quite confusing behaviour and has been deprecated since v2024.01.0. `groupby`` behavior is now + quite confusing behaviour and has been deprecated since v2024.01.0. ``groupby`` behavior is now always consistent with the existing ``.groupby(..., squeeze=False)`` behavior. No errors will - be raised if `squeeze=False`. (:pull:`9280`) + be raised if ``squeeze=False``. (:pull:`9280`) By `Deepak Cherian `_. @@ -315,7 +319,7 @@ Bug fixes by py:meth:`DataArray.convert_calendar` to be indexed by a time index in certain circumstances (:issue:`9138`, :pull:`9192`). By `Mark Harfouche `_ and `Spencer Clark `_. -- Fix static typing of tolerance arguments by allowing `str` type (:issue:`8892`, :pull:`9194`). +- Fix static typing of tolerance arguments by allowing ``str`` type (:issue:`8892`, :pull:`9194`). By `Michael Niklas `_. - Dark themes are now properly detected for ``html[data-theme=dark]``-tags (:pull:`9200`). By `Dieter Werthmüller `_. @@ -357,7 +361,7 @@ Performance By `Deepak Cherian `_. - Small optimizations to help reduce indexing speed of datasets (:pull:`9002`). By `Mark Harfouche `_. -- Performance improvement in `open_datatree` method for Zarr, netCDF4 and h5netcdf backends (:issue:`8994`, :pull:`9014`). +- Performance improvement in ``open_datatree`` method for Zarr, netCDF4 and h5netcdf backends (:issue:`8994`, :pull:`9014`). By `Alfonso Ladino `_. @@ -406,13 +410,13 @@ New Features for example, will retain the object. However, one cannot do operations that are not possible on the ``ExtensionArray`` then, such as broadcasting. (:issue:`5287`, :issue:`8463`, :pull:`8723`) By `Ilan Gold `_. -- :py:func:`testing.assert_allclose`/:py:func:`testing.assert_equal` now accept a new argument `check_dims="transpose"`, controlling whether a transposed array is considered equal. (:issue:`5733`, :pull:`8991`) +- :py:func:`testing.assert_allclose`/:py:func:`testing.assert_equal` now accept a new argument ``check_dims="transpose"``, controlling whether a transposed array is considered equal. (:issue:`5733`, :pull:`8991`) By `Ignacio Martinez Vazquez `_. - Added the option to avoid automatically creating 1D pandas indexes in :py:meth:`Dataset.expand_dims()`, by passing the new kwarg - `create_index_for_new_dim=False`. (:pull:`8960`) + ``create_index_for_new_dim=False``. (:pull:`8960`) By `Tom Nicholas `_. - Avoid automatically re-creating 1D pandas indexes in :py:func:`concat()`. Also added option to avoid creating 1D indexes for - new dimension coordinates by passing the new kwarg `create_index_for_new_dim=False`. (:issue:`8871`, :pull:`8872`) + new dimension coordinates by passing the new kwarg ``create_index_for_new_dim=False``. (:issue:`8871`, :pull:`8872`) By `Tom Nicholas `_. Breaking changes @@ -449,7 +453,7 @@ Internal Changes ~~~~~~~~~~~~~~~~ - Enforces failures on CI when tests raise warnings from within xarray (:pull:`8974`) By `Maximilian Roos `_ -- Migrates ``formatting_html`` functionality for ``DataTree`` into ``xarray/core`` (:pull: `8930`) +- Migrates ``formatting_html`` functionality for ``DataTree`` into ``xarray/core`` (:pull:`8930`) By `Eni Awowale `_, `Julia Signell `_ and `Tom Nicholas `_. - Migrates ``datatree_mapping`` functionality into ``xarray/core`` (:pull:`8948`) @@ -462,7 +466,7 @@ Internal Changes `Tom Nicholas `_. - Migrates ``ops.py`` functionality into ``xarray/core/datatree_ops.py`` (:pull:`8976`) By `Matt Savoie `_ and `Tom Nicholas `_. -- Migrates ``iterator`` functionality into ``xarray/core`` (:pull: `8879`) +- Migrates ``iterator`` functionality into ``xarray/core`` (:pull:`8879`) By `Owen Littlejohns `_, `Matt Savoie `_ and `Tom Nicholas `_. - ``transpose``, ``set_dims``, ``stack`` & ``unstack`` now use a ``dim`` kwarg @@ -497,7 +501,7 @@ New Features By `Anderson Banihirwe `_. - Add the ``.vindex`` property to Explicitly Indexed Arrays for vectorized indexing functionality. (:issue:`8238`, :pull:`8780`) By `Anderson Banihirwe `_. -- Expand use of ``.oindex`` and ``.vindex`` properties. (:pull: `8790`) +- Expand use of ``.oindex`` and ``.vindex`` properties. (:pull:`8790`) By `Anderson Banihirwe `_ and `Deepak Cherian `_. - Allow creating :py:class:`xr.Coordinates` objects with no indexes (:pull:`8711`) By `Benoit Bovy `_ and `Tom Nicholas @@ -521,11 +525,11 @@ Bug fixes when used in :py:meth:`DataArray.expand_dims` and ::py:meth:`Dataset.expand_dims` (:pull:`8781`). By `Spencer Clark `_. -- CF conform handling of `_FillValue`/`missing_value` and `dtype` in - `CFMaskCoder`/`CFScaleOffsetCoder` (:issue:`2304`, :issue:`5597`, +- CF conform handling of ``_FillValue``/``missing_value`` and ``dtype`` in + ``CFMaskCoder``/``CFScaleOffsetCoder`` (:issue:`2304`, :issue:`5597`, :issue:`7691`, :pull:`8713`, see also discussion in :pull:`7654`). By `Kai Mühlbauer `_. -- Do not cast `_FillValue`/`missing_value` in `CFMaskCoder` if `_Unsigned` is provided +- Do not cast ``_FillValue``/``missing_value`` in ``CFMaskCoder`` if ``_Unsigned`` is provided (:issue:`8844`, :pull:`8852`). - Adapt handling of copy keyword argument for numpy >= 2.0dev (:issue:`8844`, :pull:`8851`, :pull:`8865`). @@ -545,7 +549,7 @@ Internal Changes - Migrates ``treenode`` functionality into ``xarray/core`` (:pull:`8757`) By `Matt Savoie `_ and `Tom Nicholas `_. -- Migrates ``datatree`` functionality into ``xarray/core``. (:pull: `8789`) +- Migrates ``datatree`` functionality into ``xarray/core``. (:pull:`8789`) By `Owen Littlejohns `_, `Matt Savoie `_ and `Tom Nicholas `_. @@ -576,10 +580,10 @@ New Features :py:meth:`NamedArray.broadcast_to` (:pull:`8380`) By `Anderson Banihirwe `_. - Xarray now defers to `flox's heuristics `_ - to set the default `method` for groupby problems. This only applies to ``flox>=0.9``. + to set the default ``method`` for groupby problems. This only applies to ``flox>=0.9``. By `Deepak Cherian `_. -- All `quantile` methods (e.g. :py:meth:`DataArray.quantile`) now use `numbagg` - for the calculation of nanquantiles (i.e., `skipna=True`) if it is installed. +- All ``quantile`` methods (e.g. :py:meth:`DataArray.quantile`) now use ``numbagg`` + for the calculation of nanquantiles (i.e., ``skipna=True``) if it is installed. This is currently limited to the linear interpolation method (`method='linear'`). (:issue:`7377`, :pull:`8684`) By `Marco Wolsza `_. @@ -593,7 +597,7 @@ Breaking changes Deprecations ~~~~~~~~~~~~ -- The `dt.weekday_name` parameter wasn't functional on modern pandas versions and has been +- The ``dt.weekday_name`` parameter wasn't functional on modern pandas versions and has been removed. (:issue:`8610`, :pull:`8664`) By `Sam Coleman `_. @@ -627,7 +631,7 @@ Bug fixes Documentation ~~~~~~~~~~~~~ -- Fix `variables` arg typo in `Dataset.sortby()` docstring (:issue:`8663`, :pull:`8670`) +- Fix ``variables`` arg typo in ``Dataset.sortby()`` docstring (:issue:`8663`, :pull:`8670`) By `Tom Vo `_. - Fixed documentation where the use of the depreciated pandas frequency string prevented the documentation from being built. (:pull:`8638`) @@ -649,7 +653,7 @@ Internal Changes By `Matt Savoie `_ and `Tom Nicholas `_. - Refactor :py:meth:`xarray.core.indexing.DaskIndexingAdapter.__getitem__` to remove an unnecessary - rewrite of the indexer key (:issue: `8377`, :pull:`8758`) + rewrite of the indexer key (:issue:`8377`, :pull:`8758`) By `Anderson Banihirwe `_. .. _whats-new.2024.01.1: @@ -688,7 +692,7 @@ v2024.01.0 (17 Jan, 2024) ------------------------- This release brings support for weights in correlation and covariance functions, -a new `DataArray.cumulative` aggregation, improvements to `xr.map_blocks`, +a new ``DataArray.cumulative`` aggregation, improvements to ``xr.map_blocks``, an update to our minimum dependencies, and various bugfixes. Thanks to our 17 contributors to this release: @@ -743,7 +747,7 @@ Breaking changes Deprecations ~~~~~~~~~~~~ -- The `squeeze` kwarg to GroupBy is now deprecated. (:issue:`2157`, :pull:`8507`) +- The ``squeeze`` kwarg to GroupBy is now deprecated. (:issue:`2157`, :pull:`8507`) By `Deepak Cherian `_. Bug fixes @@ -753,7 +757,7 @@ Bug fixes By `Michael Niklas `_. - Reverse index output of bottleneck's rolling move_argmax/move_argmin functions (:issue:`8541`, :pull:`8552`). By `Kai Mühlbauer `_. -- Vendor `SerializableLock` from dask and use as default lock for netcdf4 backends (:issue:`8442`, :pull:`8571`). +- Vendor ``SerializableLock`` from dask and use as default lock for netcdf4 backends (:issue:`8442`, :pull:`8571`). By `Kai Mühlbauer `_. - Add tests and fixes for empty :py:class:`CFTimeIndex`, including broken html repr (:issue:`7298`, :pull:`8600`). By `Mathias Hauser `_. @@ -836,7 +840,7 @@ Deprecations from a mapping of dimension names to lengths to a set of dimension names. This is to increase consistency with :py:meth:`DataArray.dims`. To access a mapping of dimension names to lengths please use :py:meth:`Dataset.sizes`. - The same change also applies to `DatasetGroupBy.dims`. + The same change also applies to ``DatasetGroupBy.dims``. (:issue:`8496`, :pull:`8500`) By `Tom Nicholas `_. - :py:meth:`Dataset.drop` & :py:meth:`DataArray.drop` are now deprecated, since pending deprecation for @@ -975,8 +979,8 @@ Bug fixes if a coordinate with the same name already exists (:pull:`8433`, :issue:`7823`). By `András Gunyhó `_. - Fix for :py:meth:`DataArray.to_zarr` & :py:meth:`Dataset.to_zarr` to close - the created zarr store when passing a path with `.zip` extension (:pull:`8425`). - By `Carl Andersson _`. + the created zarr store when passing a path with ``.zip`` extension (:pull:`8425`). + By `Carl Andersson `_. Documentation ~~~~~~~~~~~~~ @@ -1094,7 +1098,7 @@ Internal Changes v2023.09.0 (Sep 26, 2023) ------------------------- -This release continues work on the new :py:class:`xarray.Coordinates` object, allows to provide `preferred_chunks` when +This release continues work on the new :py:class:`xarray.Coordinates` object, allows to provide ``preferred_chunks`` when reading from netcdf files, enables :py:func:`xarray.apply_ufunc` to handle missing core dimensions and fixes several bugs. Thanks to the 24 contributors to this release: Alexander Fischer, Amrest Chinkamol, Benoit Bovy, Darsh Ranjan, Deepak Cherian, @@ -1112,9 +1116,9 @@ New Features different collections of coordinates prior to assign them to a Dataset or DataArray (:pull:`8102`) at once. By `Benoît Bovy `_. -- Provide `preferred_chunks` for data read from netcdf files (:issue:`1440`, :pull:`7948`). +- Provide ``preferred_chunks`` for data read from netcdf files (:issue:`1440`, :pull:`7948`). By `Martin Raspaud `_. -- Added `on_missing_core_dims` to :py:meth:`apply_ufunc` to allow for copying or +- Added ``on_missing_core_dims`` to :py:meth:`apply_ufunc` to allow for copying or dropping a :py:class:`Dataset`'s variables with missing core dimensions (:pull:`8138`). By `Maximilian Roos `_. @@ -1197,8 +1201,8 @@ Internal Changes By `András Gunyhó `_. - Refactor of encoding and decoding times/timedeltas to preserve nanosecond resolution in arrays that contain missing values (:pull:`7827`). By `Kai Mühlbauer `_. -- Transition ``.rolling_exp`` functions to use `.apply_ufunc` internally rather - than `.reduce`, as the start of a broader effort to move non-reducing +- Transition ``.rolling_exp`` functions to use ``.apply_ufunc`` internally rather + than ``.reduce``, as the start of a broader effort to move non-reducing functions away from ```.reduce``, (:pull:`8114`). By `Maximilian Roos `_. - Test range of fill_value's in test_interpolate_pd_compat (:issue:`8146`, :pull:`8189`). @@ -1305,7 +1309,7 @@ Internal Changes - :py:func:`as_variable` now consistently includes the variable name in any exceptions raised. (:pull:`7995`). By `Peter Hill `_ - :py:func:`encode_dataset_coordinates` now sorts coordinates automatically assigned to - `coordinates` attributes during serialization (:issue:`8026`, :pull:`8034`). + ``coordinates`` attributes during serialization (:issue:`8026`, :pull:`8034`). `By Ian Carroll `_. .. _whats-new.2023.07.0: @@ -1318,7 +1322,7 @@ This release brings improvements to the documentation on wrapping numpy-like arr Deprecations ~~~~~~~~~~~~ -- `hue_style` is being deprecated for scatter plots. (:issue:`7907`, :pull:`7925`). +- ``hue_style`` is being deprecated for scatter plots. (:issue:`7907`, :pull:`7925`). By `Jimmy Westling `_. Bug fixes @@ -1435,9 +1439,9 @@ New Features - Add support for lshift and rshift binary operators (``<<``, ``>>``) on :py:class:`xr.DataArray` of type :py:class:`int` (:issue:`7727` , :pull:`7741`). By `Alan Brammer `_. -- Keyword argument `data='array'` to both :py:meth:`xarray.Dataset.to_dict` and +- Keyword argument ``data='array'`` to both :py:meth:`xarray.Dataset.to_dict` and :py:meth:`xarray.DataArray.to_dict` will now return data as the underlying array type. - Python lists are returned for `data='list'` or `data=True`. Supplying `data=False` only returns the schema without data. + Python lists are returned for ``data='list'`` or ``data=True``. Supplying ``data=False`` only returns the schema without data. ``encoding=True`` returns the encoding dictionary for the underlying variable also. (:issue:`1599`, :pull:`7739`) . By `James McCreight `_. @@ -1453,7 +1457,7 @@ Performance Bug fixes ~~~~~~~~~ -- Fix `as_compatible_data` for masked float arrays, now always creates a copy when mask is present (:issue:`2377`, :pull:`7788`). +- Fix ``as_compatible_data`` for masked float arrays, now always creates a copy when mask is present (:issue:`2377`, :pull:`7788`). By `Max Hollmann `_. - Fix groupby binary ops when grouped array is subset relative to other. (:issue:`7797`). By `Deepak Cherian `_. @@ -1556,7 +1560,7 @@ Bug fixes (:issue:`7420`, :pull:`7441`). By `Justus Magin `_ and `Spencer Clark `_. -- Various `dtype` related fixes needed to support `pandas>=2.0` (:pull:`7724`) +- Various ``dtype`` related fixes needed to support ``pandas>=2.0`` (:pull:`7724`) By `Justus Magin `_. - Preserve boolean dtype within encoding (:issue:`7652`, :pull:`7720`). By `Kai Mühlbauer `_ @@ -1685,8 +1689,8 @@ Breaking changes Deprecations ~~~~~~~~~~~~ -- Following pandas, the `closed` parameters of :py:func:`cftime_range` and - :py:func:`date_range` are deprecated in favor of the `inclusive` parameters, +- Following pandas, the ``closed`` parameters of :py:func:`cftime_range` and + :py:func:`date_range` are deprecated in favor of the ``inclusive`` parameters, and will be removed in a future version of xarray (:issue:`6985`:, :pull:`7373`). By `Spencer Clark `_. @@ -1746,7 +1750,7 @@ Bug fixes Internal Changes ~~~~~~~~~~~~~~~~ -- Add the pre-commit hook `absolufy-imports` to convert relative xarray imports to +- Add the pre-commit hook ``absolufy-imports`` to convert relative xarray imports to absolute imports (:pull:`7204`, :pull:`7370`). By `Jimmy Westling `_. @@ -1762,7 +1766,7 @@ Mick, Mike Taves, Sam Levang, Spencer Clark, Tom Nicholas, Wei Ji, templiert New Features ~~~~~~~~~~~~ -- Enable using `offset` and `origin` arguments in :py:meth:`DataArray.resample` +- Enable using ``offset`` and ``origin`` arguments in :py:meth:`DataArray.resample` and :py:meth:`Dataset.resample` (:issue:`7266`, :pull:`7284`). By `Spencer Clark `_. - Add experimental support for Zarr's in-progress V3 specification. (:pull:`6475`). @@ -1898,7 +1902,7 @@ Internal Changes ~~~~~~~~~~~~~~~~ - Doctests fail on any warnings (:pull:`7166`) By `Maximilian Roos `_. -- Improve import time by lazy loading ``dask.distributed`` (:pull: `7172`). +- Improve import time by lazy loading ``dask.distributed`` (:pull:`7172`). - Explicitly specify ``longdouble=False`` in :py:func:`cftime.date2num` when encoding times to preserve existing behavior and prevent future errors when it is eventually set to ``True`` by default in cftime (:pull:`7171`). By @@ -2022,7 +2026,7 @@ Bug fixes By `Michael Niklas `_. - Fix side effects on index coordinate metadata after aligning objects. (:issue:`6852`, :pull:`6857`) By `Benoît Bovy `_. -- Make FacetGrid.set_titles send kwargs correctly using `handle.update(kwargs)`. (:issue:`6839`, :pull:`6843`) +- Make FacetGrid.set_titles send kwargs correctly using ``handle.update(kwargs)``. (:issue:`6839`, :pull:`6843`) By `Oliver Lopez `_. - Fix bug where index variables would be changed inplace. (:issue:`6931`, :pull:`6938`) By `Michael Niklas `_. @@ -2033,7 +2037,7 @@ Bug fixes By `Fabian Hofmann `_. - Fix step plots with ``hue`` arg. (:pull:`6944`) By `András Gunyhó `_. -- Avoid use of random numbers in `test_weighted.test_weighted_operations_nonequal_coords`. (:issue:`6504`, :pull:`6961`) +- Avoid use of random numbers in ``test_weighted.test_weighted_operations_nonequal_coords``. (:issue:`6504`, :pull:`6961`) By `Luke Conibear `_. - Fix multiple regression issues with :py:meth:`Dataset.set_index` and :py:meth:`Dataset.reset_index`. (:pull:`6992`) @@ -2170,12 +2174,12 @@ West, Thomas Nicholas, Thomas Vogt, Tom White, Xianxiang Li Known Regressions ~~~~~~~~~~~~~~~~~ -- `reset_coords(drop=True)` does not create indexes (:issue:`6607`) +- ``reset_coords(drop=True)`` does not create indexes (:issue:`6607`) New Features ~~~~~~~~~~~~ -- The `zarr` backend is now able to read NCZarr. +- The ``zarr`` backend is now able to read NCZarr. By `Mattia Almansi `_. - Add a weighted ``quantile`` method to :py:class:`~core.weighted.DatasetWeighted` and :py:class:`~core.weighted.DataArrayWeighted` (:pull:`6059`). @@ -2196,7 +2200,7 @@ New Features By `Tom Nicholas `_. - Add :py:meth:`core.groupby.DatasetGroupBy.cumsum` and :py:meth:`core.groupby.DataArrayGroupBy.cumsum`. By `Vladislav Skripniuk `_ and `Deepak Cherian `_. (:pull:`3147`, :pull:`6525`, :issue:`3141`) -- Expose `inline_array` kwarg from `dask.array.from_array` in :py:func:`open_dataset`, :py:meth:`Dataset.chunk`, +- Expose ``inline_array`` kwarg from ``dask.array.from_array`` in :py:func:`open_dataset`, :py:meth:`Dataset.chunk`, :py:meth:`DataArray.chunk`, and :py:meth:`Variable.chunk`. (:pull:`6471`) - Expose the ``inline_array`` kwarg from :py:func:`dask.array.from_array` in :py:func:`open_dataset`, :py:meth:`Dataset.chunk`, :py:meth:`DataArray.chunk`, and :py:meth:`Variable.chunk`. (:pull:`6471`) @@ -2250,7 +2254,7 @@ Breaking changes Bug fixes ~~~~~~~~~ -- :py:meth:`Dataset.to_zarr` now allows to write all attribute types supported by `zarr-python`. +- :py:meth:`Dataset.to_zarr` now allows to write all attribute types supported by ``zarr-python``. By `Mattia Almansi `_. - Set ``skipna=None`` for all ``quantile`` methods (e.g. :py:meth:`Dataset.quantile`) and ensure it skips missing values for float dtypes (consistent with other methods). This should @@ -2283,7 +2287,7 @@ Bug fixes By `Spencer Clark `_. - Dark themes are now properly detected in Furo-themed Sphinx documents (:issue:`6500`, :pull:`6501`). By `Kevin Paul `_. -- :py:meth:`Dataset.isel`, :py:meth:`DataArray.isel` with `drop=True` works as intended with scalar :py:class:`DataArray` indexers. +- :py:meth:`Dataset.isel`, :py:meth:`DataArray.isel` with ``drop=True`` works as intended with scalar :py:class:`DataArray` indexers. (:issue:`6554`, :pull:`6579`) By `Michael Niklas `_. - Fixed silent overflow issue when decoding times encoded with 32-bit and below @@ -2344,9 +2348,9 @@ New Features :py:meth:`CFTimeIndex.shift` if ``shift_freq`` is between ``Day`` and ``Microsecond``. (:issue:`6134`, :pull:`6135`). By `Aaron Spring `_. -- Enable providing more keyword arguments to the `pydap` backend when reading +- Enable providing more keyword arguments to the ``pydap`` backend when reading OpenDAP datasets (:issue:`6274`). - By `Jonas Gliß `. + By `Jonas Gliß `_. - Allow :py:meth:`DataArray.drop_duplicates` to drop duplicates along multiple dimensions at once, and add :py:meth:`Dataset.drop_duplicates`. (:pull:`6307`) By `Tom Nicholas `_. @@ -2366,14 +2370,14 @@ Bug fixes ~~~~~~~~~ - Variables which are chunked using dask in larger (but aligned) chunks than the target zarr chunk size - can now be stored using `to_zarr()` (:pull:`6258`) By `Tobias Kölling `_. + can now be stored using ``to_zarr()`` (:pull:`6258`) By `Tobias Kölling `_. - Multi-file datasets containing encoded :py:class:`cftime.datetime` objects can be read in parallel again (:issue:`6226`, :pull:`6249`, :pull:`6305`). By `Martin Bergemann `_ and `Stan West `_. Documentation ~~~~~~~~~~~~~ - Delete files of datasets saved to disk while building the documentation and enable - building on Windows via `sphinx-build` (:pull:`6237`). + building on Windows via ``sphinx-build`` (:pull:`6237`). By `Stan West `_. @@ -2390,7 +2394,7 @@ This is a bugfix release to resolve (:issue:`6216`, :pull:`6207`). Bug fixes ~~~~~~~~~ -- Add `packaging` as a dependency to Xarray (:issue:`6216`, :pull:`6207`). +- Add ``packaging`` as a dependency to Xarray (:issue:`6216`, :pull:`6207`). By `Sebastian Weigand `_ and `Joe Hamman `_. @@ -2449,7 +2453,7 @@ Bug fixes By `Cindy Chiao `_. - No longer raise an error for an all-nan-but-one argument to - :py:meth:`DataArray.interpolate_na` when using `method='nearest'` (:issue:`5994`, :pull:`6144`). + :py:meth:`DataArray.interpolate_na` when using ``method='nearest'`` (:issue:`5994`, :pull:`6144`). By `Michael Delgado `_. - `dt.season `_ can now handle NaN and NaT. (:pull:`5876`). By `Pierre Loicq `_. @@ -2618,7 +2622,7 @@ Deprecations - Deprecate :py:func:`open_rasterio` (:issue:`4697`, :pull:`5808`). By `Alan Snow `_. -- Set the default argument for `roll_coords` to `False` for :py:meth:`DataArray.roll` +- Set the default argument for ``roll_coords`` to ``False`` for :py:meth:`DataArray.roll` and :py:meth:`Dataset.roll`. (:pull:`5653`) By `Tom Nicholas `_. - :py:meth:`xarray.open_mfdataset` will now error instead of warn when a value for ``concat_dim`` is @@ -2628,7 +2632,7 @@ Deprecations Bug fixes ~~~~~~~~~ -- Fix ZeroDivisionError from saving dask array with empty dimension (:issue: `5741`). +- Fix ZeroDivisionError from saving dask array with empty dimension (:issue:`5741`). By `Joseph K Aicher `_. - Fixed performance bug where ``cftime`` import attempted within various core operations if ``cftime`` not installed (:pull:`5640`). @@ -2641,13 +2645,13 @@ Bug fixes By `Jimmy Westling `_. - Numbers are properly formatted in a plot's title (:issue:`5788`, :pull:`5789`). By `Maxime Liquet `_. -- Faceted plots will no longer raise a `pint.UnitStrippedWarning` when a `pint.Quantity` array is plotted, +- Faceted plots will no longer raise a ``pint.UnitStrippedWarning`` when a ``pint.Quantity`` array is plotted, and will correctly display the units of the data in the colorbar (if there is one) (:pull:`5886`). By `Tom Nicholas `_. - With backends, check for path-like objects rather than ``pathlib.Path`` type, use ``os.fspath`` (:pull:`5879`). By `Mike Taves `_. -- ``open_mfdataset()`` now accepts a single ``pathlib.Path`` object (:issue: `5881`). +- ``open_mfdataset()`` now accepts a single ``pathlib.Path`` object (:issue:`5881`). By `Panos Mavrogiorgos `_. - Improved performance of :py:meth:`Dataset.unstack` (:pull:`5906`). By `Tom Augspurger `_. @@ -2674,12 +2678,12 @@ Internal Changes By `Maximilian Roos `_. - Improve the performance of reprs for large datasets or dataarrays. (:pull:`5661`) By `Jimmy Westling `_. -- Use isort's `float_to_top` config. (:pull:`5695`). +- Use isort's ``float_to_top`` config. (:pull:`5695`). By `Maximilian Roos `_. - Remove use of the deprecated ``kind`` argument in :py:meth:`pandas.Index.get_slice_bound` inside :py:class:`xarray.CFTimeIndex` tests (:pull:`5723`). By `Spencer Clark `_. -- Refactor `xarray.core.duck_array_ops` to no longer special-case dispatching to +- Refactor ``xarray.core.duck_array_ops`` to no longer special-case dispatching to dask versions of functions when acting on dask arrays, instead relying numpy and dask's adherence to NEP-18 to dispatch automatically. (:pull:`5571`) By `Tom Nicholas `_. @@ -2757,8 +2761,8 @@ Breaking changes pre-existing array values. This is a safer default than the prior ``mode="a"``, and allows for higher performance writes (:pull:`5252`). By `Stephan Hoyer `_. -- The main parameter to :py:func:`combine_by_coords` is renamed to `data_objects` instead - of `datasets` so anyone calling this method using a named parameter will need to update +- The main parameter to :py:func:`combine_by_coords` is renamed to ``data_objects`` instead + of ``datasets`` so anyone calling this method using a named parameter will need to update the name accordingly (:issue:`3248`, :pull:`4696`). By `Augustus Ijams `_. @@ -2905,7 +2909,7 @@ New Features By `Justus Magin `_. - Add :py:meth:`Dataset.to_pandas` (:pull:`5247`) By `Giacomo Caria `_. -- Add :py:meth:`DataArray.plot.surface` which wraps matplotlib's `plot_surface` to make +- Add :py:meth:`DataArray.plot.surface` which wraps matplotlib's ``plot_surface`` to make surface plots (:issue:`2235` :issue:`5084` :pull:`5101`). By `John Omotani `_. - Allow passing multiple arrays to :py:meth:`Dataset.__setitem__` (:pull:`5216`). @@ -2975,7 +2979,7 @@ New Features :py:meth:`DataArray.clip` & :py:meth:`Dataset.clip`; these methods now use :py:func:`xarray.apply_ufunc`; (:pull:`5184`). By `Maximilian Roos `_. -- Disable the `cfgrib` backend if the `eccodes` library is not installed (:pull:`5083`). +- Disable the ``cfgrib`` backend if the ``eccodes`` library is not installed (:pull:`5083`). By `Baudouin Raoult `_. - Added :py:meth:`DataArray.curvefit` and :py:meth:`Dataset.curvefit` for general curve fitting applications. (:issue:`4300`, :pull:`4849`) By `Sam Levang `_. @@ -2989,11 +2993,11 @@ New Features - Significant speedups in :py:meth:`Dataset.interp` and :py:meth:`DataArray.interp`. (:issue:`4739`, :pull:`4740`). By `Deepak Cherian `_. -- Prevent passing `concat_dim` to :py:func:`xarray.open_mfdataset` when - `combine='by_coords'` is specified, which should never have been possible (as - :py:func:`xarray.combine_by_coords` has no `concat_dim` argument to pass to). +- Prevent passing ``concat_dim`` to :py:func:`xarray.open_mfdataset` when + ``combine='by_coords'`` is specified, which should never have been possible (as + :py:func:`xarray.combine_by_coords` has no ``concat_dim`` argument to pass to). Also removes unneeded internal reordering of datasets in - :py:func:`xarray.open_mfdataset` when `combine='by_coords'` is specified. + :py:func:`xarray.open_mfdataset` when ``combine='by_coords'`` is specified. Fixes (:issue:`5230`). By `Tom Nicholas `_. - Implement ``__setitem__`` for ``xarray.core.indexing.DaskIndexingAdapter`` if @@ -3037,14 +3041,14 @@ Breaking changes Deprecations ~~~~~~~~~~~~ -- Warn when passing `concat_dim` to :py:func:`xarray.open_mfdataset` when - `combine='by_coords'` is specified, which should never have been possible (as - :py:func:`xarray.combine_by_coords` has no `concat_dim` argument to pass to). +- Warn when passing ``concat_dim`` to :py:func:`xarray.open_mfdataset` when + ``combine='by_coords'`` is specified, which should never have been possible (as + :py:func:`xarray.combine_by_coords` has no ``concat_dim`` argument to pass to). Also removes unneeded internal reordering of datasets in - :py:func:`xarray.open_mfdataset` when `combine='by_coords'` is specified. + :py:func:`xarray.open_mfdataset` when ``combine='by_coords'`` is specified. Fixes (:issue:`5230`), via (:pull:`5231`, :pull:`5255`). By `Tom Nicholas `_. -- The `lock` keyword argument to :py:func:`open_dataset` and :py:func:`open_dataarray` is now +- The ``lock`` keyword argument to :py:func:`open_dataset` and :py:func:`open_dataarray` is now a backend specific option. It will give a warning if passed to a backend that doesn't support it instead of being silently ignored. From the next version it will raise an error. This is part of the refactor to support external backends (:issue:`5073`). @@ -3056,18 +3060,18 @@ Bug fixes - Properly support :py:meth:`DataArray.ffill`, :py:meth:`DataArray.bfill`, :py:meth:`Dataset.ffill`, :py:meth:`Dataset.bfill` along chunked dimensions. (:issue:`2699`). By `Deepak Cherian `_. -- Fix 2d plot failure for certain combinations of dimensions when `x` is 1d and `y` is +- Fix 2d plot failure for certain combinations of dimensions when ``x`` is 1d and ``y`` is 2d (:issue:`5097`, :pull:`5099`). By `John Omotani `_. - Ensure standard calendar times encoded with large values (i.e. greater than approximately 292 years), can be decoded correctly without silently overflowing (:pull:`5050`). This was a regression in xarray 0.17.0. By `Zeb Nicholls `_. -- Added support for `numpy.bool_` attributes in roundtrips using `h5netcdf` engine with `invalid_netcdf=True` [which casts `bool`s to `numpy.bool_`] (:issue:`4981`, :pull:`4986`). +- Added support for ``numpy.bool_`` attributes in roundtrips using ``h5netcdf`` engine with ``invalid_netcdf=True`` [which casts ``bool`` s to ``numpy.bool_``] (:issue:`4981`, :pull:`4986`). By `Victor Negîrneac `_. - Don't allow passing ``axis`` to :py:meth:`Dataset.reduce` methods (:issue:`3510`, :pull:`4940`). By `Justus Magin `_. -- Decode values as signed if attribute `_Unsigned = "false"` (:issue:`4954`) +- Decode values as signed if attribute ``_Unsigned = "false"`` (:issue:`4954`) By `Tobias Kölling `_. - Keep coords attributes when interpolating when the indexer is not a Variable. (:issue:`4239`, :issue:`4839` :pull:`5031`) By `Jimmy Westling `_. @@ -3262,7 +3266,7 @@ Bug fixes :py:meth:`Dataset.to_zarr` (:issue:`4783`, :pull:`4795`). By `Julien Seguinot `_. - Raise DeprecationWarning when trying to typecast a tuple containing a :py:class:`DataArray`. - User now prompted to first call `.data` on it (:issue:`4483`). + User now prompted to first call ``.data`` on it (:issue:`4483`). By `Chun Ho Chow `_. - Ensure that :py:meth:`Dataset.interp` raises ``ValueError`` when interpolating outside coordinate range and ``bounds_error=True`` (:issue:`4854`, @@ -3498,7 +3502,7 @@ Breaking changes ~~~~~~~~~~~~~~~~ - :py:meth:`DataArray.astype` and :py:meth:`Dataset.astype` now preserve attributes. Keep the - old behavior by passing `keep_attrs=False` (:issue:`2049`, :pull:`4314`). + old behavior by passing ``keep_attrs=False`` (:issue:`2049`, :pull:`4314`). By `Dan Nowacki `_ and `Gabriel Joel Mitchell `_. New Features @@ -3555,7 +3559,7 @@ Bug fixes - fix the signature of the plot methods. (:pull:`4359`) By `Justus Magin `_. - Fix :py:func:`xarray.apply_ufunc` with ``vectorize=True`` and ``exclude_dims`` (:issue:`3890`). By `Mathias Hauser `_. -- Fix `KeyError` when doing linear interpolation to an nd `DataArray` +- Fix ``KeyError`` when doing linear interpolation to an nd ``DataArray`` that contains NaNs (:pull:`4233`). By `Jens Svensmark `_ - Fix incorrect legend labels for :py:meth:`Dataset.plot.scatter` (:issue:`4126`). @@ -3624,9 +3628,9 @@ Internal Changes v0.16.0 (2020-07-11) --------------------- -This release adds `xarray.cov` & `xarray.corr` for covariance & correlation -respectively; the `idxmax` & `idxmin` methods, the `polyfit` method & -`xarray.polyval` for fitting polynomials, as well as a number of documentation +This release adds ``xarray.cov`` & ``xarray.corr`` for covariance & correlation +respectively; the ``idxmax`` & ``idxmin`` methods, the ``polyfit`` method & +``xarray.polyval`` for fitting polynomials, as well as a number of documentation improvements, other features, and bug fixes. Many thanks to all 44 contributors who contributed to this release: @@ -3693,7 +3697,7 @@ New Features :py:func:`combine_by_coords` and :py:func:`combine_nested` using combine_attrs keyword argument. (:issue:`3865`, :pull:`3877`) By `John Omotani `_ -- `missing_dims` argument to :py:meth:`Dataset.isel`, +- ``missing_dims`` argument to :py:meth:`Dataset.isel`, :py:meth:`DataArray.isel` and :py:meth:`Variable.isel` to allow replacing the exception when a dimension passed to ``isel`` is not present with a warning, or just ignore the dimension. (:issue:`3866`, :pull:`3923`) @@ -3792,10 +3796,10 @@ Bug fixes in a notebook context. (:issue:`3972`, :pull:`3973`) By `Ian Castleden `_. - Fix bug causing :py:meth:`DataArray.interpolate_na` to always drop attributes, - and added `keep_attrs` argument. (:issue:`3968`) + and added ``keep_attrs`` argument. (:issue:`3968`) By `Tom Nicholas `_. - Fix bug in time parsing failing to fall back to cftime. This was causing time - variables with a time unit of `'msecs'` to fail to parse. (:pull:`3998`) + variables with a time unit of ``'msecs'`` to fail to parse. (:pull:`3998`) By `Ryan May `_. - Fix weighted mean when passing boolean weights (:issue:`4074`). By `Mathias Hauser `_. @@ -3886,13 +3890,13 @@ New Features ``cftime.datetime`` objects directly via a :py:class:`CFTimeIndex` or via the :py:class:`~core.accessor_dt.DatetimeAccessor`. By `Spencer Clark `_ -- Support new h5netcdf backend keyword `phony_dims` (available from h5netcdf +- Support new h5netcdf backend keyword ``phony_dims`` (available from h5netcdf v0.8.0 for :py:class:`~xarray.backends.H5NetCDFStore`. By `Kai Mühlbauer `_. - Add partial support for unit aware arrays with pint. (:pull:`3706`, :pull:`3611`) By `Justus Magin `_. - :py:meth:`Dataset.groupby` and :py:meth:`DataArray.groupby` now raise a - `TypeError` on multiple string arguments. Receiving multiple string arguments + ``TypeError`` on multiple string arguments. Receiving multiple string arguments often means a user is attempting to pass multiple dimensions as separate arguments and should instead pass a single list of dimensions. (:pull:`3802`) @@ -3911,7 +3915,7 @@ New Features :py:meth:`core.groupby.DatasetGroupBy.quantile`, :py:meth:`core.groupby.DataArrayGroupBy.quantile` (:issue:`3843`, :pull:`3844`) By `Aaron Spring `_. -- Add a diff summary for `testing.assert_allclose`. (:issue:`3617`, :pull:`3847`) +- Add a diff summary for ``testing.assert_allclose``. (:issue:`3617`, :pull:`3847`) By `Justus Magin `_. Bug fixes @@ -3956,7 +3960,7 @@ Documentation ~~~~~~~~~~~~~ - Fix documentation of :py:class:`DataArray` removing the deprecated mention - that when omitted, `dims` are inferred from a `coords`-dict. (:pull:`3821`) + that when omitted, ``dims`` are inferred from a ``coords``-dict. (:pull:`3821`) By `Sander van Rijn `_. - Improve the :py:func:`where` docstring. By `Maximilian Roos `_ @@ -4478,7 +4482,7 @@ New functions/methods numpy-like library (important: read notes about ``NUMPY_EXPERIMENTAL_ARRAY_FUNCTION`` in the above link). Added explicit test coverage for `sparse `_. (:issue:`3117`, :issue:`3202`). - This requires `sparse>=0.8.0`. By `Nezar Abdennur `_ + This requires ``sparse>=0.8.0``. By `Nezar Abdennur `_ and `Guido Imperiale `_. - :py:meth:`~Dataset.from_dataframe` and :py:meth:`~DataArray.from_series` now @@ -4575,7 +4579,7 @@ Bug fixes - Fix regression introduced in v0.12.2 where ``copy(deep=True)`` would convert unicode indices to dtype=object (:issue:`3094`). By `Guido Imperiale `_. -- Improved error handling and documentation for `.expand_dims()` +- Improved error handling and documentation for ``.expand_dims()`` read-only view. - Fix tests for big-endian systems (:issue:`3125`). By `Graham Inggs `_. @@ -4585,7 +4589,7 @@ Bug fixes - Fix KeyError that arises when using .sel method with float values different from coords float type (:issue:`3137`). By `Hasan Ahmad `_. -- Fixed bug in ``combine_by_coords()`` causing a `ValueError` if the input had +- Fixed bug in ``combine_by_coords()`` causing a ``ValueError`` if the input had an unused dimension with coordinates which were not monotonic (:issue:`3150`). By `Tom Nicholas `_. - Fixed crash when applying ``distributed.Client.compute()`` to a DataArray @@ -4600,7 +4604,7 @@ Bug fixes - Plots in 2 dimensions (pcolormesh, contour) now allow to specify levels as numpy array (:issue:`3284`). By `Mathias Hauser `_. - Fixed bug in :meth:`DataArray.quantile` failing to keep attributes when - `keep_attrs` was True (:issue:`3304`). By `David Huard `_. + ``keep_attrs`` was True (:issue:`3304`). By `David Huard `_. Documentation ~~~~~~~~~~~~~ @@ -4791,14 +4795,14 @@ Bug fixes By `Ian Castleden `_ - A deep copy deep-copies the coords (:issue:`1463`) By `Martin Pletcher `_. -- Increased support for `missing_value` (:issue:`2871`) +- Increased support for ``missing_value`` (:issue:`2871`) By `Deepak Cherian `_. -- Removed usages of `pytest.config`, which is deprecated (:issue:`2988`) +- Removed usages of ``pytest.config``, which is deprecated (:issue:`2988`) By `Maximilian Roos `_. - Fixed performance issues with cftime installed (:issue:`3000`) By `0x0L `_. -- Replace incorrect usages of `message` in pytest assertions - with `match` (:issue:`3011`) +- Replace incorrect usages of ``message`` in pytest assertions + with ``match`` (:issue:`3011`) By `Maximilian Roos `_. - Add explicit pytest markers, now required by pytest (:issue:`3032`). @@ -5102,7 +5106,7 @@ Breaking changes without dimension argument will change in the next release. Now we warn a FutureWarning. By `Keisuke Fujii `_. - - The ``inplace`` kwarg of a number of `DataArray` and `Dataset` methods is being + - The ``inplace`` kwarg of a number of ``DataArray`` and ``Dataset`` methods is being deprecated and will be removed in the next release. By `Deepak Cherian `_. @@ -5142,12 +5146,12 @@ Enhancements ~~~~~~~~~~~~ - :py:meth:`xarray.DataArray.plot.line` can now accept multidimensional - coordinate variables as input. `hue` must be a dimension name in this case. + coordinate variables as input. ``hue`` must be a dimension name in this case. (:issue:`2407`) By `Deepak Cherian `_. - Added support for Python 3.7. (:issue:`2271`). By `Joe Hamman `_. -- Added support for plotting data with `pandas.Interval` coordinates, such as those +- Added support for plotting data with ``pandas.Interval`` coordinates, such as those created by :py:meth:`~xarray.DataArray.groupby_bins` By `Maximilian Maahn `_. - Added :py:meth:`~xarray.CFTimeIndex.shift` for shifting the values of a @@ -5175,7 +5179,7 @@ Enhancements - The preferred way to access tutorial data is now to load it lazily with :py:meth:`xarray.tutorial.open_dataset`. - :py:meth:`xarray.tutorial.load_dataset` calls `Dataset.load()` prior + :py:meth:`xarray.tutorial.load_dataset` calls ``Dataset.load()`` prior to returning (and is now deprecated). This was changed in order to facilitate using tutorial datasets with dask. By `Joe Hamman `_. @@ -5226,7 +5230,7 @@ Bug fixes By `Spencer Clark `_. - Chunked datasets can now roundtrip to Zarr storage continually - with `to_zarr` and ``open_zarr`` (:issue:`2300`). + with ``to_zarr`` and ``open_zarr`` (:issue:`2300`). By `Lily Wang `_. .. _whats-new.0.10.9: @@ -5243,7 +5247,7 @@ Announcements of note: for more details. - We have a new :doc:`roadmap` that outlines our future development plans. -- ``Dataset.apply`` now properly documents the way `func` is called. +- ``Dataset.apply`` now properly documents the way ``func`` is called. By `Matti Eskelinen `_. Enhancements @@ -5270,7 +5274,7 @@ Enhancements By `Deepak Cherian `_. (:issue:`2224`) - DataArray coordinates and Dataset coordinates and data variables are - now displayed as `a b ... y z` rather than `a b c d ...`. + now displayed as ``a b ... y z`` rather than ``a b c d ...``. (:issue:`1186`) By `Seth P `_. - A new CFTimeIndex-enabled :py:func:`cftime_range` function for use in @@ -5394,7 +5398,7 @@ Bug fixes By `Fabien Maussion `_. - Fixed warning raised in :py:meth:`~Dataset.to_netcdf` due to deprecation of - `effective_get` in dask (:issue:`2238`). + ``effective_get`` in dask (:issue:`2238`). By `Joe Hamman `_. .. _whats-new.0.10.7: @@ -5451,7 +5455,7 @@ Enhancements - :py:meth:`~DataArray.sel`, :py:meth:`~DataArray.isel` & :py:meth:`~DataArray.reindex`, (and their :py:class:`Dataset` counterparts) now support supplying a ``dict`` as a first argument, as an alternative to the existing approach - of supplying `kwargs`. This allows for more robust behavior + of supplying ``kwargs``. This allows for more robust behavior of dimension names which conflict with other keyword names, or are not strings. By `Maximilian Roos `_. @@ -5591,7 +5595,7 @@ Bug fixes - Fixed a bug in :py:meth:`~xarray.DataArray.rolling` with bottleneck. Also, fixed a bug in rolling an integer dask array. (:issue:`2113`) By `Keisuke Fujii `_. -- Fixed a bug where `keep_attrs=True` flag was neglected if +- Fixed a bug where ``keep_attrs=True`` flag was neglected if :py:func:`apply_ufunc` was used with :py:class:`Variable`. (:issue:`2114`) By `Keisuke Fujii `_. - When assigning a :py:class:`DataArray` to :py:class:`Dataset`, any conflicted @@ -5647,7 +5651,7 @@ Bug fixes - Fixed a bug in decode_cf_datetime where ``int32`` arrays weren't parsed correctly (:issue:`2002`). By `Fabien Maussion `_. -- When calling `xr.auto_combine()` or `xr.open_mfdataset()` with a `concat_dim`, +- When calling ``xr.auto_combine()`` or ``xr.open_mfdataset()`` with a ``concat_dim``, the resulting dataset will have that one-element dimension (it was silently dropped, previously) (:issue:`1988`). By `Ben Root `_. @@ -5725,7 +5729,7 @@ Bug fixes By `Keisuke Fujii `_. - Silenced irrelevant warnings issued by ``open_rasterio`` (:issue:`1964`). By `Stephan Hoyer `_. -- Fix kwarg `colors` clashing with auto-inferred `cmap` (:issue:`1461`) +- Fix kwarg ``colors`` clashing with auto-inferred ``cmap`` (:issue:`1461`) By `Deepak Cherian `_. - Fix :py:func:`~xarray.plot.imshow` error when passed an RGB array with size one in a spatial dimension. @@ -5745,7 +5749,7 @@ Documentation By `Joe Hamman `_. - Added apply_ufunc example to :ref:`/examples/weather-data.ipynb#Toy-weather-data` (:issue:`1844`). By `Liam Brannigan `_. -- New entry `Why don’t aggregations return Python scalars?` in the +- New entry ``Why don’t aggregations return Python scalars?`` in the :doc:`getting-started-guide/faq` (:issue:`1726`). By `0x0L `_. @@ -5888,7 +5892,7 @@ Bug fixes ``parse_coordinates`` kwarg has been added to :py:func:`~open_rasterio` (set to ``True`` per default). By `Fabien Maussion `_. -- The colors of discrete colormaps are now the same regardless if `seaborn` +- The colors of discrete colormaps are now the same regardless if ``seaborn`` is installed or not (:issue:`1896`). By `Fabien Maussion `_. - Fixed dtype promotion rules in :py:func:`where` and :py:func:`concat` to @@ -6923,11 +6927,11 @@ Enhancements page on :ref:`internals`. By `Stephan Hoyer `_. - Round trip boolean datatypes. Previously, writing boolean datatypes to netCDF - formats would raise an error since netCDF does not have a `bool` datatype. - This feature reads/writes a `dtype` attribute to boolean variables in netCDF + formats would raise an error since netCDF does not have a ``bool`` datatype. + This feature reads/writes a ``dtype`` attribute to boolean variables in netCDF files. By `Joe Hamman `_. -- 2D plotting methods now have two new keywords (`cbar_ax` and `cbar_kwargs`), +- 2D plotting methods now have two new keywords (``cbar_ax`` and ``cbar_kwargs``), allowing more control on the colorbar (:issue:`872`). By `Fabien Maussion `_. @@ -6958,7 +6962,7 @@ Bug fixes This fixes issue :issue:`665`. `Filipe Fernandes `_. -- Fix a bug where `xarray.ufuncs` that take two arguments would incorrectly +- Fix a bug where ``xarray.ufuncs`` that take two arguments would incorrectly use to numpy functions instead of dask.array functions (:issue:`876`). By `Stephan Hoyer `_. @@ -6977,7 +6981,7 @@ Bug fixes - Fixed incorrect test for dask version (:issue:`891`). By `Stephan Hoyer `_. -- Fixed `dim` argument for `isel_points`/`sel_points` when a `pandas.Index` is +- Fixed ``dim`` argument for ``isel_points``/``sel_points`` when a ``pandas.Index`` is passed. By `Stephan Hoyer `_. - :py:func:`~xarray.plot.contour` now plots the correct number of contours @@ -7516,7 +7520,7 @@ Enhancements global thread lock by default for reading from netCDF files with dask. This avoids possible segmentation faults for reading from netCDF4 files when HDF5 is not configured properly for concurrent access (:issue:`444`). -- Added support for serializing arrays of complex numbers with `engine='h5netcdf'`. +- Added support for serializing arrays of complex numbers with ``engine='h5netcdf'``. - The new ``xray.save_mfdataset`` function allows for saving multiple datasets to disk simultaneously. This is useful when processing large datasets with dask.array. For example, to save a dataset too big to fit into memory @@ -7898,7 +7902,7 @@ Breaking changes Previously, you would need to use something like ``counts.sel(**{'time.month': 2}})``, which is much more awkward. - The ``season`` datetime shortcut now returns an array of string labels - such `'DJF'`: + such ``'DJF'``: .. code-block:: ipython @@ -7982,7 +7986,7 @@ Bug fixes multi-dimensional variables (:issue:`315`). - Slicing with negative step sizes (:issue:`312`). - Invalid conversion of string arrays to numeric dtype (:issue:`305`). -- Fixed``repr()`` on dataset objects with non-standard dates (:issue:`347`). +- Fixed ``repr()`` on dataset objects with non-standard dates (:issue:`347`). Deprecations ~~~~~~~~~~~~ diff --git a/properties/README.md b/properties/README.md index 86c1d41d81d..09279bc8c73 100644 --- a/properties/README.md +++ b/properties/README.md @@ -11,8 +11,8 @@ without needing to `pip install hypothesis`. ## Hang on, "property-based" tests? Instead of making assertions about operations on a particular piece of -data, you use Hypothesis to describe a *kind* of data, then make assertions -that should hold for *any* example of this kind. +data, you use Hypothesis to describe a _kind_ of data, then make assertions +that should hold for _any_ example of this kind. For example: "given a 2d ndarray of dtype uint8 `arr`, `xr.DataArray(arr).plot.imshow()` never raises an exception". diff --git a/xarray/backends/common.py b/xarray/backends/common.py index 11e6e20a9dc..b860d5f3025 100644 --- a/xarray/backends/common.py +++ b/xarray/backends/common.py @@ -253,7 +253,7 @@ class BackendArray(NdimSizeLenMixin, indexing.ExplicitlyIndexed): def get_duck_array(self, dtype: np.typing.DTypeLike = None): key = indexing.BasicIndexer((slice(None),) * self.ndim) - return self[key] # type: ignore [index] + return self[key] # type: ignore[index] class AbstractDataStore: diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 52ce2463d51..1448145183f 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -1107,7 +1107,7 @@ def reset_coords( return self._replace(coords=dataset._variables) if self.name is None: raise ValueError( - "cannot reset_coords with drop=False on an unnamed DataArrray" + "cannot reset_coords with drop=False on an unnamed DataArray" ) dataset[self.name] = self.variable return dataset diff --git a/xarray/core/indexes.py b/xarray/core/indexes.py index f760a306cc3..fbaef9729e3 100644 --- a/xarray/core/indexes.py +++ b/xarray/core/indexes.py @@ -1628,7 +1628,7 @@ def group_by_index( return index_coords def to_pandas_indexes(self) -> Indexes[pd.Index]: - """Returns an immutable proxy for Dataset or DataArrary pandas indexes. + """Returns an immutable proxy for Dataset or DataArray pandas indexes. Raises an error if this proxy contains indexes that cannot be coerced to pandas.Index objects. diff --git a/xarray/static/css/style.css b/xarray/static/css/style.css index b1cefeb2af9..05312c52707 100644 --- a/xarray/static/css/style.css +++ b/xarray/static/css/style.css @@ -13,14 +13,14 @@ --xr-background-color-row-odd: var(--jp-layout-color2, #eeeeee); } -html[theme=dark], -html[data-theme=dark], -body[data-theme=dark], +html[theme="dark"], +html[data-theme="dark"], +body[data-theme="dark"], body.vscode-dark { --xr-font-color0: rgba(255, 255, 255, 1); --xr-font-color2: rgba(255, 255, 255, 0.54); --xr-font-color3: rgba(255, 255, 255, 0.38); - --xr-border-color: #1F1F1F; + --xr-border-color: #1f1f1f; --xr-disabled-color: #515151; --xr-background-color: #111111; --xr-background-color-row-even: #111111; @@ -112,7 +112,7 @@ body.vscode-dark { .xr-section-summary-in + label:before { display: inline-block; - content: '►'; + content: "►"; font-size: 11px; width: 15px; text-align: center; @@ -123,7 +123,7 @@ body.vscode-dark { } .xr-section-summary-in:checked + label:before { - content: '▼'; + content: "▼"; } .xr-section-summary-in:checked + label > span { @@ -195,15 +195,15 @@ body.vscode-dark { } .xr-dim-list:before { - content: '('; + content: "("; } .xr-dim-list:after { - content: ')'; + content: ")"; } .xr-dim-list li:not(:last-child):after { - content: ','; + content: ","; padding-right: 5px; } diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index c543333c61e..fd866cae5ee 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -3314,7 +3314,7 @@ def test_append(self) -> None: with self.create_zarr_target() as store: if has_zarr_v3: - # TOOD: verify these + # TODO: verify these expected = { "set": 17, "get": 12, diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index b5ecc9517d9..c8b438948de 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -3801,7 +3801,7 @@ def test_to_dataset_retains_keys(self) -> None: array = DataArray([1, 2, 3], coords=[("x", dates)], attrs={"a": 1}) - # convert to dateset and back again + # convert to dataset and back again result = array.to_dataset("x").to_dataarray(dim="x") assert_equal(array, result) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index be82655515d..67d38aac0fe 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -3206,7 +3206,7 @@ def test_rename_multiindex(self) -> None: with pytest.raises(ValueError, match=r"'b' conflicts"): original.rename({"a": "b"}) - def test_rename_perserve_attrs_encoding(self) -> None: + def test_rename_preserve_attrs_encoding(self) -> None: # test propagate attrs/encoding to new variable(s) created from Index object original = Dataset(coords={"x": ("x", [0, 1, 2])}) expected = Dataset(coords={"y": ("y", [0, 1, 2])}) diff --git a/xarray/tutorial.py b/xarray/tutorial.py index 9a5d52ed285..cfc6a5147d3 100644 --- a/xarray/tutorial.py +++ b/xarray/tutorial.py @@ -39,7 +39,7 @@ def _construct_cache_dir(path): return path -external_urls = {} # type: dict +external_urls: dict = {} file_formats = { "air_temperature": 3, "air_temperature_gradient": 4,