diff --git a/.github/workflows/ci-pre-commit-autoupdate.yaml b/.github/workflows/ci-pre-commit-autoupdate.yaml deleted file mode 100644 index b10a541197e..00000000000 --- a/.github/workflows/ci-pre-commit-autoupdate.yaml +++ /dev/null @@ -1,44 +0,0 @@ -name: "pre-commit autoupdate CI" - -on: - schedule: - - cron: "0 0 * * 0" # every Sunday at 00:00 UTC - workflow_dispatch: - - -jobs: - autoupdate: - name: 'pre-commit autoupdate' - runs-on: ubuntu-latest - if: github.repository == 'pydata/xarray' - steps: - - name: checkout - uses: actions/checkout@v2 - - name: Cache pip and pre-commit - uses: actions/cache@v2 - with: - path: | - ~/.cache/pre-commit - ~/.cache/pip - key: ${{ runner.os }}-pre-commit-autoupdate - - name: setup python - uses: actions/setup-python@v2 - - name: upgrade pip - run: python -m pip install --upgrade pip - - name: install dependencies - run: python -m pip install --upgrade pre-commit pyyaml packaging - - name: version info - run: python -m pip list - - name: autoupdate - uses: technote-space/create-pr-action@837dbe469b39f08d416889369a52e2a993625c84 - with: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - EXECUTE_COMMANDS: | - python -m pre_commit autoupdate - python -m pre_commit run --all-files - COMMIT_MESSAGE: 'pre-commit: autoupdate hook versions' - COMMIT_NAME: 'github-actions[bot]' - COMMIT_EMAIL: 'github-actions[bot]@users.noreply.github.com' - PR_TITLE: 'pre-commit: autoupdate hook versions' - PR_BRANCH_PREFIX: 'pre-commit/' - PR_BRANCH_NAME: 'autoupdate-${PR_ID}' diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index e774803dda7..82e21a4f46c 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -108,19 +108,12 @@ jobs: name: codecov-umbrella fail_ci_if_error: false - publish-test-results: - needs: test + event_file: + name: "Event File" runs-on: ubuntu-latest - # the build-and-test job might be skipped, we don't need to run this job then - if: success() || failure() - steps: - - name: Download Artifacts - uses: actions/download-artifact@v2 - with: - path: test-results - - - name: Publish Unit Test Results - uses: EnricoMi/publish-unit-test-result-action@v1 + - name: Upload + uses: actions/upload-artifact@v2 with: - files: test-results/**/*.xml + name: Event File + path: ${{ github.event_path }} diff --git a/.github/workflows/publish-test-results.yaml b/.github/workflows/publish-test-results.yaml index 485383b31b4..a2e02c28f5a 100644 --- a/.github/workflows/publish-test-results.yaml +++ b/.github/workflows/publish-test-results.yaml @@ -1,4 +1,4 @@ -# Copied from https://github.com/EnricoMi/publish-unit-test-result-action/blob/v1.18/README.md#support-fork-repositories-and-dependabot-branches +# Copied from https://github.com/EnricoMi/publish-unit-test-result-action/blob/v1.23/README.md#support-fork-repositories-and-dependabot-branches name: Publish test results @@ -12,11 +12,7 @@ jobs: publish-test-results: name: Publish test results runs-on: ubuntu-latest - if: > - github.event.workflow_run.conclusion != 'skipped' && ( - github.event.sender.login == 'dependabot[bot]' || - github.event.workflow_run.head_repository.full_name != github.repository - ) + if: github.event.workflow_run.conclusion != 'skipped' steps: - name: Download and extract artifacts @@ -26,13 +22,10 @@ jobs: mkdir artifacts && cd artifacts artifacts_url=${{ github.event.workflow_run.artifacts_url }} - artifacts=$(gh api $artifacts_url -q '.artifacts[] | {name: .name, url: .archive_download_url}') - IFS=$'\n' - for artifact in $artifacts + gh api "$artifacts_url" -q '.artifacts[] | [.name, .archive_download_url] | @tsv' | while read artifact do - name=$(jq -r .name <<<$artifact) - url=$(jq -r .url <<<$artifact) + IFS=$'\t' read name url <<< "$artifact" gh api $url > "$name.zip" unzip -d "$name" "$name.zip" done @@ -41,4 +34,7 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v1 with: commit: ${{ github.event.workflow_run.head_sha }} + event_file: artifacts/Event File/event.json + event_name: ${{ github.event.workflow_run.event }} files: "artifacts/**/*.xml" + comment_mode: off diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 8c9b61a7364..6db093b2dbb 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -8,12 +8,12 @@ repos: - id: check-yaml # isort should run before black as black sometimes tweaks the isort output - repo: https://github.com/PyCQA/isort - rev: 5.9.3 + rev: 5.10.1 hooks: - id: isort # https://github.com/python/black#version-control-integration - repo: https://github.com/psf/black - rev: 21.9b0 + rev: 21.10b0 hooks: - id: black - id: black-jupyter @@ -22,8 +22,8 @@ repos: hooks: - id: blackdoc exclude: "generate_reductions.py" - - repo: https://gitlab.com/pycqa/flake8 - rev: 3.9.2 + - repo: https://github.com/PyCQA/flake8 + rev: 4.0.1 hooks: - id: flake8 # - repo: https://github.com/Carreau/velin diff --git a/CITATION.cff b/CITATION.cff new file mode 100644 index 00000000000..fcc04a6f3e4 --- /dev/null +++ b/CITATION.cff @@ -0,0 +1,96 @@ +cff-version: 1.2.0 +message: "If you use this software, please cite it as below." +authors: +- family-names: "Hoyer" + given-names: "Stephan" + orcid: "https://orcid.org/0000-0002-5207-0380" +- family-names: "Roos" + given-names: "Maximilian" +- family-names: "Joseph" + given-names: "Hamman" + orcid: "https://orcid.org/0000-0001-7479-8439" +- family-names: "Magin" + given-names: "Justus" +- family-names: "Cherian" + given-names: "Deepak" + orcid: "https://orcid.org/0000-0002-6861-8734" +- family-names: "Fitzgerald" + given-names: "Clark" + orcid: "https://orcid.org/0000-0003-3446-6389" +- family-names: "Hauser" + given-names: "Mathias" + orcid: "https://orcid.org/0000-0002-0057-4878" +- family-names: "Fujii" + given-names: "Keisuke" + orcid: "https://orcid.org/0000-0003-0390-9984" +- family-names: "Maussion" + given-names: "Fabien" + orcid: "https://orcid.org/0000-0002-3211-506X" +- family-names: "Imperiale" + given-names: "Guido" +- family-names: "Clark" + given-names: "Spencer" + orcid: "https://orcid.org/0000-0001-5595-7895" +- family-names: "Kleeman" + given-names: "Alex" +- family-names: "Nicholas" + given-names: "Thomas" + orcid: "https://orcid.org/0000-0002-2176-0530" +- family-names: "Kluyver" + given-names: "Thomas" + orcid: "https://orcid.org/0000-0003-4020-6364" +- family-names: "Westling" + given-names: "Jimmy" +- family-names: "Munroe" + given-names: "James" + orcid: "https://orcid.org/0000-0001-9098-6309" +- family-names: "Amici" + given-names: "Alessandro" + orcid: "https://orcid.org/0000-0002-1778-4505" +- family-names: "Barghini" + given-names: "Aureliana" +- family-names: "Banihirwe" + given-names: "Anderson" + orcid: "https://orcid.org/0000-0001-6583-571X" +- family-names: "Bell" + given-names: "Ray" + orcid: "https://orcid.org/0000-0003-2623-0587" +- family-names: "Hatfield-Dodds" + given-names: "Zac" + orcid: "https://orcid.org/0000-0002-8646-8362" +- family-names: "Abernathey" + given-names: "Ryan" + orcid: "https://orcid.org/0000-0001-5999-4917" +- family-names: "Bovy" + given-names: "Benoît" +- family-names: "Omotani" + given-names: "John" + orcid: "https://orcid.org/0000-0002-3156-8227" +- family-names: "Mühlbauer" + given-names: "Kai" + orcid: "https://orcid.org/0000-0001-6599-1034" +- family-names: "Roszko" + given-names: "Maximilian K." + orcid: "https://orcid.org/0000-0001-9424-2526" +- family-names: "Wolfram" + given-names: "Phillip J." + orcid: "https://orcid.org/0000-0001-5971-4241" +title: "xarray" +doi: 10.5281/zenodo.598201 +url: "https://github.com/pydata/xarray" +preferred-citation: + type: article + authors: + - family-names: "Hoyer" + given-names: "Stephan" + orcid: "https://orcid.org/0000-0002-5207-0380" + - family-names: "Joseph" + given-names: "Hamman" + orcid: "https://orcid.org/0000-0001-7479-8439" + doi: "10.5334/jors.148" + journal: "Journal of Open Research Software" + month: 4 + title: "xarray: N-D labeled Arrays and Datasets in Python" + volume: 5 + issue: 1 + year: 2017 diff --git a/asv_bench/asv.conf.json b/asv_bench/asv.conf.json index 83a2aa9f010..dafa0fc47e1 100644 --- a/asv_bench/asv.conf.json +++ b/asv_bench/asv.conf.json @@ -62,7 +62,7 @@ "pandas": [""], "netcdf4": [""], "scipy": [""], - "bottleneck": ["", null], + "bottleneck": [""], "dask": [""], "distributed": [""], }, diff --git a/asv_bench/benchmarks/dataarray_missing.py b/asv_bench/benchmarks/dataarray_missing.py index f89fe7f8eb9..d786c04e852 100644 --- a/asv_bench/benchmarks/dataarray_missing.py +++ b/asv_bench/benchmarks/dataarray_missing.py @@ -16,13 +16,6 @@ def make_bench_data(shape, frac_nan, chunks): return da -def requires_bottleneck(): - try: - import bottleneck # noqa: F401 - except ImportError: - raise NotImplementedError() - - class DataArrayMissingInterpolateNA: def setup(self, shape, chunks, limit): if chunks is not None: @@ -46,7 +39,6 @@ def time_interpolate_na(self, shape, chunks, limit): class DataArrayMissingBottleneck: def setup(self, shape, chunks, limit): - requires_bottleneck() if chunks is not None: requires_dask() self.da = make_bench_data(shape, 0.1, chunks) diff --git a/asv_bench/benchmarks/groupby.py b/asv_bench/benchmarks/groupby.py index fa8deaf572f..46d6293cc98 100644 --- a/asv_bench/benchmarks/groupby.py +++ b/asv_bench/benchmarks/groupby.py @@ -1,39 +1,98 @@ import numpy as np +import pandas as pd import xarray as xr -from . import parameterized, requires_dask +from . import _skip_slow, parameterized, requires_dask class GroupBy: def setup(self, *args, **kwargs): - self.ds = xr.Dataset( + self.n = 100 + self.ds1d = xr.Dataset( { - "a": xr.DataArray(np.r_[np.arange(500.0), np.arange(500.0)]), - "b": xr.DataArray(np.arange(1000.0)), + "a": xr.DataArray(np.r_[np.repeat(1, self.n), np.repeat(2, self.n)]), + "b": xr.DataArray(np.arange(2 * self.n)), } ) + self.ds2d = self.ds1d.expand_dims(z=10) - @parameterized(["method"], [("sum", "mean")]) - def time_agg(self, method): - return getattr(self.ds.groupby("a"), method)() + @parameterized(["ndim"], [(1, 2)]) + def time_init(self, ndim): + getattr(self, f"ds{ndim}d").groupby("b") + + @parameterized(["method", "ndim"], [("sum", "mean"), (1, 2)]) + def time_agg_small_num_groups(self, method, ndim): + ds = getattr(self, f"ds{ndim}d") + getattr(ds.groupby("a"), method)() + + @parameterized(["method", "ndim"], [("sum", "mean"), (1, 2)]) + def time_agg_large_num_groups(self, method, ndim): + ds = getattr(self, f"ds{ndim}d") + getattr(ds.groupby("b"), method)() class GroupByDask(GroupBy): def setup(self, *args, **kwargs): requires_dask() super().setup(**kwargs) - self.ds = self.ds.chunk({"dim_0": 50}) + self.ds1d = self.ds1d.sel(dim_0=slice(None, None, 2)).chunk({"dim_0": 50}) + self.ds2d = self.ds2d.sel(dim_0=slice(None, None, 2)).chunk( + {"dim_0": 50, "z": 5} + ) -class GroupByDataFrame(GroupBy): +class GroupByPandasDataFrame(GroupBy): + """Run groupby tests using pandas DataFrame.""" + def setup(self, *args, **kwargs): + # Skip testing in CI as it won't ever change in a commit: + _skip_slow() + super().setup(**kwargs) - self.ds = self.ds.to_dataframe() + self.ds1d = self.ds1d.to_dataframe() class GroupByDaskDataFrame(GroupBy): + """Run groupby tests using dask DataFrame.""" + + def setup(self, *args, **kwargs): + # Skip testing in CI as it won't ever change in a commit: + _skip_slow() + + requires_dask() + super().setup(**kwargs) + self.ds1d = self.ds1d.chunk({"dim_0": 50}).to_dataframe() + + +class Resample: + def setup(self, *args, **kwargs): + self.ds1d = xr.Dataset( + { + "b": ("time", np.arange(365.0 * 24)), + }, + coords={"time": pd.date_range("2001-01-01", freq="H", periods=365 * 24)}, + ) + self.ds2d = self.ds1d.expand_dims(z=10) + + @parameterized(["ndim"], [(1, 2)]) + def time_init(self, ndim): + getattr(self, f"ds{ndim}d").resample(time="D") + + @parameterized(["method", "ndim"], [("sum", "mean"), (1, 2)]) + def time_agg_small_num_groups(self, method, ndim): + ds = getattr(self, f"ds{ndim}d") + getattr(ds.resample(time="3M"), method)() + + @parameterized(["method", "ndim"], [("sum", "mean"), (1, 2)]) + def time_agg_large_num_groups(self, method, ndim): + ds = getattr(self, f"ds{ndim}d") + getattr(ds.resample(time="48H"), method)() + + +class ResampleDask(Resample): def setup(self, *args, **kwargs): requires_dask() super().setup(**kwargs) - self.ds = self.ds.chunk({"dim_0": 50}).to_dataframe() + self.ds1d = self.ds1d.chunk({"time": 50}) + self.ds2d = self.ds2d.chunk({"time": 50, "z": 4}) diff --git a/asv_bench/benchmarks/rolling.py b/asv_bench/benchmarks/rolling.py index f0e18bf2153..1d3713f19bf 100644 --- a/asv_bench/benchmarks/rolling.py +++ b/asv_bench/benchmarks/rolling.py @@ -36,29 +36,45 @@ def setup(self, *args, **kwargs): randn_long, dims="x", coords={"x": np.arange(long_nx) * 0.1} ) - @parameterized(["func", "center"], (["mean", "count"], [True, False])) - def time_rolling(self, func, center): - getattr(self.ds.rolling(x=window, center=center), func)().load() - - @parameterized(["func", "pandas"], (["mean", "count"], [True, False])) - def time_rolling_long(self, func, pandas): + @parameterized( + ["func", "center", "use_bottleneck"], + (["mean", "count"], [True, False], [True, False]), + ) + def time_rolling(self, func, center, use_bottleneck): + with xr.set_options(use_bottleneck=use_bottleneck): + getattr(self.ds.rolling(x=window, center=center), func)().load() + + @parameterized( + ["func", "pandas", "use_bottleneck"], + (["mean", "count"], [True, False], [True, False]), + ) + def time_rolling_long(self, func, pandas, use_bottleneck): if pandas: se = self.da_long.to_series() getattr(se.rolling(window=window, min_periods=window), func)() else: - getattr(self.da_long.rolling(x=window, min_periods=window), func)().load() - - @parameterized(["window_", "min_periods"], ([20, 40], [5, 5])) - def time_rolling_np(self, window_, min_periods): - self.ds.rolling(x=window_, center=False, min_periods=min_periods).reduce( - getattr(np, "nansum") - ).load() - - @parameterized(["center", "stride"], ([True, False], [1, 1])) - def time_rolling_construct(self, center, stride): - self.ds.rolling(x=window, center=center).construct( - "window_dim", stride=stride - ).sum(dim="window_dim").load() + with xr.set_options(use_bottleneck=use_bottleneck): + getattr( + self.da_long.rolling(x=window, min_periods=window), func + )().load() + + @parameterized( + ["window_", "min_periods", "use_bottleneck"], ([20, 40], [5, 5], [True, False]) + ) + def time_rolling_np(self, window_, min_periods, use_bottleneck): + with xr.set_options(use_bottleneck=use_bottleneck): + self.ds.rolling(x=window_, center=False, min_periods=min_periods).reduce( + getattr(np, "nansum") + ).load() + + @parameterized( + ["center", "stride", "use_bottleneck"], ([True, False], [1, 1], [True, False]) + ) + def time_rolling_construct(self, center, stride, use_bottleneck): + with xr.set_options(use_bottleneck=use_bottleneck): + self.ds.rolling(x=window, center=center).construct( + "window_dim", stride=stride + ).sum(dim="window_dim").load() class RollingDask(Rolling): @@ -87,24 +103,28 @@ def setup(self, *args, **kwargs): class DataArrayRollingMemory(RollingMemory): - @parameterized("func", ["sum", "max", "mean"]) - def peakmem_ndrolling_reduce(self, func): - roll = self.ds.var1.rolling(x=10, y=4) - getattr(roll, func)() + @parameterized(["func", "use_bottleneck"], (["sum", "max", "mean"], [True, False])) + def peakmem_ndrolling_reduce(self, func, use_bottleneck): + with xr.set_options(use_bottleneck=use_bottleneck): + roll = self.ds.var1.rolling(x=10, y=4) + getattr(roll, func)() - @parameterized("func", ["sum", "max", "mean"]) - def peakmem_1drolling_reduce(self, func): - roll = self.ds.var3.rolling(t=100) - getattr(roll, func)() + @parameterized(["func", "use_bottleneck"], (["sum", "max", "mean"], [True, False])) + def peakmem_1drolling_reduce(self, func, use_bottleneck): + with xr.set_options(use_bottleneck=use_bottleneck): + roll = self.ds.var3.rolling(t=100) + getattr(roll, func)() class DatasetRollingMemory(RollingMemory): - @parameterized("func", ["sum", "max", "mean"]) - def peakmem_ndrolling_reduce(self, func): - roll = self.ds.rolling(x=10, y=4) - getattr(roll, func)() - - @parameterized("func", ["sum", "max", "mean"]) - def peakmem_1drolling_reduce(self, func): - roll = self.ds.rolling(t=100) - getattr(roll, func)() + @parameterized(["func", "use_bottleneck"], (["sum", "max", "mean"], [True, False])) + def peakmem_ndrolling_reduce(self, func, use_bottleneck): + with xr.set_options(use_bottleneck=use_bottleneck): + roll = self.ds.rolling(x=10, y=4) + getattr(roll, func)() + + @parameterized(["func", "use_bottleneck"], (["sum", "max", "mean"], [True, False])) + def peakmem_1drolling_reduce(self, func, use_bottleneck): + with xr.set_options(use_bottleneck=use_bottleneck): + roll = self.ds.rolling(t=100) + getattr(roll, func)() diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 0128e70caed..b66c99d0bcb 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -34,7 +34,10 @@ Deprecations Bug fixes ~~~~~~~~~ - +- Fix plot.line crash for data of shape ``(1, N)`` in _title_for_slice on format_item (:pull:`5948`). + By `Sebastian Weigand `_. +- Fix a regression in the removal of duplicate backend entrypoints (:issue:`5944`, :pull:`5959`) + By `Kai Mühlbauer `_. Documentation ~~~~~~~~~~~~~ @@ -48,6 +51,10 @@ Documentation Internal Changes ~~~~~~~~~~~~~~~~ +- Use ``importlib`` to replace functionality of ``pkg_resources`` in + backend plugins tests. (:pull:`5959`). + By `Kai Mühlbauer `_. + .. _whats-new.0.20.1: diff --git a/xarray/backends/plugins.py b/xarray/backends/plugins.py index bcaee498b90..0a9ffcbda22 100644 --- a/xarray/backends/plugins.py +++ b/xarray/backends/plugins.py @@ -1,15 +1,16 @@ import functools import inspect import itertools +import sys import warnings from .common import BACKEND_ENTRYPOINTS, BackendEntrypoint -try: +if sys.version_info >= (3, 8): from importlib.metadata import entry_points -except ImportError: +else: # if the fallback library is missing, we are doomed. - from importlib_metadata import entry_points # type: ignore[no-redef] + from importlib_metadata import entry_points STANDARD_BACKENDS_ORDER = ["netcdf4", "h5netcdf", "scipy"] @@ -22,15 +23,17 @@ def remove_duplicates(entrypoints): # check if there are multiple entrypoints for the same name unique_entrypoints = [] for name, matches in entrypoints_grouped: - matches = list(matches) + # remove equal entrypoints + matches = list(set(matches)) unique_entrypoints.append(matches[0]) matches_len = len(matches) if matches_len > 1: - selected_module_name = matches[0].module_name - all_module_names = [e.module_name for e in matches] + all_module_names = [e.value.split(":")[0] for e in matches] + selected_module_name = all_module_names[0] warnings.warn( f"Found {matches_len} entrypoints for the engine name {name}:" - f"\n {all_module_names}.\n It will be used: {selected_module_name}.", + f"\n {all_module_names}.\n " + f"The entrypoint {selected_module_name} will be used.", RuntimeWarning, ) return unique_entrypoints diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index a991eb05e1d..ea03ac76f37 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -2729,9 +2729,11 @@ def to_dataframe( """Convert this array and its coordinates into a tidy pandas.DataFrame. The DataFrame is indexed by the Cartesian product of index coordinates - (in the form of a :py:class:`pandas.MultiIndex`). + (in the form of a :py:class:`pandas.MultiIndex`). Other coordinates are + included as columns in the DataFrame. - Other coordinates are included as columns in the DataFrame. + For 1D and 2D DataArrays, see also :py:func:`DataArray.to_pandas` which + doesn't rely on a MultiIndex to build the DataFrame. Parameters ---------- @@ -2753,6 +2755,9 @@ def to_dataframe( result DataArray as a pandas DataFrame. + See also + -------- + DataArray.to_pandas """ if name is None: name = self.name diff --git a/xarray/core/formatting.py b/xarray/core/formatting.py index ab1cde860f9..3f65cce4f68 100644 --- a/xarray/core/formatting.py +++ b/xarray/core/formatting.py @@ -143,7 +143,7 @@ def format_item(x, timedelta_format=None, quote_strings=True): elif isinstance(x, (str, bytes)): return repr(x) if quote_strings else x elif hasattr(x, "dtype") and np.issubdtype(x.dtype, np.floating): - return f"{x:.4}" + return f"{x.item():.4}" else: return str(x) diff --git a/xarray/core/formatting_html.py b/xarray/core/formatting_html.py index faad06d8093..072a932b943 100644 --- a/xarray/core/formatting_html.py +++ b/xarray/core/formatting_html.py @@ -255,7 +255,7 @@ def _obj_repr(obj, header_components, sections): "
" f"{icons_svg}" f"
{escape(repr(obj))}
" - "