Merge branch 'main' into pr/5950

andersy005 · Nov 20, 2021 · f03b675 · f03b675
2 parents 411d75d + 7a201de
commit f03b675
Show file tree

Hide file tree

Showing 20 changed files with 332 additions and 163 deletions.
diff --git a/.github/workflows/ci-pre-commit-autoupdate.yaml b/.github/workflows/ci-pre-commit-autoupdate.yaml
diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
@@ -108,19 +108,12 @@ jobs:
           name: codecov-umbrella
           fail_ci_if_error: false
 
-  publish-test-results:
-    needs: test
+  event_file:
+    name: "Event File"
     runs-on: ubuntu-latest
-    # the build-and-test job might be skipped, we don't need to run this job then
-    if: success() || failure()
-
     steps:
-      - name: Download Artifacts
-        uses: actions/download-artifact@v2
-        with:
-          path: test-results
-
-      - name: Publish Unit Test Results
-        uses: EnricoMi/publish-unit-test-result-action@v1
+      - name: Upload
+        uses: actions/upload-artifact@v2
         with:
-          files: test-results/**/*.xml
+          name: Event File
+          path: ${{ github.event_path }}
diff --git a/.github/workflows/publish-test-results.yaml b/.github/workflows/publish-test-results.yaml
@@ -1,4 +1,4 @@
-# Copied from https://github.com/EnricoMi/publish-unit-test-result-action/blob/v1.18/README.md#support-fork-repositories-and-dependabot-branches
+# Copied from https://github.com/EnricoMi/publish-unit-test-result-action/blob/v1.23/README.md#support-fork-repositories-and-dependabot-branches
 
 name: Publish test results
 
@@ -12,11 +12,7 @@ jobs:
   publish-test-results:
     name: Publish test results
     runs-on: ubuntu-latest
-    if: >
-      github.event.workflow_run.conclusion != 'skipped' && (
-        github.event.sender.login == 'dependabot[bot]' ||
-        github.event.workflow_run.head_repository.full_name != github.repository
-      )
+    if: github.event.workflow_run.conclusion != 'skipped'
 
     steps:
       - name: Download and extract artifacts
@@ -26,13 +22,10 @@ jobs:
           mkdir artifacts && cd artifacts
 
           artifacts_url=${{ github.event.workflow_run.artifacts_url }}
-          artifacts=$(gh api $artifacts_url -q '.artifacts[] | {name: .name, url: .archive_download_url}')
 
-          IFS=$'\n'
-          for artifact in $artifacts
+          gh api "$artifacts_url" -q '.artifacts[] | [.name, .archive_download_url] | @tsv' | while read artifact
           do
-            name=$(jq -r .name <<<$artifact)
-            url=$(jq -r .url <<<$artifact)
+            IFS=$'\t' read name url <<< "$artifact"
             gh api $url > "$name.zip"
             unzip -d "$name" "$name.zip"
           done
@@ -41,4 +34,7 @@ jobs:
         uses: EnricoMi/publish-unit-test-result-action@v1
         with:
           commit: ${{ github.event.workflow_run.head_sha }}
+          event_file: artifacts/Event File/event.json
+          event_name: ${{ github.event.workflow_run.event }}
           files: "artifacts/**/*.xml"
+          comment_mode: off
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -8,12 +8,12 @@ repos:
       - id: check-yaml
   # isort should run before black as black sometimes tweaks the isort output
   - repo: https://github.com/PyCQA/isort
-    rev: 5.9.3
+    rev: 5.10.1
     hooks:
       - id: isort
   # https://github.com/python/black#version-control-integration
   - repo: https://github.com/psf/black
-    rev: 21.9b0
+    rev: 21.10b0
     hooks:
       - id: black
       - id: black-jupyter
@@ -22,8 +22,8 @@ repos:
     hooks:
       - id: blackdoc
         exclude: "generate_reductions.py"
-  - repo: https://gitlab.com/pycqa/flake8
-    rev: 3.9.2
+  - repo: https://github.com/PyCQA/flake8
+    rev: 4.0.1
     hooks:
       - id: flake8
   # - repo: https://github.com/Carreau/velin

diff --git a/CITATION.cff b/CITATION.cff
@@ -0,0 +1,96 @@
+cff-version: 1.2.0
+message: "If you use this software, please cite it as below."
+authors:
+- family-names: "Hoyer"
+  given-names: "Stephan"
+  orcid: "https://orcid.org/0000-0002-5207-0380"
+- family-names: "Roos"
+  given-names: "Maximilian"
+- family-names: "Joseph"
+  given-names: "Hamman"
+  orcid: "https://orcid.org/0000-0001-7479-8439"
+- family-names: "Magin"
+  given-names: "Justus"
+- family-names: "Cherian"
+  given-names: "Deepak"
+  orcid: "https://orcid.org/0000-0002-6861-8734"
+- family-names: "Fitzgerald"
+  given-names: "Clark"
+  orcid: "https://orcid.org/0000-0003-3446-6389"
+- family-names: "Hauser"
+  given-names: "Mathias"
+  orcid: "https://orcid.org/0000-0002-0057-4878"
+- family-names: "Fujii"
+  given-names: "Keisuke"
+  orcid: "https://orcid.org/0000-0003-0390-9984"
+- family-names: "Maussion"
+  given-names: "Fabien"
+  orcid: "https://orcid.org/0000-0002-3211-506X"
+- family-names: "Imperiale"
+  given-names: "Guido"
+- family-names: "Clark"
+  given-names: "Spencer"
+  orcid: "https://orcid.org/0000-0001-5595-7895"
+- family-names: "Kleeman"
+  given-names: "Alex"
+- family-names: "Nicholas"
+  given-names: "Thomas"
+  orcid: "https://orcid.org/0000-0002-2176-0530"
+- family-names: "Kluyver"
+  given-names: "Thomas"
+  orcid: "https://orcid.org/0000-0003-4020-6364"
+- family-names: "Westling"
+  given-names: "Jimmy"
+- family-names: "Munroe"
+  given-names: "James"
+  orcid: "https://orcid.org/0000-0001-9098-6309"
+- family-names: "Amici"
+  given-names: "Alessandro"
+  orcid: "https://orcid.org/0000-0002-1778-4505"
+- family-names: "Barghini"
+  given-names: "Aureliana"
+- family-names: "Banihirwe"
+  given-names: "Anderson"
+  orcid: "https://orcid.org/0000-0001-6583-571X"
+- family-names: "Bell"
+  given-names: "Ray"
+  orcid: "https://orcid.org/0000-0003-2623-0587"
+- family-names: "Hatfield-Dodds"
+  given-names: "Zac"
+  orcid: "https://orcid.org/0000-0002-8646-8362"
+- family-names: "Abernathey"
+  given-names: "Ryan"
+  orcid: "https://orcid.org/0000-0001-5999-4917"
+- family-names: "Bovy"
+  given-names: "Benoît"
+- family-names: "Omotani"
+  given-names: "John"
+  orcid: "https://orcid.org/0000-0002-3156-8227"
+- family-names: "Mühlbauer"
+  given-names: "Kai"
+  orcid: "https://orcid.org/0000-0001-6599-1034"
+- family-names: "Roszko"
+  given-names: "Maximilian K."
+  orcid: "https://orcid.org/0000-0001-9424-2526"
+- family-names: "Wolfram"
+  given-names: "Phillip J."
+  orcid: "https://orcid.org/0000-0001-5971-4241"
+title: "xarray"
+doi: 10.5281/zenodo.598201
+url: "https://github.com/pydata/xarray"
+preferred-citation:
+  type: article
+  authors:
+  - family-names: "Hoyer"
+    given-names: "Stephan"
+    orcid: "https://orcid.org/0000-0002-5207-0380"
+  - family-names: "Joseph"
+    given-names: "Hamman"
+    orcid: "https://orcid.org/0000-0001-7479-8439"
+  doi: "10.5334/jors.148"
+  journal: "Journal of Open Research Software"
+  month: 4
+  title: "xarray: N-D labeled Arrays and Datasets in Python"
+  volume: 5
+  issue: 1
+  year: 2017
diff --git a/asv_bench/asv.conf.json b/asv_bench/asv.conf.json
@@ -62,7 +62,7 @@
         "pandas": [""],
         "netcdf4": [""],
         "scipy": [""],
-        "bottleneck": ["", null],
+        "bottleneck": [""],
         "dask": [""],
         "distributed": [""],
     },

diff --git a/asv_bench/benchmarks/dataarray_missing.py b/asv_bench/benchmarks/dataarray_missing.py
@@ -16,13 +16,6 @@ def make_bench_data(shape, frac_nan, chunks):
     return da
 
 
-def requires_bottleneck():
-    try:
-        import bottleneck  # noqa: F401
-    except ImportError:
-        raise NotImplementedError()
-
-
 class DataArrayMissingInterpolateNA:
     def setup(self, shape, chunks, limit):
         if chunks is not None:
@@ -46,7 +39,6 @@ def time_interpolate_na(self, shape, chunks, limit):
 
 class DataArrayMissingBottleneck:
     def setup(self, shape, chunks, limit):
-        requires_bottleneck()
         if chunks is not None:
             requires_dask()
         self.da = make_bench_data(shape, 0.1, chunks)

diff --git a/asv_bench/benchmarks/groupby.py b/asv_bench/benchmarks/groupby.py
@@ -1,39 +1,98 @@
 import numpy as np
+import pandas as pd
 
 import xarray as xr
 
-from . import parameterized, requires_dask
+from . import _skip_slow, parameterized, requires_dask
 
 
 class GroupBy:
     def setup(self, *args, **kwargs):
-        self.ds = xr.Dataset(
+        self.n = 100
+        self.ds1d = xr.Dataset(
             {
-                "a": xr.DataArray(np.r_[np.arange(500.0), np.arange(500.0)]),
-                "b": xr.DataArray(np.arange(1000.0)),
+                "a": xr.DataArray(np.r_[np.repeat(1, self.n), np.repeat(2, self.n)]),
+                "b": xr.DataArray(np.arange(2 * self.n)),
             }
         )
+        self.ds2d = self.ds1d.expand_dims(z=10)
 
-    @parameterized(["method"], [("sum", "mean")])
-    def time_agg(self, method):
-        return getattr(self.ds.groupby("a"), method)()
+    @parameterized(["ndim"], [(1, 2)])
+    def time_init(self, ndim):
+        getattr(self, f"ds{ndim}d").groupby("b")
+
+    @parameterized(["method", "ndim"], [("sum", "mean"), (1, 2)])
+    def time_agg_small_num_groups(self, method, ndim):
+        ds = getattr(self, f"ds{ndim}d")
+        getattr(ds.groupby("a"), method)()
+
+    @parameterized(["method", "ndim"], [("sum", "mean"), (1, 2)])
+    def time_agg_large_num_groups(self, method, ndim):
+        ds = getattr(self, f"ds{ndim}d")
+        getattr(ds.groupby("b"), method)()
 
 
 class GroupByDask(GroupBy):
     def setup(self, *args, **kwargs):
         requires_dask()
         super().setup(**kwargs)
-        self.ds = self.ds.chunk({"dim_0": 50})
+        self.ds1d = self.ds1d.sel(dim_0=slice(None, None, 2)).chunk({"dim_0": 50})
+        self.ds2d = self.ds2d.sel(dim_0=slice(None, None, 2)).chunk(
+            {"dim_0": 50, "z": 5}
+        )
 
 
-class GroupByDataFrame(GroupBy):
+class GroupByPandasDataFrame(GroupBy):
+    """Run groupby tests using pandas DataFrame."""
+
     def setup(self, *args, **kwargs):
+        # Skip testing in CI as it won't ever change in a commit:
+        _skip_slow()
+
         super().setup(**kwargs)
-        self.ds = self.ds.to_dataframe()
+        self.ds1d = self.ds1d.to_dataframe()
 
 
 class GroupByDaskDataFrame(GroupBy):
+    """Run groupby tests using dask DataFrame."""
+
+    def setup(self, *args, **kwargs):
+        # Skip testing in CI as it won't ever change in a commit:
+        _skip_slow()
+
+        requires_dask()
+        super().setup(**kwargs)
+        self.ds1d = self.ds1d.chunk({"dim_0": 50}).to_dataframe()
+
+
+class Resample:
+    def setup(self, *args, **kwargs):
+        self.ds1d = xr.Dataset(
+            {
+                "b": ("time", np.arange(365.0 * 24)),
+            },
+            coords={"time": pd.date_range("2001-01-01", freq="H", periods=365 * 24)},
+        )
+        self.ds2d = self.ds1d.expand_dims(z=10)
+
+    @parameterized(["ndim"], [(1, 2)])
+    def time_init(self, ndim):
+        getattr(self, f"ds{ndim}d").resample(time="D")
+
+    @parameterized(["method", "ndim"], [("sum", "mean"), (1, 2)])
+    def time_agg_small_num_groups(self, method, ndim):
+        ds = getattr(self, f"ds{ndim}d")
+        getattr(ds.resample(time="3M"), method)()
+
+    @parameterized(["method", "ndim"], [("sum", "mean"), (1, 2)])
+    def time_agg_large_num_groups(self, method, ndim):
+        ds = getattr(self, f"ds{ndim}d")
+        getattr(ds.resample(time="48H"), method)()
+
+
+class ResampleDask(Resample):
     def setup(self, *args, **kwargs):
         requires_dask()
         super().setup(**kwargs)
-        self.ds = self.ds.chunk({"dim_0": 50}).to_dataframe()
+        self.ds1d = self.ds1d.chunk({"time": 50})
+        self.ds2d = self.ds2d.chunk({"time": 50, "z": 4})