Merge remote-tracking branch 'upstream/main' into groupby-aggs-using-…

…numpy-groupies * upstream/main: Add groupby & resample benchmarks (pydata#5922) Fix plot.line crash for data of shape (1, N) in _title_for_slice on format_item (pydata#5948) Disable unit test comments (pydata#5946) Publish test results from workflow_run only (pydata#5947)
andersy005 · Nov 8, 2021 · 08911b9 · 08911b9
2 parents a2168df + 20fddb7
commit 08911b9
Show file tree

Hide file tree

Showing 6 changed files with 93 additions and 37 deletions.
diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
@@ -108,19 +108,12 @@ jobs:
           name: codecov-umbrella
           fail_ci_if_error: false
 
-  publish-test-results:
-    needs: test
+  event_file:
+    name: "Event File"
     runs-on: ubuntu-latest
-    # the build-and-test job might be skipped, we don't need to run this job then
-    if: success() || failure()
-
     steps:
-      - name: Download Artifacts
-        uses: actions/download-artifact@v2
-        with:
-          path: test-results
-
-      - name: Publish Unit Test Results
-        uses: EnricoMi/publish-unit-test-result-action@v1
+      - name: Upload
+        uses: actions/upload-artifact@v2
         with:
-          files: test-results/**/*.xml
+          name: Event File
+          path: ${{ github.event_path }}
diff --git a/.github/workflows/publish-test-results.yaml b/.github/workflows/publish-test-results.yaml
@@ -1,4 +1,4 @@
-# Copied from https://github.com/EnricoMi/publish-unit-test-result-action/blob/v1.18/README.md#support-fork-repositories-and-dependabot-branches
+# Copied from https://github.com/EnricoMi/publish-unit-test-result-action/blob/v1.23/README.md#support-fork-repositories-and-dependabot-branches
 
 name: Publish test results
 
@@ -12,11 +12,7 @@ jobs:
   publish-test-results:
     name: Publish test results
     runs-on: ubuntu-latest
-    if: >
-      github.event.workflow_run.conclusion != 'skipped' && (
-        github.event.sender.login == 'dependabot[bot]' ||
-        github.event.workflow_run.head_repository.full_name != github.repository
-      )
+    if: github.event.workflow_run.conclusion != 'skipped'
 
     steps:
       - name: Download and extract artifacts
@@ -26,13 +22,10 @@ jobs:
           mkdir artifacts && cd artifacts
 
           artifacts_url=${{ github.event.workflow_run.artifacts_url }}
-          artifacts=$(gh api $artifacts_url -q '.artifacts[] | {name: .name, url: .archive_download_url}')
 
-          IFS=$'\n'
-          for artifact in $artifacts
+          gh api "$artifacts_url" -q '.artifacts[] | [.name, .archive_download_url] | @tsv' | while read artifact
           do
-            name=$(jq -r .name <<<$artifact)
-            url=$(jq -r .url <<<$artifact)
+            IFS=$'\t' read name url <<< "$artifact"
             gh api $url > "$name.zip"
             unzip -d "$name" "$name.zip"
           done
@@ -41,4 +34,7 @@ jobs:
         uses: EnricoMi/publish-unit-test-result-action@v1
         with:
           commit: ${{ github.event.workflow_run.head_sha }}
+          event_file: artifacts/Event File/event.json
+          event_name: ${{ github.event.workflow_run.event }}
           files: "artifacts/**/*.xml"
+          comment_mode: off
diff --git a/asv_bench/benchmarks/groupby.py b/asv_bench/benchmarks/groupby.py
@@ -1,39 +1,98 @@
 import numpy as np
+import pandas as pd
 
 import xarray as xr
 
-from . import parameterized, requires_dask
+from . import _skip_slow, parameterized, requires_dask
 
 
 class GroupBy:
     def setup(self, *args, **kwargs):
-        self.ds = xr.Dataset(
+        self.n = 100
+        self.ds1d = xr.Dataset(
             {
-                "a": xr.DataArray(np.r_[np.arange(500.0), np.arange(500.0)]),
-                "b": xr.DataArray(np.arange(1000.0)),
+                "a": xr.DataArray(np.r_[np.repeat(1, self.n), np.repeat(2, self.n)]),
+                "b": xr.DataArray(np.arange(2 * self.n)),
             }
         )
+        self.ds2d = self.ds1d.expand_dims(z=10)
 
-    @parameterized(["method"], [("sum", "mean")])
-    def time_agg(self, method):
-        return getattr(self.ds.groupby("a"), method)()
+    @parameterized(["ndim"], [(1, 2)])
+    def time_init(self, ndim):
+        getattr(self, f"ds{ndim}d").groupby("b")
+
+    @parameterized(["method", "ndim"], [("sum", "mean"), (1, 2)])
+    def time_agg_small_num_groups(self, method, ndim):
+        ds = getattr(self, f"ds{ndim}d")
+        getattr(ds.groupby("a"), method)()
+
+    @parameterized(["method", "ndim"], [("sum", "mean"), (1, 2)])
+    def time_agg_large_num_groups(self, method, ndim):
+        ds = getattr(self, f"ds{ndim}d")
+        getattr(ds.groupby("b"), method)()
 
 
 class GroupByDask(GroupBy):
     def setup(self, *args, **kwargs):
         requires_dask()
         super().setup(**kwargs)
-        self.ds = self.ds.chunk({"dim_0": 50})
+        self.ds1d = self.ds1d.sel(dim_0=slice(None, None, 2)).chunk({"dim_0": 50})
+        self.ds2d = self.ds2d.sel(dim_0=slice(None, None, 2)).chunk(
+            {"dim_0": 50, "z": 5}
+        )
 
 
-class GroupByDataFrame(GroupBy):
+class GroupByPandasDataFrame(GroupBy):
+    """Run groupby tests using pandas DataFrame."""
+
     def setup(self, *args, **kwargs):
+        # Skip testing in CI as it won't ever change in a commit:
+        _skip_slow()
+
         super().setup(**kwargs)
-        self.ds = self.ds.to_dataframe()
+        self.ds1d = self.ds1d.to_dataframe()
 
 
 class GroupByDaskDataFrame(GroupBy):
+    """Run groupby tests using dask DataFrame."""
+
+    def setup(self, *args, **kwargs):
+        # Skip testing in CI as it won't ever change in a commit:
+        _skip_slow()
+
+        requires_dask()
+        super().setup(**kwargs)
+        self.ds1d = self.ds1d.chunk({"dim_0": 50}).to_dataframe()
+
+
+class Resample:
+    def setup(self, *args, **kwargs):
+        self.ds1d = xr.Dataset(
+            {
+                "b": ("time", np.arange(365.0 * 24)),
+            },
+            coords={"time": pd.date_range("2001-01-01", freq="H", periods=365 * 24)},
+        )
+        self.ds2d = self.ds1d.expand_dims(z=10)
+
+    @parameterized(["ndim"], [(1, 2)])
+    def time_init(self, ndim):
+        getattr(self, f"ds{ndim}d").resample(time="D")
+
+    @parameterized(["method", "ndim"], [("sum", "mean"), (1, 2)])
+    def time_agg_small_num_groups(self, method, ndim):
+        ds = getattr(self, f"ds{ndim}d")
+        getattr(ds.resample(time="3M"), method)()
+
+    @parameterized(["method", "ndim"], [("sum", "mean"), (1, 2)])
+    def time_agg_large_num_groups(self, method, ndim):
+        ds = getattr(self, f"ds{ndim}d")
+        getattr(ds.resample(time="48H"), method)()
+
+
+class ResampleDask(Resample):
     def setup(self, *args, **kwargs):
         requires_dask()
         super().setup(**kwargs)
-        self.ds = self.ds.chunk({"dim_0": 50}).to_dataframe()
+        self.ds1d = self.ds1d.chunk({"time": 50})
+        self.ds2d = self.ds2d.chunk({"time": 50, "z": 4})
diff --git a/doc/whats-new.rst b/doc/whats-new.rst
@@ -34,7 +34,8 @@ Deprecations
 
 Bug fixes
 ~~~~~~~~~
-
+- Fix plot.line crash for data of shape ``(1, N)`` in _title_for_slice on format_item (:pull:`5948`).
+  By `Sebastian Weigand <https://github.com/s-weigand>`_.
 
 Documentation
 ~~~~~~~~~~~~~

diff --git a/xarray/core/formatting.py b/xarray/core/formatting.py
@@ -143,7 +143,7 @@ def format_item(x, timedelta_format=None, quote_strings=True):
     elif isinstance(x, (str, bytes)):
         return repr(x) if quote_strings else x
     elif hasattr(x, "dtype") and np.issubdtype(x.dtype, np.floating):
-        return f"{x:.4}"
+        return f"{x.item():.4}"
     else:
         return str(x)
 

diff --git a/xarray/tests/test_plot.py b/xarray/tests/test_plot.py
@@ -754,6 +754,13 @@ def test_slice_in_title(self):
         title = plt.gca().get_title()
         assert "d = 10.01" == title
 
+    def test_slice_in_title_single_item_array(self):
+        """Edge case for data of shape (1, N) or (N, 1)."""
+        darray = self.darray.expand_dims({"d": np.array([10.009])})
+        darray.plot.line(x="period")
+        title = plt.gca().get_title()
+        assert "d = 10.01" == title
+
 
 class TestPlotStep(PlotTestCase):
     @pytest.fixture(autouse=True)