Skip to content

Commit

Permalink
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge remote-tracking branch 'upstream/main' into groupby-aggs-using-…
Browse files Browse the repository at this point in the history
…numpy-groupies

* upstream/main:
  Add groupby & resample benchmarks (pydata#5922)
  Fix plot.line crash for data of shape (1, N) in _title_for_slice on format_item (pydata#5948)
  Disable unit test comments (pydata#5946)
  Publish test results from workflow_run only (pydata#5947)
dcherian committed Nov 8, 2021

Verified

This commit was created on GitHub.com and signed with GitHub’s verified signature. The key has expired.
2 parents a2168df + 20fddb7 commit 08911b9
Showing 6 changed files with 93 additions and 37 deletions.
19 changes: 6 additions & 13 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
@@ -108,19 +108,12 @@ jobs:
name: codecov-umbrella
fail_ci_if_error: false

publish-test-results:
needs: test
event_file:
name: "Event File"
runs-on: ubuntu-latest
# the build-and-test job might be skipped, we don't need to run this job then
if: success() || failure()

steps:
- name: Download Artifacts
uses: actions/download-artifact@v2
with:
path: test-results

- name: Publish Unit Test Results
uses: EnricoMi/publish-unit-test-result-action@v1
- name: Upload
uses: actions/upload-artifact@v2
with:
files: test-results/**/*.xml
name: Event File
path: ${{ github.event_path }}
18 changes: 7 additions & 11 deletions .github/workflows/publish-test-results.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copied from https://github.com/EnricoMi/publish-unit-test-result-action/blob/v1.18/README.md#support-fork-repositories-and-dependabot-branches
# Copied from https://github.com/EnricoMi/publish-unit-test-result-action/blob/v1.23/README.md#support-fork-repositories-and-dependabot-branches

name: Publish test results

@@ -12,11 +12,7 @@ jobs:
publish-test-results:
name: Publish test results
runs-on: ubuntu-latest
if: >
github.event.workflow_run.conclusion != 'skipped' && (
github.event.sender.login == 'dependabot[bot]' ||
github.event.workflow_run.head_repository.full_name != github.repository
)
if: github.event.workflow_run.conclusion != 'skipped'

steps:
- name: Download and extract artifacts
@@ -26,13 +22,10 @@ jobs:
mkdir artifacts && cd artifacts
artifacts_url=${{ github.event.workflow_run.artifacts_url }}
artifacts=$(gh api $artifacts_url -q '.artifacts[] | {name: .name, url: .archive_download_url}')
IFS=$'\n'
for artifact in $artifacts
gh api "$artifacts_url" -q '.artifacts[] | [.name, .archive_download_url] | @tsv' | while read artifact
do
name=$(jq -r .name <<<$artifact)
url=$(jq -r .url <<<$artifact)
IFS=$'\t' read name url <<< "$artifact"
gh api $url > "$name.zip"
unzip -d "$name" "$name.zip"
done
@@ -41,4 +34,7 @@ jobs:
uses: EnricoMi/publish-unit-test-result-action@v1
with:
commit: ${{ github.event.workflow_run.head_sha }}
event_file: artifacts/Event File/event.json
event_name: ${{ github.event.workflow_run.event }}
files: "artifacts/**/*.xml"
comment_mode: off
81 changes: 70 additions & 11 deletions asv_bench/benchmarks/groupby.py
Original file line number Diff line number Diff line change
@@ -1,39 +1,98 @@
import numpy as np
import pandas as pd

import xarray as xr

from . import parameterized, requires_dask
from . import _skip_slow, parameterized, requires_dask


class GroupBy:
def setup(self, *args, **kwargs):
self.ds = xr.Dataset(
self.n = 100
self.ds1d = xr.Dataset(
{
"a": xr.DataArray(np.r_[np.arange(500.0), np.arange(500.0)]),
"b": xr.DataArray(np.arange(1000.0)),
"a": xr.DataArray(np.r_[np.repeat(1, self.n), np.repeat(2, self.n)]),
"b": xr.DataArray(np.arange(2 * self.n)),
}
)
self.ds2d = self.ds1d.expand_dims(z=10)

@parameterized(["method"], [("sum", "mean")])
def time_agg(self, method):
return getattr(self.ds.groupby("a"), method)()
@parameterized(["ndim"], [(1, 2)])
def time_init(self, ndim):
getattr(self, f"ds{ndim}d").groupby("b")

@parameterized(["method", "ndim"], [("sum", "mean"), (1, 2)])
def time_agg_small_num_groups(self, method, ndim):
ds = getattr(self, f"ds{ndim}d")
getattr(ds.groupby("a"), method)()

@parameterized(["method", "ndim"], [("sum", "mean"), (1, 2)])
def time_agg_large_num_groups(self, method, ndim):
ds = getattr(self, f"ds{ndim}d")
getattr(ds.groupby("b"), method)()


class GroupByDask(GroupBy):
def setup(self, *args, **kwargs):
requires_dask()
super().setup(**kwargs)
self.ds = self.ds.chunk({"dim_0": 50})
self.ds1d = self.ds1d.sel(dim_0=slice(None, None, 2)).chunk({"dim_0": 50})
self.ds2d = self.ds2d.sel(dim_0=slice(None, None, 2)).chunk(
{"dim_0": 50, "z": 5}
)


class GroupByDataFrame(GroupBy):
class GroupByPandasDataFrame(GroupBy):
"""Run groupby tests using pandas DataFrame."""

def setup(self, *args, **kwargs):
# Skip testing in CI as it won't ever change in a commit:
_skip_slow()

super().setup(**kwargs)
self.ds = self.ds.to_dataframe()
self.ds1d = self.ds1d.to_dataframe()


class GroupByDaskDataFrame(GroupBy):
"""Run groupby tests using dask DataFrame."""

def setup(self, *args, **kwargs):
# Skip testing in CI as it won't ever change in a commit:
_skip_slow()

requires_dask()
super().setup(**kwargs)
self.ds1d = self.ds1d.chunk({"dim_0": 50}).to_dataframe()


class Resample:
def setup(self, *args, **kwargs):
self.ds1d = xr.Dataset(
{
"b": ("time", np.arange(365.0 * 24)),
},
coords={"time": pd.date_range("2001-01-01", freq="H", periods=365 * 24)},
)
self.ds2d = self.ds1d.expand_dims(z=10)

@parameterized(["ndim"], [(1, 2)])
def time_init(self, ndim):
getattr(self, f"ds{ndim}d").resample(time="D")

@parameterized(["method", "ndim"], [("sum", "mean"), (1, 2)])
def time_agg_small_num_groups(self, method, ndim):
ds = getattr(self, f"ds{ndim}d")
getattr(ds.resample(time="3M"), method)()

@parameterized(["method", "ndim"], [("sum", "mean"), (1, 2)])
def time_agg_large_num_groups(self, method, ndim):
ds = getattr(self, f"ds{ndim}d")
getattr(ds.resample(time="48H"), method)()


class ResampleDask(Resample):
def setup(self, *args, **kwargs):
requires_dask()
super().setup(**kwargs)
self.ds = self.ds.chunk({"dim_0": 50}).to_dataframe()
self.ds1d = self.ds1d.chunk({"time": 50})
self.ds2d = self.ds2d.chunk({"time": 50, "z": 4})
3 changes: 2 additions & 1 deletion doc/whats-new.rst
Original file line number Diff line number Diff line change
@@ -34,7 +34,8 @@ Deprecations

Bug fixes
~~~~~~~~~

- Fix plot.line crash for data of shape ``(1, N)`` in _title_for_slice on format_item (:pull:`5948`).
By `Sebastian Weigand <https://github.com/s-weigand>`_.

Documentation
~~~~~~~~~~~~~
2 changes: 1 addition & 1 deletion xarray/core/formatting.py
Original file line number Diff line number Diff line change
@@ -143,7 +143,7 @@ def format_item(x, timedelta_format=None, quote_strings=True):
elif isinstance(x, (str, bytes)):
return repr(x) if quote_strings else x
elif hasattr(x, "dtype") and np.issubdtype(x.dtype, np.floating):
return f"{x:.4}"
return f"{x.item():.4}"
else:
return str(x)

7 changes: 7 additions & 0 deletions xarray/tests/test_plot.py
Original file line number Diff line number Diff line change
@@ -754,6 +754,13 @@ def test_slice_in_title(self):
title = plt.gca().get_title()
assert "d = 10.01" == title

def test_slice_in_title_single_item_array(self):
"""Edge case for data of shape (1, N) or (N, 1)."""
darray = self.darray.expand_dims({"d": np.array([10.009])})
darray.plot.line(x="period")
title = plt.gca().get_title()
assert "d = 10.01" == title


class TestPlotStep(PlotTestCase):
@pytest.fixture(autouse=True)

0 comments on commit 08911b9

Please sign in to comment.