-
Notifications
You must be signed in to change notification settings - Fork 4.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[BEAM-14474] Suppress 'Mean of empty slice' Runtime Warning in dataframe unit test #17682
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -16,6 +16,7 @@ | |
|
||
import re | ||
import unittest | ||
import warnings | ||
|
||
import numpy as np | ||
import pandas as pd | ||
|
@@ -1601,6 +1602,11 @@ def test_pivot_no_index_provided_on_multiindex(self): | |
|
||
class GroupByTest(_AbstractFrameTest): | ||
"""Tests for DataFrame/Series GroupBy operations.""" | ||
@staticmethod | ||
def median_sum_fn(x): | ||
warnings.filterwarnings("ignore", message="Mean of empty slice") | ||
return (x.foo + x.bar).median() | ||
|
||
@parameterized.expand(ALL_GROUPING_AGGREGATIONS) | ||
def test_groupby_agg(self, agg_type): | ||
if agg_type == 'describe' and PD_VERSION < (1, 2): | ||
|
@@ -1723,28 +1729,24 @@ def test_groupby_callable(self): | |
|
||
def test_groupby_apply(self): | ||
df = GROUPBY_DF | ||
|
||
def median_sum_fn(x): | ||
return (x.foo + x.bar).median() | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: I would actually keep these where they are and just duplicate the filterwarnings calls, so that the tests are more self-contained and easy to inspect (but I'm also fine with the move if you prefer it that way) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah I agree with the idea. Given the amount of code added I considered pick up them into a separate helper function. Nevertheless it is still in the GroupByTest test class. |
||
# Note this is the same as DataFrameGroupBy.describe. Using it here is | ||
# just a convenient way to test apply() with a user fn that returns a Series | ||
describe = lambda df: df.describe() | ||
|
||
self._run_test(lambda df: df.groupby('group').foo.apply(describe), df) | ||
self._run_test( | ||
lambda df: df.groupby('group')[['foo', 'bar']].apply(describe), df) | ||
self._run_test(lambda df: df.groupby('group').apply(median_sum_fn), df) | ||
self._run_test(lambda df: df.groupby('group').apply(self.median_sum_fn), df) | ||
self._run_test( | ||
lambda df: df.set_index('group').foo.groupby(level=0).apply(describe), | ||
df) | ||
self._run_test(lambda df: df.groupby(level=0).apply(median_sum_fn), df) | ||
self._run_test(lambda df: df.groupby(level=0).apply(self.median_sum_fn), df) | ||
self._run_test(lambda df: df.groupby(lambda x: x % 3).apply(describe), df) | ||
self._run_test( | ||
lambda df: df.bar.groupby(lambda x: x % 3).apply(describe), df) | ||
self._run_test( | ||
lambda df: df.set_index(['str', 'group', 'bool']).groupby( | ||
level='group').apply(median_sum_fn), | ||
level='group').apply(self.median_sum_fn), | ||
df) | ||
|
||
def test_groupby_apply_preserves_column_order(self): | ||
|
@@ -1830,9 +1832,7 @@ def test_groupby_level_agg(self, level): | |
self._run_test( | ||
lambda df: df.groupby(level=level).sum(numeric_only=True), df) | ||
self._run_test( | ||
lambda df: df.groupby(level=level).apply( | ||
lambda x: (x.foo + x.bar).median()), | ||
df) | ||
lambda df: df.groupby(level=level).apply(self.median_sum_fn), df) | ||
|
||
@unittest.skipIf(PD_VERSION < (1, 1), "drop_na added in pandas 1.1.0") | ||
def test_groupby_count_na(self): | ||
|
@@ -1892,17 +1892,15 @@ def test_series_groupby_series(self, agg_type): | |
def test_groupby_series_apply(self): | ||
df = GROUPBY_DF | ||
|
||
def median_sum_fn(x): | ||
return (x.foo + x.bar).median() | ||
|
||
# Note this is the same as DataFrameGroupBy.describe. Using it here is | ||
# just a convenient way to test apply() with a user fn that returns a Series | ||
describe = lambda df: df.describe() | ||
|
||
self._run_test(lambda df: df.groupby(df.group).foo.apply(describe), df) | ||
self._run_test( | ||
lambda df: df.groupby(df.group)[['foo', 'bar']].apply(describe), df) | ||
self._run_test(lambda df: df.groupby(df.group).apply(median_sum_fn), df) | ||
self._run_test( | ||
lambda df: df.groupby(df.group).apply(self.median_sum_fn), df) | ||
|
||
def test_groupby_multiindex_keep_nans(self): | ||
# Due to https://github.com/pandas-dev/pandas/issues/36470 | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could we use a context here to make sure the filters get reset?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for the suggestion. Applied.