Skip to content

Commit

Permalink
[BEAM-14474] Suppress 'Mean of empty slice' Runtime Warning in datafr…
Browse files Browse the repository at this point in the history
…ame unit test (#17682)

* [BEAM-14474] Suppress 'Mean of empty slice' Runtime Warning in dataframe unit test

* use catch_warnings context manager
  • Loading branch information
Abacn authored May 18, 2022
1 parent 6774b74 commit 857f8d3
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 15 deletions.
27 changes: 13 additions & 14 deletions sdks/python/apache_beam/dataframe/frames_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

import re
import unittest
import warnings

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -1601,6 +1602,12 @@ def test_pivot_no_index_provided_on_multiindex(self):

class GroupByTest(_AbstractFrameTest):
"""Tests for DataFrame/Series GroupBy operations."""
@staticmethod
def median_sum_fn(x):
with warnings.catch_warnings():
warnings.filterwarnings("ignore", message="Mean of empty slice")
return (x.foo + x.bar).median()

@parameterized.expand(ALL_GROUPING_AGGREGATIONS)
def test_groupby_agg(self, agg_type):
if agg_type == 'describe' and PD_VERSION < (1, 2):
Expand Down Expand Up @@ -1723,28 +1730,24 @@ def test_groupby_callable(self):

def test_groupby_apply(self):
df = GROUPBY_DF

def median_sum_fn(x):
return (x.foo + x.bar).median()

# Note this is the same as DataFrameGroupBy.describe. Using it here is
# just a convenient way to test apply() with a user fn that returns a Series
describe = lambda df: df.describe()

self._run_test(lambda df: df.groupby('group').foo.apply(describe), df)
self._run_test(
lambda df: df.groupby('group')[['foo', 'bar']].apply(describe), df)
self._run_test(lambda df: df.groupby('group').apply(median_sum_fn), df)
self._run_test(lambda df: df.groupby('group').apply(self.median_sum_fn), df)
self._run_test(
lambda df: df.set_index('group').foo.groupby(level=0).apply(describe),
df)
self._run_test(lambda df: df.groupby(level=0).apply(median_sum_fn), df)
self._run_test(lambda df: df.groupby(level=0).apply(self.median_sum_fn), df)
self._run_test(lambda df: df.groupby(lambda x: x % 3).apply(describe), df)
self._run_test(
lambda df: df.bar.groupby(lambda x: x % 3).apply(describe), df)
self._run_test(
lambda df: df.set_index(['str', 'group', 'bool']).groupby(
level='group').apply(median_sum_fn),
level='group').apply(self.median_sum_fn),
df)

def test_groupby_apply_preserves_column_order(self):
Expand Down Expand Up @@ -1830,9 +1833,7 @@ def test_groupby_level_agg(self, level):
self._run_test(
lambda df: df.groupby(level=level).sum(numeric_only=True), df)
self._run_test(
lambda df: df.groupby(level=level).apply(
lambda x: (x.foo + x.bar).median()),
df)
lambda df: df.groupby(level=level).apply(self.median_sum_fn), df)

@unittest.skipIf(PD_VERSION < (1, 1), "drop_na added in pandas 1.1.0")
def test_groupby_count_na(self):
Expand Down Expand Up @@ -1892,17 +1893,15 @@ def test_series_groupby_series(self, agg_type):
def test_groupby_series_apply(self):
df = GROUPBY_DF

def median_sum_fn(x):
return (x.foo + x.bar).median()

# Note this is the same as DataFrameGroupBy.describe. Using it here is
# just a convenient way to test apply() with a user fn that returns a Series
describe = lambda df: df.describe()

self._run_test(lambda df: df.groupby(df.group).foo.apply(describe), df)
self._run_test(
lambda df: df.groupby(df.group)[['foo', 'bar']].apply(describe), df)
self._run_test(lambda df: df.groupby(df.group).apply(median_sum_fn), df)
self._run_test(
lambda df: df.groupby(df.group).apply(self.median_sum_fn), df)

def test_groupby_multiindex_keep_nans(self):
# Due to https://github.com/pandas-dev/pandas/issues/36470
Expand Down
5 changes: 4 additions & 1 deletion sdks/python/apache_beam/dataframe/transforms_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

import typing
import unittest
import warnings

import pandas as pd

Expand Down Expand Up @@ -132,7 +133,9 @@ def test_groupby_apply(self):
})

def median_sum_fn(x):
return (x.foo + x.bar).median()
with warnings.catch_warnings():
warnings.filterwarnings("ignore", message="Mean of empty slice")
return (x.foo + x.bar).median()

describe = lambda df: df.describe()

Expand Down

0 comments on commit 857f8d3

Please sign in to comment.