Skip to content

Commit

Permalink
FIX-modin-project#6585: avoid 'FutureWarning's in 'rolling' unless ne…
Browse files Browse the repository at this point in the history
…cessary

Signed-off-by: Anatoly Myachev <[email protected]>
  • Loading branch information
anmyachev committed Sep 26, 2023
1 parent be0eab9 commit e38e846
Show file tree
Hide file tree
Showing 6 changed files with 171 additions and 82 deletions.
6 changes: 6 additions & 0 deletions modin/core/dataframe/algebra/fold.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,12 @@ def register(cls, fold_function):
Function that takes query compiler and executes Fold function.
"""

# @functools.wraps(fold_function)
# def fold_function_catch_warnings(*args, **kwargs):
# with warnings.catch_warnings():
# warnings.filterwarnings("ignore", category=FutureWarning)
# return fold_function(*args, **kwargs)

def caller(query_compiler, fold_axis=None, *args, **kwargs):
"""
Execute Fold function against passed query compiler.
Expand Down
4 changes: 2 additions & 2 deletions modin/core/storage_formats/pandas/query_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -1519,9 +1519,9 @@ def expanding_corr(
)
)
rolling_quantile = Fold.register(
lambda df, rolling_kwargs, quantile, interpolation, **kwargs: pandas.DataFrame(
lambda df, rolling_kwargs, q, interpolation, **kwargs: pandas.DataFrame(
df.rolling(**rolling_kwargs).quantile(
quantile=quantile, interpolation=interpolation, **kwargs
q=q, interpolation=interpolation, **kwargs
)
)
)
Expand Down
4 changes: 2 additions & 2 deletions modin/pandas/test/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ def pytest_collection_modifyitems(items):
):
for item in items:
if item.name in (
"test_dataframe_dt_index[3s-both-DateCol-0]",
"test_dataframe_dt_index[3s-right-DateCol-0]",
"test_dataframe_dt_index[3s-both-DateCol-_NoDefault.no_default]",
"test_dataframe_dt_index[3s-right-DateCol-_NoDefault.no_default]",
):
item.add_marker(
pytest.mark.xfail(
Expand Down
39 changes: 21 additions & 18 deletions modin/pandas/test/test_general.py
Original file line number Diff line number Diff line change
Expand Up @@ -628,29 +628,32 @@ def test_unique():
def test_value_counts(normalize, bins, dropna):
# We sort indices for Modin and pandas result because of issue #1650
values = np.array([3, 1, 2, 3, 4, np.nan])
modin_result = sort_index_for_equal_values(
pd.value_counts(values, normalize=normalize, ascending=False), False
)
pandas_result = sort_index_for_equal_values(
pandas.value_counts(values, normalize=normalize, ascending=False), False
)
with pytest.warns(FutureWarning, match=".*pandas.value_counts is deprecated.*"):
modin_result = sort_index_for_equal_values(
pd.value_counts(values, normalize=normalize, ascending=False), False
)
pandas_result = sort_index_for_equal_values(
pandas.value_counts(values, normalize=normalize, ascending=False), False
)
df_equals(modin_result, pandas_result)

with warns_that_defaulting_to_pandas():
modin_result = sort_index_for_equal_values(
pd.value_counts(values, bins=bins, ascending=False), False
with pytest.warns(FutureWarning, match=".*pandas.value_counts is deprecated.*"):
with warns_that_defaulting_to_pandas():
modin_result = sort_index_for_equal_values(
pd.value_counts(values, bins=bins, ascending=False), False
)
pandas_result = sort_index_for_equal_values(
pandas.value_counts(values, bins=bins, ascending=False), False
)
pandas_result = sort_index_for_equal_values(
pandas.value_counts(values, bins=bins, ascending=False), False
)
df_equals(modin_result, pandas_result)

modin_result = sort_index_for_equal_values(
pd.value_counts(values, dropna=dropna, ascending=True), True
)
pandas_result = sort_index_for_equal_values(
pandas.value_counts(values, dropna=dropna, ascending=True), True
)
with pytest.warns(FutureWarning, match=".*pandas.value_counts is deprecated.*"):
modin_result = sort_index_for_equal_values(
pd.value_counts(values, dropna=dropna, ascending=True), True
)
pandas_result = sort_index_for_equal_values(
pandas.value_counts(values, dropna=dropna, ascending=True), True
)
df_equals(modin_result, pandas_result)


Expand Down
198 changes: 139 additions & 59 deletions modin/pandas/test/test_rolling.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,12 @@
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

import contextlib
import warnings

import numpy as np
import pandas
import pandas._libs.lib as lib
import pytest

import modin.pandas as pd
Expand All @@ -34,7 +38,10 @@
# have too many such instances.
# TODO(https://github.com/modin-project/modin/issues/3655): catch all instances
# of defaulting to pandas.
pytestmark = pytest.mark.filterwarnings(default_to_pandas_ignore_string)
pytestmark = [
pytest.mark.filterwarnings(default_to_pandas_ignore_string),
pytest.mark.filterwarnings("error::FutureWarning"),
]


def create_test_series(vals):
Expand All @@ -47,10 +54,29 @@ def create_test_series(vals):
return modin_series, pandas_series


def catch_rolling_axis_1_future_depr(axis):
return (
pytest.warns(
FutureWarning,
match=".*Support for axis=1 in DataFrame.rolling is deprecated.*"
+ ".*Use obj.T.rolling.*",
)
if axis in (1, "columns")
else contextlib.nullcontext()
)


def catch_rolling_numpy_callable_future_depr():
return pytest.warns(
FutureWarning,
match=".*The provided callable.*pass .* instead.",
)


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
@pytest.mark.parametrize("window", [5, 100])
@pytest.mark.parametrize("min_periods", [None, 5])
@pytest.mark.parametrize("axis", [0, 1])
@pytest.mark.parametrize("axis", [lib.no_default, 1])
@pytest.mark.parametrize(
"method, kwargs",
[
Expand All @@ -75,51 +101,73 @@ def test_dataframe_rolling(data, window, min_periods, axis, method, kwargs):
modin_df, pandas_df = create_test_dfs(data)
if window > len(pandas_df):
window = len(pandas_df)

def _callable(df):
with catch_rolling_axis_1_future_depr(axis):
return getattr(
df.rolling(
window=window,
min_periods=min_periods,
win_type=None,
center=True,
axis=axis,
),
method,
)(**kwargs)

eval_general(
modin_df,
pandas_df,
lambda df: getattr(
df.rolling(
window=window,
min_periods=min_periods,
win_type=None,
center=True,
axis=axis,
),
method,
)(**kwargs),
_callable,
)


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
@pytest.mark.parametrize("window", [5, 100])
@pytest.mark.parametrize("min_periods", [None, 5])
@pytest.mark.parametrize("axis", [0, 1])
@pytest.mark.parametrize("axis", [lib.no_default, 1])
def test_dataframe_agg(data, window, min_periods, axis):
modin_df, pandas_df = create_test_dfs(data)
if window > len(pandas_df):
window = len(pandas_df)
modin_rolled = modin_df.rolling(
window=window, min_periods=min_periods, win_type=None, center=True, axis=axis
)
pandas_rolled = pandas_df.rolling(
window=window, min_periods=min_periods, win_type=None, center=True, axis=axis
)
df_equals(pandas_rolled.aggregate(np.sum), modin_rolled.aggregate(np.sum))
with catch_rolling_axis_1_future_depr(axis):
modin_rolled = modin_df.rolling(
window=window,
min_periods=min_periods,
win_type=None,
center=True,
axis=axis,
)
with catch_rolling_axis_1_future_depr(axis):
pandas_rolled = pandas_df.rolling(
window=window,
min_periods=min_periods,
win_type=None,
center=True,
axis=axis,
)
with catch_rolling_numpy_callable_future_depr():
pandas_res = pandas_rolled.aggregate(np.sum)
# FIXME: modin doesn't have warning for the case so we can't use
# `catch_rolling_numpy_callable_future_depr`.
modin_res = modin_rolled.aggregate(np.sum)
df_equals(pandas_res, modin_res)
# TODO(https://github.com/modin-project/modin/issues/4260): Once pandas
# allows us to rolling aggregate a list of functions over axis 1, test
# that, too.
if axis != 1:
df_equals(
pandas_rolled.aggregate([np.sum, np.mean]),
modin_rolled.aggregate([np.sum, np.mean]),
)
with catch_rolling_numpy_callable_future_depr():
pandas_res = pandas_rolled.aggregate([np.sum, np.mean])
# FIXME: modin doesn't have warning for the case so we can't use
# `catch_rolling_numpy_callable_future_depr`.
modin_res = modin_rolled.aggregate([np.sum, np.mean])
df_equals(pandas_res, modin_res)


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
@pytest.mark.parametrize("window", [5, 100])
@pytest.mark.parametrize("min_periods", [None, 5])
@pytest.mark.parametrize("axis", [0, 1])
@pytest.mark.parametrize("axis", [lib.no_default, 1])
@pytest.mark.parametrize(
"method, kwargs",
[
Expand All @@ -134,23 +182,25 @@ def test_dataframe_window(data, window, min_periods, axis, method, kwargs):
modin_df, pandas_df = create_test_dfs(data)
if window > len(pandas_df):
window = len(pandas_df)
eval_general(
modin_df,
pandas_df,
lambda df: getattr(
df.rolling(
window=window,
min_periods=min_periods,
win_type="triang",
center=True,
axis=axis,
),
method,
)(**kwargs),
)

with catch_rolling_axis_1_future_depr(axis):
eval_general(
modin_df,
pandas_df,
lambda df: getattr(
df.rolling(
window=window,
min_periods=min_periods,
win_type="triang",
center=True,
axis=axis,
),
method,
)(**kwargs),
)


@pytest.mark.parametrize("axis", [0, "columns"])
@pytest.mark.parametrize("axis", [lib.no_default, "columns"])
@pytest.mark.parametrize("on", [None, "DateCol"])
@pytest.mark.parametrize("closed", ["both", "right"])
@pytest.mark.parametrize("window", [3, "3s"])
Expand All @@ -159,16 +209,22 @@ def test_dataframe_dt_index(axis, on, closed, window):
data = {"A": range(12), "B": range(12)}
pandas_df = pandas.DataFrame(data, index=index)
modin_df = pd.DataFrame(data, index=index)
if on is not None and axis == 0 and isinstance(window, str):
if on is not None and axis == lib.no_default and isinstance(window, str):
pandas_df[on] = pandas.date_range("22/06/1941", periods=12, freq="T")
modin_df[on] = pd.date_range("22/06/1941", periods=12, freq="T")
else:
on = None
if axis == "columns":
pandas_df = pandas_df.T
modin_df = modin_df.T
pandas_rolled = pandas_df.rolling(window=window, on=on, axis=axis, closed=closed)
modin_rolled = modin_df.rolling(window=window, on=on, axis=axis, closed=closed)

with catch_rolling_axis_1_future_depr(axis):
pandas_rolled = pandas_df.rolling(
window=window, on=on, axis=axis, closed=closed
)
with catch_rolling_axis_1_future_depr(axis):
modin_rolled = modin_df.rolling(window=window, on=on, axis=axis, closed=closed)

if isinstance(window, int):
# This functions are very slowly for data from test_rolling
df_equals(
Expand All @@ -181,7 +237,7 @@ def test_dataframe_dt_index(axis, on, closed, window):
df_equals(
modin_rolled.cov(modin_df, False), pandas_rolled.cov(pandas_df, False)
)
if axis == 0:
if axis == lib.no_default:
df_equals(
modin_rolled.cov(modin_df[modin_df.columns[0]], True),
pandas_rolled.cov(pandas_df[pandas_df.columns[0]], True),
Expand All @@ -197,7 +253,14 @@ def test_dataframe_dt_index(axis, on, closed, window):
modin_rolled.apply(np.sum, raw=True),
pandas_rolled.apply(np.sum, raw=True),
)
df_equals(modin_rolled.aggregate(np.sum), pandas_rolled.aggregate(np.sum))

# FIXME: modin doesn't have warning for the case so we can't use
# `catch_rolling_numpy_callable_future_depr`.
modin_res = modin_rolled.aggregate(np.sum)
with catch_rolling_numpy_callable_future_depr():
pandas_res = pandas_rolled.aggregate(np.sum)
df_equals(modin_res, pandas_res)

df_equals(modin_rolled.quantile(0.1), pandas_rolled.quantile(0.1))


Expand Down Expand Up @@ -230,19 +293,28 @@ def test_series_rolling(data, window, min_periods, method, kwargs):
modin_series, pandas_series = create_test_series(data)
if window > len(pandas_series):
window = len(pandas_series)
eval_general(
modin_series,
pandas_series,
lambda series: getattr(
series.rolling(
window=window,
min_periods=min_periods,
win_type=None,
center=True,
),
method,
)(**kwargs),
)

# FIXME: modin doesn't have warning for the case so we can't use
# `catch_rolling_numpy_callable_future_depr`.
with warnings.catch_warnings():
warnings.filterwarnings(
"ignore",
message=".*The provided callable.*pass .* instead.",
category=FutureWarning,
)
eval_general(
modin_series,
pandas_series,
lambda series: getattr(
series.rolling(
window=window,
min_periods=min_periods,
win_type=None,
center=True,
),
method,
)(**kwargs),
)


@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys)
Expand Down Expand Up @@ -312,7 +384,15 @@ def test_series_dt_index(closed):
df_equals(
modin_rolled.apply(np.sum, raw=True), pandas_rolled.apply(np.sum, raw=True)
)
df_equals(modin_rolled.aggregate(np.sum), pandas_rolled.aggregate(np.sum))
# FIXME: modin doesn't have warning for the case so we can't use
# `catch_rolling_numpy_callable_future_depr`.
with warnings.catch_warnings():
warnings.filterwarnings(
"ignore",
message=".*The provided callable.*pass .* instead.",
category=FutureWarning,
)
df_equals(modin_rolled.aggregate(np.sum), pandas_rolled.aggregate(np.sum))
df_equals(modin_rolled.quantile(0.1), pandas_rolled.quantile(0.1))


Expand Down
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ tag_prefix =
parentdir_prefix = modin-

[tool:pytest]
addopts = --disable-pytest-warnings --cov-config=setup.cfg --cov=modin --cov-append --cov-report=
addopts = --cov-config=setup.cfg --cov=modin --cov-append --cov-report=
xfail_strict=true
markers =
xfail_executions
Expand Down

0 comments on commit e38e846

Please sign in to comment.