From 828d7e554eb40f8d751c27cf46d1d568431c7398 Mon Sep 17 00:00:00 2001 From: Alexey Prutskov Date: Mon, 20 Jul 2020 14:19:32 +0300 Subject: [PATCH] FEAT-#1194 #1283 #1138: Add `Series.rolling`, `DataFrame.rolling` functionality to enable rolling window operations Signed-off-by: Alexey Prutskov --- .github/workflows/ci.yml | 4 + .github/workflows/push.yml | 4 + docs/supported_apis/dataframe_supported.rst | 2 +- docs/supported_apis/series_supported.rst | 2 +- modin/backends/pandas/query_compiler.py | 130 +++++++++ modin/pandas/base.py | 277 +++++++++++++++++++- modin/pandas/test/test_dataframe.py | 5 - modin/pandas/test/test_rolling.py | 187 +++++++++++++ modin/pandas/test/test_series.py | 7 - 9 files changed, 601 insertions(+), 17 deletions(-) create mode 100644 modin/pandas/test/test_rolling.py diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 9da547594df..cdc38768e72 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -112,6 +112,8 @@ jobs: if: matrix.part != 3 - run: python -m pytest modin/pandas/test/test_series.py if: matrix.part == 3 + - run: python -m pytest modin/pandas/test/test_rolling.py + if: matrix.part == 3 - run: python -m pytest modin/pandas/test/test_concat.py if: matrix.part == 3 - run: python -m pytest modin/pandas/test/test_groupby.py @@ -149,6 +151,8 @@ jobs: if: matrix.part != 3 - run: python -m pytest modin/pandas/test/test_series.py if: matrix.part == 3 + - run: python -m pytest modin/pandas/test/test_rolling.py + if: matrix.part == 3 - run: python -m pytest modin/pandas/test/test_concat.py if: matrix.part == 3 - run: python -m pytest modin/pandas/test/test_groupby.py diff --git a/.github/workflows/push.yml b/.github/workflows/push.yml index 93f199cd914..8c8f38e2f57 100644 --- a/.github/workflows/push.yml +++ b/.github/workflows/push.yml @@ -27,6 +27,8 @@ jobs: if: matrix.part != 3 - run: python -m pytest modin/pandas/test/test_series.py if: matrix.part == 3 + - run: python -m pytest modin/pandas/test/test_rolling.py + if: matrix.part == 3 - run: python -m pytest modin/pandas/test/test_concat.py if: matrix.part == 3 - run: python -m pytest modin/pandas/test/test_groupby.py @@ -63,6 +65,8 @@ jobs: if: matrix.part != 3 - run: python -m pytest modin/pandas/test/test_series.py if: matrix.part == 3 + - run: python -m pytest modin/pandas/test/test_rolling.py + if: matrix.part == 3 - run: python -m pytest modin/pandas/test/test_concat.py if: matrix.part == 3 - run: python -m pytest modin/pandas/test/test_groupby.py diff --git a/docs/supported_apis/dataframe_supported.rst b/docs/supported_apis/dataframe_supported.rst index 88edff08374..3c8897857a5 100644 --- a/docs/supported_apis/dataframe_supported.rst +++ b/docs/supported_apis/dataframe_supported.rst @@ -313,7 +313,7 @@ default to pandas. +----------------------------+---------------------------+------------------------+----------------------------------------------------+ | ``rmul`` | `rmul`_ | Y | See ``add`` | +----------------------------+---------------------------+------------------------+----------------------------------------------------+ -| ``rolling`` | `rolling`_ | D | | +| ``rolling`` | `rolling`_ | Y | | +----------------------------+---------------------------+------------------------+----------------------------------------------------+ | ``round`` | `round`_ | Y | | +----------------------------+---------------------------+------------------------+----------------------------------------------------+ diff --git a/docs/supported_apis/series_supported.rst b/docs/supported_apis/series_supported.rst index 83a6fd3924d..962aef16f87 100644 --- a/docs/supported_apis/series_supported.rst +++ b/docs/supported_apis/series_supported.rst @@ -348,7 +348,7 @@ the related section on `Defaulting to pandas`_. +-----------------------------+---------------------------------+ | ``rmul`` | Y | +-----------------------------+---------------------------------+ -| ``rolling`` | D | +| ``rolling`` | Y | +-----------------------------+---------------------------------+ | ``round`` | Y | +-----------------------------+---------------------------------+ diff --git a/modin/backends/pandas/query_compiler.py b/modin/backends/pandas/query_compiler.py index 14f12ff1a2c..a93ffceaa18 100644 --- a/modin/backends/pandas/query_compiler.py +++ b/modin/backends/pandas/query_compiler.py @@ -835,6 +835,136 @@ def resample_var(self, resample_args, ddof, *args, **kwargs): def resample_quantile(self, resample_args, q, **kwargs): return self._resample_func(resample_args, "quantile", q=q, **kwargs) + window_mean = FoldFunction.register( + lambda df, rolling_args, *args, **kwargs: pandas.DataFrame( + df.rolling(*rolling_args).mean(*args, **kwargs) + ) + ) + window_sum = FoldFunction.register( + lambda df, rolling_args, *args, **kwargs: pandas.DataFrame( + df.rolling(*rolling_args).sum(*args, **kwargs) + ) + ) + window_var = FoldFunction.register( + lambda df, rolling_args, ddof, *args, **kwargs: pandas.DataFrame( + df.rolling(*rolling_args).var(ddof=ddof, *args, **kwargs) + ) + ) + window_std = FoldFunction.register( + lambda df, rolling_args, ddof, *args, **kwargs: pandas.DataFrame( + df.rolling(*rolling_args).std(ddof=ddof, *args, **kwargs) + ) + ) + rolling_count = FoldFunction.register( + lambda df, rolling_args: pandas.DataFrame(df.rolling(*rolling_args).count()) + ) + rolling_sum = FoldFunction.register( + lambda df, rolling_args, *args, **kwargs: pandas.DataFrame( + df.rolling(*rolling_args).sum(*args, **kwargs) + ) + ) + rolling_mean = FoldFunction.register( + lambda df, rolling_args, *args, **kwargs: pandas.DataFrame( + df.rolling(*rolling_args).mean(*args, **kwargs) + ) + ) + rolling_median = FoldFunction.register( + lambda df, rolling_args, **kwargs: pandas.DataFrame( + df.rolling(*rolling_args).median(**kwargs) + ) + ) + rolling_var = FoldFunction.register( + lambda df, rolling_args, ddof, *args, **kwargs: pandas.DataFrame( + df.rolling(*rolling_args).var(ddof=ddof, *args, **kwargs) + ) + ) + rolling_std = FoldFunction.register( + lambda df, rolling_args, ddof, *args, **kwargs: pandas.DataFrame( + df.rolling(*rolling_args).std(ddof=ddof, *args, **kwargs) + ) + ) + rolling_min = FoldFunction.register( + lambda df, rolling_args, *args, **kwargs: pandas.DataFrame( + df.rolling(*rolling_args).min(*args, **kwargs) + ) + ) + rolling_max = FoldFunction.register( + lambda df, rolling_args, *args, **kwargs: pandas.DataFrame( + df.rolling(*rolling_args).max(*args, **kwargs) + ) + ) + rolling_skew = FoldFunction.register( + lambda df, rolling_args, **kwargs: pandas.DataFrame( + df.rolling(*rolling_args).skew(**kwargs) + ) + ) + rolling_kurt = FoldFunction.register( + lambda df, rolling_args, **kwargs: pandas.DataFrame( + df.rolling(*rolling_args).kurt(**kwargs) + ) + ) + rolling_apply = FoldFunction.register( + lambda df, rolling_args, func, raw, engine, engine_kwargs, args, kwargs: pandas.DataFrame( + df.rolling(*rolling_args).apply( + func=func, + raw=raw, + engine=engine, + engine_kwargs=engine_kwargs, + args=args, + kwargs=kwargs, + ) + ) + ) + rolling_quantile = FoldFunction.register( + lambda df, rolling_args, quantile, interpolation, **kwargs: pandas.DataFrame( + df.rolling(*rolling_args).quantile( + quantile=quantile, interpolation=interpolation, **kwargs + ) + ) + ) + + def rolling_corr(self, rolling_args, other, pairwise, *args, **kwargs): + if len(self.columns) > 1: + return self.default_to_pandas( + lambda df: pandas.DataFrame.rolling(df, *rolling_args).corr( + other=other, pairwise=pairwise, *args, **kwargs + ) + ) + else: + return FoldFunction.register( + lambda df: pandas.DataFrame( + df.rolling(*rolling_args).corr( + other=other, pairwise=pairwise, *args, **kwargs + ) + ) + )(self) + + def rolling_cov(self, rolling_args, other, pairwise, ddof, **kwargs): + if len(self.columns) > 1: + return self.default_to_pandas( + lambda df: pandas.DataFrame.rolling(df, *rolling_args).cov( + other=other, pairwise=pairwise, ddof=ddof, **kwargs + ) + ) + else: + return FoldFunction.register( + lambda df: pandas.DataFrame( + df.rolling(*rolling_args).cov( + other=other, pairwise=pairwise, ddof=ddof, **kwargs + ) + ) + )(self) + + def rolling_aggregate(self, rolling_args, func, *args, **kwargs): + new_modin_frame = self._modin_frame._apply_full_axis( + 0, + lambda df: pandas.DataFrame( + df.rolling(*rolling_args).aggregate(func=func, *args, **kwargs) + ), + new_index=self.index, + ) + return self.__constructor__(new_modin_frame) + # Map partitions operations # These operations are operations that apply a function to every partition. abs = MapFunction.register(pandas.DataFrame.abs, dtypes="copy") diff --git a/modin/pandas/base.py b/modin/pandas/base.py index cd2cbed248a..bcb6e5987de 100644 --- a/modin/pandas/base.py +++ b/modin/pandas/base.py @@ -2336,9 +2336,62 @@ def rolling( axis=0, closed=None, ): - return self._default_to_pandas( - "rolling", - window, + """ + Provide rolling window calculations. + + Parameters + ---------- + window : int, offset, or BaseIndexer subclass + Size of the moving window. This is the number of observations used for + calculating the statistic. Each window will be a fixed size. + If its an offset then this will be the time period of each window. Each + window will be a variable sized based on the observations included in + the time-period. This is only valid for datetimelike indexes. + If a BaseIndexer subclass is passed, calculates the window boundaries + based on the defined ``get_window_bounds`` method. Additional rolling + keyword arguments, namely `min_periods`, `center`, and + `closed` will be passed to `get_window_bounds`. + min_periods : int, default None + Minimum number of observations in window required to have a value + (otherwise result is NA). For a window that is specified by an offset, + `min_periods` will default to 1. Otherwise, `min_periods` will default + to the size of the window. + center : bool, default False + Set the labels at the center of the window. + win_type : str, default None + Provide a window type. If ``None``, all points are evenly weighted. + See the notes below for further information. + on : str, optional + For a DataFrame, a datetime-like column or MultiIndex level on which + to calculate the rolling window, rather than the DataFrame's index. + Provided integer column is ignored and excluded from result since + an integer index is not used to calculate the rolling window. + axis : int or str, default 0 + closed : str, default None + Make the interval closed on the 'right', 'left', 'both' or + 'neither' endpoints. + For offset-based windows, it defaults to 'right'. + For fixed windows, defaults to 'both'. Remaining cases not implemented + for fixed windows. + Returns + ------- + a Window or Rolling sub-classed for the particular operation + """ + if win_type is not None: + return Window( + self, + window=window, + min_periods=min_periods, + center=center, + win_type=win_type, + on=on, + axis=axis, + closed=closed, + ) + + return Rolling( + self, + window=window, min_periods=min_periods, center=center, win_type=win_type, @@ -3858,3 +3911,221 @@ def quantile(self, q=0.5, **kwargs): self.resample_args, q, **kwargs ) ) + + +class Window(object): + def __init__( + self, + dataframe, + window, + min_periods=None, + center=False, + win_type=None, + on=None, + axis=0, + closed=None, + ): + self._dataframe = dataframe + self._query_compiler = dataframe._query_compiler + self.window_args = [ + window, + min_periods, + center, + win_type, + on, + axis, + closed, + ] + + def mean(self, *args, **kwargs): + return self._dataframe.__constructor__( + query_compiler=self._query_compiler.window_mean( + self.window_args, *args, **kwargs + ) + ) + + def sum(self, *args, **kwargs): + return self._dataframe.__constructor__( + query_compiler=self._query_compiler.window_sum( + self.window_args, *args, **kwargs + ) + ) + + def var(self, ddof=1, *args, **kwargs): + return self._dataframe.__constructor__( + query_compiler=self._query_compiler.window_var( + self.window_args, ddof, *args, **kwargs + ) + ) + + def std(self, ddof=1, *args, **kwargs): + return self._dataframe.__constructor__( + query_compiler=self._query_compiler.window_std( + self.window_args, ddof, *args, **kwargs + ) + ) + + +class Rolling(object): + def __init__( + self, + dataframe, + window, + min_periods=None, + center=False, + win_type=None, + on=None, + axis=0, + closed=None, + ): + self._dataframe = dataframe + self._query_compiler = dataframe._query_compiler + self.rolling_args = [ + window, + min_periods, + center, + win_type, + on, + axis, + closed, + ] + + def count(self): + return self._dataframe.__constructor__( + query_compiler=self._query_compiler.rolling_count(self.rolling_args) + ) + + def sum(self, *args, **kwargs): + return self._dataframe.__constructor__( + query_compiler=self._query_compiler.rolling_sum( + self.rolling_args, *args, **kwargs + ) + ) + + def mean(self, *args, **kwargs): + return self._dataframe.__constructor__( + query_compiler=self._query_compiler.rolling_mean( + self.rolling_args, *args, **kwargs + ) + ) + + def median(self, **kwargs): + return self._dataframe.__constructor__( + query_compiler=self._query_compiler.rolling_median( + self.rolling_args, **kwargs + ) + ) + + def var(self, ddof=1, *args, **kwargs): + return self._dataframe.__constructor__( + query_compiler=self._query_compiler.rolling_var( + self.rolling_args, ddof, *args, **kwargs + ) + ) + + def std(self, ddof=1, *args, **kwargs): + return self._dataframe.__constructor__( + query_compiler=self._query_compiler.rolling_std( + self.rolling_args, ddof, *args, **kwargs + ) + ) + + def min(self, *args, **kwargs): + return self._dataframe.__constructor__( + query_compiler=self._query_compiler.rolling_min( + self.rolling_args, *args, **kwargs + ) + ) + + def max(self, *args, **kwargs): + return self._dataframe.__constructor__( + query_compiler=self._query_compiler.rolling_max( + self.rolling_args, *args, **kwargs + ) + ) + + def corr(self, other=None, pairwise=None, *args, **kwargs): + from .dataframe import DataFrame + from .series import Series + + if isinstance(other, DataFrame): + other = other._query_compiler.to_pandas() + elif isinstance(other, Series): + other = other._query_compiler.to_pandas().squeeze() + + return self._dataframe.__constructor__( + query_compiler=self._query_compiler.rolling_corr( + self.rolling_args, other, pairwise, *args, **kwargs + ) + ) + + def cov(self, other=None, pairwise=None, ddof=1, **kwargs): + from .dataframe import DataFrame + from .series import Series + + if isinstance(other, DataFrame): + other = other._query_compiler.to_pandas() + elif isinstance(other, Series): + other = other._query_compiler.to_pandas().squeeze() + + return self._dataframe.__constructor__( + query_compiler=self._query_compiler.rolling_cov( + self.rolling_args, other, pairwise, ddof, **kwargs + ) + ) + + def skew(self, **kwargs): + return self._dataframe.__constructor__( + query_compiler=self._query_compiler.rolling_skew( + self.rolling_args, **kwargs + ) + ) + + def kurt(self, **kwargs): + return self._dataframe.__constructor__( + query_compiler=self._query_compiler.rolling_kurt( + self.rolling_args, **kwargs + ) + ) + + def apply( + self, + func, + raw=False, + engine="cython", + engine_kwargs=None, + args=None, + kwargs=None, + ): + return self._dataframe.__constructor__( + query_compiler=self._query_compiler.rolling_apply( + self.rolling_args, func, raw, engine, engine_kwargs, args, kwargs, + ) + ) + + def aggregate( + self, func, *args, **kwargs, + ): + from .dataframe import DataFrame + + dataframe = DataFrame( + query_compiler=self._query_compiler.rolling_aggregate( + self.rolling_args, func, *args, **kwargs, + ) + ) + if isinstance(self._dataframe, DataFrame): + return dataframe + elif is_list_like(func): + dataframe.columns = dataframe.columns.droplevel() + return dataframe + else: + return dataframe.squeeze() + + agg = aggregate + + def quantile(self, quantile, interpolation="linear", **kwargs): + return self._dataframe.__constructor__( + query_compiler=self._query_compiler.rolling_quantile( + self.rolling_args, quantile, interpolation, **kwargs + ) + ) diff --git a/modin/pandas/test/test_dataframe.py b/modin/pandas/test/test_dataframe.py index 19dab8c791f..887fa40a5bb 100644 --- a/modin/pandas/test/test_dataframe.py +++ b/modin/pandas/test/test_dataframe.py @@ -2549,11 +2549,6 @@ def test_resample(self, rule, axis, closed, label, on, level): pandas_resampler.aggregate(["sum", "mean", "max"]), ) - def test_rolling(self): - df = pd.DataFrame({"B": [0, 1, 2, np.nan, 4]}) - with pytest.warns(UserWarning): - df.rolling(2, win_type="triang") - def test_sem(self): data = test_data_values[0] with pytest.warns(UserWarning): diff --git a/modin/pandas/test/test_rolling.py b/modin/pandas/test/test_rolling.py new file mode 100644 index 00000000000..66193903faf --- /dev/null +++ b/modin/pandas/test/test_rolling.py @@ -0,0 +1,187 @@ +# Licensed to Modin Development Team under one or more contributor license agreements. +# See the NOTICE file distributed with this work for additional information regarding +# copyright ownership. The Modin Development Team licenses this file to you under the +# Apache License, Version 2.0 (the "License"); you may not use this file except in +# compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under +# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific language +# governing permissions and limitations under the License. + +import pytest +import numpy as np +import pandas +import modin.pandas as pd + +from .utils import df_equals, test_data_values, test_data_keys + +pd.DEFAULT_NPARTITIONS = 4 + + +def create_test_series(vals): + if isinstance(vals, dict): + modin_series = pd.Series(vals[next(iter(vals.keys()))]) + pandas_series = pandas.Series(vals[next(iter(vals.keys()))]) + else: + modin_series = pd.Series(vals) + pandas_series = pandas.Series(vals) + return modin_series, pandas_series + + +@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys) +@pytest.mark.parametrize("window", [5, 100]) +@pytest.mark.parametrize("min_periods", [None, 5]) +@pytest.mark.parametrize("win_type", [None, "triang"]) +def test_dataframe(data, window, min_periods, win_type): + modin_df = pd.DataFrame(data) + pandas_df = pandas.DataFrame(data) + pandas_rolled = pandas_df.rolling( + window=window, min_periods=min_periods, win_type=win_type, center=True, + ) + modin_rolled = modin_df.rolling( + window=window, min_periods=min_periods, win_type=win_type, center=True, + ) + # Testing of Window class + if win_type is not None: + df_equals(modin_rolled.mean(), pandas_rolled.mean()) + df_equals(modin_rolled.sum(), pandas_rolled.sum()) + df_equals(modin_rolled.var(ddof=0), pandas_rolled.var(ddof=0)) + df_equals(modin_rolled.std(ddof=0), pandas_rolled.std(ddof=0)) + # Testing of Rolling class + else: + df_equals(modin_rolled.count(), pandas_rolled.count()) + df_equals(modin_rolled.sum(), pandas_rolled.sum()) + df_equals(modin_rolled.mean(), pandas_rolled.mean()) + df_equals(modin_rolled.median(), pandas_rolled.median()) + df_equals(modin_rolled.var(ddof=0), pandas_rolled.var(ddof=0)) + df_equals(modin_rolled.std(ddof=0), pandas_rolled.std(ddof=0)) + df_equals(modin_rolled.min(), pandas_rolled.min()) + df_equals(modin_rolled.max(), pandas_rolled.max()) + df_equals(modin_rolled.skew(), pandas_rolled.skew()) + df_equals(modin_rolled.kurt(), pandas_rolled.kurt()) + df_equals(modin_rolled.apply(np.sum), pandas_rolled.apply(np.sum)) + df_equals(modin_rolled.aggregate(np.sum), pandas_rolled.aggregate(np.sum)) + df_equals( + modin_rolled.aggregate([np.sum, np.mean]), + pandas_rolled.aggregate([np.sum, np.mean]), + ) + df_equals(modin_rolled.quantile(0.1), pandas_rolled.quantile(0.1)) + + +@pytest.mark.parametrize("axis", [0, "columns"]) +@pytest.mark.parametrize("on", [None, "DateCol"]) +@pytest.mark.parametrize("closed", ["both", "right"]) +@pytest.mark.parametrize("window", [3, "3s"]) +def test_dataframe_dt_index(axis, on, closed, window): + index = pandas.date_range("31/12/2000", periods=12, freq="T") + data = {"A": range(12), "B": range(12)} + pandas_df = pandas.DataFrame(data, index=index) + modin_df = pd.DataFrame(data, index=index) + if on is not None and axis == 0 and isinstance(window, str): + pandas_df[on] = pandas.date_range("22/06/1941", periods=12, freq="T") + modin_df[on] = pd.date_range("22/06/1941", periods=12, freq="T") + else: + on = None + if axis == "columns": + pandas_df = pandas_df.T + modin_df = modin_df.T + pandas_rolled = pandas_df.rolling(window=window, on=on, axis=axis, closed=closed) + modin_rolled = modin_df.rolling(window=window, on=on, axis=axis, closed=closed) + if isinstance(window, int): + # This functions are very slowly for data from test_rolling + df_equals( + modin_rolled.corr(modin_df, True), pandas_rolled.corr(pandas_df, True) + ) + df_equals( + modin_rolled.corr(modin_df, False), pandas_rolled.corr(pandas_df, False) + ) + df_equals(modin_rolled.cov(modin_df, True), pandas_rolled.cov(pandas_df, True)) + df_equals( + modin_rolled.cov(modin_df, False), pandas_rolled.cov(pandas_df, False) + ) + if axis == 0: + df_equals( + modin_rolled.cov(modin_df[modin_df.columns[0]], True), + pandas_rolled.cov(pandas_df[pandas_df.columns[0]], True), + ) + df_equals( + modin_rolled.corr(modin_df[modin_df.columns[0]], True), + pandas_rolled.corr(pandas_df[pandas_df.columns[0]], True), + ) + else: + df_equals(modin_rolled.count(), pandas_rolled.count()) + df_equals(modin_rolled.skew(), pandas_rolled.skew()) + df_equals( + modin_rolled.apply(np.sum, raw=True), pandas_rolled.apply(np.sum, raw=True), + ) + df_equals(modin_rolled.aggregate(np.sum), pandas_rolled.aggregate(np.sum)) + df_equals(modin_rolled.quantile(0.1), pandas_rolled.quantile(0.1)) + + +@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys) +@pytest.mark.parametrize("window", [5, 100]) +@pytest.mark.parametrize("min_periods", [None, 5]) +@pytest.mark.parametrize("win_type", [None, "triang"]) +def test_series(data, window, min_periods, win_type): + modin_series, pandas_series = create_test_series(data) + + pandas_rolled = pandas_series.rolling( + window=window, min_periods=min_periods, win_type=win_type, center=True, + ) + modin_rolled = modin_series.rolling( + window=window, min_periods=min_periods, win_type=win_type, center=True, + ) + # Testing of Window class + if win_type is not None: + df_equals(modin_rolled.mean(), pandas_rolled.mean()) + df_equals(modin_rolled.sum(), pandas_rolled.sum()) + df_equals(modin_rolled.var(ddof=0), pandas_rolled.var(ddof=0)) + df_equals(modin_rolled.std(ddof=0), pandas_rolled.std(ddof=0)) + # Testing of Rolling class + else: + df_equals(modin_rolled.count(), pandas_rolled.count()) + df_equals(modin_rolled.sum(), pandas_rolled.sum()) + df_equals(modin_rolled.mean(), pandas_rolled.mean()) + df_equals(modin_rolled.median(), pandas_rolled.median()) + df_equals(modin_rolled.var(ddof=0), pandas_rolled.var(ddof=0)) + df_equals(modin_rolled.std(ddof=0), pandas_rolled.std(ddof=0)) + df_equals(modin_rolled.min(), pandas_rolled.min()) + df_equals(modin_rolled.max(), pandas_rolled.max()) + df_equals( + modin_rolled.corr(modin_series), pandas_rolled.corr(pandas_series), + ) + df_equals( + modin_rolled.cov(modin_series, True), pandas_rolled.cov(pandas_series, True) + ) + df_equals( + modin_rolled.cov(modin_series, False), + pandas_rolled.cov(pandas_series, False), + ) + df_equals(modin_rolled.skew(), pandas_rolled.skew()) + df_equals(modin_rolled.kurt(), pandas_rolled.kurt()) + df_equals(modin_rolled.apply(np.sum), pandas_rolled.apply(np.sum)) + df_equals(modin_rolled.aggregate(np.sum), pandas_rolled.aggregate(np.sum)) + df_equals( + modin_rolled.agg([np.sum, np.mean]), pandas_rolled.agg([np.sum, np.mean]), + ) + df_equals(modin_rolled.quantile(0.1), pandas_rolled.quantile(0.1)) + + +@pytest.mark.parametrize("closed", ["both", "right"]) +def test_series_dt_index(closed): + index = pandas.date_range("1/1/2000", periods=12, freq="T") + pandas_series = pandas.Series(range(12), index=index) + modin_series = pd.Series(range(12), index=index) + + pandas_rolled = pandas_series.rolling("3s", closed=closed) + modin_rolled = modin_series.rolling("3s", closed=closed) + df_equals(modin_rolled.count(), pandas_rolled.count()) + df_equals(modin_rolled.skew(), pandas_rolled.skew()) + df_equals( + modin_rolled.apply(np.sum, raw=True), pandas_rolled.apply(np.sum, raw=True) + ) + df_equals(modin_rolled.aggregate(np.sum), pandas_rolled.aggregate(np.sum)) + df_equals(modin_rolled.quantile(0.1), pandas_rolled.quantile(0.1)) diff --git a/modin/pandas/test/test_series.py b/modin/pandas/test/test_series.py index 6eb3169f559..87e4ba4d108 100644 --- a/modin/pandas/test/test_series.py +++ b/modin/pandas/test/test_series.py @@ -2448,13 +2448,6 @@ def test_rmul(data): inter_df_math_helper(modin_series, pandas_series, "rmul") -@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys) -def test_rolling(data): - modin_series, _ = create_test_series(data) # noqa: F841 - with pytest.warns(UserWarning): - modin_series.rolling(10) - - @pytest.mark.parametrize("data", test_data_values, ids=test_data_keys) def test_round(data): modin_series, pandas_series = create_test_series(data)