From 611df5f49f6e3a74563afc498a9479d777782654 Mon Sep 17 00:00:00 2001 From: "Igoshev, Yaroslav" Date: Thu, 5 Nov 2020 12:23:27 +0300 Subject: [PATCH] FIX-#2369: Update pandas version to 1.1.4 Signed-off-by: Igoshev, Yaroslav --- environment.yml | 2 +- modin/engines/base/io/file_reader.py | 4 +- modin/engines/dask/task_wrapper.py | 4 +- .../engines/pandas_on_ray/io_exp.py | 2 +- .../pyarrow_on_ray/frame/axis_partition.py | 6 +- modin/pandas/__init__.py | 2 +- modin/pandas/base.py | 6 -- modin/pandas/test/dataframe/test_binary.py | 16 +--- modin/pandas/test/dataframe/test_udf.py | 28 ++----- modin/pandas/test/test_series.py | 84 ++++++------------- requirements.txt | 2 +- requirements/env_omnisci.yml | 2 +- setup.py | 2 +- 13 files changed, 46 insertions(+), 114 deletions(-) diff --git a/environment.yml b/environment.yml index 77688620c0e..559f74ccf12 100644 --- a/environment.yml +++ b/environment.yml @@ -2,7 +2,7 @@ name: modin channels: - conda-forge dependencies: - - pandas==1.1.3 + - pandas==1.1.4 - numpy - pyarrow==1.0 - dask[complete]>=2.12.0,<=2.19.0 diff --git a/modin/engines/base/io/file_reader.py b/modin/engines/base/io/file_reader.py index 8a8ea6bd1ef..879444881cf 100644 --- a/modin/engines/base/io/file_reader.py +++ b/modin/engines/base/io/file_reader.py @@ -137,10 +137,10 @@ def file_exists(cls, file_path): return os.path.exists(file_path) @classmethod - def deploy(cls, func, args, num_return_vals): + def deploy(cls, func, args, num_returns): raise NotImplementedError(NOT_IMPLEMENTED_MESSAGE) - def parse(self, func, args, num_return_vals): + def parse(self, func, args, num_returns): raise NotImplementedError(NOT_IMPLEMENTED_MESSAGE) @classmethod diff --git a/modin/engines/dask/task_wrapper.py b/modin/engines/dask/task_wrapper.py index 04e5ed2a3b9..af717625afe 100644 --- a/modin/engines/dask/task_wrapper.py +++ b/modin/engines/dask/task_wrapper.py @@ -16,12 +16,12 @@ class DaskTask: @classmethod - def deploy(cls, func, num_return_vals, kwargs): + def deploy(cls, func, num_returns, kwargs): client = _get_global_client() remote_task_future = client.submit(func, **kwargs) return [ client.submit(lambda l, i: l[i], remote_task_future, i) - for i in range(num_return_vals) + for i in range(num_returns) ] @classmethod diff --git a/modin/experimental/engines/pandas_on_ray/io_exp.py b/modin/experimental/engines/pandas_on_ray/io_exp.py index c093e93708c..38b8170445f 100644 --- a/modin/experimental/engines/pandas_on_ray/io_exp.py +++ b/modin/experimental/engines/pandas_on_ray/io_exp.py @@ -148,7 +148,7 @@ def read_sql( columns, chunksize, ), - num_return_vals=num_splits + 1, + num_returns=num_splits + 1, ) partition_ids.append( [PandasOnRayFramePartition(obj) for obj in partition_id[:-1]] diff --git a/modin/experimental/engines/pyarrow_on_ray/frame/axis_partition.py b/modin/experimental/engines/pyarrow_on_ray/frame/axis_partition.py index be82e790e7b..b7cdb2eaa94 100644 --- a/modin/experimental/engines/pyarrow_on_ray/frame/axis_partition.py +++ b/modin/experimental/engines/pyarrow_on_ray/frame/axis_partition.py @@ -46,7 +46,7 @@ def apply(self, func, num_splits=None, other_axis_partition=None, **kwargs): for obj in deploy_ray_func_between_two_axis_partitions._remote( args=(self.axis, func, num_splits, len(self.list_of_blocks), kwargs) + tuple(self.list_of_blocks + other_axis_partition.list_of_blocks), - num_return_vals=num_splits, + num_returns=num_splits, ) ] @@ -54,7 +54,7 @@ def apply(self, func, num_splits=None, other_axis_partition=None, **kwargs): args.extend(self.list_of_blocks) return [ PyarrowOnRayFramePartition(obj) - for obj in deploy_ray_axis_func._remote(args, num_return_vals=num_splits) + for obj in deploy_ray_axis_func._remote(args, num_returns=num_splits) ] def shuffle(self, func, num_splits=None, **kwargs): @@ -74,7 +74,7 @@ def shuffle(self, func, num_splits=None, **kwargs): args.extend(self.list_of_blocks) return [ PyarrowOnRayFramePartition(obj) - for obj in deploy_ray_axis_func._remote(args, num_return_vals=num_splits) + for obj in deploy_ray_axis_func._remote(args, num_returns=num_splits) ] diff --git a/modin/pandas/__init__.py b/modin/pandas/__init__.py index 5ddc9c33c07..7979784d023 100644 --- a/modin/pandas/__init__.py +++ b/modin/pandas/__init__.py @@ -13,7 +13,7 @@ import pandas -__pandas_version__ = "1.1.3" +__pandas_version__ = "1.1.4" if pandas.__version__ != __pandas_version__: import warnings diff --git a/modin/pandas/base.py b/modin/pandas/base.py index 73d3043d38c..e300ea15f96 100644 --- a/modin/pandas/base.py +++ b/modin/pandas/base.py @@ -488,9 +488,6 @@ def add(self, other, axis="columns", level=None, fill_value=None): ) def aggregate(self, func=None, axis=0, *args, **kwargs): - warnings.warn( - "Modin index may not match pandas index due to pandas issue pandas-dev/pandas#36189." - ) axis = self._get_axis_number(axis) result = None @@ -686,9 +683,6 @@ def apply( args=(), **kwds, ): - warnings.warn( - "Modin index may not match pandas index due to pandas issue pandas-dev/pandas#36189." - ) axis = self._get_axis_number(axis) ErrorMessage.non_verified_udf() if isinstance(func, str): diff --git a/modin/pandas/test/dataframe/test_binary.py b/modin/pandas/test/dataframe/test_binary.py index 5346c0e8932..a4449798c12 100644 --- a/modin/pandas/test/dataframe/test_binary.py +++ b/modin/pandas/test/dataframe/test_binary.py @@ -135,7 +135,7 @@ def test_math_alias(math_op, alias): assert getattr(pd.DataFrame, math_op) == getattr(pd.DataFrame, alias) -@pytest.mark.parametrize("other", ["as_left", 4, 4.0]) +@pytest.mark.parametrize("other", ["as_left", 4, 4.0, "a"]) @pytest.mark.parametrize("op", ["eq", "ge", "gt", "le", "lt", "ne"]) @pytest.mark.parametrize("data", test_data_values, ids=test_data_keys) def test_comparison(data, op, other): @@ -145,20 +145,6 @@ def test_comparison(data, op, other): ) -@pytest.mark.xfail_backends( - ["BaseOnPython"], - reason="Test is failing because of mismathing of thrown exceptions. See pandas issue #36377", -) -@pytest.mark.parametrize("other", ["a"]) -@pytest.mark.parametrize("op", ["ge", "gt", "le", "lt", "eq", "ne"]) -@pytest.mark.parametrize("data", test_data_values, ids=test_data_keys) -def test_comparison_except(data, op, other): - eval_general( - *create_test_dfs(data), - lambda df: getattr(df, op)(other), - ) - - @pytest.mark.parametrize("op", ["eq", "ge", "gt", "le", "lt", "ne"]) @pytest.mark.parametrize("data", test_data_values, ids=test_data_keys) def test_multi_level_comparison(data, op): diff --git a/modin/pandas/test/dataframe/test_udf.py b/modin/pandas/test/dataframe/test_udf.py index 4b39cf7cd22..651feab1e40 100644 --- a/modin/pandas/test/dataframe/test_udf.py +++ b/modin/pandas/test/dataframe/test_udf.py @@ -49,16 +49,10 @@ ) @pytest.mark.parametrize("op", ["agg", "apply"]) def test_agg_apply(axis, func, op): - # AssertionError may be arisen in case of - # mismathing of index/columns in Modin and pandas. - # See details in pandas issue 36189. - try: - eval_general( - *create_test_dfs(test_data["float_nan_data"]), - lambda df: getattr(df, op)(func, axis), - ) - except AssertionError: - pass + eval_general( + *create_test_dfs(test_data["float_nan_data"]), + lambda df: getattr(df, op)(func, axis), + ) @pytest.mark.parametrize("axis", ["rows", "columns"]) @@ -69,16 +63,10 @@ def test_agg_apply(axis, func, op): ) @pytest.mark.parametrize("op", ["agg", "apply"]) def test_agg_apply_axis_names(axis, func, op): - # AssertionError may be arisen in case of - # mismathing of index/columns in Modin and pandas. - # See details in pandas issue 36189. - try: - eval_general( - *create_test_dfs(test_data["int_data"]), - lambda df: getattr(df, op)(func, axis), - ) - except AssertionError: - pass + eval_general( + *create_test_dfs(test_data["int_data"]), + lambda df: getattr(df, op)(func, axis), + ) def test_aggregate_alias(): diff --git a/modin/pandas/test/test_series.py b/modin/pandas/test/test_series.py index d524537148d..029b6e2676e 100644 --- a/modin/pandas/test/test_series.py +++ b/modin/pandas/test/test_series.py @@ -593,16 +593,10 @@ def test_add_suffix(data): @pytest.mark.parametrize("data", test_data_values, ids=test_data_keys) @pytest.mark.parametrize("func", agg_func_values, ids=agg_func_keys) def test_agg(data, func): - # AssertionError may be arisen in case of - # mismathing of index/columns in Modin and pandas. - # See details in pandas issue 36189. - try: - eval_general( - *create_test_series(data), - lambda df: df.agg(func), - ) - except AssertionError: - pass + eval_general( + *create_test_series(data), + lambda df: df.agg(func), + ) @pytest.mark.parametrize("data", test_data_values, ids=test_data_keys) @@ -624,16 +618,10 @@ def test_agg_numeric(request, data, func): request.node.name, numeric_dfs ): axis = 0 - # AssertionError may be arisen in case of - # mismathing of index/columns in Modin and pandas. - # See details in pandas issue 36189. - try: - eval_general( - *create_test_series(data), - lambda df: df.agg(func, axis), - ) - except AssertionError: - pass + eval_general( + *create_test_series(data), + lambda df: df.agg(func, axis), + ) @pytest.mark.parametrize("data", test_data_values, ids=test_data_keys) @@ -656,16 +644,10 @@ def test_agg_numeric_except(request, data, func): @pytest.mark.parametrize("func", agg_func_values, ids=agg_func_keys) def test_aggregate(data, func): axis = 0 - # AssertionError may be arisen in case of - # mismathing of index/columns in Modin and pandas. - # See details in pandas issue 36189. - try: - eval_general( - *create_test_series(data), - lambda df: df.aggregate(func, axis), - ) - except AssertionError: - pass + eval_general( + *create_test_series(data), + lambda df: df.aggregate(func, axis), + ) @pytest.mark.parametrize("data", test_data_values, ids=test_data_keys) @@ -688,16 +670,10 @@ def test_aggregate_numeric(request, data, func): request.node.name, numeric_dfs ): axis = 0 - # AssertionError may be arisen in case of - # mismathing of index/columns in Modin and pandas. - # See details in pandas issue 36189. - try: - eval_general( - *create_test_series(data), - lambda df: df.agg(func, axis), - ) - except AssertionError: - pass + eval_general( + *create_test_series(data), + lambda df: df.agg(func, axis), + ) @pytest.mark.parametrize("data", test_data_values, ids=test_data_keys) @@ -823,16 +799,10 @@ def test_append(data): @pytest.mark.parametrize("data", test_data_values, ids=test_data_keys) @pytest.mark.parametrize("func", agg_func_values, ids=agg_func_keys) def test_apply(data, func): - # AssertionError may be arisen in case of - # mismathing of index/columns in Modin and pandas. - # See details in pandas issue 36189. - try: - eval_general( - *create_test_series(data), - lambda df: df.apply(func), - ) - except AssertionError: - pass + eval_general( + *create_test_series(data), + lambda df: df.apply(func), + ) @pytest.mark.parametrize("data", test_data_values, ids=test_data_keys) @@ -871,16 +841,10 @@ def test_apply_external_lib(): @pytest.mark.parametrize("func", agg_func_values, ids=agg_func_keys) def test_apply_numeric(request, data, func): if name_contains(request.node.name, numeric_dfs): - # AssertionError may be arisen in case of - # mismathing of index/columns in Modin and pandas. - # See details in pandas issue 36189. - try: - eval_general( - *create_test_series(data), - lambda df: df.apply(func), - ) - except AssertionError: - pass + eval_general( + *create_test_series(data), + lambda df: df.apply(func), + ) @pytest.mark.parametrize("data", test_data_values, ids=test_data_keys) diff --git a/requirements.txt b/requirements.txt index c60bf62d8f0..a3183876ef4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -pandas==1.1.3 +pandas==1.1.4 numpy pyarrow==1.0 dask[complete]>=2.12.0,<=2.19.0 diff --git a/requirements/env_omnisci.yml b/requirements/env_omnisci.yml index eabd27089de..e8432f00898 100644 --- a/requirements/env_omnisci.yml +++ b/requirements/env_omnisci.yml @@ -3,7 +3,7 @@ channels: - intel/label/modin - conda-forge dependencies: - - pandas==1.1.3 + - pandas==1.1.4 - pyarrow==1.0 - numpy - pip diff --git a/setup.py b/setup.py index ed1dc9e43f6..b5bbd4a5cc8 100644 --- a/setup.py +++ b/setup.py @@ -55,7 +55,7 @@ def is_pure(self): url="https://github.com/modin-project/modin", long_description=long_description, long_description_content_type="text/markdown", - install_requires=["pandas==1.1.3", "packaging"], + install_requires=["pandas==1.1.4", "packaging"], extras_require={ # can be installed by pip install modin[dask] "dask": dask_deps,