diff --git a/modin/backends/pandas/query_compiler.py b/modin/backends/pandas/query_compiler.py index b6af408951a..c948401c1bb 100644 --- a/modin/backends/pandas/query_compiler.py +++ b/modin/backends/pandas/query_compiler.py @@ -23,7 +23,7 @@ from modin.backends.base.query_compiler import BaseQueryCompiler from modin.error_message import ErrorMessage -from modin.pandas.utils import try_cast_to_pandas, wrap_udf_function +from modin.utils import try_cast_to_pandas, wrap_udf_function from modin.data_management.functions import ( FoldFunction, MapFunction, diff --git a/modin/data_management/functions/groupby_function.py b/modin/data_management/functions/groupby_function.py index 6312e8f56c3..c645c000c27 100644 --- a/modin/data_management/functions/groupby_function.py +++ b/modin/data_management/functions/groupby_function.py @@ -14,7 +14,7 @@ import pandas from .mapreducefunction import MapReduceFunction -from modin.pandas.utils import try_cast_to_pandas +from modin.utils import try_cast_to_pandas class GroupbyReduceFunction(MapReduceFunction): diff --git a/modin/pandas/base.py b/modin/pandas/base.py index 542545fad5d..225636552fd 100644 --- a/modin/pandas/base.py +++ b/modin/pandas/base.py @@ -31,8 +31,9 @@ import warnings import pickle as pkl +from modin.utils import try_cast_to_pandas from modin.error_message import ErrorMessage -from modin.pandas.utils import try_cast_to_pandas, is_scalar +from modin.pandas.utils import is_scalar # Similar to pandas, sentinel value to use as kwarg in place of None when None has # special meaning and needs to be distinguished from a user explicitly passing None. diff --git a/modin/pandas/dataframe.py b/modin/pandas/dataframe.py index 1b4655ad285..cc87678e467 100644 --- a/modin/pandas/dataframe.py +++ b/modin/pandas/dataframe.py @@ -33,12 +33,10 @@ import warnings from modin.error_message import ErrorMessage +from modin.utils import _inherit_docstrings, to_pandas, hashable from .utils import ( from_pandas, from_non_pandas, - to_pandas, - _inherit_docstrings, - hashable, ) from .iterator import PartitionIterator from .series import Series diff --git a/modin/pandas/general.py b/modin/pandas/general.py index c8b635ef4f2..18675307201 100644 --- a/modin/pandas/general.py +++ b/modin/pandas/general.py @@ -17,7 +17,7 @@ from .base import BasePandasDataset from .dataframe import DataFrame from .series import Series -from .utils import to_pandas +from modin.utils import to_pandas def isna(obj): diff --git a/modin/pandas/groupby.py b/modin/pandas/groupby.py index 565fd2b3929..e5fca25ec71 100644 --- a/modin/pandas/groupby.py +++ b/modin/pandas/groupby.py @@ -18,8 +18,7 @@ import pandas.core.common as com from modin.error_message import ErrorMessage - -from .utils import _inherit_docstrings, wrap_udf_function, try_cast_to_pandas +from modin.utils import _inherit_docstrings, wrap_udf_function, try_cast_to_pandas from .series import Series diff --git a/modin/pandas/io.py b/modin/pandas/io.py index f8dceb5db8c..85b644e8119 100644 --- a/modin/pandas/io.py +++ b/modin/pandas/io.py @@ -485,7 +485,7 @@ def return_handler(*args, **kwargs): A Modin DataFrame in place of a pandas DataFrame, or the same return type as pandas.ExcelFile. """ - from .utils import to_pandas + from modin.utils import to_pandas # We don't want to constantly be giving this error message for # internal methods. @@ -554,7 +554,7 @@ def return_handler(*args, **kwargs): A Modin DataFrame in place of a pandas DataFrame, or the same return type as pandas.HDFStore. """ - from .utils import to_pandas + from modin.utils import to_pandas # We don't want to constantly be giving this error message for # internal methods. diff --git a/modin/pandas/plotting.py b/modin/pandas/plotting.py index 3ba9af5d7d4..36102b13c67 100644 --- a/modin/pandas/plotting.py +++ b/modin/pandas/plotting.py @@ -13,7 +13,7 @@ from pandas import plotting as pdplot -from .utils import to_pandas +from modin.utils import to_pandas from .dataframe import DataFrame diff --git a/modin/pandas/reshape.py b/modin/pandas/reshape.py index 5423f4dc9b7..99b9005df4e 100644 --- a/modin/pandas/reshape.py +++ b/modin/pandas/reshape.py @@ -15,7 +15,7 @@ from .dataframe import DataFrame from .series import Series -from .utils import to_pandas +from modin.utils import to_pandas from modin.error_message import ErrorMessage diff --git a/modin/pandas/series.py b/modin/pandas/series.py index 556d26cab53..2901b8af1c7 100644 --- a/modin/pandas/series.py +++ b/modin/pandas/series.py @@ -24,10 +24,10 @@ import sys import warnings +from modin.utils import _inherit_docstrings, to_pandas from .base import BasePandasDataset from .iterator import PartitionIterator -from .utils import _inherit_docstrings -from .utils import from_pandas, to_pandas, is_scalar +from .utils import from_pandas, is_scalar if sys.version_info[0] == 3 and sys.version_info[1] >= 7: # Python >= 3.7 diff --git a/modin/pandas/test/dataframe/test_default.py b/modin/pandas/test/dataframe/test_default.py index ddb6bc080d1..ca715883b76 100644 --- a/modin/pandas/test/dataframe/test_default.py +++ b/modin/pandas/test/dataframe/test_default.py @@ -17,7 +17,7 @@ import os import matplotlib import modin.pandas as pd -from modin.pandas.utils import to_pandas +from modin.utils import to_pandas from numpy.testing import assert_array_equal import io diff --git a/modin/pandas/test/dataframe/test_join_sort.py b/modin/pandas/test/dataframe/test_join_sort.py index 7e1edf022dc..c281f2b9db9 100644 --- a/modin/pandas/test/dataframe/test_join_sort.py +++ b/modin/pandas/test/dataframe/test_join_sort.py @@ -16,7 +16,7 @@ import pandas import matplotlib import modin.pandas as pd -from modin.pandas.utils import to_pandas +from modin.utils import to_pandas from modin.pandas.test.utils import ( random_state, diff --git a/modin/pandas/test/test_groupby.py b/modin/pandas/test/test_groupby.py index 329e1258e2b..c179a49c456 100644 --- a/modin/pandas/test/test_groupby.py +++ b/modin/pandas/test/test_groupby.py @@ -15,7 +15,8 @@ import pandas import numpy as np import modin.pandas as pd -from modin.pandas.utils import from_pandas, to_pandas, try_cast_to_pandas +from modin.utils import try_cast_to_pandas, to_pandas +from modin.pandas.utils import from_pandas from .utils import ( df_equals, check_df_columns_have_nans, diff --git a/modin/pandas/test/test_io.py b/modin/pandas/test/test_io.py index 5bc1207b733..ca66d8a6ba5 100644 --- a/modin/pandas/test/test_io.py +++ b/modin/pandas/test/test_io.py @@ -16,7 +16,8 @@ import pandas from pandas.errors import ParserWarning from collections import OrderedDict -from modin.pandas.utils import to_pandas, from_arrow +from modin.utils import to_pandas +from modin.pandas.utils import from_arrow from pathlib import Path import pyarrow as pa import pyarrow.parquet as pq diff --git a/modin/pandas/test/test_series.py b/modin/pandas/test/test_series.py index 5546367b56f..8c1b4d4517d 100644 --- a/modin/pandas/test/test_series.py +++ b/modin/pandas/test/test_series.py @@ -20,7 +20,7 @@ from numpy.testing import assert_array_equal import sys -from modin.pandas.utils import to_pandas +from modin.utils import to_pandas from .utils import ( random_state, RAND_LOW, diff --git a/modin/pandas/test/utils.py b/modin/pandas/test/utils.py index 65224e51661..80f03da9c02 100644 --- a/modin/pandas/test/utils.py +++ b/modin/pandas/test/utils.py @@ -20,7 +20,7 @@ assert_categorical_equal, ) import modin.pandas as pd -from modin.pandas.utils import to_pandas +from modin.utils import to_pandas from io import BytesIO random_state = np.random.RandomState(seed=42) diff --git a/modin/pandas/utils.py b/modin/pandas/utils.py index e21c7617dd5..25461db481e 100644 --- a/modin/pandas/utils.py +++ b/modin/pandas/utils.py @@ -11,8 +11,6 @@ # ANY KIND, either express or implied. See the License for the specific language # governing permissions and limitations under the License. -import pandas - def from_non_pandas(df, index, columns, dtype): from modin.data_management.dispatcher import EngineDispatcher @@ -58,105 +56,6 @@ def from_arrow(at): return DataFrame(query_compiler=EngineDispatcher.from_arrow(at)) -def to_pandas(modin_obj): - """Converts a Modin DataFrame/Series to a pandas DataFrame/Series. - - Args: - obj {modin.DataFrame, modin.Series}: The Modin DataFrame/Series to convert. - - Returns: - A new pandas DataFrame or Series. - """ - return modin_obj._to_pandas() - - -def _inherit_docstrings(parent, excluded=[]): - """Creates a decorator which overwrites a decorated class' __doc__ - attribute with parent's __doc__ attribute. Also overwrites __doc__ of - methods and properties defined in the class with the __doc__ of matching - methods and properties in parent. - - Args: - parent (object): Class from which the decorated class inherits __doc__. - excluded (list): List of parent objects from which the class does not - inherit docstrings. - - Returns: - function: decorator which replaces the decorated class' documentation - parent's documentation. - """ - - def decorator(cls): - if parent not in excluded: - cls.__doc__ = parent.__doc__ - for attr, obj in cls.__dict__.items(): - parent_obj = getattr(parent, attr, None) - if parent_obj in excluded or ( - not callable(parent_obj) and not isinstance(parent_obj, property) - ): - continue - if callable(obj): - obj.__doc__ = parent_obj.__doc__ - elif isinstance(obj, property) and obj.fget is not None: - p = property(obj.fget, obj.fset, obj.fdel, parent_obj.__doc__) - setattr(cls, attr, p) - return cls - - return decorator - - -def try_cast_to_pandas(obj): - """ - Converts obj and all nested objects from modin to pandas if it is possible, - otherwise returns obj - - Parameters - ---------- - obj : object, - object to convert from modin to pandas - - Returns - ------- - Converted object - """ - if hasattr(obj, "_to_pandas"): - return obj._to_pandas() - if isinstance(obj, (list, tuple)): - return type(obj)([try_cast_to_pandas(o) for o in obj]) - if isinstance(obj, dict): - return {k: try_cast_to_pandas(v) for k, v in obj.items()} - if callable(obj): - module_hierarchy = getattr(obj, "__module__", "").split(".") - fn_name = getattr(obj, "__name__", None) - if fn_name and module_hierarchy[0] == "modin": - return ( - getattr(pandas.DataFrame, fn_name, obj) - if module_hierarchy[-1] == "dataframe" - else getattr(pandas.Series, fn_name, obj) - ) - return obj - - -def wrap_udf_function(func): - def wrapper(*args, **kwargs): - result = func(*args, **kwargs) - # if user accidently returns modin DataFrame or Series - # casting it back to pandas to properly process - return try_cast_to_pandas(result) - - wrapper.__name__ = func.__name__ - return wrapper - - -def hashable(obj): - """Return whether the object is hashable.""" - try: - hash(obj) - except TypeError: - return False - return True - - def is_scalar(obj): """ Return True if given object is scalar. diff --git a/modin/utils.py b/modin/utils.py new file mode 100644 index 00000000000..ae87964ec37 --- /dev/null +++ b/modin/utils.py @@ -0,0 +1,113 @@ +# Licensed to Modin Development Team under one or more contributor license agreements. +# See the NOTICE file distributed with this work for additional information regarding +# copyright ownership. The Modin Development Team licenses this file to you under the +# Apache License, Version 2.0 (the "License"); you may not use this file except in +# compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under +# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific language +# governing permissions and limitations under the License. + +import pandas + + +def _inherit_docstrings(parent, excluded=[]): + """Creates a decorator which overwrites a decorated class' __doc__ + attribute with parent's __doc__ attribute. Also overwrites __doc__ of + methods and properties defined in the class with the __doc__ of matching + methods and properties in parent. + + Args: + parent (object): Class from which the decorated class inherits __doc__. + excluded (list): List of parent objects from which the class does not + inherit docstrings. + + Returns: + function: decorator which replaces the decorated class' documentation + parent's documentation. + """ + + def decorator(cls): + if parent not in excluded: + cls.__doc__ = parent.__doc__ + for attr, obj in cls.__dict__.items(): + parent_obj = getattr(parent, attr, None) + if parent_obj in excluded or ( + not callable(parent_obj) and not isinstance(parent_obj, property) + ): + continue + if callable(obj): + obj.__doc__ = parent_obj.__doc__ + elif isinstance(obj, property) and obj.fget is not None: + p = property(obj.fget, obj.fset, obj.fdel, parent_obj.__doc__) + setattr(cls, attr, p) + return cls + + return decorator + + +def to_pandas(modin_obj): + """Converts a Modin DataFrame/Series to a pandas DataFrame/Series. + + Args: + obj {modin.DataFrame, modin.Series}: The Modin DataFrame/Series to convert. + + Returns: + A new pandas DataFrame or Series. + """ + return modin_obj._to_pandas() + + +def hashable(obj): + """Return whether the object is hashable.""" + try: + hash(obj) + except TypeError: + return False + return True + + +def try_cast_to_pandas(obj): + """ + Converts obj and all nested objects from modin to pandas if it is possible, + otherwise returns obj + + Parameters + ---------- + obj : object, + object to convert from modin to pandas + + Returns + ------- + Converted object + """ + if hasattr(obj, "_to_pandas"): + return obj._to_pandas() + if isinstance(obj, (list, tuple)): + return type(obj)([try_cast_to_pandas(o) for o in obj]) + if isinstance(obj, dict): + return {k: try_cast_to_pandas(v) for k, v in obj.items()} + if callable(obj): + module_hierarchy = getattr(obj, "__module__", "").split(".") + fn_name = getattr(obj, "__name__", None) + if fn_name and module_hierarchy[0] == "modin": + return ( + getattr(pandas.DataFrame, fn_name, obj) + if module_hierarchy[-1] == "dataframe" + else getattr(pandas.Series, fn_name, obj) + ) + return obj + + +def wrap_udf_function(func): + def wrapper(*args, **kwargs): + result = func(*args, **kwargs) + # if user accidently returns modin DataFrame or Series + # casting it back to pandas to properly process + return try_cast_to_pandas(result) + + wrapper.__name__ = func.__name__ + return wrapper