Skip to content

Commit

Permalink
FIX-modin-project#2054: Moved non-dependent on modin.DataFrame utils …
Browse files Browse the repository at this point in the history
…to modin/utils.py (modin-project#2055)

Signed-off-by: Dmitry Chigarev <[email protected]>
  • Loading branch information
dchigarev authored and aregm committed Sep 16, 2020
1 parent 1e6bbde commit 90e403e
Show file tree
Hide file tree
Showing 18 changed files with 134 additions and 122 deletions.
2 changes: 1 addition & 1 deletion modin/backends/pandas/query_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@

from modin.backends.base.query_compiler import BaseQueryCompiler
from modin.error_message import ErrorMessage
from modin.pandas.utils import try_cast_to_pandas, wrap_udf_function
from modin.utils import try_cast_to_pandas, wrap_udf_function
from modin.data_management.functions import (
FoldFunction,
MapFunction,
Expand Down
2 changes: 1 addition & 1 deletion modin/data_management/functions/groupby_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
import pandas

from .mapreducefunction import MapReduceFunction
from modin.pandas.utils import try_cast_to_pandas
from modin.utils import try_cast_to_pandas


class GroupbyReduceFunction(MapReduceFunction):
Expand Down
3 changes: 2 additions & 1 deletion modin/pandas/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,9 @@
import warnings
import pickle as pkl

from modin.utils import try_cast_to_pandas
from modin.error_message import ErrorMessage
from modin.pandas.utils import try_cast_to_pandas, is_scalar
from modin.pandas.utils import is_scalar

# Similar to pandas, sentinel value to use as kwarg in place of None when None has
# special meaning and needs to be distinguished from a user explicitly passing None.
Expand Down
4 changes: 1 addition & 3 deletions modin/pandas/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,10 @@
import warnings

from modin.error_message import ErrorMessage
from modin.utils import _inherit_docstrings, to_pandas, hashable
from .utils import (
from_pandas,
from_non_pandas,
to_pandas,
_inherit_docstrings,
hashable,
)
from .iterator import PartitionIterator
from .series import Series
Expand Down
2 changes: 1 addition & 1 deletion modin/pandas/general.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from .base import BasePandasDataset
from .dataframe import DataFrame
from .series import Series
from .utils import to_pandas
from modin.utils import to_pandas


def isna(obj):
Expand Down
3 changes: 1 addition & 2 deletions modin/pandas/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,7 @@
import pandas.core.common as com

from modin.error_message import ErrorMessage

from .utils import _inherit_docstrings, wrap_udf_function, try_cast_to_pandas
from modin.utils import _inherit_docstrings, wrap_udf_function, try_cast_to_pandas
from .series import Series


Expand Down
4 changes: 2 additions & 2 deletions modin/pandas/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -485,7 +485,7 @@ def return_handler(*args, **kwargs):
A Modin DataFrame in place of a pandas DataFrame, or the same
return type as pandas.ExcelFile.
"""
from .utils import to_pandas
from modin.utils import to_pandas

# We don't want to constantly be giving this error message for
# internal methods.
Expand Down Expand Up @@ -554,7 +554,7 @@ def return_handler(*args, **kwargs):
A Modin DataFrame in place of a pandas DataFrame, or the same
return type as pandas.HDFStore.
"""
from .utils import to_pandas
from modin.utils import to_pandas

# We don't want to constantly be giving this error message for
# internal methods.
Expand Down
2 changes: 1 addition & 1 deletion modin/pandas/plotting.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

from pandas import plotting as pdplot

from .utils import to_pandas
from modin.utils import to_pandas
from .dataframe import DataFrame


Expand Down
2 changes: 1 addition & 1 deletion modin/pandas/reshape.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

from .dataframe import DataFrame
from .series import Series
from .utils import to_pandas
from modin.utils import to_pandas
from modin.error_message import ErrorMessage


Expand Down
4 changes: 2 additions & 2 deletions modin/pandas/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,10 @@
import sys
import warnings

from modin.utils import _inherit_docstrings, to_pandas
from .base import BasePandasDataset
from .iterator import PartitionIterator
from .utils import _inherit_docstrings
from .utils import from_pandas, to_pandas, is_scalar
from .utils import from_pandas, is_scalar

if sys.version_info[0] == 3 and sys.version_info[1] >= 7:
# Python >= 3.7
Expand Down
2 changes: 1 addition & 1 deletion modin/pandas/test/dataframe/test_default.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
import os
import matplotlib
import modin.pandas as pd
from modin.pandas.utils import to_pandas
from modin.utils import to_pandas
from numpy.testing import assert_array_equal
import io

Expand Down
2 changes: 1 addition & 1 deletion modin/pandas/test/dataframe/test_join_sort.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
import pandas
import matplotlib
import modin.pandas as pd
from modin.pandas.utils import to_pandas
from modin.utils import to_pandas

from modin.pandas.test.utils import (
random_state,
Expand Down
3 changes: 2 additions & 1 deletion modin/pandas/test/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@
import pandas
import numpy as np
import modin.pandas as pd
from modin.pandas.utils import from_pandas, to_pandas, try_cast_to_pandas
from modin.utils import try_cast_to_pandas, to_pandas
from modin.pandas.utils import from_pandas
from .utils import (
df_equals,
check_df_columns_have_nans,
Expand Down
3 changes: 2 additions & 1 deletion modin/pandas/test/test_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@
import pandas
from pandas.errors import ParserWarning
from collections import OrderedDict
from modin.pandas.utils import to_pandas, from_arrow
from modin.utils import to_pandas
from modin.pandas.utils import from_arrow
from pathlib import Path
import pyarrow as pa
import pyarrow.parquet as pq
Expand Down
2 changes: 1 addition & 1 deletion modin/pandas/test/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from numpy.testing import assert_array_equal
import sys

from modin.pandas.utils import to_pandas
from modin.utils import to_pandas
from .utils import (
random_state,
RAND_LOW,
Expand Down
2 changes: 1 addition & 1 deletion modin/pandas/test/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
assert_categorical_equal,
)
import modin.pandas as pd
from modin.pandas.utils import to_pandas
from modin.utils import to_pandas
from io import BytesIO

random_state = np.random.RandomState(seed=42)
Expand Down
101 changes: 0 additions & 101 deletions modin/pandas/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,6 @@
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

import pandas


def from_non_pandas(df, index, columns, dtype):
from modin.data_management.dispatcher import EngineDispatcher
Expand Down Expand Up @@ -58,105 +56,6 @@ def from_arrow(at):
return DataFrame(query_compiler=EngineDispatcher.from_arrow(at))


def to_pandas(modin_obj):
"""Converts a Modin DataFrame/Series to a pandas DataFrame/Series.
Args:
obj {modin.DataFrame, modin.Series}: The Modin DataFrame/Series to convert.
Returns:
A new pandas DataFrame or Series.
"""
return modin_obj._to_pandas()


def _inherit_docstrings(parent, excluded=[]):
"""Creates a decorator which overwrites a decorated class' __doc__
attribute with parent's __doc__ attribute. Also overwrites __doc__ of
methods and properties defined in the class with the __doc__ of matching
methods and properties in parent.
Args:
parent (object): Class from which the decorated class inherits __doc__.
excluded (list): List of parent objects from which the class does not
inherit docstrings.
Returns:
function: decorator which replaces the decorated class' documentation
parent's documentation.
"""

def decorator(cls):
if parent not in excluded:
cls.__doc__ = parent.__doc__
for attr, obj in cls.__dict__.items():
parent_obj = getattr(parent, attr, None)
if parent_obj in excluded or (
not callable(parent_obj) and not isinstance(parent_obj, property)
):
continue
if callable(obj):
obj.__doc__ = parent_obj.__doc__
elif isinstance(obj, property) and obj.fget is not None:
p = property(obj.fget, obj.fset, obj.fdel, parent_obj.__doc__)
setattr(cls, attr, p)
return cls

return decorator


def try_cast_to_pandas(obj):
"""
Converts obj and all nested objects from modin to pandas if it is possible,
otherwise returns obj
Parameters
----------
obj : object,
object to convert from modin to pandas
Returns
-------
Converted object
"""
if hasattr(obj, "_to_pandas"):
return obj._to_pandas()
if isinstance(obj, (list, tuple)):
return type(obj)([try_cast_to_pandas(o) for o in obj])
if isinstance(obj, dict):
return {k: try_cast_to_pandas(v) for k, v in obj.items()}
if callable(obj):
module_hierarchy = getattr(obj, "__module__", "").split(".")
fn_name = getattr(obj, "__name__", None)
if fn_name and module_hierarchy[0] == "modin":
return (
getattr(pandas.DataFrame, fn_name, obj)
if module_hierarchy[-1] == "dataframe"
else getattr(pandas.Series, fn_name, obj)
)
return obj


def wrap_udf_function(func):
def wrapper(*args, **kwargs):
result = func(*args, **kwargs)
# if user accidently returns modin DataFrame or Series
# casting it back to pandas to properly process
return try_cast_to_pandas(result)

wrapper.__name__ = func.__name__
return wrapper


def hashable(obj):
"""Return whether the object is hashable."""
try:
hash(obj)
except TypeError:
return False
return True


def is_scalar(obj):
"""
Return True if given object is scalar.
Expand Down
113 changes: 113 additions & 0 deletions modin/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership. The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

import pandas


def _inherit_docstrings(parent, excluded=[]):
"""Creates a decorator which overwrites a decorated class' __doc__
attribute with parent's __doc__ attribute. Also overwrites __doc__ of
methods and properties defined in the class with the __doc__ of matching
methods and properties in parent.
Args:
parent (object): Class from which the decorated class inherits __doc__.
excluded (list): List of parent objects from which the class does not
inherit docstrings.
Returns:
function: decorator which replaces the decorated class' documentation
parent's documentation.
"""

def decorator(cls):
if parent not in excluded:
cls.__doc__ = parent.__doc__
for attr, obj in cls.__dict__.items():
parent_obj = getattr(parent, attr, None)
if parent_obj in excluded or (
not callable(parent_obj) and not isinstance(parent_obj, property)
):
continue
if callable(obj):
obj.__doc__ = parent_obj.__doc__
elif isinstance(obj, property) and obj.fget is not None:
p = property(obj.fget, obj.fset, obj.fdel, parent_obj.__doc__)
setattr(cls, attr, p)
return cls

return decorator


def to_pandas(modin_obj):
"""Converts a Modin DataFrame/Series to a pandas DataFrame/Series.
Args:
obj {modin.DataFrame, modin.Series}: The Modin DataFrame/Series to convert.
Returns:
A new pandas DataFrame or Series.
"""
return modin_obj._to_pandas()


def hashable(obj):
"""Return whether the object is hashable."""
try:
hash(obj)
except TypeError:
return False
return True


def try_cast_to_pandas(obj):
"""
Converts obj and all nested objects from modin to pandas if it is possible,
otherwise returns obj
Parameters
----------
obj : object,
object to convert from modin to pandas
Returns
-------
Converted object
"""
if hasattr(obj, "_to_pandas"):
return obj._to_pandas()
if isinstance(obj, (list, tuple)):
return type(obj)([try_cast_to_pandas(o) for o in obj])
if isinstance(obj, dict):
return {k: try_cast_to_pandas(v) for k, v in obj.items()}
if callable(obj):
module_hierarchy = getattr(obj, "__module__", "").split(".")
fn_name = getattr(obj, "__name__", None)
if fn_name and module_hierarchy[0] == "modin":
return (
getattr(pandas.DataFrame, fn_name, obj)
if module_hierarchy[-1] == "dataframe"
else getattr(pandas.Series, fn_name, obj)
)
return obj


def wrap_udf_function(func):
def wrapper(*args, **kwargs):
result = func(*args, **kwargs)
# if user accidently returns modin DataFrame or Series
# casting it back to pandas to properly process
return try_cast_to_pandas(result)

wrapper.__name__ = func.__name__
return wrapper

0 comments on commit 90e403e

Please sign in to comment.