Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

FIX-#7138: Stop reloading modules for custom docstrings. #7307

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 0 additions & 31 deletions modin/config/envvars.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@

"""Module houses Modin configs originated from environment variables."""

import importlib
import os
import secrets
import sys
Expand Down Expand Up @@ -771,36 +770,6 @@ class DocModule(EnvironmentVariable, type=ExactStr):
varname = "MODIN_DOC_MODULE"
default = "pandas"

@classmethod
def put(cls, value: str) -> None:
"""
Assign a value to the DocModule config.

Parameters
----------
value : str
Config value to set.
"""
super().put(value)
# Reload everything to apply the documentation. This is required since the
# docs might already have been created and the implementation will assume
# that the new docs are applied when the config is set. This set of operations
# does this.
import modin.pandas as pd

importlib.reload(pd.accessor)
importlib.reload(pd.base)
importlib.reload(pd.dataframe)
importlib.reload(pd.general)
importlib.reload(pd.groupby)
importlib.reload(pd.io)
importlib.reload(pd.iterator)
importlib.reload(pd.series)
importlib.reload(pd.series_utils)
importlib.reload(pd.utils)
importlib.reload(pd.window)
importlib.reload(pd)


class DaskThreadsPerWorker(EnvironmentVariable, type=int):
"""Number of threads per Dask worker."""
Expand Down
70 changes: 42 additions & 28 deletions modin/tests/config/test_envvars.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

import os

import pandas
import pytest

import modin.config as cfg
Expand Down Expand Up @@ -79,34 +80,47 @@ def test_custom_help(make_custom_envvar):
assert "custom var" in make_custom_envvar.get_help()


def test_doc_module():
import pandas

import modin.pandas as pd
from modin.config import DocModule

DocModule.put("modin.tests.config.docs_module")

# Test for override
assert (
pd.DataFrame.apply.__doc__
== "This is a test of the documentation module for DataFrame."
)
# Test for pandas doc when method is not defined on the plugin module
assert pandas.DataFrame.isna.__doc__ in pd.DataFrame.isna.__doc__
assert pandas.DataFrame.isnull.__doc__ in pd.DataFrame.isnull.__doc__
# Test for override
assert (
pd.Series.isna.__doc__
== "This is a test of the documentation module for Series."
)
# Test for pandas doc when method is not defined on the plugin module
assert pandas.Series.isnull.__doc__ in pd.Series.isnull.__doc__
assert pandas.Series.apply.__doc__ in pd.Series.apply.__doc__
# Test for override
assert pd.read_csv.__doc__ == "Test override for functions on the module."
# Test for pandas doc when function is not defined on module.
assert pandas.read_table.__doc__ in pd.read_table.__doc__
class TestDocModule:
"""
Test using a module to replace default docstrings.
"""

def test_overrides(self):
cfg.DocModule.put("modin.tests.config.docs_module")

# Test for override
assert (
pd.DataFrame.apply.__doc__
== "This is a test of the documentation module for DataFrame."
)
# Test for pandas doc when method is not defined on the plugin module
assert pandas.DataFrame.isna.__doc__ in pd.DataFrame.isna.__doc__
assert pandas.DataFrame.isnull.__doc__ in pd.DataFrame.isnull.__doc__
# Test for override
assert (
pd.Series.isna.__doc__
== "This is a test of the documentation module for Series."
)
# Test for pandas doc when method is not defined on the plugin module
assert pandas.Series.isnull.__doc__ in pd.Series.isnull.__doc__
assert pandas.Series.apply.__doc__ in pd.Series.apply.__doc__
# Test for override
assert pd.read_csv.__doc__ == "Test override for functions on the module."
# Test for pandas doc when function is not defined on module.
assert pandas.read_table.__doc__ in pd.read_table.__doc__

def test_not_redefining_classes_modin_issue_7138(self):
original_dataframe_class = pd.DataFrame

cfg.DocModule.put("modin.tests.config.docs_module")

# Test for override
assert (
pd.DataFrame.apply.__doc__
== "This is a test of the documentation module for DataFrame."
)

assert pd.DataFrame is original_dataframe_class


@pytest.mark.skipif(cfg.Engine.get() != "Ray", reason="Ray specific test")
Expand Down
196 changes: 136 additions & 60 deletions modin/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -378,6 +378,129 @@ def _replace_doc(
target_obj.__doc__ = doc


# This is a map from objects whose docstrings we are overriding to functions that
# take a DocModule string and override the docstring according to the
# DocModule. When we update DocModule, we can use this map to update all
# inherited docstrings.
_docstring_inheritance_calls: list[Callable[[str], None]] = []


def _documentable_obj(obj: object) -> bool:
"""
Check whether we can replace the docstring of `obj`.

Parameters
----------
obj : object
Object whose docstring we want to replace.

Returns
-------
bool
Whether we can replace the docstring.
"""
return bool(
callable(obj)
and not inspect.isclass(obj)
or (isinstance(obj, property) and obj.fget)
or (isinstance(obj, functools.cached_property))
or (isinstance(obj, (staticmethod, classmethod)) and obj.__func__)
)


def _update_inherited_docstrings(doc_module: DocModule) -> None:
"""
Update all inherited docstrings.

Parameters
----------
doc_module : DocModule
The current DocModule.
"""
_doc_module = doc_module.get()
for doc_inheritance_call in _docstring_inheritance_calls:
doc_inheritance_call(doc_module=_doc_module) # type: ignore[call-arg]


def _inherit_docstrings_in_place(
cls_or_func: Fn,
doc_module: str,
parent: object,
excluded: List[object],
overwrite_existing: bool = False,
apilink: Optional[Union[str, List[str]]] = None,
) -> None:
"""
Replace `cls_or_func` docstrings with `parent` docstrings in place.

Parameters
----------
cls_or_func : Fn
The class or function whose docstrings we need to update.
doc_module : str
The docs module.
parent : object
Parent object from which the decorated object inherits __doc__.
excluded : list, default: []
List of parent objects from which the class does not
inherit docstrings.
overwrite_existing : bool, default: False
Allow overwriting docstrings that already exist in
the decorated class.
apilink : str | List[str], optional
If non-empty, insert the link(s) to pandas API documentation.
Should be the prefix part in the URL template, e.g. "pandas.DataFrame".
"""
# Import the docs module and get the class (e.g. `DataFrame`).
imported_doc_module = importlib.import_module(doc_module)
# Set the default parent so we can use it in case some docs are missing from
# parent module.
default_parent = parent
# Try to get the parent object from the doc module, and if it isn't there,
# get it from parent instead. We only do this if we are overriding pandas
# documentation. We don't touch other docs.
if doc_module != DocModule.default and "pandas" in str(
getattr(parent, "__module__", "")
):
parent = getattr(imported_doc_module, getattr(parent, "__name__", ""), parent)
if parent != default_parent:
# Reset API link in case the docs are overridden.
apilink = None
overwrite_existing = True

if parent not in excluded:
_replace_doc(parent, cls_or_func, overwrite_existing, apilink)

if not isinstance(cls_or_func, types.FunctionType):
seen = set()
for base in cls_or_func.__mro__: # type: ignore[attr-defined]
if base is object:
continue
for attr, obj in base.__dict__.items():
if attr in seen:
continue
seen.add(attr)
# Try to get the attribute from the docs class first, then
# from the default parent (pandas), and if it's not in either,
# set `parent_obj` to `None`.
parent_obj = getattr(parent, attr, getattr(default_parent, attr, None))
if (
parent_obj in excluded
or not _documentable_obj(parent_obj)
or not _documentable_obj(obj)
):
continue

_replace_doc(
parent_obj,
obj,
overwrite_existing,
apilink,
parent_cls=cls_or_func,
attr_name=attr,
)


def _inherit_docstrings(
parent: object,
excluded: List[object] = [],
Expand Down Expand Up @@ -416,73 +539,26 @@ def _inherit_docstrings(
are not defined in target class (but are defined in the ancestor class),
which means that ancestor class attribute docstrings could also change.
"""
# Import the docs module and get the class (e.g. `DataFrame`).
imported_doc_module = importlib.import_module(DocModule.get())
# Set the default parent so we can use it in case some docs are missing from
# parent module.
default_parent = parent
# Try to get the parent object from the doc module, and if it isn't there,
# get it from parent instead. We only do this if we are overriding pandas
# documentation. We don't touch other docs.
if DocModule.get() != DocModule.default and "pandas" in str(
getattr(parent, "__module__", "")
):
parent = getattr(imported_doc_module, getattr(parent, "__name__", ""), parent)
if parent != default_parent:
# Reset API link in case the docs are overridden.
apilink = None
overwrite_existing = True

def _documentable_obj(obj: object) -> bool:
"""Check if `obj` docstring could be patched."""
return bool(
callable(obj)
and not inspect.isclass(obj)
or (isinstance(obj, property) and obj.fget)
or (isinstance(obj, functools.cached_property))
or (isinstance(obj, (staticmethod, classmethod)) and obj.__func__)
)

def decorator(cls_or_func: Fn) -> Fn:
if parent not in excluded:
_replace_doc(parent, cls_or_func, overwrite_existing, apilink)

if not isinstance(cls_or_func, types.FunctionType):
seen = set()
for base in cls_or_func.__mro__: # type: ignore[attr-defined]
if base is object:
continue
for attr, obj in base.__dict__.items():
if attr in seen:
continue
seen.add(attr)
# Try to get the attribute from the docs class first, then
# from the default parent (pandas), and if it's not in either,
# set `parent_obj` to `None`.
parent_obj = getattr(
parent, attr, getattr(default_parent, attr, None)
)
if (
parent_obj in excluded
or not _documentable_obj(parent_obj)
or not _documentable_obj(obj)
):
continue

_replace_doc(
parent_obj,
obj,
overwrite_existing,
apilink,
parent_cls=cls_or_func,
attr_name=attr,
)

inherit_docstring_in_place = functools.partial(
_inherit_docstrings_in_place,
cls_or_func=cls_or_func,
parent=parent,
excluded=excluded,
overwrite_existing=overwrite_existing,
apilink=apilink,
)
inherit_docstring_in_place(doc_module=DocModule.get())
_docstring_inheritance_calls.append(inherit_docstring_in_place)
return cls_or_func

return decorator


DocModule.subscribe(_update_inherited_docstrings)


def expanduser_path_arg(argname: str) -> Callable[[Fn], Fn]:
"""
Decorate a function replacing its path argument with "user-expanded" value.
Expand Down
Loading