From 0a064552f3913fc4dba0087729563b82dce72ec4 Mon Sep 17 00:00:00 2001 From: Mahesh Vashishtha Date: Tue, 11 Jun 2024 11:15:39 -0700 Subject: [PATCH] FIX-#7138: Stop reloading modules for custom docstrings. (#7307) Signed-off-by: sfc-gh-mvashishtha Co-authored-by: Anatoly Myachev --- modin/config/envvars.py | 31 ----- modin/tests/config/test_envvars.py | 70 ++++++----- modin/utils.py | 196 ++++++++++++++++++++--------- 3 files changed, 178 insertions(+), 119 deletions(-) diff --git a/modin/config/envvars.py b/modin/config/envvars.py index 3f83ec1febf..d05a7117cf2 100644 --- a/modin/config/envvars.py +++ b/modin/config/envvars.py @@ -13,7 +13,6 @@ """Module houses Modin configs originated from environment variables.""" -import importlib import os import secrets import sys @@ -771,36 +770,6 @@ class DocModule(EnvironmentVariable, type=ExactStr): varname = "MODIN_DOC_MODULE" default = "pandas" - @classmethod - def put(cls, value: str) -> None: - """ - Assign a value to the DocModule config. - - Parameters - ---------- - value : str - Config value to set. - """ - super().put(value) - # Reload everything to apply the documentation. This is required since the - # docs might already have been created and the implementation will assume - # that the new docs are applied when the config is set. This set of operations - # does this. - import modin.pandas as pd - - importlib.reload(pd.accessor) - importlib.reload(pd.base) - importlib.reload(pd.dataframe) - importlib.reload(pd.general) - importlib.reload(pd.groupby) - importlib.reload(pd.io) - importlib.reload(pd.iterator) - importlib.reload(pd.series) - importlib.reload(pd.series_utils) - importlib.reload(pd.utils) - importlib.reload(pd.window) - importlib.reload(pd) - class DaskThreadsPerWorker(EnvironmentVariable, type=int): """Number of threads per Dask worker.""" diff --git a/modin/tests/config/test_envvars.py b/modin/tests/config/test_envvars.py index 4fc1b65140b..c1fa009dd21 100644 --- a/modin/tests/config/test_envvars.py +++ b/modin/tests/config/test_envvars.py @@ -13,6 +13,7 @@ import os +import pandas import pytest import modin.config as cfg @@ -79,34 +80,47 @@ def test_custom_help(make_custom_envvar): assert "custom var" in make_custom_envvar.get_help() -def test_doc_module(): - import pandas - - import modin.pandas as pd - from modin.config import DocModule - - DocModule.put("modin.tests.config.docs_module") - - # Test for override - assert ( - pd.DataFrame.apply.__doc__ - == "This is a test of the documentation module for DataFrame." - ) - # Test for pandas doc when method is not defined on the plugin module - assert pandas.DataFrame.isna.__doc__ in pd.DataFrame.isna.__doc__ - assert pandas.DataFrame.isnull.__doc__ in pd.DataFrame.isnull.__doc__ - # Test for override - assert ( - pd.Series.isna.__doc__ - == "This is a test of the documentation module for Series." - ) - # Test for pandas doc when method is not defined on the plugin module - assert pandas.Series.isnull.__doc__ in pd.Series.isnull.__doc__ - assert pandas.Series.apply.__doc__ in pd.Series.apply.__doc__ - # Test for override - assert pd.read_csv.__doc__ == "Test override for functions on the module." - # Test for pandas doc when function is not defined on module. - assert pandas.read_table.__doc__ in pd.read_table.__doc__ +class TestDocModule: + """ + Test using a module to replace default docstrings. + """ + + def test_overrides(self): + cfg.DocModule.put("modin.tests.config.docs_module") + + # Test for override + assert ( + pd.DataFrame.apply.__doc__ + == "This is a test of the documentation module for DataFrame." + ) + # Test for pandas doc when method is not defined on the plugin module + assert pandas.DataFrame.isna.__doc__ in pd.DataFrame.isna.__doc__ + assert pandas.DataFrame.isnull.__doc__ in pd.DataFrame.isnull.__doc__ + # Test for override + assert ( + pd.Series.isna.__doc__ + == "This is a test of the documentation module for Series." + ) + # Test for pandas doc when method is not defined on the plugin module + assert pandas.Series.isnull.__doc__ in pd.Series.isnull.__doc__ + assert pandas.Series.apply.__doc__ in pd.Series.apply.__doc__ + # Test for override + assert pd.read_csv.__doc__ == "Test override for functions on the module." + # Test for pandas doc when function is not defined on module. + assert pandas.read_table.__doc__ in pd.read_table.__doc__ + + def test_not_redefining_classes_modin_issue_7138(self): + original_dataframe_class = pd.DataFrame + + cfg.DocModule.put("modin.tests.config.docs_module") + + # Test for override + assert ( + pd.DataFrame.apply.__doc__ + == "This is a test of the documentation module for DataFrame." + ) + + assert pd.DataFrame is original_dataframe_class @pytest.mark.skipif(cfg.Engine.get() != "Ray", reason="Ray specific test") diff --git a/modin/utils.py b/modin/utils.py index fdc5260b215..2a7df885172 100644 --- a/modin/utils.py +++ b/modin/utils.py @@ -378,6 +378,129 @@ def _replace_doc( target_obj.__doc__ = doc +# This is a map from objects whose docstrings we are overriding to functions that +# take a DocModule string and override the docstring according to the +# DocModule. When we update DocModule, we can use this map to update all +# inherited docstrings. +_docstring_inheritance_calls: list[Callable[[str], None]] = [] + + +def _documentable_obj(obj: object) -> bool: + """ + Check whether we can replace the docstring of `obj`. + + Parameters + ---------- + obj : object + Object whose docstring we want to replace. + + Returns + ------- + bool + Whether we can replace the docstring. + """ + return bool( + callable(obj) + and not inspect.isclass(obj) + or (isinstance(obj, property) and obj.fget) + or (isinstance(obj, functools.cached_property)) + or (isinstance(obj, (staticmethod, classmethod)) and obj.__func__) + ) + + +def _update_inherited_docstrings(doc_module: DocModule) -> None: + """ + Update all inherited docstrings. + + Parameters + ---------- + doc_module : DocModule + The current DocModule. + """ + _doc_module = doc_module.get() + for doc_inheritance_call in _docstring_inheritance_calls: + doc_inheritance_call(doc_module=_doc_module) # type: ignore[call-arg] + + +def _inherit_docstrings_in_place( + cls_or_func: Fn, + doc_module: str, + parent: object, + excluded: List[object], + overwrite_existing: bool = False, + apilink: Optional[Union[str, List[str]]] = None, +) -> None: + """ + Replace `cls_or_func` docstrings with `parent` docstrings in place. + + Parameters + ---------- + cls_or_func : Fn + The class or function whose docstrings we need to update. + doc_module : str + The docs module. + parent : object + Parent object from which the decorated object inherits __doc__. + excluded : list, default: [] + List of parent objects from which the class does not + inherit docstrings. + overwrite_existing : bool, default: False + Allow overwriting docstrings that already exist in + the decorated class. + apilink : str | List[str], optional + If non-empty, insert the link(s) to pandas API documentation. + Should be the prefix part in the URL template, e.g. "pandas.DataFrame". + """ + # Import the docs module and get the class (e.g. `DataFrame`). + imported_doc_module = importlib.import_module(doc_module) + # Set the default parent so we can use it in case some docs are missing from + # parent module. + default_parent = parent + # Try to get the parent object from the doc module, and if it isn't there, + # get it from parent instead. We only do this if we are overriding pandas + # documentation. We don't touch other docs. + if doc_module != DocModule.default and "pandas" in str( + getattr(parent, "__module__", "") + ): + parent = getattr(imported_doc_module, getattr(parent, "__name__", ""), parent) + if parent != default_parent: + # Reset API link in case the docs are overridden. + apilink = None + overwrite_existing = True + + if parent not in excluded: + _replace_doc(parent, cls_or_func, overwrite_existing, apilink) + + if not isinstance(cls_or_func, types.FunctionType): + seen = set() + for base in cls_or_func.__mro__: # type: ignore[attr-defined] + if base is object: + continue + for attr, obj in base.__dict__.items(): + if attr in seen: + continue + seen.add(attr) + # Try to get the attribute from the docs class first, then + # from the default parent (pandas), and if it's not in either, + # set `parent_obj` to `None`. + parent_obj = getattr(parent, attr, getattr(default_parent, attr, None)) + if ( + parent_obj in excluded + or not _documentable_obj(parent_obj) + or not _documentable_obj(obj) + ): + continue + + _replace_doc( + parent_obj, + obj, + overwrite_existing, + apilink, + parent_cls=cls_or_func, + attr_name=attr, + ) + + def _inherit_docstrings( parent: object, excluded: List[object] = [], @@ -416,73 +539,26 @@ def _inherit_docstrings( are not defined in target class (but are defined in the ancestor class), which means that ancestor class attribute docstrings could also change. """ - # Import the docs module and get the class (e.g. `DataFrame`). - imported_doc_module = importlib.import_module(DocModule.get()) - # Set the default parent so we can use it in case some docs are missing from - # parent module. - default_parent = parent - # Try to get the parent object from the doc module, and if it isn't there, - # get it from parent instead. We only do this if we are overriding pandas - # documentation. We don't touch other docs. - if DocModule.get() != DocModule.default and "pandas" in str( - getattr(parent, "__module__", "") - ): - parent = getattr(imported_doc_module, getattr(parent, "__name__", ""), parent) - if parent != default_parent: - # Reset API link in case the docs are overridden. - apilink = None - overwrite_existing = True - - def _documentable_obj(obj: object) -> bool: - """Check if `obj` docstring could be patched.""" - return bool( - callable(obj) - and not inspect.isclass(obj) - or (isinstance(obj, property) and obj.fget) - or (isinstance(obj, functools.cached_property)) - or (isinstance(obj, (staticmethod, classmethod)) and obj.__func__) - ) def decorator(cls_or_func: Fn) -> Fn: - if parent not in excluded: - _replace_doc(parent, cls_or_func, overwrite_existing, apilink) - - if not isinstance(cls_or_func, types.FunctionType): - seen = set() - for base in cls_or_func.__mro__: # type: ignore[attr-defined] - if base is object: - continue - for attr, obj in base.__dict__.items(): - if attr in seen: - continue - seen.add(attr) - # Try to get the attribute from the docs class first, then - # from the default parent (pandas), and if it's not in either, - # set `parent_obj` to `None`. - parent_obj = getattr( - parent, attr, getattr(default_parent, attr, None) - ) - if ( - parent_obj in excluded - or not _documentable_obj(parent_obj) - or not _documentable_obj(obj) - ): - continue - - _replace_doc( - parent_obj, - obj, - overwrite_existing, - apilink, - parent_cls=cls_or_func, - attr_name=attr, - ) - + inherit_docstring_in_place = functools.partial( + _inherit_docstrings_in_place, + cls_or_func=cls_or_func, + parent=parent, + excluded=excluded, + overwrite_existing=overwrite_existing, + apilink=apilink, + ) + inherit_docstring_in_place(doc_module=DocModule.get()) + _docstring_inheritance_calls.append(inherit_docstring_in_place) return cls_or_func return decorator +DocModule.subscribe(_update_inherited_docstrings) + + def expanduser_path_arg(argname: str) -> Callable[[Fn], Fn]: """ Decorate a function replacing its path argument with "user-expanded" value.