diff --git a/README.md b/README.md index 95746652..7a6badf3 100644 --- a/README.md +++ b/README.md @@ -74,7 +74,7 @@ gens_dir: _build/pydocmd # This will end up as the MkDocs 'docs_dir' site_dir: _build/site theme: readthedocs loader: pydocmd.loader.PythonLoader -preprocessor: pydocmd.preprocessor.Preprocessor +preprocessor: pydocmd.preprocessors.simple.Preprocessor # Whether to output headers as markdown or HTML. Used to workaround # https://github.com/NiklasRosenstein/pydoc-markdown/issues/11. The default is diff --git a/pydocmd/__main__.py b/pydocmd/__main__.py index e1a9001e..b1a0224f 100644 --- a/pydocmd/__main__.py +++ b/pydocmd/__main__.py @@ -59,7 +59,7 @@ def default_config(config): config.setdefault('headers', 'html') config.setdefault('theme', 'readthedocs') config.setdefault('loader', 'pydocmd.loader.PythonLoader') - config.setdefault('preprocessor', 'pydocmd.preprocessor.Preprocessor') + config.setdefault('preprocessor', 'pydocmd.preprocessors.simple.Preprocessor') config.setdefault('additional_search_paths', []) return config diff --git a/pydocmd/document.py b/pydocmd/document.py index 2f177814..8a762975 100644 --- a/pydocmd/document.py +++ b/pydocmd/document.py @@ -28,129 +28,130 @@ class Section(object): - """ - A section represents a part of a #Document. It contains Markdown-formatted - content that will be rendered into a file at some point. - - # Attributes - doc (Document): The document that the section belongs to. - identifier (str, None): The globally unique identifier of the section. This - identifier usually matches the name of the element that the section - describes (eg. a class or function) and will be used for cross-referencing. - title (str, None): The title of the section. If specified, it will be - rendered before `section.content` and the header-size will depend on - the `section.depth`. - depth (int): The depth of the section, defaults to 1. Currently only affects - the header-size that is rendered for the `section.title`. - content (str): The Markdown-formatted content of the section. - """ - - def __init__(self, doc, identifier=None, title=None, depth=1, content=None, header_type='html'): - self.doc = doc - self.identifier = identifier - self.title = title - self.depth = depth - self.content = content if content is not None else '*Nothing to see here.*' - self.header_type = header_type - - def render(self, stream): """ - Render the section into *stream*. + A section represents a part of a #Document. It contains Markdown-formatted + content that will be rendered into a file at some point. + + # Attributes + doc (Document): The document that the section belongs to. + identifier (str, None): The globally unique identifier of the section. This + identifier usually matches the name of the element that the section + describes (eg. a class or function) and will be used for cross-referencing. + title (str, None): The title of the section. If specified, it will be + rendered before `section.content` and the header-size will depend on + the `section.depth`. + depth (int): The depth of the section, defaults to 1. Currently only affects + the header-size that is rendered for the `section.title`. + content (str): The Markdown-formatted content of the section. """ - if self.header_type == 'html': - print('{title}\n' - .format(depth = self.depth, id = self.identifier, title = self.title), - file = stream) - elif self.header_type == 'markdown': - print('\n' + ('#' * self.depth), self.title, file = stream) - else: - raise ValueError('Invalid header type: %s' % self.header_type) - print(self.content, file=stream) - - @property - def index(self): - """ - Returns the #Index that this section is associated with, accessed via - `section.document`. - """ - - return self.document.index + def __init__(self, doc, identifier=None, title=None, depth=1, content=None, header_type='html'): + self.doc = doc + self.identifier = identifier + self.title = title + self.depth = depth + self.content = content if content is not None else '*Nothing to see here.*' + self.header_type = header_type + + def render(self, stream): + """ + Render the section into *stream*. + """ + + if self.header_type == 'html': + print('{title}\n' + .format(depth=self.depth, id=self.identifier, title=self.title), + file=stream) + elif self.header_type == 'markdown': + print('\n' + ('#' * self.depth), self.title, file=stream) + else: + raise ValueError('Invalid header type: %s' % self.header_type) + print(self.content, file=stream) + + @property + def index(self): + """ + Returns the #Index that this section is associated with, accessed via + `section.document`. + """ + + return self.document.index class Document(object): - """ - Represents a single document that may contain several #Section#s. Every - document *must* have a relative URL associated with it. - - # Attributes - index (Index): The index that the document belongs to. - url (str): The relative URL of the document. - """ - - def __init__(self, index, url): - self.index = index - self.url = url - self.sections = [] - - -class Index(object): - """ - The index manages all documents and sections globally. It keeps track of - the symbolic names allocated for the sections to be able to link to them - from other sections. - - # Attributes - documents (dict): - sections (dict): - """ - - def __init__(self): - self.documents = {} - self.sections = {} - - def new_document(self, filename, url=None): """ - Create a new document. - - # Arguments - filename (str): The filename of the document. Must end with `.md`. - url (str): The relative URL of the document. If omitted, will be - automatically deduced from *filename* (same without the `.md` suffix). + Represents a single document that may contain several #Section#s. Every + document *must* have a relative URL associated with it. - # Raises - ValueError: If *filename* does not end with `.md`. - ValueError: If *filename* is not a relative path. - ValueError: If a document with the specified *filename* already exists. + # Attributes + index (Index): The index that the document belongs to. + url (str): The relative URL of the document. """ - if not filename.endswith('.md'): - raise ValueError('filename must end with `.md`') - if os.path.isabs(filename): - raise ValueError('filename must be relative') - if filename in self.documents: - raise ValueError('document filename {!r} already used'.format(filename)) - if not url: - url = filename[:-3] + def __init__(self, index, url): + self.index = index + self.url = url + self.sections = [] - doc = Document(self, url) - self.documents[filename] = doc - return doc - def new_section(self, doc, *args, **kwargs): +class Index(object): """ - Create a new section in the specified document. The arguments for this - method match the parameters for the #Section constructor. + The index manages all documents and sections globally. It keeps track of + the symbolic names allocated for the sections to be able to link to them + from other sections. - # Raises - ValueError: If the section identifier is already used. + # Attributes + documents (dict): + sections (dict): """ - section = Section(doc, *args, **kwargs) - if section.identifier: - if section.identifier in self.sections: - raise ValueError('section identifier {!r} already used' - .format(section.identifier)) - self.sections[section.identifier] = section - doc.sections.append(section) - return section + def __init__(self): + self.documents = {} + self.sections = {} + + def new_document(self, filename, url=None): + """ + Create a new document. + + # Arguments + filename (str): The filename of the document. Must end with `.md`. + url (str): The relative URL of the document. If omitted, will be + automatically deduced from *filename* (same without the `.md` suffix). + + # Raises + ValueError: If *filename* does not end with `.md`. + ValueError: If *filename* is not a relative path. + ValueError: If a document with the specified *filename* already exists. + """ + + if not filename.endswith('.md'): + raise ValueError('filename must end with `.md`') + if os.path.isabs(filename): + raise ValueError('filename must be relative') + if filename in self.documents: + raise ValueError( + 'document filename {!r} already used'.format(filename)) + if not url: + url = filename[:-3] + + doc = Document(self, url) + self.documents[filename] = doc + return doc + + def new_section(self, doc, *args, **kwargs): + """ + Create a new section in the specified document. The arguments for this + method match the parameters for the #Section constructor. + + # Raises + ValueError: If the section identifier is already used. + """ + + section = Section(doc, *args, **kwargs) + if section.identifier: + if section.identifier in self.sections: + raise ValueError('section identifier {!r} already used' + .format(section.identifier)) + self.sections[section.identifier] = section + doc.sections.append(section) + return section diff --git a/pydocmd/imp.py b/pydocmd/imp.py index 42394109..7c4e6158 100644 --- a/pydocmd/imp.py +++ b/pydocmd/imp.py @@ -26,113 +26,117 @@ def import_module(name): - """ - Imports a Python module assuming that the whole *name* identifies only a - Python module and no symbol inside a Python module. - """ + """ + Imports a Python module assuming that the whole *name* identifies only a + Python module and no symbol inside a Python module. + """ - # fromlist must not be empty so we get the bottom-level module rather than - # the top-level module. - return __import__(name, fromlist=['']) + # fromlist must not be empty so we get the bottom-level module rather than + # the top-level module. + return __import__(name, fromlist=['']) def import_object(name): - """ - Like #import_object_with_scope() but returns only the object. - """ + """ + Like #import_object_with_scope() but returns only the object. + """ - return import_object_with_scope(name)[0] + return import_object_with_scope(name)[0] def import_object_with_scope(name): - """ - Imports a Python object by an absolute identifier. - - # Arguments - name (str): The name of the Python object to import. - - # Returns - (any, Module): The object and the module that contains it. Note that - for plain modules loaded with this function, both elements of the - tuple may be the same object. - """ - - # Import modules until we can no longer import them. Prefer existing - # attributes over importing modules at each step. - parts = name.split('.') - current_name = parts[0] - obj = import_module(current_name) - scope = None - for part in parts[1:]: - current_name += '.' + part - try: - if hasattr(obj, '__dict__'): - # Using directly __dict__ for descriptors, where we want to get the descriptor's instance - # and not calling the descriptor's __get__ method. - sub_obj = obj.__dict__[part] - else: - sub_obj = getattr(obj, part) - - scope, obj = obj, sub_obj - except (AttributeError, KeyError): - try: - obj = scope = import_module(current_name) - except ImportError as exc: - if 'named {}'.format(part) in str(exc): - raise ImportError(current_name) - raise - return obj, scope + """ + Imports a Python object by an absolute identifier. + + # Arguments + name (str): The name of the Python object to import. + + # Returns + (any, Module): The object and the module that contains it. Note that + for plain modules loaded with this function, both elements of the + tuple may be the same object. + """ + + # Import modules until we can no longer import them. Prefer existing + # attributes over importing modules at each step. + parts = name.split('.') + current_name = parts[0] + obj = import_module(current_name) + scope = None + for part in parts[1:]: + current_name += '.' + part + try: + if hasattr(obj, '__dict__'): + # Using directly __dict__ for descriptors, where we want to get the descriptor's instance + # and not calling the descriptor's __get__ method. + sub_obj = obj.__dict__[part] + else: + sub_obj = getattr(obj, part) + + scope, obj = obj, sub_obj + except (AttributeError, KeyError): + try: + obj = scope = import_module(current_name) + except ImportError as exc: + if 'named {}'.format(part) in str(exc): + raise ImportError(current_name) + raise + return obj, scope def force_lazy_import(name): - """ - Import any modules off of "name" by iterating a new list rather than a generator so that this - library works with lazy imports. - """ - obj = import_object(name) - module_items = list(getattr(obj, '__dict__', {}).items()) - for key, value in module_items: - if getattr(value, '__module__', None): - import_object(name + '.' + key) + """ + Import any modules off of "name" by iterating a new list rather than a generator so that this + library works with lazy imports. + """ + obj = import_object(name) + module_items = list(getattr(obj, '__dict__', {}).items()) + for key, value in module_items: + if getattr(value, '__module__', None): + import_object(name + '.' + key) def dir_object(name, sort_order, need_docstrings=True): - prefix = None - obj = import_object(name) - if isinstance(obj, types.ModuleType): - prefix = obj.__name__ - all = getattr(obj, '__all__', None) - - # Import any modules attached to this object so that this will work with lazy imports. Otherwise - # the block below will fail because the object will change while it's being iterated. - force_lazy_import(name) - - by_name = [] - by_lineno = [] - for key, value in getattr(obj, '__dict__', {}).items(): - if isinstance(value, (staticmethod, classmethod)): - value = value.__func__ - if key.startswith('_'): continue - if not hasattr(value, '__doc__'): continue - - # If we have a type, we only want to skip it if it doesn't have - # any documented members. - if not (isinstance(value, type) and dir_object(name + '.' + key, sort_order, True)): - if need_docstrings and not value.__doc__: continue - if all is not None and key not in all: continue - - if prefix is not None and getattr(value, '__module__', None) != prefix: - continue - if sort_order == 'line': - try: - by_lineno.append((key, inspect.getsourcelines(value)[1])) - except Exception: - # some members don't have (retrievable) line numbers (e.g., properties) - # so fall back to sorting those first, and by name - by_name.append(key) - else: - by_name.append(key) - by_name = sorted(by_name, key=lambda s: s.lower()) - by_lineno = [key for key, lineno in sorted(by_lineno, key=lambda r: r[1])] - - return by_name + by_lineno + prefix = None + obj = import_object(name) + if isinstance(obj, types.ModuleType): + prefix = obj.__name__ + all = getattr(obj, '__all__', None) + + # Import any modules attached to this object so that this will work with lazy imports. Otherwise + # the block below will fail because the object will change while it's being iterated. + force_lazy_import(name) + + by_name = [] + by_lineno = [] + for key, value in getattr(obj, '__dict__', {}).items(): + if isinstance(value, (staticmethod, classmethod)): + value = value.__func__ + if key.startswith('_'): + continue + if not hasattr(value, '__doc__'): + continue + + # If we have a type, we only want to skip it if it doesn't have + # any documented members. + if not (isinstance(value, type) and dir_object(name + '.' + key, sort_order, True)): + if need_docstrings and not value.__doc__: + continue + if all is not None and key not in all: + continue + + if prefix is not None and getattr(value, '__module__', None) != prefix: + continue + if sort_order == 'line': + try: + by_lineno.append((key, inspect.getsourcelines(value)[1])) + except Exception: + # some members don't have (retrievable) line numbers (e.g., properties) + # so fall back to sorting those first, and by name + by_name.append(key) + else: + by_name.append(key) + by_name = sorted(by_name, key=lambda s: s.lower()) + by_lineno = [key for key, lineno in sorted(by_lineno, key=lambda r: r[1])] + + return by_name + by_lineno diff --git a/pydocmd/preprocessors/__init__.py b/pydocmd/preprocessors/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pydocmd/preprocessors/google.py b/pydocmd/preprocessors/google.py new file mode 100644 index 00000000..e39d6050 --- /dev/null +++ b/pydocmd/preprocessors/google.py @@ -0,0 +1,113 @@ +import re + + +class Preprocessor: + """ + This class implements the preprocessor for Google and PEP 257 docstrings. + + https://sphinxcontrib-napoleon.readthedocs.io/en/latest/example_google.html + https://www.python.org/dev/peps/pep-0257/ + """ + _param_res = [ + re.compile(r'^(?P\S+):\s+(?P.+)$'), + re.compile(r'^(?P\S+)\s+\((?P[^)]+)\):\s+(?P.+)$'), + re.compile(r'^(?P\S+)\s+--\s+(?P.+)$'), + re.compile( + r'^(?P\S+)\s+\{\[(?P\S+)\]\}\s+--\s+(?P.+)$'), + re.compile( + r'^(?P\S+)\s+\{(?P\S+)\}\s+--\s+(?P.+)$'), + ] + + _keywords_map = { + 'Args:': 'Arguments', + 'Arguments:': 'Arguments', + 'Attributes:': 'Attributes', + 'Example:': 'Examples', + 'Examples:': 'Examples', + 'Keyword Args:': 'Arguments', + 'Keyword Arguments:': 'Arguments', + 'Methods:': 'Methods', + 'Note:': 'Notes', + 'Notes:': 'Notes', + 'Other Parameters:': 'Arguments', + 'Parameters:': 'Arguments', + 'Return:': 'Returns', + 'Returns:': 'Returns', + 'Raises:': 'Raises', + 'References:': 'References', + 'See Also:': 'See Also', + 'Todo:': 'Todo', + 'Warning:': 'Warnings', + 'Warnings:': 'Warnings', + 'Warns:': 'Warns', + 'Yield:': 'Yields', + 'Yields:': 'Yields', + } + + def __init__(self, config=None): + self.config = config + + def get_section_names(self): + return list(self._keywords_map.keys()) + + def preprocess_section(self, section): + """ + Preprocessors a given section into it's components. + """ + lines = [] + in_codeblock = False + keyword = None + components = {} + + for line in section.content.split('\n'): + line = line.strip() + + if line.startswith("```"): + in_codeblock = not in_codeblock + + if in_codeblock: + lines.append(line) + continue + + if line in self._keywords_map: + keyword = self._keywords_map[line] + continue + + if keyword is None: + lines.append(line) + continue + + if keyword not in components: + components[keyword] = [] + + for param_re in self._param_res: + param_match = param_re.match(line) + if param_match: + if 'type' in param_match.groupdict(): + components[keyword].append( + '- `{param}` _{type}_ - {desc}'.format(**param_match.groupdict())) + else: + components[keyword].append( + '- `{param}` - {desc}'.format(**param_match.groupdict())) + break + + if not param_match: + components[keyword].append(f' {line}') + + for key in components: + self._append_section(lines, key, components) + + section.content = '\n'.join(lines) + + @staticmethod + def _append_section(lines, key, sections): + section = sections.get(key) + if not section: + return + + if lines and lines[-1]: + lines.append('') + + # add an extra line because of markdown syntax + lines.extend(['**{}**:'.format(key), '']) + lines.extend(section) diff --git a/pydocmd/restructuredtext.py b/pydocmd/preprocessors/rst.py similarity index 100% rename from pydocmd/restructuredtext.py rename to pydocmd/preprocessors/rst.py diff --git a/pydocmd/preprocessor.py b/pydocmd/preprocessors/simple.py similarity index 100% rename from pydocmd/preprocessor.py rename to pydocmd/preprocessors/simple.py diff --git a/pydocmd/preprocessors/smart.py b/pydocmd/preprocessors/smart.py new file mode 100644 index 00000000..f71e8996 --- /dev/null +++ b/pydocmd/preprocessors/smart.py @@ -0,0 +1,49 @@ +from pydocmd.preprocessors.rst import Preprocessor as RSTPreprocessor +from pydocmd.preprocessors.google import Preprocessor as GooglePreprocessor + + +class Preprocessor(object): + """ + This class implements the preprocessor for restructured text and google. + """ + def __init__(self, config=None): + self.config = config + self._google_preprocessor = GooglePreprocessor(config) + self._rst_preprocessor = RSTPreprocessor(config) + + def is_google_format(self, docstring): + """ + Check if `docstring` is written in Google docstring format + + https://sphinxcontrib-napoleon.readthedocs.io/en/latest/example_google.html + """ + lines = [line.strip() for line in docstring.split('\n')] + google_section_names = self._google_preprocessor.get_section_names() + for section_name in google_section_names: + if section_name in lines: + return True + + return False + + def preprocess_section(self, section): + """ + Preprocessors a given section into it's components. + """ + + if self.is_google_format(section.content): + return self._google_preprocessor.preprocess_section(section) + + return self._rst_preprocessor.preprocess_section(section) + + @staticmethod + def _append_section(lines, key, sections): + section = sections.get(key) + if not section: + return + + if lines and lines[-1]: + lines.append('') + + # add an extra line because of markdown syntax + lines.extend(['**{}**:'.format(key), '']) + lines.extend(section) diff --git a/tests/test_restructuredtext.py b/tests/test_restructuredtext.py index f2ad20c8..79e3c916 100644 --- a/tests/test_restructuredtext.py +++ b/tests/test_restructuredtext.py @@ -1,7 +1,7 @@ import pytest from pydocmd.document import Section -from pydocmd.restructuredtext import Preprocessor +from pydocmd.preprocessors.rst import Preprocessor @pytest.fixture