Skip to content

Commit

Permalink
Faster docs extraction (#197)
Browse files Browse the repository at this point in the history
* Add new doc parsing backend script which more efficiently dumps all necessary data about collections and plugin docs.
* Use new script instead of ansible-doc.
* Make sure that proper JSON serialization is used.
* Make doc parsing backend configurable.
* Improve filtering.
* Equivalent to ansible/ansible#72359.
* Change default backend to ansible-internal.
  • Loading branch information
felixfontein authored Nov 3, 2020
1 parent 2105076 commit edee741
Show file tree
Hide file tree
Showing 18 changed files with 389 additions and 59 deletions.
1 change: 1 addition & 0 deletions antsibull.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ process_max = none
pypi_url = https://pypi.org/
thread_max = 80
max_retries = 10
doc_parsing_backend = ansible-doc
logging_cfg = {
version = 1.0
outputs = {
Expand Down
4 changes: 3 additions & 1 deletion antsibull/app_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,8 @@ def run(args):
_FIELDS_IN_APP_CTX = frozenset(('galaxy_url', 'logging_cfg', 'pypi_url'))

#: Field names in the args and config which whose value will be added to the lib_ctx
_FIELDS_IN_LIB_CTX = frozenset(('chunksize', 'process_max', 'thread_max', 'max_retries'))
_FIELDS_IN_LIB_CTX = frozenset(
('chunksize', 'process_max', 'thread_max', 'max_retries', 'doc_parsing_backend'))

#: lib_ctx should be restricted to things which do not belong in the API but an application or
#: user might want to tweak. Global, internal, incidental values are good to store here. Things
Expand Down Expand Up @@ -242,6 +243,7 @@ class LibContext(BaseModel):
process_max: t.Optional[int] = None
thread_max: int = 64
max_retries: int = 10
doc_parsing_backend: str = 'ansible-internal'

@p.validator('process_max', pre=True)
def convert_to_none(cls, value):
Expand Down
2 changes: 1 addition & 1 deletion antsibull/cli/doc_commands/stable.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
from ...collections import install_together
from ...compat import asyncio_run, best_get_loop
from ...dependency_files import DepsFile
from ...docs_parsing.ansible_doc import get_ansible_plugin_info
from ...docs_parsing.parsing import get_ansible_plugin_info
from ...docs_parsing.fqcn import get_fqcn_parts
from ...galaxy import CollectionDownloader
from ...logging import log
Expand Down
5 changes: 5 additions & 0 deletions antsibull/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,10 @@
#: Valid choices for a logging level field
LEVEL_CHOICES_F = p.Field(..., regex='^(CRITICAL|ERROR|WARNING|NOTICE|INFO|DEBUG|DISABLED)$')

#: Valid choices for a logging level field
DOC_PARSING_BACKEND_CHOICES_F = p.Field(
'ansible-internal', regex='^(ansible-doc|ansible-internal)$')

#: Valid choice of the logging version field
VERSION_CHOICES_F = p.Field(..., regex=r'1\.0')

Expand Down Expand Up @@ -131,6 +135,7 @@ class ConfigModel(BaseModel):
pypi_url: p.HttpUrl = 'https://pypi.org/'
thread_max: int = 80
max_retries: int = 10
doc_parsing_backend: str = DOC_PARSING_BACKEND_CHOICES_F

@p.validator('process_max', pre=True)
def convert_to_none(cls, value):
Expand Down
173 changes: 173 additions & 0 deletions antsibull/data/collection-enum.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,173 @@
# Copyright: (c) 2014, James Tanner <[email protected]>
# Copyright: (c) 2018, Ansible Project
# Copyright: (c) 2020, Felix Fontein
# GNU General Public License v3.0+ (see COPYING or https://www.gnu.org/licenses/gpl-3.0.txt)

# Parts taken from Ansible's ansible-doc sources

import argparse
import json
import sys

import ansible.plugins.loader as plugin_loader

from ansible import constants as C
from ansible import release as ansible_release
from ansible.cli import doc
from ansible.cli.arguments import option_helpers as opt_help
from ansible.collections.list import list_collection_dirs
from ansible.galaxy.collection import CollectionRequirement
from ansible.module_utils._text import to_native
from ansible.module_utils.common.json import AnsibleJSONEncoder
from ansible.plugins.loader import action_loader, fragment_loader
from ansible.utils.collection_loader import AnsibleCollectionConfig
from ansible.utils.plugin_docs import get_docstring


def load_plugin(loader, plugin_type, plugin):
result = {}
try:
plugin_context = loader.find_plugin_with_context(
plugin, mod_type='.py', ignore_deprecated=True, check_aliases=True)
if not plugin_context.resolved:
result['error'] = 'Cannot find plugin'
return result
plugin_name = plugin_context.plugin_resolved_name
filename = plugin_context.plugin_resolved_path
collection_name = plugin_context.plugin_resolved_collection

result.update({
'plugin_name': plugin_name,
'filename': filename,
'collection_name': collection_name,
})

documentation, plainexamples, returndocs, metadata = get_docstring(
filename, fragment_loader, verbose=False,
collection_name=collection_name, is_module=(plugin_type == 'module'))

if documentation is None:
result['error'] = 'No valid documentation found'
return result

documentation['filename'] = filename
documentation['collection'] = collection_name

if plugin_type == 'module':
# is there corresponding action plugin?
if plugin in action_loader:
documentation['has_action'] = True
else:
documentation['has_action'] = False

ansible_doc = {
'doc': documentation,
'examples': plainexamples,
'return': returndocs,
'metadata': metadata,
}

try:
# If this fails, the documentation cannot be seralized as JSON
json.dumps(ansible_doc, cls=AnsibleJSONEncoder)
# Store result. This is guaranteed to be serializable
result['ansible-doc'] = ansible_doc
except Exception as e:
result['error'] = (
'Cannot serialize documentation as JSON: %s' % to_native(e)
)
except Exception as e:
result['error'] = (
'Missing documentation or could not parse documentation: %s' % to_native(e)
)

return result


def ansible_doc_coll_filter(coll_filter):
return coll_filter[0] if coll_filter and len(coll_filter) == 1 else None


def match_filter(name, coll_filter):
if coll_filter is None or name in coll_filter:
return True
for filter in coll_filter:
if name.startswith(filter + '.'):
return True
return False


def load_all_plugins(plugin_type, basedir, coll_filter):
loader = getattr(plugin_loader, '%s_loader' % plugin_type)

if basedir:
loader.add_directory(basedir, with_subdir=True)

loader._paths = None # reset so we can use subdirs below

plugin_list = set()

if match_filter('ansible.builtin', coll_filter):
paths = loader._get_paths_with_context()
for path_context in paths:
plugin_list.update(
doc.DocCLI.find_plugins(path_context.path, path_context.internal, plugin_type))

doc.add_collection_plugins(
plugin_list, plugin_type, coll_filter=ansible_doc_coll_filter(coll_filter))

result = {}
for plugin in plugin_list:
if match_filter(plugin, coll_filter):
result[plugin] = load_plugin(loader, plugin_type, plugin)

return result


def main(args):
parser = argparse.ArgumentParser(
prog=args[0], description='Bulk extraction of Ansible plugin docs.')
parser.add_argument('args', nargs='*', help='Collection filter', metavar='collection_filter')
parser.add_argument('--pretty', action='store_true', help='Pretty-print JSON')
opt_help.add_basedir_options(parser)

arguments = parser.parse_args(args[1:])

basedir = arguments.basedir
coll_filter = arguments.args or None

if basedir:
AnsibleCollectionConfig.playbook_paths = basedir

result = {
'plugins': {},
'collections': {},
}

# Export plugin docs
for plugin_type in C.DOCUMENTABLE_PLUGINS:
result['plugins'][plugin_type] = load_all_plugins(plugin_type, basedir, coll_filter)

# Export collection data
b_colldirs = list_collection_dirs(coll_filter=ansible_doc_coll_filter(coll_filter))
for b_path in b_colldirs:
collection = CollectionRequirement.from_path(b_path, False, fallback_metadata=True)

collection_name = '{0}.{1}'.format(collection.namespace, collection.name)
if match_filter(collection_name, coll_filter):
version = collection.metadata.version
result['collections'][collection_name] = {
'path': to_native(b_path),
'version': version if version != '*' else None,
}
if match_filter('ansible.builtin', coll_filter):
result['collections']['ansible.builtin'] = {
'version': ansible_release.__version__,
}

print(json.dumps(
result, cls=AnsibleJSONEncoder, sort_keys=True, indent=4 if arguments.pretty else None))


if __name__ == '__main__':
main(sys.argv)
64 changes: 64 additions & 0 deletions antsibull/docs_parsing/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
# Author: Toshio Kuratomi <[email protected]>
# License: GPLv3+
# Copyright: Ansible Project, 2020
"""Parse documentation from ansible plugins using anible-doc."""

import os
import typing as t


#: Clear Ansible environment variables that set paths where plugins could be found.
ANSIBLE_PATH_ENVIRON: t.Dict[str, str] = os.environ.copy()
ANSIBLE_PATH_ENVIRON.update({'ANSIBLE_COLLECTIONS_PATH': '/dev/null',
'ANSIBLE_ACTION_PLUGINS': '/dev/null',
'ANSIBLE_CACHE_PLUGINS': '/dev/null',
'ANSIBLE_CALLBACK_PLUGINS': '/dev/null',
'ANSIBLE_CLICONF_PLUGINS': '/dev/null',
'ANSIBLE_CONNECTION_PLUGINS': '/dev/null',
'ANSIBLE_FILTER_PLUGINS': '/dev/null',
'ANSIBLE_HTTPAPI_PLUGINS': '/dev/null',
'ANSIBLE_INVENTORY_PLUGINS': '/dev/null',
'ANSIBLE_LOOKUP_PLUGINS': '/dev/null',
'ANSIBLE_LIBRARY': '/dev/null',
'ANSIBLE_MODULE_UTILS': '/dev/null',
'ANSIBLE_NETCONF_PLUGINS': '/dev/null',
'ANSIBLE_ROLES_PATH': '/dev/null',
'ANSIBLE_STRATEGY_PLUGINS': '/dev/null',
'ANSIBLE_TERMINAL_PLUGINS': '/dev/null',
'ANSIBLE_TEST_PLUGINS': '/dev/null',
'ANSIBLE_VARS_PLUGINS': '/dev/null',
'ANSIBLE_DOC_FRAGMENT_PLUGINS': '/dev/null',
})
try:
del ANSIBLE_PATH_ENVIRON['PYTHONPATH']
except KeyError:
# We just wanted to make sure there was no PYTHONPATH set...
# all python libs will come from the venv
pass
try:
del ANSIBLE_PATH_ENVIRON['ANSIBLE_COLLECTIONS_PATHS']
except KeyError:
# ANSIBLE_COLLECTIONS_PATHS is the deprecated name replaced by
# ANSIBLE_COLLECTIONS_PATH
pass


class ParsingError(Exception):
"""Error raised while parsing plugins for documentation."""


def _get_environment(collection_dir: t.Optional[str]) -> t.Dict[str, str]:
env = ANSIBLE_PATH_ENVIRON.copy()
if collection_dir is not None:
env['ANSIBLE_COLLECTIONS_PATH'] = collection_dir
else:
# Copy ANSIBLE_COLLECTIONS_PATH and ANSIBLE_COLLECTIONS_PATHS from the
# original environment.
for env_var in ('ANSIBLE_COLLECTIONS_PATH', 'ANSIBLE_COLLECTIONS_PATHS'):
try:
del env[env_var]
except KeyError:
pass
if env_var in os.environ:
env[env_var] = os.environ[env_var]
return env
62 changes: 5 additions & 57 deletions antsibull/docs_parsing/ansible_doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@

import asyncio
import json
import os
import sys
import traceback
import typing as t
Expand All @@ -19,52 +18,14 @@
from ..logging import log
from ..vendored.json_utils import _filter_non_json_lines
from .fqcn import get_fqcn_parts
from . import _get_environment, ParsingError

if t.TYPE_CHECKING:
from ..venv import VenvRunner, FakeVenvRunner


mlog = log.fields(mod=__name__)

#: Clear Ansible environment variables that set paths where plugins could be found.
ANSIBLE_PATH_ENVIRON: t.Dict[str, str] = os.environ.copy()
ANSIBLE_PATH_ENVIRON.update({'ANSIBLE_COLLECTIONS_PATH': '/dev/null',
'ANSIBLE_ACTION_PLUGINS': '/dev/null',
'ANSIBLE_CACHE_PLUGINS': '/dev/null',
'ANSIBLE_CALLBACK_PLUGINS': '/dev/null',
'ANSIBLE_CLICONF_PLUGINS': '/dev/null',
'ANSIBLE_CONNECTION_PLUGINS': '/dev/null',
'ANSIBLE_FILTER_PLUGINS': '/dev/null',
'ANSIBLE_HTTPAPI_PLUGINS': '/dev/null',
'ANSIBLE_INVENTORY_PLUGINS': '/dev/null',
'ANSIBLE_LOOKUP_PLUGINS': '/dev/null',
'ANSIBLE_LIBRARY': '/dev/null',
'ANSIBLE_MODULE_UTILS': '/dev/null',
'ANSIBLE_NETCONF_PLUGINS': '/dev/null',
'ANSIBLE_ROLES_PATH': '/dev/null',
'ANSIBLE_STRATEGY_PLUGINS': '/dev/null',
'ANSIBLE_TERMINAL_PLUGINS': '/dev/null',
'ANSIBLE_TEST_PLUGINS': '/dev/null',
'ANSIBLE_VARS_PLUGINS': '/dev/null',
'ANSIBLE_DOC_FRAGMENT_PLUGINS': '/dev/null',
})
try:
del ANSIBLE_PATH_ENVIRON['PYTHONPATH']
except KeyError:
# We just wanted to make sure there was no PYTHONPATH set...
# all python libs will come from the venv
pass
try:
del ANSIBLE_PATH_ENVIRON['ANSIBLE_COLLECTIONS_PATHS']
except KeyError:
# ANSIBLE_COLLECTIONS_PATHS is the deprecated name replaced by
# ANSIBLE_COLLECTIONS_PATH
pass


class ParsingError(Exception):
"""Error raised while parsing plugins for documentation."""


def _process_plugin_results(plugin_type: str,
plugin_names: t.Iterable[str],
Expand Down Expand Up @@ -197,23 +158,6 @@ async def _get_plugin_info(plugin_type: str, ansible_doc: 'sh.Command',
return results


def _get_environment(collection_dir: t.Optional[str]) -> t.Dict[str, str]:
env = ANSIBLE_PATH_ENVIRON.copy()
if collection_dir is not None:
env['ANSIBLE_COLLECTIONS_PATH'] = collection_dir
else:
# Copy ANSIBLE_COLLECTIONS_PATH and ANSIBLE_COLLECTIONS_PATHS from the
# original environment.
for env_var in ('ANSIBLE_COLLECTIONS_PATH', 'ANSIBLE_COLLECTIONS_PATHS'):
try:
del env[env_var]
except KeyError:
pass
if env_var in os.environ:
env[env_var] = os.environ[env_var]
return env


async def get_ansible_plugin_info(venv: t.Union['VenvRunner', 'FakeVenvRunner'],
collection_dir: t.Optional[str],
collection_names: t.Optional[t.List[str]] = None
Expand All @@ -234,6 +178,9 @@ async def get_ansible_plugin_info(venv: t.Union['VenvRunner', 'FakeVenvRunner'],
{information from ansible-doc --json. See the ansible-doc documentation for more
info.}
"""
flog = mlog.fields(func='get_ansible_plugin_info')
flog.debug('Enter')

env = _get_environment(collection_dir)

# Setup an sh.Command to run ansible-doc from the venv with only the collections we
Expand Down Expand Up @@ -299,4 +246,5 @@ async def get_ansible_plugin_info(venv: t.Union['VenvRunner', 'FakeVenvRunner'],
# done so, we want to then fail by raising one of the exceptions.
raise ParsingError('Parsing of plugins failed')

flog.debug('Leave')
return plugin_map
Loading

0 comments on commit edee741

Please sign in to comment.