diff --git a/.azure-pipelines/warden.yml b/.azure-pipelines/warden.yml new file mode 100644 index 00000000000..530d769be75 --- /dev/null +++ b/.azure-pipelines/warden.yml @@ -0,0 +1,39 @@ +trigger: +- master + +jobs: +- job: 'Build' + + pool: + vmImage: 'ubuntu-16.04' + + steps: + - task: UsePythonVersion@0 + displayName: 'Use Python 3.6' + inputs: + versionSpec: 3.6 + + - script: | + pip install setuptools + pip install wheel twine readme-renderer[md] + displayName: 'Prep Environment' + + - script: | + cd $(Build.SourcesDirectory)/packages/python-packages/doc-warden/ + python setup.py bdist_wheel -d $(Build.ArtifactStagingDirectory) + displayName: 'Build Package' + + - script: | + twine check $(Build.ArtifactStagingDirectory)/* + displayName: 'Verify Readme' + + - task: PublishBuildArtifacts@1 + displayName: 'Publish Artifacts' + condition: succeededOrFailed() + + - task: ms.vss-governance-buildtask.governance-build-task-component-detection.ComponentGovernanceComponentDetection@0 + # ComponentGovernance is currently unable to run on pull requests of public projects. Running on non-PR + # builds should be sufficient. + condition: and(succeededOrFailed(), ne(variables['Build.Reason'], 'PullRequest')) + displayName: 'Component Detection' + diff --git a/README.md b/README.md index 72f1506a93b..006f3aae1fe 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,12 @@ +# Azure SDK Tools + +This repository contains useful tools that the Azure SDK team utilizes across their infrastructure. + +# Index + +| Package or Intent | Path | Description | +|-------------------|-----------------------------------------|-----------------------------------------------------------------| +| doc-warden | [Readme](packages/python-packages/doc-warden/README.md) | A tool used to enforce readme standards across Azure SDK Repos. | # Contributing diff --git a/packages/python-packages/doc-warden/MANIFEST.in b/packages/python-packages/doc-warden/MANIFEST.in new file mode 100644 index 00000000000..009c440d166 --- /dev/null +++ b/packages/python-packages/doc-warden/MANIFEST.in @@ -0,0 +1,2 @@ +include *.md +include warden/__init__.py diff --git a/packages/python-packages/doc-warden/README.md b/packages/python-packages/doc-warden/README.md new file mode 100644 index 00000000000..f4e2bcbb290 --- /dev/null +++ b/packages/python-packages/doc-warden/README.md @@ -0,0 +1,141 @@ +# Doc Warden + +Every CI build owned by the Azure-SDK team also needs to verify that the documentation within the target repo meets a set of standards. `Doc-warden` is intended to ease the _implementation_ of these checks in CI builds. + +Features: + +* Enforces Readme Standards + - [x] Readmes present + - [ ] Readmes have appropriate contents + - [ ] Files issues for failed standards checks + - [ ] Exit code > 0 for issues discovered +* Generates report for included observed packages + +This package is under development, and as such Python version compatibility has not been finalized at this time. + +## Prerequisites +This package is intended to be run as part of a pipeline within Azure DevOps. As such, [Python](https://www.python.org/downloads/) must be installed prior to attempting to install or use `Doc-Warden.` While `pip` comes pre-installed on most modern Python installs, if `pip` is an unrecognized command when attempting to install `warden`, run the following command **after** your Python installation is complete. + +In addition, `warden` is distributed using `setuptools` and `wheel`, so those packages should also be present prior to install. + +``` +/:> python -m ensurepip +/:> pip install setuptools wheel +``` + +## Usage + +Right now, `warden` has a single command. `scan`, which by default looks for a target `.docsettings.yml` file within the target repo. However, all the parameters that can be pulled from the `.docsettings` files will **override** whatever is placed within the `.docsettings` file. + +Example usage: + +``` + + +... +/:> pip install setuptools wheel +/:> sudo pip install doc-warden +/:> ward scan -d $(Build.SourcesDirectory) + +``` +**Notes for example above** + +* Devops is a bit finicky with registering a console entry point, hence the `sudo` just on the installation. `sudo` is only required on devops machines. +* Assumption is that the `.docsettings` file is placed at the root of the repository. + * To provide a different path (like `azure-sdk-for-java` does...), use: + * `ward scan -d $(Build.SourcesDirectory) -c $(Build.SourcesDirectory)/eng/.docsettings.yml` + +##### Parameter Options + +`command` +Currently supports the `scan` command. Additional commands may be supported in the future. **Required.** + +`--scan-directory` +The target directory `warden` should be scanning. **Required.** + +`--scan-language` +`warden` checks for packages by _convention_, so it needs to understand what language it is looking at. This must be populated either in `.docsettings file` or by parameter. **Required.** + +`--config-location` +By default, `warden` looks for the `.docsettings` file in the root of the repository. However, populating this location will override this behavior and instead pull the file from the location in this parameter. **Optional.** + +`--verbose-output` +Enable or disable output of an html report. Defaults to false. **Optional.** + +##### Notes for Devops Usage + +The `-d` argument should be `$(Build.SourcesDirectory)`. This will point `warden` at the repo that has been associated with CI. + +## Methodology + +### Enforcing Readme Presence + +When should we expect a readme to be present? + +**Always:** + +* At the root of the repo +* Associated with a `package` directory + +#### .Net + +A package is indicated by: +* a `*.csproj` file + * Project file does not end with `tests.csproj` + +#### Python + +A package is indicated by: + +* the presence of a `setup.py` file + +#### Java + +A package is indicated by: + +* the presence of a `pom.xml` file + * The POM `` value within is set to `JAR` + +#### Node & JS + +A package is indicated by: + +* The presence of a `package.json` file + +#### Control, the `.docsettings.yml` File, and You + +Special cases often need to be configured. It seems logical that there needs be a central location (per repo) to override conventional settings. To that end, a new `.docsettings.yml` file will be added to each repo. + +``` + +│ README.md +│ .docsettings.yml +│ +└───.azure-pipelines +│ │ +│ +└─── +``` + +The presence of this file allows each repository to customize how enforcement takes place within their repo. + +**Example DocSettings File for Java Repo** + +``` +omitted_paths: + - archive/* +language: java +root_check_enabled: True +``` + +The above configuration tells `warden`... + +- The language within the repo is `java` +- To ensure that a `README.md` is present at the root of the repository. +- To omit any paths under `archive/` from the readme checks. + +Possible values for `language` right now are `['net', 'java', 'js', 'python']`. Greater than one target language is not currently supported. + +## Provide Feedback + +If you encounter any bugs or have suggestions, please file an issue [here]() and assign to `scbedd`. diff --git a/packages/python-packages/doc-warden/sdk_packaging.toml b/packages/python-packages/doc-warden/sdk_packaging.toml new file mode 100644 index 00000000000..4e12f14e4a4 --- /dev/null +++ b/packages/python-packages/doc-warden/sdk_packaging.toml @@ -0,0 +1,6 @@ +[packaging] +auto_update = true +package_name = "doc-warden" +package_pprint_name = "Doc Warden" +is_stable = false +is_arm = false diff --git a/packages/python-packages/doc-warden/setup.cfg b/packages/python-packages/doc-warden/setup.cfg new file mode 100644 index 00000000000..3c6e79cf31d --- /dev/null +++ b/packages/python-packages/doc-warden/setup.cfg @@ -0,0 +1,2 @@ +[bdist_wheel] +universal=1 diff --git a/packages/python-packages/doc-warden/setup.py b/packages/python-packages/doc-warden/setup.py new file mode 100644 index 00000000000..df35ac17263 --- /dev/null +++ b/packages/python-packages/doc-warden/setup.py @@ -0,0 +1,56 @@ +from setuptools import setup, find_packages +import setuptools + +import os +from io import open +import re + +PACKAGE_NAME = 'doc-warden' + +DESCRIPTION = 'Doc-Warden is an internal project created by the Azure SDK Team. It is intended to be used by CI Builds to ensure that documentation standards are met. See readme for more details.' + +with open(os.path.join('warden', 'version.py'), 'r') as fd: + version = re.search(r'^VERSION\s*=\s*[\'"]([^\'"]*)[\'"]', + fd.read(), re.MULTILINE).group(1) + +if not version: + raise RuntimeError('Cannot find version information') + +with open('README.md', encoding='utf-8') as f: + long_description = f.read() + +setup( + name=PACKAGE_NAME, + version=version, + description=DESCRIPTION, + long_description=long_description, + long_description_content_type='text/markdown', + url='https://github.com/Azure/azure-sdk-tools/packages/python-packages/', + author='Microsoft Corporation', + author_email='azuresdkengsysadmins@microsoft.com', + + license='MIT License', + + classifiers=[ + 'Development Status :: 3 - Alpha', + + 'Programming Language :: Python', + 'Programming Language :: Python :: 2.7', + 'Programming Language :: Python :: 3', + 'Programming Language :: Python :: 3.4', + 'Programming Language :: Python :: 3.5', + 'Programming Language :: Python :: 3.6', + 'Programming Language :: Python :: 3.7', + 'License :: OSI Approved :: MIT License', + ], + packages=find_packages(), + install_requires = [ + 'pyyaml', + 'pathlib' + ], + entry_points = { + 'console_scripts': [ + 'ward=warden:console_entry_point', + ] + } +) diff --git a/packages/python-packages/doc-warden/warden/WardenConfiguration.py b/packages/python-packages/doc-warden/warden/WardenConfiguration.py new file mode 100644 index 00000000000..1ddc3dd3b66 --- /dev/null +++ b/packages/python-packages/doc-warden/warden/WardenConfiguration.py @@ -0,0 +1,92 @@ +from __future__ import print_function +import argparse +import yaml +import os + +class WardenConfiguration(): + def __init__(self): + parser = argparse.ArgumentParser(description ='''\ + Scan an azure-sdk repo and ensure that readmes are present and have appropriate content. + This check is done by convention, which is why there is the --scan-language option exists. + For normal/CI usage, a .docsettings file should be present in the repository root, to allow + for visible configuration of all the options. + ''') + + parser.add_argument( + '-d', + '--scan-directory', + dest = 'scan_directory', + help = 'The repo directory that this tool should be scanning.', + required = True) + parser.add_argument( + '-c', + '--config-location', + dest = 'config_location', + required = False, + help = ''' + If provided, will replace the repo native .docsettings file + with a .docsettings file found at the location provided by this input + ''') + parser.add_argument( + '-l', + '--scan-language', + dest = 'scan_language', + required = False, + help = 'The language contained within the target directory. Overrides .docsettings contents.') + parser.add_argument( + '-r', + '--root-check-enabled', + dest = 'root_check_enabled', + required = False, + help = 'Enable or disable checking for a readme at the root of the repository. Defaults true. Overrides .docsettings contents.') + parser.add_argument( + '-o', + '--verbose-output', + dest = 'output_report', + required = False, + help = 'Enable or disable html generation.') + parser.add_argument( + 'command', + help = ('The warden command to run.')) + + args = parser.parse_args() + + self.command = args.command + self.target_directory = args.scan_directory + self.yml_location = args.config_location or os.path.join(self.target_directory, '.docsettings.yml') + + with open(self.yml_location, 'r') as f: + try: + doc = yaml.load(f) + except err: + print('Unable to parse .docsettings. Check the location of the file.') + + try: + self.omitted_paths = doc['omitted_paths'] + except: + self.omitted_paths = [] + + try: + self.scan_language = args.scan_language or doc['language'] + except: + print('.docsettings has no selected language, neither has the --scan-language parameter been populated. Exiting.') + exit(1) + + try: + settings_file_root_check = doc['root_check_enabled'] + except: + settings_file_root_check = False + self.root_check_enabled = args.root_check_enabled or settings_file_root_check or True + + self.verbose_output = args.output_report or False + + def dump(self): + return { + 'command': self.command, + 'target_directory': self.target_directory, + 'yml_location': self.yml_location, + 'omitted_paths': self.omitted_paths, + 'scan_language': self.scan_language, + 'root_check_enabled': self.root_check_enabled, + 'verbose_output': self.verbose_output + } diff --git a/packages/python-packages/doc-warden/warden/__init__.py b/packages/python-packages/doc-warden/warden/__init__.py new file mode 100644 index 00000000000..418a122512e --- /dev/null +++ b/packages/python-packages/doc-warden/warden/__init__.py @@ -0,0 +1,28 @@ +from .version import VERSION +from .enforce_readme_presence import * +from .WardenConfiguration import WardenConfiguration + + +__all__ = ['WardenConfiguration', + 'DEFAULT_LOCATION', + 'return_true', + 'unrecognized_option', + 'console_entry_point', + 'scan_repo', + 'results', + 'check_package_readmes', + 'check_python_readmes', + 'check_js_readmes', + 'check_net_readmes', + 'is_net_csproj_package', + 'check_java_readmes', + 'is_java_pom_package_pom', + 'check_repo_root', + 'find_alongside_file', + 'get_file_sets', + 'get_omitted_files', + 'walk_directory_for_pattern', + 'check_match', + 'parse_pom'] + +__version__ = VERSION diff --git a/packages/python-packages/doc-warden/warden/enforce_readme_presence.py b/packages/python-packages/doc-warden/warden/enforce_readme_presence.py new file mode 100644 index 00000000000..bee82b1225c --- /dev/null +++ b/packages/python-packages/doc-warden/warden/enforce_readme_presence.py @@ -0,0 +1,216 @@ +# ------------------------------------------------------------------------------ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project +# root for license information. +# ------------------------------------------------------------------------------ +from __future__ import print_function + +import argparse +import yaml +import pathlib +import os +import glob +import xml.etree.ElementTree as ET +import fnmatch +import zipfile +import re +from .WardenConfiguration import WardenConfiguration + +# python 3 transitioned StringIO to be part of `io` module. +# python 2 needs the old version however +try: + from StringIO import StringIO +except ImportError: + from io import StringIO + +DEFAULT_LOCATION = '.docsettings.yml' + +def return_true(param): + return True + +# default option for handling an uncrecognized language +def unrecognized_option(configuration): + print('Argument {} provided is not a supported option'.format(configuration.scan_language)) + exit(1) + +# CONFIGURATION. ENTRY POINT. EXECUTION. +def console_entry_point(): + cfg = WardenConfiguration() + print(cfg.dump()) + + command_selector = { + 'scan': scan_repo, + } + + if cfg.command in command_selector: + command_selector.get(cfg.command)(cfg) + else: + print('Unrecognized command invocation {}.'.format(cfg.command)) + exit(1) + +def scan_repo(config): + missing_readme_paths = check_package_readmes(config) + results(missing_readme_paths, config) + + +# print results +def results(missing_readme_paths, config): + if len(missing_readme_paths): + print('{} missing readmes detected at:'.format(len(missing_readme_paths))) + for path in missing_readme_paths: + print(path.replace(os.path.normpath(config.target_directory), '')) + exit(1) + +# parent caller for language types +def check_package_readmes(configuration): + language_selector = { + 'python': check_python_readmes, + 'js': check_js_readmes, + 'java': check_java_readmes, + 'net': check_net_readmes + } + + return language_selector.get(configuration.scan_language.lower(), unrecognized_option)(configuration) + +# return all missing readmes for a PYTHON repostiroy +def check_python_readmes(configuration): + expected_readmes, omitted_readmes = get_file_sets(configuration, '*/setup.py') + missing_expected_readme_locations = [] + + for expected_location in expected_readmes: + result = find_alongside_file(expected_location, 'readme.md') or find_alongside_file(expected_location, 'readme.rst') + if not result: + missing_expected_readme_locations.append(os.path.dirname(expected_location)) + + return missing_expected_readme_locations + +# return all missing readmes for a JAVASCRIPT repository +def check_js_readmes(configuration): + expected_readmes, omitted_readmes = get_file_sets(configuration, '*/package.json') + missing_expected_readme_locations = [] + + for expected_location in expected_readmes: + result = find_alongside_file(expected_location, 'readme.md') + if not result: + missing_expected_readme_locations.append(os.path.dirname(expected_location)) + + return missing_expected_readme_locations + +# return all missing readmes for a .NET repostory +def check_net_readmes(configuration): + expected_readmes, omitted_readmes = get_file_sets(configuration, '*.csproj', is_net_csproj_package) + missing_expected_readme_locations = [] + + for expected_location in expected_readmes: + result = find_alongside_file(expected_location, 'readme.md') + if not result: + missing_expected_readmes.append(os.path.dirname(expected_location)) + return missing_expected_readme_locations + +# convention. omit test projects +def is_net_csproj_package(file_path): + return "tests.csproj" not in file_path.lower() + +# returns all missing readmes for a JAVA repo +def check_java_readmes(configuration): + expected_readmes, omitted_readmes = get_file_sets(configuration, "*/pom.xml", is_java_pom_package_pom) + missing_expected_readmes = [] + + for expected_location in expected_readmes: + result = find_alongside_file(expected_location, 'readme.md') + if not result: + missing_expected_readmes.append(os.path.dirname(expected_location)) + + return missing_expected_readmes + +# given a pom.xml, crack it open and ensure that it is actually a package pom (versus a parent pom) +def is_java_pom_package_pom(file_path): + root = parse_pom(file_path) + jar_tag = root.find('packaging') + + if jar_tag is not None: + return jar_tag.text == 'jar' + return False + +# check the root of the target_directory for a master README +def check_repo_root(configuration): + if configuration.root_check_enabled: + # check root for readme.md + present_files = [f for f in os.listdir(configuration.target_directory) if os.path.isfile(os.path.join(configuration.target_directory, f))] + return any(x in [f.lower() for f in present_files] for x in ['readme.md', 'readme.rst']) + return true + +# given a file location or folder, check within or alongside for a target file +# case insensitive +def find_alongside_file(file_location, target_file_name): + if not os.path.exists(file_location) or not target_file_name: + return False + containing_folder = '' + if os.path.isdir(file_location): + # we're already looking at a file location. just check for presence of target_file_name in listdir + containing_folder = file_location + else: + # os.path.listdir(os.path.dirname(file_location)) + containing_folder = os.path.dirname(file_location) + + for x in os.listdir(containing_folder): + if x.lower() == target_file_name.lower(): + return os.path.normpath(os.path.join(containing_folder, x)) + return False + +# returns the two sets: + # the set of files where we expect a readme to be present + # and the set of files that we expect a readme to be present that have been explicitly omitted +def get_file_sets(configuration, target_pattern, lambda_check = None): + expected_locations = walk_directory_for_pattern(configuration.target_directory, [target_pattern], lambda_check) + + omitted_files = get_omitted_files(configuration) + + return list(set(expected_locations) - set(omitted_files)), set(omitted_files).intersection(expected_locations) + +# gets the set of files in the target directory that have explicitly been omitted in the config settings +def get_omitted_files(configuration): + target_directory = configuration.target_directory + omitted_paths = [] + dirs = configuration.omitted_paths or [] + + # single special case here. if wildcard match at the beginning, do not join, use the pattern as is + adjusted_dirs = [pattern if pattern.startswith("*") else os.path.join(target_directory, pattern) for pattern in dirs] + omitted_paths.extend(walk_directory_for_pattern(target_directory, adjusted_dirs, None)) + + return omitted_paths + +# Returns a list of files under a target directory. The files included will match any of the +# target_patterns AND the lambda_check function. +def walk_directory_for_pattern(target_directory, target_patterns, lambda_check = None): + expected_locations = [] + target_directory = os.path.normpath(target_directory) + normalized_target_patterns = [os.path.normpath(pattern) for pattern in target_patterns] + check_function = lambda_check or return_true + + # walk the folders, filter to the patterns established + for folder, subfolders, files in os.walk(target_directory): + for file in files: + file_path = os.path.join(folder, file) + if check_match(file_path, normalized_target_patterns) and check_function(file_path): + expected_locations.append(file_path) + + return expected_locations + +# we want to walk the files as few times as possible. as such, for omitted_files, we provide a SET +# of patterns that we want to omit. This function simply checks +def check_match(file_path, normalized_target_patterns): + return any([fnmatch.fnmatch(file_path, normalized_target_pattern) + for normalized_target_pattern in normalized_target_patterns]) + +# namespaces in xml really mess with xmlTree: https://bugs.python.org/issue18304 +# this function provides a workaround for both parsing an xml file as well as REMOVING said namespaces +def parse_pom(file_path): + with open(file_path) as f: + xml = f.read() + + it = ET.iterparse(StringIO(xml)) + for _, el in it: + if '}' in el.tag: + el.tag = el.tag.split('}', 1)[1] + return it.root diff --git a/packages/python-packages/doc-warden/warden/version.py b/packages/python-packages/doc-warden/warden/version.py new file mode 100644 index 00000000000..1f6518e6db5 --- /dev/null +++ b/packages/python-packages/doc-warden/warden/version.py @@ -0,0 +1 @@ +VERSION = '0.1.0'