diff --git a/.github/workflows/devcontainer.yml b/.github/workflows/devcontainer.yml index c0d41a3d72c..ae009107508 100644 --- a/.github/workflows/devcontainer.yml +++ b/.github/workflows/devcontainer.yml @@ -198,7 +198,6 @@ jobs: rm -f doc/build/html/htmlcov/.gitignore - uses: actions/upload-artifact@v4 - if: github.ref == 'refs/heads/main' with: name: docs path: doc/build/html diff --git a/.vscode/settings.json b/.vscode/settings.json index 982eb42c8dd..759b34a412a 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,6 +1,18 @@ // vim: set ft=jsonc: { "makefile.extensionOutputFolder": "./.vscode", + "files.exclude": { + ".git/": true, + ".mypy_cache/": true, + ".pytest_cache/": true, + "**/__pycache__/": true, + "**/node_modules/": true, + "**/*.egg-info": true, + "doc/source/autoapi/": true, + "doc/build/doctrees/": true, + "doc/build/html/": true, + "htmlcov/": true, + }, // Note: this only works in WSL/Linux currently. "python.defaultInterpreterPath": "${env:HOME}/.conda/envs/mlos/bin/python", // For Windows it should be this instead: @@ -26,6 +38,8 @@ "mlos_bench/mlos_bench/tests/config/environments/**/*.json", "mlos_bench/mlos_bench/config/environments/**/*.jsonc", "mlos_bench/mlos_bench/config/environments/**/*.json", + "!mlos_bench/mlos_bench/tests/config/environments/**/*-tunables.jsonc", + "!mlos_bench/mlos_bench/tests/config/environments/**/*-tunables.json", "!mlos_bench/mlos_bench/config/environments/**/*-tunables.jsonc", "!mlos_bench/mlos_bench/config/environments/**/*-tunables.json" ], diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index c32eaa836ec..163f48cb6c5 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -86,6 +86,8 @@ We expect development to follow a typical "forking" style workflow: make doc-test ``` + > See the [documentation README](./doc/README.md) for more information on documentation and its testing. + 1. Submit changes for inclusion as a [Pull Request on Github](https://github.com/microsoft/MLOS/pulls). Some notes on organizing changes to help reviewers: diff --git a/MAINTAINING.md b/MAINTAINING.md index 54f60c016ab..9a7f6060365 100644 --- a/MAINTAINING.md +++ b/MAINTAINING.md @@ -2,6 +2,10 @@ Some notes for maintainers. +## Documentation + +See the [documentation README](./doc/README.md) for more information on writing (and testing) documentation. + ## Releasing 1. Bump the version using the [`update-version.sh`](./scripts/update-version.sh) script: diff --git a/Makefile b/Makefile index 93bd4fd0be1..d465b2b70ea 100644 --- a/Makefile +++ b/Makefile @@ -659,49 +659,32 @@ clean-doc-env: COMMON_DOC_FILES := build/doc-prereqs.${CONDA_ENV_NAME}.build-stamp doc/source/*.rst doc/source/_templates/*.rst doc/source/conf.py -doc/source/api/mlos_core/modules.rst: $(FORMAT_PREREQS) $(COMMON_DOC_FILES) -doc/source/api/mlos_core/modules.rst: $(MLOS_CORE_PYTHON_FILES) - rm -rf doc/source/api/mlos_core - cd doc/ && conda run -n ${CONDA_ENV_NAME} sphinx-apidoc -f -e -M \ - -o source/api/mlos_core/ \ - ../mlos_core/ \ - ../mlos_core/setup.py ../mlos_core/mlos_core/tests/ - -doc/source/api/mlos_bench/modules.rst: $(FORMAT_PREREQS) $(COMMON_DOC_FILES) -doc/source/api/mlos_bench/modules.rst: $(MLOS_BENCH_PYTHON_FILES) - rm -rf doc/source/api/mlos_bench - cd doc/ && conda run -n ${CONDA_ENV_NAME} sphinx-apidoc -f -e -M \ - -o source/api/mlos_bench/ \ - ../mlos_bench/ \ - ../mlos_bench/setup.py ../mlos_bench/mlos_bench/tests/ - # Save the help output of the mlos_bench scripts to include in the documentation. - # First make sure that the latest version of mlos_bench is installed (since it uses git based tagging). - conda run -n ${CONDA_ENV_NAME} pip install -e mlos_core -e mlos_bench -e mlos_viz - conda run -n ${CONDA_ENV_NAME} mlos_bench --help > doc/source/api/mlos_bench/mlos_bench.run.usage.txt - echo ".. literalinclude:: mlos_bench.run.usage.txt" >> doc/source/api/mlos_bench/mlos_bench.run.rst - echo " :language: none" >> doc/source/api/mlos_bench/mlos_bench.run.rst - -doc/source/api/mlos_viz/modules.rst: $(FORMAT_PREREQS) $(COMMON_DOC_FILES) -doc/source/api/mlos_viz/modules.rst: $(MLOS_VIZ_PYTHON_FILES) - rm -rf doc/source/api/mlos_viz - cd doc/ && conda run -n ${CONDA_ENV_NAME} sphinx-apidoc -f -e -M \ - -o source/api/mlos_viz/ \ - ../mlos_viz/ \ - ../mlos_viz/setup.py ../mlos_viz/mlos_viz/tests/ - -SPHINX_API_RST_FILES := doc/source/api/mlos_core/modules.rst -SPHINX_API_RST_FILES += doc/source/api/mlos_bench/modules.rst -SPHINX_API_RST_FILES += doc/source/api/mlos_viz/modules.rst - -.PHONY: sphinx-apidoc -sphinx-apidoc: $(SPHINX_API_RST_FILES) +SPHINX_API_RST_FILES := doc/source/index.rst doc/source/mlos_bench.run.usage.rst ifeq ($(SKIP_COVERAGE),) doc/build/html/index.html: build/pytest.${CONDA_ENV_NAME}.build-stamp doc/build/html/htmlcov/index.html: build/pytest.${CONDA_ENV_NAME}.build-stamp endif -doc/build/html/index.html: $(SPHINX_API_RST_FILES) doc/Makefile doc/copy-source-tree-docs.sh $(MD_FILES) +# Treat warnings as failures. +SPHINXOPTS ?= # -v # be verbose +SPHINXOPTS += -n -W -w $(CURDIR)/doc/build/sphinx-build.warn.log -j auto + +sphinx-apidoc: doc/build/html/index.html + +doc/source/generated/mlos_bench.run.usage.txt: build/conda-env.${CONDA_ENV_NAME}.build-stamp +doc/source/generated/mlos_bench.run.usage.txt: $(MLOS_BENCH_PYTHON_FILES) + # Generate the help output from mlos_bench CLI for the docs. + mkdir -p doc/source/generated/ + conda run -n ${CONDA_ENV_NAME} mlos_bench --help > doc/source/generated/mlos_bench.run.usage.txt + +doc/build/html/index.html: build/doc-prereqs.${CONDA_ENV_NAME}.build-stamp +doc/build/html/index.html: doc/source/generated/mlos_bench.run.usage.txt +doc/build/html/index.html: $(MLOS_CORE_PYTHON_FILES) +doc/build/html/index.html: $(MLOS_BENCH_PYTHON_FILES) +doc/build/html/index.html: $(MLOS_VIZ_PYTHON_FILES) +doc/build/html/index.html: $(SPHINX_API_RST_FILES) doc/Makefile doc/source/conf.py +doc/build/html/index.html: doc/copy-source-tree-docs.sh $(MD_FILES) @rm -rf doc/build @mkdir -p doc/build @rm -f doc/build/log.txt @@ -715,7 +698,7 @@ doc/build/html/index.html: $(SPHINX_API_RST_FILES) doc/Makefile doc/copy-source- ./doc/copy-source-tree-docs.sh # Build the rst files into html. - conda run -n ${CONDA_ENV_NAME} $(MAKE) -C doc/ $(MAKEFLAGS) html \ + conda run -n ${CONDA_ENV_NAME} $(MAKE) SPHINXOPTS="$(SPHINXOPTS)" -C doc/ $(MAKEFLAGS) html \ >> doc/build/log.txt 2>&1 \ || { cat doc/build/log.txt; exit 1; } # DONE: Add some output filtering for this so we can more easily see what went wrong. @@ -744,27 +727,21 @@ check-doc: build/check-doc.build-stamp build/check-doc.build-stamp: doc/build/html/index.html doc/build/html/htmlcov/index.html # Check for a few files to make sure the docs got generated in a way we want. test -s doc/build/html/index.html - test -s doc/build/html/generated/mlos_core.optimizers.optimizer.BaseOptimizer.html - test -s doc/build/html/generated/mlos_bench.environments.Environment.html - test -s doc/build/html/generated/mlos_viz.plot.html - test -s doc/build/html/api/mlos_core/mlos_core.html - test -s doc/build/html/api/mlos_bench/mlos_bench.html - test -s doc/build/html/api/mlos_viz/mlos_viz.html - test -s doc/build/html/api/mlos_viz/mlos_viz.dabl.html - grep -q -e '--config CONFIG' doc/build/html/api/mlos_bench/mlos_bench.run.html + grep -q BaseOptimizer doc/build/html/autoapi/mlos_core/optimizers/optimizer/index.html + grep -q Environment doc/build/html/autoapi/mlos_bench/environments/base_environment/index.html + grep -q plot doc/build/html/autoapi/mlos_viz/index.html + test -s doc/build/html/autoapi/mlos_core/index.html + test -s doc/build/html/autoapi/mlos_bench/index.html + test -s doc/build/html/autoapi/mlos_viz/index.html + test -s doc/build/html/autoapi/mlos_viz/dabl/index.html + grep -q -e '--config CONFIG' doc/build/html//mlos_bench.run.usage.html # Check doc logs for errors (but skip over some known ones) ... @cat doc/build/log.txt \ | egrep -C1 -e WARNING -e CRITICAL -e ERROR \ | egrep -v \ -e "warnings.warn\(f'\"{wd.path}\" is shallow and may cause errors'\)" \ -e "No such file or directory: '.*.examples'.( \[docutils\]\s*)?$$" \ - -e 'Problems with "include" directive path:' \ - -e 'duplicate object description' \ - -e "document isn't included in any toctree" \ - -e "more than one target found for cross-reference" \ -e "toctree contains reference to nonexisting document 'auto_examples/index'" \ - -e "failed to import function 'create' from module '(SpaceAdapter|Optimizer)Factory'" \ - -e "No module named '(SpaceAdapter|Optimizer)Factory'" \ -e '^make.*resetting jobserver mode' \ -e 'from cryptography.hazmat.primitives.ciphers.algorithms import' \ | grep -v '^\s*$$' \ @@ -798,7 +775,7 @@ build/linklint-doc.build-stamp: doc/build/html/index.html doc/build/html/htmlcov .PHONY: clean-doc clean-doc: - rm -rf doc/build/ doc/global/ doc/source/api/ doc/source/generated + rm -rf doc/build/ doc/global/ doc/source/api/ doc/source/generated doc/source/autoapi rm -rf doc/source/source_tree_docs/* .PHONY: clean-format diff --git a/README.md b/README.md index 2c1160dc938..dbc48ffb4fc 100644 --- a/README.md +++ b/README.md @@ -188,7 +188,10 @@ Details on using a local version from git are available in [CONTRIBUTING.md](./C Working example of tuning `sqlite` with MLOS. -These can be used as starting points for new autotuning projects. +These can be used as starting points for new autotuning projects outside of the main MLOS repository if you want to keep your tuning experiment configs separate from the MLOS codebase. + +Alternatively, we accept PRs to add new examples to the main MLOS repository! +See [mlos_bench/config](./mlos_bench/mlos_bench/config/) and [CONTRIBUTING.md](./CONTRIBUTING.md) for more details. ### Publications diff --git a/doc/README.md b/doc/README.md index eea9dd955c4..4ed884f8cc7 100644 --- a/doc/README.md +++ b/doc/README.md @@ -2,14 +2,99 @@ Documentation is generated using [`sphinx`](https://www.sphinx-doc.org/). +The configuration for this is in [`doc/source/conf.py`](./source/conf.py). + +We use the [`autoapi`](https://sphinx-autoapi.readthedocs.io/en/latest/) extension to generate documentation automatically from the docstrings in our python code. + +Additionally, we also use the [`copy-source-tree-docs.sh`](./copy-source-tree-docs.sh) script to copy a few Markdown files from the root of the repository to the `doc/source` build directory automatically to include them in the documentation. + +Those are included in the [`index.rst`](./source/index.rst) file which is the main entry point for the documentation and about the only manually maintained rst file. + +## Writing Documentation + +When writing docstrings, use the [`numpydoc`](https://numpydoc.readthedocs.io/en/latest/format.html) style. + +Where necessary embedded [reStructuredText (rst)](https://www.sphinx-doc.org/en/master/usage/restructuredtext/basics.html) markup can be used to help format the documentation. + +Each top level module should include a docstring that describes the module and its purpose and usage. + +These string should be written for consumption by both users and developers. + +Other function and method docstrings that aren't typically intended for users can be written for developers. + +### Cross Referencing + +You can include links between the documentation using [cross-referencing](https://www.sphinx-doc.org/en/master/usage/domains/python.html#python-xref-roles) links in the docstring. + +For instance: + +```python +""" +My docstring that references another module :py:mod:`fully.qualified.module.name`. + +Or else, a class :py:class:`fully.qualified.module.name.ClassName`. + +Or else, a class name :py:class:`.ClassName` that is in the same module. + +Or else, a class method :py:meth:`~.ClassName.method` but without the leading class name. +""" +``` + +These links will be automatically resolved by `sphinx` and checked using the `nitpick` option to ensure we have well-formed links in the documentation. + +### Example Code + +Ideally, each main class should also inclue example code that demonstrates how to use the class. + +This code should be included in the docstring and should be runnable via [`doctest`](https://docs.python.org/3/library/doctest.html). + +For instance: + +```python +class MyClass: + """ + My class that does something. + + Examples + -------- + >>> from my_module import MyClass + >>> my_class = MyClass() + >>> my_class.do_something() + Expected output + + """ + ... +``` + +This code will be automatically checked with `pytest` using the `--doctest-modules` option specified in [`setup.cfg`](../setup.cfg). + +## Building the documentation + ```sh -make -C .. doc +# From the root of the repository +make SKIP_COVERAGE=true doc ``` -## Testing with Docker +This will also run some checks on the documentation. + +> When running this command in a tight loop, it may be useful to run with `SKIP_COVERAGE=true` to avoid re-running the test and coverage checks each time a python file changes. + +## Testing + +### Manually with Docker ```sh ./nginx-docker.sh restart ``` > Now browse to `http://localhost:8080` + +## Troubleshooting + +We use the [`intersphinx`](https://www.sphinx-doc.org/en/master/usage/extensions/intersphinx.html) extension to link between external modules and the [`nitpick`](https://www.sphinx-doc.org/en/master/usage/configuration.html#confval-nitpicky) option to ensure that all references resolve correctly. + +Unfortunately, this process is not perfect and sometimes we need to provide [`nitpick_ignore`](https://www.sphinx-doc.org/en/master/usage/configuration.html#confval-nitpick_ignore)s in the [`doc/source/conf.py`](./source/conf.py) file. + +In particular, currently external `TypeVar` and `TypeAliases` are not resolved correctly and we need to ignore those. + +In other cases, specifying the full path to the module in the cross-reference or the `import` can help. diff --git a/doc/copy-source-tree-docs.sh b/doc/copy-source-tree-docs.sh index 0eeac02fdc3..027a7448e70 100755 --- a/doc/copy-source-tree-docs.sh +++ b/doc/copy-source-tree-docs.sh @@ -24,6 +24,7 @@ for readme_file_path in README.md mlos_core/README.md mlos_bench/README.md mlos_ cp "$readme_file_path" "doc/source/source_tree_docs/$file_dir/index.md" # Tweak source source code links. + # FIXME: This sed expression doesn't work in MacOS. sed -i -r -e "s|\]\(([^:#)]+)(#[a-zA-Z0-9_-]+)?\)|\]\(https://github.com/microsoft/MLOS/tree/main/$file_dir/\1\2\)|g" \ "doc/source/source_tree_docs/$file_dir/index.md" # Tweak the lexers for local expansion by pygments instead of github's. diff --git a/doc/requirements.txt b/doc/requirements.txt index 9825b5b442a..fac2db1fa4c 100644 --- a/doc/requirements.txt +++ b/doc/requirements.txt @@ -1,10 +1,11 @@ setuptools-scm>=8.1.0 sphinx +sphinx-autoapi +intersphinx_registry>=0.2410.14 # https://github.com/Quansight-Labs/intersphinx_registry/pull/41 nbsphinx jupyter_core>=4.11.2 # nbsphix dependency - addresses CVE-2022-39286 nbconvert mistune>=2.0.3 # Address CVE-2022-34749 -numpydoc sphinx-rtd-theme myst-parser diff --git a/doc/source/.gitignore b/doc/source/.gitignore index bf83a1d89cf..8173208e1d5 100644 --- a/doc/source/.gitignore +++ b/doc/source/.gitignore @@ -1,4 +1,5 @@ api/ +autoapi/ generated/ badges/ source_tree_docs/ diff --git a/doc/source/_templates/class.rst b/doc/source/_templates/class.rst deleted file mode 100644 index 3eef9746722..00000000000 --- a/doc/source/_templates/class.rst +++ /dev/null @@ -1,16 +0,0 @@ -:mod:`{{module}}`.{{objname}} -{{ underline }}============== - -.. currentmodule:: {{ module }} - -.. autoclass:: {{ objname }} - - {% block methods %} - .. automethod:: __init__ - {% endblock %} - -.. include:: {{module}}.{{objname}}.examples - -.. raw:: html - -
diff --git a/doc/source/_templates/function.rst b/doc/source/_templates/function.rst deleted file mode 100644 index 4ba355d57c8..00000000000 --- a/doc/source/_templates/function.rst +++ /dev/null @@ -1,12 +0,0 @@ -:mod:`{{module}}`.{{objname}} -{{ underline }}==================== - -.. currentmodule:: {{ module }} - -.. autofunction:: {{ objname }} - -.. include:: {{module}}.{{objname}}.examples - -.. raw:: html - -
diff --git a/doc/source/_templates/numpydoc_docstring.py b/doc/source/_templates/numpydoc_docstring.py deleted file mode 100644 index 71acc5df9f0..00000000000 --- a/doc/source/_templates/numpydoc_docstring.py +++ /dev/null @@ -1,20 +0,0 @@ -# -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. -# -{{index}} -{{summary}} -{{extended_summary}} -{{parameters}} -{{returns}} -{{yields}} -{{other_parameters}} -{{attributes}} -{{raises}} -{{warns}} -{{warnings}} -{{see_also}} -{{notes}} -{{references}} -{{examples}} -{{methods}} diff --git a/doc/source/conf.py b/doc/source/conf.py index 42459f4de92..5a3771714f9 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -18,13 +18,18 @@ # documentation root, use os.path.abspath to make it absolute, like shown here. # +import json import os import sys +from typing import Dict, Union, Tuple from logging import warning -import sphinx_rtd_theme # pylint: disable=unused-import - +from docutils.nodes import Element +from intersphinx_registry import get_intersphinx_mapping +from sphinx.application import Sphinx as SphinxApp +from sphinx.environment import BuildEnvironment +from sphinx.addnodes import pending_xref sys.path.insert(0, os.path.abspath("../../mlos_core/mlos_core")) sys.path.insert(1, os.path.abspath("../../mlos_bench/mlos_bench")) @@ -63,17 +68,163 @@ # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ - "nbsphinx", "sphinx.ext.autodoc", - "sphinx.ext.autosummary", - "sphinx.ext.doctest", - # 'sphinx.ext.intersphinx', - # 'sphinx.ext.linkcode', - "numpydoc", + "autoapi.extension", + "nbsphinx", + "sphinx.ext.intersphinx", + "sphinx.ext.linkcode", + "sphinx.ext.napoleon", "matplotlib.sphinxext.plot_directive", "myst_parser", ] +autodoc_typehints = "both" # signature and description + +napoleon_numpy_docstring = True +napoleon_include_init_with_doc = False +napoleon_include_private_with_doc = False +napoleon_include_special_with_doc = True +napoleon_use_admonition_for_examples = False +napoleon_use_admonition_for_notes = False +napoleon_use_admonition_for_references = False +napoleon_use_ivar = False +napoleon_use_param = True +napoleon_use_rtype = True +napoleon_use_keyword = True +napoleon_custom_sections = None + +_base_path = os.path.abspath(os.path.join(__file__, "../../..")) +_path_cache: Dict[str, bool] = {} + + +def _check_path(path: str) -> bool: + """Check if a path exists and cache the result.""" + path = os.path.join(_base_path, path) + result = _path_cache.get(path) + if result is None: + result = os.path.exists(path) + _path_cache[path] = result + return result + + +def linkcode_resolve(domain: str, info: Dict[str, str]): + """linkcode extension override to link to the source code on GitHub.""" + if domain != "py": + return None + if not info["module"]: + return None + if not info["module"].startswith("mlos_"): + return None + package = info["module"].split(".")[0] + filename = info["module"].replace(".", "/") + path = f"{package}/{filename}.py" + if not _check_path(path): + path = f"{package}/{filename}/__init__.py" + if not _check_path(path): + warning(f"linkcode_resolve failed to find {path}") + warning(f"linkcode_resolve info: {json.dumps(info, indent=2)}") + return f"https://github.com/microsoft/MLOS/tree/main/{path}" + + +def is_on_github_actions(): + """Check if the documentation is being built on GitHub Actions.""" + return os.environ.get("CI") and os.environ.get("GITHUB_RUN_ID") + + +# Add mappings to link to external documentation. +intersphinx_mapping = get_intersphinx_mapping( + packages={ + "asyncssh", + "azure-core", + "azure-identity", + "configspace", + "matplotlib", + "numpy", + "pandas", + "python", + "referencing", + "smac", + "typing_extensions", + } +) +intersphinx_mapping.update( + { + "dabl": ("https://dabl.github.io/stable/", None), + } +) + +# Hack to resolve type aliases as attributes instead of classes. +# See Also: https://github.com/sphinx-doc/sphinx/issues/10785 + +# Type alias resolution map +# (original, refname) -> new +CUSTOM_REF_TYPE_MAP: Dict[Tuple[str, str], str] = { + # Internal typevars and aliases: + ("BaseTypeVar", "class"): "data", + ("ConcreteOptimizer", "class"): "data", + ("ConcreteSpaceAdapter", "class"): "data", + ("DistributionName", "class"): "data", + ("FlamlDomain", "class"): "data", + ("mlos_core.spaces.converters.flaml.FlamlDomain", "class"): "data", + ("TunableValue", "class"): "data", + ("mlos_bench.tunables.tunable.TunableValue", "class"): "data", + ("TunableValueType", "class"): "data", + ("TunableValueTypeName", "class"): "data", + ("T_co", "class"): "data", + ("CoroReturnType", "class"): "data", + ("FutureReturnType", "class"): "data", +} + + +def resolve_type_aliases( + app: SphinxApp, + env: BuildEnvironment, + node: pending_xref, + contnode: Element, +) -> Optional[Element]: + """Resolve :class: references to our type aliases as :attr: instead.""" + if node["refdomain"] != "py": + return None + (orig_type, reftarget) = (node["reftype"], node["reftarget"]) + new_type = CUSTOM_REF_TYPE_MAP.get((reftarget, orig_type)) + if new_type: + # warning(f"Resolved {orig_type} {reftarget} to {new_type}") + return app.env.get_domain("py").resolve_xref( + env, + node["refdoc"], + app.builder, + new_type, + reftarget, + node, + contnode, + ) + return None + + +def setup(app: SphinxApp) -> None: + """Connect the missing-reference event to resolve type aliases.""" + app.connect("missing-reference", resolve_type_aliases) + + +# Ignore some cross references to external things we can't intersphinx with. +# sphinx has a hard time finding typealiases and typevars instead of classes. +# See Also: https://github.com/sphinx-doc/sphinx/issues/10974 +nitpick_ignore = [ + # Internal typevars and aliases: + ("py:class", "EnvironType"), + # External typevars and aliases: + ("py:class", "numpy.typing.NDArray"), + # External classes that refuse to resolve: + ("py:class", "contextlib.nullcontext"), + ("py:class", "sqlalchemy.engine.Engine"), + ("py:exc", "jsonschema.exceptions.SchemaError"), + ("py:exc", "jsonschema.exceptions.ValidationError"), +] +nitpick_ignore_regex = [ + # Ignore some external references that don't use sphinx for their docs. + (r"py:.*", r"flaml\..*"), +] +# Which documents to include in the build. source_suffix = { ".rst": "restructuredtext", # '.txt': 'markdown', @@ -81,21 +232,7 @@ } # Add any paths that contain templates here, relative to this directory. -templates_path = ["_templates"] - -# generate autosummary even if no references -autosummary_generate = True -# but don't complain about missing stub files -# See Also: -numpydoc_class_members_toctree = False - -autodoc_default_options = { - "members": True, - "undoc-members": True, - # Don't generate documentation for some (non-private) functions that are more - # for internal implementation use. - "exclude-members": "mlos_bench.util.check_required_params", -} +# templates_path = ["_templates"] # Generate the plots for the gallery # plot_gallery = True @@ -105,6 +242,40 @@ # This pattern also affects html_static_path and html_extra_path. exclude_patterns = ["_build", "_templates"] +autoapi_dirs = [ + # Don't index setup.py or other utility scripts. + "../../mlos_core/mlos_core/", + "../../mlos_bench/mlos_bench/", + "../../mlos_viz/mlos_viz/", +] +autoapi_ignore = [ + "*/tests/*", + # Don't document internal environment scripts that aren't part of a module. + "*/mlos_bench/config/environments/*/*.py", + "*/mlos_bench/config/services/*/*.py", +] +autoapi_options = [ + "members", + # Can't document externally inherited members due to broken references. + # "inherited-members", + "undoc-members", + # Don't document private members. + # "private-members", + "show-inheritance", + # Causes issues when base class is a typing protocol. + # "show-inheritance-diagram", + "show-module-summary", + "special-members", + # Causes duplicate reference issues. For instance: + # - mlos_bench.environments.LocalEnv + # - mlos_bench.environments.local.LocalEnv + # - mlos_bench.environments.local.local_env.LocalEnv + # "imported-members", +] +autoapi_python_class_content = "both" +autoapi_member_order = "groupwise" +autoapi_add_toctree_entry = False # handled manually +autoapi_keep_files = not is_on_github_actions() # for local testing # -- Options for HTML output ------------------------------------------------- @@ -112,7 +283,6 @@ # a list of builtin themes. # html_theme = "sphinx_rtd_theme" -# html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, diff --git a/doc/source/index.rst b/doc/source/index.rst index e0302b9f78a..68bd53dbef9 100644 --- a/doc/source/index.rst +++ b/doc/source/index.rst @@ -1,43 +1,42 @@ -Welcome to the MLOS documentation! -================================== +MLOS Documentation +================== .. image:: badges/tests.svg .. image:: badges/coverage.svg :target: htmlcov/index.html -`MLOS `_ is a project to enable autotuning for systems via automated benchmarking including managing the storage and visualization of the results. +`MLOS `_ is a project to enable autotuning for systems via `automated benchmarking `_ including managing the storage and `visualization `_ of the results. See below for additional documentation sections. +Here is some documentation pulled from the markdown files in the `MLOS source tree `_: + .. toctree:: - :maxdepth: 2 :caption: Source Tree Documentation + :maxdepth: 5 source_tree_docs/index source_tree_docs/mlos_core/index source_tree_docs/mlos_bench/index source_tree_docs/mlos_viz/index -.. toctree:: - :maxdepth: 2 - :caption: API Overview - - overview +Here is some documentation pulled from the Python docstrings in the `MLOS source tree `_: .. toctree:: - :maxdepth: 3 :caption: API Reference + :titlesonly: + :maxdepth: 5 - api/mlos_core/modules - api/mlos_bench/modules - api/mlos_viz/modules + autoapi/mlos_core/index + autoapi/mlos_bench/index + autoapi/mlos_viz/index .. toctree:: - :maxdepth: 2 - :caption: Examples + :caption: mlos_bench CLI usage + :maxdepth: 1 - auto_examples/index + mlos_bench.run.usage .. toctree:: :maxdepth: 1 diff --git a/doc/source/mlos_bench.run.usage.rst b/doc/source/mlos_bench.run.usage.rst new file mode 100644 index 00000000000..513d2274db5 --- /dev/null +++ b/doc/source/mlos_bench.run.usage.rst @@ -0,0 +1,10 @@ +mlos_bench CLI usage +==================== + +Here is the current ``--help`` output for the ``mlos_bench`` :py:mod:`CLI script `: + +See the :py:mod:`mlos_bench.config` module documentation for more information on +configuration files. + +.. literalinclude:: ./generated/mlos_bench.run.usage.txt + :language: none diff --git a/doc/source/overview.rst b/doc/source/overview.rst deleted file mode 100644 index b3ca2a3fad3..00000000000 --- a/doc/source/overview.rst +++ /dev/null @@ -1,298 +0,0 @@ -########################## -MLOS Package APIs Overview -########################## - -This is a list of major functions and classes provided by the MLOS packages. - -############################# -mlos-core API -############################# - -This is a list of major functions and classes provided by `mlos_core`. - -.. currentmodule:: mlos_core - -Optimizers -========== -.. currentmodule:: mlos_core.optimizers -.. autosummary:: - :toctree: generated/ - - :template: class.rst - - OptimizerType - OptimizerFactory - - :template: function.rst - - OptimizerFactory.create - -.. currentmodule:: mlos_core.optimizers.optimizer -.. autosummary:: - :toctree: generated/ - :template: class.rst - - BaseOptimizer - -.. currentmodule:: mlos_core.optimizers.random_optimizer -.. autosummary:: - :toctree: generated/ - :template: class.rst - - RandomOptimizer - -.. currentmodule:: mlos_core.optimizers.flaml_optimizer -.. autosummary:: - :toctree: generated/ - :template: class.rst - - FlamlOptimizer - -.. currentmodule:: mlos_core.optimizers.bayesian_optimizers -.. autosummary:: - :toctree: generated/ - :template: class.rst - - BaseBayesianOptimizer - SmacOptimizer - -Spaces -====== - -Converters ----------- -.. currentmodule:: mlos_core.spaces.converters.flaml -.. autosummary:: - :toctree: generated/ - :template: function.rst - - configspace_to_flaml_space - -Space Adapters --------------- -.. currentmodule:: mlos_core.spaces.adapters -.. autosummary:: - :toctree: generated/ - - :template: class.rst - - SpaceAdapterType - SpaceAdapterFactory - - :template: function.rst - - SpaceAdapterFactory.create - -.. currentmodule:: mlos_core.spaces.adapters.adapter -.. autosummary:: - :toctree: generated/ - :template: class.rst - - BaseSpaceAdapter - -.. currentmodule:: mlos_core.spaces.adapters.identity_adapter -.. autosummary:: - :toctree: generated/ - :template: class.rst - - IdentityAdapter - -.. currentmodule:: mlos_core.spaces.adapters.llamatune -.. autosummary:: - :toctree: generated/ - :template: class.rst - - LlamaTuneAdapter - -############################# -mlos-bench API -############################# - -This is a list of major functions and classes provided by `mlos_bench`. - -.. currentmodule:: mlos_bench - -Main -==== - -:doc:`run.py ` - - The script to run the benchmarks or the optimization loop. - - Also available as `mlos_bench` command line tool. - -.. note:: - The are `json config examples `_ and `json schemas `_ on the main `source code `_ repository site. - -Benchmark Environments -====================== -.. currentmodule:: mlos_bench.environments -.. autosummary:: - :toctree: generated/ - :template: class.rst - - Status - Environment - CompositeEnv - MockEnv - -Local Environments -------------------- - -.. currentmodule:: mlos_bench.environments.local -.. autosummary:: - :toctree: generated/ - :template: class.rst - - LocalEnv - LocalFileShareEnv - -Remote Environments -------------------- - -.. currentmodule:: mlos_bench.environments.remote -.. autosummary:: - :toctree: generated/ - :template: class.rst - - RemoteEnv - OSEnv - VMEnv - HostEnv - -Tunable Parameters -================== -.. currentmodule:: mlos_bench.tunables -.. autosummary:: - :toctree: generated/ - :template: class.rst - - Tunable - TunableGroups - -Service Mix-ins -=============== -.. currentmodule:: mlos_bench.services -.. autosummary:: - :toctree: generated/ - :template: class.rst - - Service - FileShareService - -.. currentmodule:: mlos_bench.services.config_persistence -.. autosummary:: - :toctree: generated/ - :template: class.rst - - ConfigPersistenceService - -Local Services ---------------- -.. currentmodule:: mlos_bench.services.local -.. autosummary:: - :toctree: generated/ - :template: class.rst - - LocalExecService - -Remote Azure Services ---------------------- - -.. currentmodule:: mlos_bench.services.remote.azure -.. autosummary:: - :toctree: generated/ - :template: class.rst - - AzureVMService - AzureFileShareService - -Optimizer Adapters -================== -.. currentmodule:: mlos_bench.optimizers -.. autosummary:: - :toctree: generated/ - :template: class.rst - - Optimizer - MockOptimizer - MlosCoreOptimizer - -Storage -======= -Base Runtime Backends ---------------------- -.. currentmodule:: mlos_bench.storage -.. autosummary:: - :toctree: generated/ - :template: class.rst - - Storage - -.. currentmodule:: mlos_bench.storage.storage_factory -.. autosummary:: - :toctree: generated/ - :template: function.rst - - from_config - -SQL DB Storage Backend ----------------------- -.. currentmodule:: mlos_bench.storage.sql.storage -.. autosummary:: - :toctree: generated/ - :template: class.rst - - SqlStorage - -Analysis Client Access APIs ---------------------------- -.. currentmodule:: mlos_bench.storage.base_experiment_data -.. autosummary:: - :toctree: generated/ - :template: class.rst - - ExperimentData - -.. currentmodule:: mlos_bench.storage.base_trial_data -.. autosummary:: - :toctree: generated/ - :template: class.rst - - TrialData - -.. currentmodule:: mlos_bench.storage.base_tunable_config_data -.. autosummary:: - :toctree: generated/ - :template: class.rst - - TunableConfigData - -.. currentmodule:: mlos_bench.storage.base_tunable_config_trial_group_data -.. autosummary:: - :toctree: generated/ - :template: class.rst - - TunableConfigTrialGroupData - -############################# -mlos-viz API -############################# - -This is a list of major functions and classes provided by `mlos_viz`. - -.. currentmodule:: mlos_viz - -.. currentmodule:: mlos_viz -.. autosummary:: - :toctree: generated/ - :template: class.rst - - MlosVizMethod - -.. currentmodule:: mlos_viz -.. autosummary:: - :toctree: generated/ - :template: function.rst - - plot diff --git a/mlos_bench/mlos_bench/__init__.py b/mlos_bench/mlos_bench/__init__.py index d0214754411..1f54f78c440 100644 --- a/mlos_bench/mlos_bench/__init__.py +++ b/mlos_bench/mlos_bench/__init__.py @@ -2,9 +2,131 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. # -"""mlos_bench is a framework to help automate benchmarking and and OS/application -parameter autotuning. -""" +r""" +mlos_bench is a framework to help automate benchmarking and OS/application parameter +autotuning and the data management of the results. + +It can be installed from `pypi `_ via ``pip +install mlos-bench`` and executed using the ``mlos_bench`` `command +<../../mlos_bench.run.usage.html>`_ using a collection of `json` `configs +`_. + +It is intended to be used with :py:mod:`mlos_core` via +:py:class:`~mlos_bench.optimizers.mlos_core_optimizer.MlosCoreOptimizer` to help +navigate complex parameter spaces more effeciently, though other +:py:mod:`~mlos_bench.optimizers` are also available to help customize the search +process easily by simply swapping out the +:py:class:`~mlos_bench.optimizers.base_optimizer.Optimizer` class in the associated +json configs. For instance, +:py:class:`~mlos_bench.optimizers.grid_search_optimizer.GridSearchOptimizer` can be +used to perform a grid search over the parameter space instead. + +The other core classes in this package are: + +- :py:mod:`~mlos_bench.environments` which provide abstractions for representing an + execution environment. + + These are generally the target of the optimization process and are used to + evaluate the performance of a given configuration, though can also be used to + simply run a single benchmark. They can be used, for instance, to provision a + :py:mod:`VM `, run benchmarks or execute + any other arbitrary code on a :py:mod:`remote machine + `, and many other things. + +- Environments are often associated with :py:mod:`~mlos_bench.tunables` which + provide a language for specifying the set of configuration parameters that can be + optimized or searched over with the :py:mod:`~mlos_bench.optimizers`. + +- :py:mod:`~mlos_bench.services` provide the necessary abstractions to run interact + with the :py:mod:`~mlos_bench.environments` in different settings. + + For instance, the + :py:class:`~mlos_bench.services.remote.azure.azure_vm_services.AzureVMService` can + be used to run commands on Azure VMs for a remote + :py:mod:`~mlos_bench.environments.remote.vm_env.VMEnv`. + + Alternatively, one could swap out that service for + :py:class:`~mlos_bench.services.remote.ssh.ssh_host_service.SshHostService` in + order to target a different VM without having to change the + :py:class:`~mlos_bench.environments.base_environment.Environment` configuration at + all since they both implement the same + :py:class:`~mlos_bench.services.types.remote_exec_type.SupportsRemoteExec` + :py:mod:`Services type` interfaces. + + This is particularly useful when running the same benchmark on different + ecosystems and makes the configs more modular and composable. + +- :py:mod:`~mlos_bench.storage` which provides abstractions for storing and + retrieving data from the experiments. + + For instance, nearly any :py:mod:`SQL ` backend that + `sqlalchemy `_ supports can be used. + +The data management and automation portions of experiment data is a key component of +MLOS as it provides a unified way to manage experiment data across different +Environments, enabling more reusable visualization and analysis by mapping benchmark +metrics into common semantic types (e.g., via `OpenTelemetry +`_). + +Without this most experiments are effectively siloed and require custom, and more +critically, non-reusable scripts to setup and later parse results and are hence +harder to scale to many users. + +With these features as a part of the MLOS ecosystem, benchmarking can become a +*service* that any developer, admin, research, etc. can use and adapt. + +See below for more information on the classes in this package. + +Notes +----- +Note that while the docstrings in this package are generated from the source code +and hence sometimes more focused on the implementation details, most user +interactions with the package will be through the `json configs +`_. Even +so it may be useful to look at the source code to understand how those are +interpretted. + +Examples +-------- +Here is an example that shows how to run a simple benchmark using the command line. + +The entry point for these configs can be found `here +`_. + +>>> from subprocess import run +>>> # Note: we show the command wrapped in python here for testing purposes. +>>> # Alternatively replace test-cli-local-env-bench.jsonc with +>>> # test-cli-local-env-opt.jsonc for one that does an optimization loop. +>>> cmd = "mlos_bench \ +... --config mlos_bench/mlos_bench/tests/config/cli/test-cli-local-env-bench.jsonc \ +... --globals experiment_test_local.jsonc \ +... --tunable_values tunable-values/tunable-values-local.jsonc" +>>> print(f"Here's the shell command you'd actually run:\n# {cmd}") +Here's the shell command you'd actually run: +# mlos_bench --config mlos_bench/mlos_bench/tests/config/cli/test-cli-local-env-bench.jsonc --globals experiment_test_local.jsonc --tunable_values tunable-values/tunable-values-local.jsonc +>>> # Now we run the command and check the output. +>>> result = run(cmd, shell=True, capture_output=True, text=True, check=True) +>>> assert result.returncode == 0 +>>> lines = result.stderr.splitlines() +>>> first_line = lines[0] +>>> last_line = lines[-1] +>>> expected = "INFO Launch: mlos_bench" +>>> assert first_line.endswith(expected) +>>> expected = "INFO Final score: {'score': 123.4, 'total_time': 123.4, 'throughput': 1234567.0}" +>>> assert last_line.endswith(expected) + +Notes +----- +- `mlos_bench/README.md `_ + for additional documentation and examples in the source tree. + +- `mlos_bench/DEVNOTES.md `_ + for additional developer notes in the source tree. + +- There is also a working example of using ``mlos_bench`` in a *separate config + repo* (the more expected case for most users) in the `sqlite-autotuning + `_ repo. +""" # pylint: disable=line-too-long # noqa: E501 from mlos_bench.version import VERSION __version__ = VERSION diff --git a/mlos_bench/mlos_bench/config/__init__.py b/mlos_bench/mlos_bench/config/__init__.py index b78386118c6..f19cf95b32c 100644 --- a/mlos_bench/mlos_bench/config/__init__.py +++ b/mlos_bench/mlos_bench/config/__init__.py @@ -2,4 +2,262 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. # -"""mlos_bench.config.""" +""" +A module for and documentation about the structure and mangement of json configs, their +schemas and validation for various components of MLOS. + +.. contents:: Table of Contents + :depth: 3 + +Overview +++++++++ + +MLOS is a framework for doing benchmarking and autotuning for systems. +The bulk of the code to do that is written in python. As such, all of the code +classes documented here take python objects in their construction. + +However, most users of MLOS will interact with the system via the ``mlos_bench`` CLI +and its json config files and their own scripts for MLOS to invoke. This module +attempts to document some of those high level interactions. + +General JSON Config Structure ++++++++++++++++++++++++++++++ + +We use `json5 `_ to parse the json files, since it +allows for inline C style comments (e.g., ``//``, ``/* */``), trailing commas, etc., +so it is slightly more user friendly than strict json. + +By convention files use the ``*.mlos.json`` or ``*.mlos.jsonc`` extension to +indicate that they are an ``mlos_bench`` config file. + +This allows tools that support `JSON Schema Store +`_ (e.g., `VSCode +`_ with an `extension +`_) to +provide helpful autocomplete and validation of the json configs while editing. + +CLI Configs +^^^^^^^^^^^ + +:py:attr:`~.mlos_bench.config.schemas.config_schemas.ConfigSchema.CLI` style configs +are typically used to start the ``mlos_bench`` CLI using the ``--config`` argument +and a restricted key-value dict form where each key corresponds to a CLI argument. + +For instance: + +.. code-block:: json + + // cli-config.mlos.json + { + "experiment": "path/to/base/experiment-config.mlos.json", + "services": [ + "path/to/some/service-config.mlos.json", + ], + "globals": "path/to/basic-globals-config.mlos.json", + } + +.. code-block:: json + + // basic-globals-config.mlos.json + { + "location": "westus", + "vm_size": "Standard_D2s_v5", + } + +Typically CLI configs will reference some other configs, especially the base +Environment and Services configs, but some ``globals`` may be left to be specified +on the command line. + +For instance: + +.. code-block:: shell + + mlos_bench --config path/to/cli-config.mlos.json --globals experiment-config.mlos.json + +where ``experiment-config.mlos.json`` might look something like this: + +.. code-block:: json + + // experiment-config.mlos.json (also a set of globals) + { + "experiment_id": "my_experiment", + "some_var": "some_value", + } + +This allows some of the ``globals`` to be specified on the CLI to alter the behavior +of a set of Experiments without having to adjust many of the other config files +themselves. + +See below for examples. + +Notes +----- +- See `mlos_bench CLI usage `_ for more details on the + CLI arguments. +- See `mlos_bench/config/cli + `_ + and `mlos_bench/tests/config/cli + `_ + for some examples of CLI configs. + +Globals and Variable Substitution ++++++++++++++++++++++++++++++++++ + +:py:attr:`Globals ` +are basically just key-value variables that can be used in other configs using +``$variable`` substituion via the +:py:meth:`~mlos_bench.dict_templater.DictTemplater.expand_vars` method. + +For instance: + +.. code-block:: json + + // globals-config.mlos.json + { + "experiment_id": "my_experiment", + "some_var": "some_value", + // environment variable expansion also works here + "current_dir": "$PWD", + "some_expanded_var": "$some_var: $experiment_id", + "location": "eastus", + } + +There are additional details about variable propogation in the +:py:mod:`mlos_bench.environments` module. + +Well Known Variables +^^^^^^^^^^^^^^^^^^^^ + +Here is a list of some well known variables that are provided or required by the +system and may be used in the config files: + +- ``$experiment_id``: A unique identifier for the experiment. + Typically provided in globals. +- ``$trial_id``: A unique identifier for the trial currently being executed. + This can be useful in the configs for :py:mod:`mlos_bench.environments` for + instance (e.g., when writing scripts). +- TODO: Document more variables here. + +Tunable Configs +^^^^^^^^^^^^^^^ + +There are two forms of tunable configs: + +- "TunableParams" style configs + + Which are used to define the set of + :py:mod:`~mlos_bench.tunables.tunable_groups.TunableGroups` (i.e., tunable + parameters). + + .. code-block:: json + + // some-env-tunables.json + { + // a group of tunables that are tuned together + "covariant_group_name": [ + { + "name": "tunable_name", + "type": "int", + "range": [0, 100], + "default": 50, + }, + // more tunables + ], + // another group of tunables + // both can be enabled at the same time + "another_group_name": [ + { + "name": "another_tunable_name", + "type": "categorical", + "values": ["red", "yellow", "green"], + "default": "green" + }, + // more tunables + ], + } + + Since TunableParams are associated with Environments, they are typically kept + in the same directory as that environment and named something like + ``env-tunables.json``. + +- "TunableValues" style configs which are used to specify the values for an + instantiation of a set of tunables params. + + These are essentially just a dict of the tunable names and their values. + For instance: + + .. code-block:: json + + // tunable-values.mlos.json + { + "tunable_name": 25, + "another_tunable_name": "red", + } + + These can be used with the + :py:class:`~mlos_bench.optimizers.one_shot_optimizer.OneShotOptimizer` + :py:class:`~mlos_bench.optimizers.manual_optimizer.ManualOptimizer` to run a + benchmark with a particular config or set of configs. + +Class Configs +^^^^^^^^^^^^^ + +Class style configs include most anything else and roughly take this form: + +.. code-block:: json + + // class configs (environments, services, etc.) + { + // some mlos class name to load + "class": "mlos_bench.type.ClassName", + "config": { + // class specific config + "key": "value", + "key2": "$some_var", // variable substitution is allowed here too + } + } + +Where ``type`` is one of the core classes in the system: + +- :py:mod:`~mlos_bench.environments` +- :py:mod:`~mlos_bench.optimizers` +- :py:mod:`~mlos_bench.services` +- :py:mod:`~mlos_bench.schedulers` +- :py:mod:`~mlos_bench.storage` + +Each of which have their own submodules and classes that dictate the allowed and +expected structure of the ``config`` section. + +In certain cases (e.g., script command execution) the variable substitution rules +take on slightly different behavior +See various documentation in :py:mod:`mlos_bench.environments` for more details. + +Config Processing ++++++++++++++++++ + +Config files are processed by the :py:class:`~mlos_bench.launcher.Launcher` and +:py:class:`~mlos_bench.services.config_persistence.ConfigPersistenceService` classes +at startup time by the ``mlos_bench`` CLI. + +The typical entrypoint is a CLI config which references other configs, especially +the base Environment config, Services, Optimizer, and Storage. + +See `mlos_bench CLI usage `_ for more details on those +arguments. + +Schema Definitions +++++++++++++++++++ + +For further details on the schema definitions and validation, see the +:py:class:`~mlos_bench.config.schemas.config_schemas.ConfigSchema` class +documentation, which also contains links to the actual schema definitions in the +source tree (see below). + +Notes +----- +See `mlos_bench/config/README.md +`_ and +`mlos_bench/tests/config/README.md +`_ +for additional documentation and examples in the source tree. +""" diff --git a/mlos_bench/mlos_bench/config/environments/os/linux/boot/scripts/local/create_new_grub_cfg.py b/mlos_bench/mlos_bench/config/environments/os/linux/boot/scripts/local/create_new_grub_cfg.py index 9b75f040080..c0cc5b3ea84 100755 --- a/mlos_bench/mlos_bench/config/environments/os/linux/boot/scripts/local/create_new_grub_cfg.py +++ b/mlos_bench/mlos_bench/config/environments/os/linux/boot/scripts/local/create_new_grub_cfg.py @@ -14,10 +14,16 @@ JSON_CONFIG_FILE = "config-boot-time.json" NEW_CFG = "zz-mlos-boot-params.cfg" -with open(JSON_CONFIG_FILE, "r", encoding="UTF-8") as fh_json, open( - NEW_CFG, "w", encoding="UTF-8" -) as fh_config: - for key, val in json.load(fh_json).items(): - fh_config.write( - 'GRUB_CMDLINE_LINUX_DEFAULT="$' f'{{GRUB_CMDLINE_LINUX_DEFAULT}} {key}={val}"\n' - ) + +def _write_config() -> None: + with open(JSON_CONFIG_FILE, "r", encoding="UTF-8") as fh_json, open( + NEW_CFG, "w", encoding="UTF-8" + ) as fh_config: + for key, val in json.load(fh_json).items(): + fh_config.write( + 'GRUB_CMDLINE_LINUX_DEFAULT="$' f'{{GRUB_CMDLINE_LINUX_DEFAULT}} {key}={val}"\n' + ) + + +if __name__ == "__main__": + _write_config() diff --git a/mlos_bench/mlos_bench/config/schemas/__init__.py b/mlos_bench/mlos_bench/config/schemas/__init__.py index d4987add630..9f34906a76d 100644 --- a/mlos_bench/mlos_bench/config/schemas/__init__.py +++ b/mlos_bench/mlos_bench/config/schemas/__init__.py @@ -2,7 +2,14 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. # -"""A module for managing config schemas and their validation.""" +""" +A module for managing config schemas and their validation. + +See Also +-------- +mlos_bench.config.schemas.config_schemas : The module handling the actual schema + definitions and validation. +""" from mlos_bench.config.schemas.config_schemas import CONFIG_SCHEMA_DIR, ConfigSchema diff --git a/mlos_bench/mlos_bench/config/schemas/config_schemas.py b/mlos_bench/mlos_bench/config/schemas/config_schemas.py index b7ce402b5d5..402f96a8b9f 100644 --- a/mlos_bench/mlos_bench/config/schemas/config_schemas.py +++ b/mlos_bench/mlos_bench/config/schemas/config_schemas.py @@ -2,8 +2,23 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. # -"""A simple class for describing where to find different config schemas and validating -configs against them. +""" +A simple class for describing where to find different `json config schemas +`_ and validating configs against them. + +Used by the :py:class:`~mlos_bench.launcher.Launcher` and +:py:class:`~mlos_bench.services.config_persistence.ConfigPersistenceService` to +validate configs on load. + +Notes +----- +- See `mlos_bench/config/schemas/README.md + `_ + for additional documentation in the source tree. + +- See `mlos_bench/config/README.md + `_ + for additional config examples in the source tree. """ import json # schema files are pure json - no comments @@ -22,13 +37,23 @@ # The path to find all config schemas. CONFIG_SCHEMA_DIR = path_join(path.dirname(__file__), abs_path=True) +"""The local directory where all config schemas shipped as a part of the +:py:mod:`mlos_bench` module are stored. +""" # Allow skipping schema validation for tight dev cycle changes. # It is used in `ConfigSchema.validate()` method below. # NOTE: this may cause pytest to fail if it's expecting exceptions # to be raised for invalid configs. -_VALIDATION_ENV_FLAG = "MLOS_BENCH_SKIP_SCHEMA_VALIDATION" -_SKIP_VALIDATION = environ.get(_VALIDATION_ENV_FLAG, "false").lower() in { +VALIDATION_ENV_FLAG = "MLOS_BENCH_SKIP_SCHEMA_VALIDATION" +""" +The special environment flag to set to skip schema validation when "true". + +Useful for local development when you're making a lot of changes to the config or adding +new classes that aren't in the main repo yet. +""" + +_SKIP_VALIDATION = environ.get(VALIDATION_ENV_FLAG, "false").lower() in { "true", "y", "yes", @@ -105,22 +130,96 @@ def registry(self) -> Registry: SCHEMA_STORE = SchemaStore() +"""Static :py:class:`.SchemaStore` instance used for storing and retrieving schemas for +config validation. +""" class ConfigSchema(Enum): """An enum to help describe schema types and help validate configs against them.""" CLI = path_join(CONFIG_SCHEMA_DIR, "cli/cli-schema.json") + """ + Json config `schema + `__ + for :py:mod:`mlos_bench ` CLI configuration. + + See Also + -------- + mlos_bench.config : documentation on the configuration system. + mlos_bench.launcher.Launcher : class is responsible for processing the CLI args. + """ + GLOBALS = path_join(CONFIG_SCHEMA_DIR, "cli/globals-schema.json") + """ + Json config `schema + `__ + for :py:mod:`global variables `. + """ + ENVIRONMENT = path_join(CONFIG_SCHEMA_DIR, "environments/environment-schema.json") + """ + Json config `schema + `__ + for :py:mod:`~mlos_bench.environments`. + """ + OPTIMIZER = path_join(CONFIG_SCHEMA_DIR, "optimizers/optimizer-schema.json") + """ + Json config `schema + `__ + for :py:mod:`~mlos_bench.optimizers`. + """ + SCHEDULER = path_join(CONFIG_SCHEMA_DIR, "schedulers/scheduler-schema.json") + """ + Json config `schema + `__ + for :py:mod:`~mlos_bench.schedulers`. + """ + SERVICE = path_join(CONFIG_SCHEMA_DIR, "services/service-schema.json") + """ + Json config `schema + `__ + for :py:mod:`~mlos_bench.services`. + """ + STORAGE = path_join(CONFIG_SCHEMA_DIR, "storage/storage-schema.json") + """ + Json config `schema + `__ + for :py:mod:`~mlos_bench.storage` instances. + """ + TUNABLE_PARAMS = path_join(CONFIG_SCHEMA_DIR, "tunables/tunable-params-schema.json") + """ + Json config `schema + `__ + for :py:mod:`~mlos_bench.tunables` instances. + """ + TUNABLE_VALUES = path_join(CONFIG_SCHEMA_DIR, "tunables/tunable-values-schema.json") + """ + Json config `schema + `__ + for values of :py:mod:`~mlos_bench.tunables.tunable_groups.TunableGroups` instances. + + These can be used to specify the values of the tunables for a given experiment + using the :py:class:`~mlos_bench.optimizers.one_shot_optimizer.OneShotOptimizer` + for instance. + """ UNIFIED = path_join(CONFIG_SCHEMA_DIR, "mlos-bench-config-schema.json") + """ + Combined global json `schema + `__ + use to validate any ``mlos_bench`` config file (e.g., ``*.mlos.jsonc`` files). + + See Also + -------- + + """ @property def schema(self) -> dict: @@ -141,10 +240,12 @@ def validate(self, config: dict) -> None: Raises ------ jsonschema.exceptions.ValidationError + On validation failure. jsonschema.exceptions.SchemaError + On schema loading error. """ if _SKIP_VALIDATION: - _LOG.warning("%s is set - skip schema validation", _VALIDATION_ENV_FLAG) + _LOG.warning("%s is set - skip schema validation", VALIDATION_ENV_FLAG) else: jsonschema.Draft202012Validator( schema=self.schema, diff --git a/mlos_bench/mlos_bench/dict_templater.py b/mlos_bench/mlos_bench/dict_templater.py index fcd75cf1b92..dd1d1f78afd 100644 --- a/mlos_bench/mlos_bench/dict_templater.py +++ b/mlos_bench/mlos_bench/dict_templater.py @@ -2,7 +2,9 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. # -"""Simple class to help with nested dictionary $var templating.""" +"""Simple class to help with nested dictionary ``$var`` templating in configuration file +expansions. +""" from copy import deepcopy from string import Template @@ -12,7 +14,7 @@ class DictTemplater: # pylint: disable=too-few-public-methods - """Simple class to help with nested dictionary $var templating.""" + """Simple class to help with nested dictionary ``$var`` templating.""" def __init__(self, source_dict: Dict[str, Any]): """ @@ -51,7 +53,8 @@ def expand_vars( Raises ------ - ValueError on unsupported nested types. + ValueError + On unsupported nested types. """ self._dict = deepcopy(self._template_dict) self._dict = self._expand_vars(self._dict, extra_source_dict, use_os_env) @@ -64,7 +67,7 @@ def _expand_vars( extra_source_dict: Optional[Dict[str, Any]], use_os_env: bool, ) -> Any: - """Recursively expand $var strings in the currently operating dictionary.""" + """Recursively expand ``$var`` strings in the currently operating dictionary.""" if isinstance(value, str): # First try to expand all $vars internally. value = Template(value).safe_substitute(self._dict) diff --git a/mlos_bench/mlos_bench/environments/__init__.py b/mlos_bench/mlos_bench/environments/__init__.py index ff649af50ef..2ddb7c60b7c 100644 --- a/mlos_bench/mlos_bench/environments/__init__.py +++ b/mlos_bench/mlos_bench/environments/__init__.py @@ -2,7 +2,118 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. # -"""Tunable Environments for mlos_bench.""" +""" +Tunable Environments for mlos_bench. + +.. contents:: Table of Contents + :depth: 3 + +Overview +++++++++ + +Environments are classes that represent an execution setting (i.e., environment) for +running a benchmark or tuning process. + +For instance, a :py:class:`~.LocalEnv` represents a local execution environment, a +:py:class:`~.RemoteEnv` represents a remote execution environment, a +:py:class:`~mlos_bench.environments.remote.vm_env.VMEnv` represents a virtual +machine, etc. + +An Environment goes through a series of *phases* (e.g., +:py:meth:`~.Environment.setup`, :py:meth:`~.Environment.run`, +:py:meth:`~.Environment.teardown`, etc.) that can be used to prepare a VM, workload, +etc.; run a benchmark, script, etc.; and clean up afterwards. +Often, what these phases do (e.g., what commands to execute) will depend on the +specific Environment and the configs that Environment was loaded with. +This lets Environments be very flexible in what they can accomplish. + +Environments can be stacked together with the :py:class:`.CompositeEnv` class to +represent complex setups (e.g., an appication running on a remote VM with a +benchmark running from a local machine). + +See below for the set of Environments currently available in this package. + +Note that additional ones can also be created by extending the base +:py:class:`.Environment` class and referencing them in the :py:mod:`json configs +` using the ``class`` key. + +Environment Tunables +++++++++++++++++++++ + +Each environment can use +:py:class:`~mlos_bench.tunables.tunable_groups.TunableGroups` to specify the set of +configuration parameters that can be optimized or searched. +At each iteration of the optimization process, the optimizer will generate a set of +values for the :py:class:`Tunables ` that the +environment can use to configure itself. + +At a python level, this happens by passing a +:py:meth:`~mlos_bench.tunables.tunable_groups.TunableGroups` object to the +``tunable_groups`` parameter of the :py:class:`~.Environment` constructor, but that +is typically handled by the +:py:meth:`~mlos_bench.services.config_persistence.ConfigPersistenceService.load_environment` +method of the +:py:meth:`~mlos_bench.services.config_persistence.ConfigPersistenceService` invoked +by the ``mlos_bench`` command line tool's :py:class:`mlos_bench.launcher.Launcher` +class. + +In the typical json user level configs, this is specified in the +``include_tunables`` section of the Environment config to include the +:py:class:`~mlos_bench.tunables.tunable_groups.TunableGroups` definitions from other +json files when the :py:class:`~mlos_bench.launcher.Launcher` processes the initial +set of config files. + +The ``tunable_params`` setting in the ``config`` section of the Environment config +can also be used to limit *which* of the ``TunableGroups`` should be used for the +Environment. + +Since :py:mod:`json configs ` also support ``$variable`` +substitution in the values using the `globals` mechanism, this setting can used to +dynamically change the set of active TunableGroups for a given Experiment using only +`globals`, allowing for configs to be more modular and composable. + +Environment Services +++++++++++++++++++++ + +Environments can also reference :py:mod:`~mlos_bench.services` that provide the +necessary support to perform the actions that environment needs for each of its +phases depending upon where its being deployed (e.g., local machine, remote machine, +cloud provider VM, etc.) + +Although this can be done in the Environment config directly with the +``include_services`` key, it is often more useful to do it in the global or +:py:mod:`cli config ` to allow for the same Environment to be +used in different settings (e.g., local machine, SSH accessible machine, Azure VM, +etc.) without having to change the Environment config. + +Variable Propogation +++++++++++++++++++++ +TODO: Document how variable propogation works in the script environments using +required_args, const_args, etc. + +Examples +-------- +While this documentation is generated from the source code and is intended to be a +useful reference on the internal details, most users will be more interested in +generating json configs to be used with the ``mlos_bench`` command line tool. + +For a simple working user oriented example please see the `test_local_env_bench.jsonc +`_ +file or other examples in the source tree linked below. + +For more developer oriented examples please see the `mlos_bench/tests/environments +`_ +directory in the source tree. + +Notes +----- +- See `mlos_bench/environments/README.md + `_ + for additional documentation in the source tree. +- See `mlos_bench/config/environments/README.md + `_ + for additional config examples in the source tree. +""" from mlos_bench.environments.base_environment import Environment from mlos_bench.environments.composite_env import CompositeEnv diff --git a/mlos_bench/mlos_bench/environments/base_environment.py b/mlos_bench/mlos_bench/environments/base_environment.py index ba346b67654..8c3ab6c3fa9 100644 --- a/mlos_bench/mlos_bench/environments/base_environment.py +++ b/mlos_bench/mlos_bench/environments/base_environment.py @@ -15,6 +15,7 @@ Dict, Iterable, List, + Literal, Optional, Sequence, Tuple, @@ -23,7 +24,6 @@ ) from pytz import UTC -from typing_extensions import Literal from mlos_bench.config.schemas import ConfigSchema from mlos_bench.dict_templater import DictTemplater @@ -422,7 +422,7 @@ def run(self) -> Tuple[Status, datetime, Optional[Dict[str, TunableValue]]]: Returns ------- - (status, timestamp, output) : (Status, datetime, dict) + (status, timestamp, output) : (Status, datetime.datetime, dict) 3-tuple of (Status, timestamp, output) values, where `output` is a dict with the results or None if the status is not COMPLETED. If run script is a benchmark, then the score is usually expected to @@ -439,7 +439,7 @@ def status(self) -> Tuple[Status, datetime, List[Tuple[datetime, str, Any]]]: Returns ------- - (benchmark_status, timestamp, telemetry) : (Status, datetime, list) + (benchmark_status, timestamp, telemetry) : (Status, datetime.datetime, list) 3-tuple of (benchmark status, timestamp, telemetry) values. `timestamp` is UTC time stamp of the status; it's current time by default. `telemetry` is a list (maybe empty) of (timestamp, metric, value) triplets. diff --git a/mlos_bench/mlos_bench/environments/composite_env.py b/mlos_bench/mlos_bench/environments/composite_env.py index a7edb7bb280..d81bd2f729c 100644 --- a/mlos_bench/mlos_bench/environments/composite_env.py +++ b/mlos_bench/mlos_bench/environments/composite_env.py @@ -7,9 +7,7 @@ import logging from datetime import datetime from types import TracebackType -from typing import Any, Dict, List, Optional, Tuple, Type - -from typing_extensions import Literal +from typing import Any, Dict, List, Literal, Optional, Tuple, Type from mlos_bench.environments.base_environment import Environment from mlos_bench.environments.status import Status @@ -207,7 +205,7 @@ def run(self) -> Tuple[Status, datetime, Optional[Dict[str, TunableValue]]]: Returns ------- - (status, timestamp, output) : (Status, datetime, dict) + (status, timestamp, output) : (Status, datetime.datetime, dict) 3-tuple of (Status, timestamp, output) values, where `output` is a dict with the results or None if the status is not COMPLETED. If run script is a benchmark, then the score is usually expected to @@ -238,7 +236,7 @@ def status(self) -> Tuple[Status, datetime, List[Tuple[datetime, str, Any]]]: Returns ------- - (benchmark_status, timestamp, telemetry) : (Status, datetime, list) + (benchmark_status, timestamp, telemetry) : (Status, datetime.datetime, list) 3-tuple of (benchmark status, timestamp, telemetry) values. `timestamp` is UTC time stamp of the status; it's current time by default. `telemetry` is a list (maybe empty) of (timestamp, metric, value) triplets. diff --git a/mlos_bench/mlos_bench/environments/local/local_env.py b/mlos_bench/mlos_bench/environments/local/local_env.py index 754cdd34065..344dd593b34 100644 --- a/mlos_bench/mlos_bench/environments/local/local_env.py +++ b/mlos_bench/mlos_bench/environments/local/local_env.py @@ -2,7 +2,11 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. # -"""Scheduler-side benchmark environment to run scripts locally.""" +""" +Scheduler-side benchmark environment to run scripts locally. + +TODO: Reference the script_env.py file for the base class. +""" import json import logging @@ -11,10 +15,20 @@ from datetime import datetime from tempfile import TemporaryDirectory from types import TracebackType -from typing import Any, Dict, Iterable, List, Mapping, Optional, Tuple, Type, Union +from typing import ( + Any, + Dict, + Iterable, + List, + Literal, + Mapping, + Optional, + Tuple, + Type, + Union, +) import pandas -from typing_extensions import Literal from mlos_bench.environments.base_environment import Environment from mlos_bench.environments.script_env import ScriptEnv @@ -167,7 +181,7 @@ def run(self) -> Tuple[Status, datetime, Optional[Dict[str, TunableValue]]]: Returns ------- - (status, timestamp, output) : (Status, datetime, dict) + (status, timestamp, output) : (Status, datetime.datetime, dict) 3-tuple of (Status, timestamp, output) values, where `output` is a dict with the results or None if the status is not COMPLETED. If run script is a benchmark, then the score is usually expected to diff --git a/mlos_bench/mlos_bench/environments/local/local_fileshare_env.py b/mlos_bench/mlos_bench/environments/local/local_fileshare_env.py index 351ed9c480a..70281b5a036 100644 --- a/mlos_bench/mlos_bench/environments/local/local_fileshare_env.py +++ b/mlos_bench/mlos_bench/environments/local/local_fileshare_env.py @@ -175,7 +175,7 @@ def run(self) -> Tuple[Status, datetime, Optional[Dict[str, TunableValue]]]: Returns ------- - (status, timestamp, output) : (Status, datetime, dict) + (status, timestamp, output) : (Status, datetime.datetime, dict) 3-tuple of (Status, timestamp, output) values, where `output` is a dict with the results or None if the status is not COMPLETED. If run script is a benchmark, then the score is usually expected to diff --git a/mlos_bench/mlos_bench/environments/mock_env.py b/mlos_bench/mlos_bench/environments/mock_env.py index 6d3309f35b5..a4d61fa9e37 100644 --- a/mlos_bench/mlos_bench/environments/mock_env.py +++ b/mlos_bench/mlos_bench/environments/mock_env.py @@ -14,7 +14,8 @@ from mlos_bench.environments.base_environment import Environment from mlos_bench.environments.status import Status from mlos_bench.services.base_service import Service -from mlos_bench.tunables import Tunable, TunableGroups, TunableValue +from mlos_bench.tunables.tunable import Tunable, TunableValue +from mlos_bench.tunables.tunable_groups import TunableGroups _LOG = logging.getLogger(__name__) @@ -87,7 +88,7 @@ def run(self) -> Tuple[Status, datetime, Optional[Dict[str, TunableValue]]]: Returns ------- - (status, timestamp, output) : (Status, datetime, dict) + (status, timestamp, output) : (Status, datetime.datetime, dict) 3-tuple of (Status, timestamp, output) values, where `output` is a dict with the results or None if the status is not COMPLETED. The keys of the `output` dict are the names of the metrics @@ -106,7 +107,7 @@ def status(self) -> Tuple[Status, datetime, List[Tuple[datetime, str, Any]]]: Returns ------- - (benchmark_status, timestamp, telemetry) : (Status, datetime, list) + (benchmark_status, timestamp, telemetry) : (Status, datetime.datetime, list) 3-tuple of (benchmark status, timestamp, telemetry) values. `timestamp` is UTC time stamp of the status; it's current time by default. `telemetry` is a list (maybe empty) of (timestamp, metric, value) triplets. diff --git a/mlos_bench/mlos_bench/environments/remote/remote_env.py b/mlos_bench/mlos_bench/environments/remote/remote_env.py index c3535d1a6a0..0b1ed314663 100644 --- a/mlos_bench/mlos_bench/environments/remote/remote_env.py +++ b/mlos_bench/mlos_bench/environments/remote/remote_env.py @@ -6,6 +6,8 @@ Remotely executed benchmark/script environment. e.g. Application Environment + +TODO: Documentat how variable propogation works in the remote environments. """ import logging @@ -138,7 +140,7 @@ def run(self) -> Tuple[Status, datetime, Optional[Dict[str, TunableValue]]]: Returns ------- - (status, timestamp, output) : (Status, datetime, dict) + (status, timestamp, output) : (Status, datetime.datetime, dict) 3-tuple of (Status, timestamp, output) values, where `output` is a dict with the results or None if the status is not COMPLETED. If run script is a benchmark, then the score is usually expected to @@ -180,7 +182,7 @@ def _remote_exec( Returns ------- - result : (Status, datetime, dict) + result : (Status, datetime.datetime, dict) 3-tuple of Status, timestamp, and dict with the benchmark/script results. Status is one of {PENDING, SUCCEEDED, FAILED, TIMED_OUT} """ diff --git a/mlos_bench/mlos_bench/environments/script_env.py b/mlos_bench/mlos_bench/environments/script_env.py index 7938ab65f80..9dc6d661433 100644 --- a/mlos_bench/mlos_bench/environments/script_env.py +++ b/mlos_bench/mlos_bench/environments/script_env.py @@ -2,7 +2,12 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. # -"""Base scriptable benchmark environment.""" +""" +Base scriptable benchmark environment. + +TODO: Document how variable propogation works in the script environments using +shell_env_params, required_args, const_args, etc. +""" import abc import logging @@ -19,7 +24,10 @@ class ScriptEnv(Environment, metaclass=abc.ABCMeta): - """Base Environment that runs scripts for setup/run/teardown.""" + """Base Environment that runs scripts for the different phases (e.g., + :py:meth:`.Environment.setup`, :py:meth:`.Environment.run`, + :py:meth:`.Environment.teardown`, etc.) + """ _RE_INVALID = re.compile(r"[^a-zA-Z0-9_]") @@ -37,7 +45,7 @@ def __init__( # pylint: disable=too-many-arguments Parameters ---------- - name: str + name : str Human-readable name of the environment. config : dict Free-format dictionary that contains the benchmark environment @@ -45,11 +53,13 @@ def __init__( # pylint: disable=too-many-arguments and the `const_args` sections. It must also have at least one of the following parameters: {`setup`, `run`, `teardown`}. Additional parameters: - * `shell_env_params` - an array of parameters to pass to the script - as shell environment variables, and - * `shell_env_params_rename` - a dictionary of {to: from} mappings - of the script parameters. If not specified, replace all - non-alphanumeric characters with underscores. + + - `shell_env_params` - an array of parameters to pass to the script + as shell environment variables, and + - `shell_env_params_rename` - a dictionary of {to: from} mappings + of the script parameters. If not specified, replace all + non-alphanumeric characters with underscores. + If neither `shell_env_params` nor `shell_env_params_rename` are specified, *no* additional shell parameters will be passed to the script. global_config : dict @@ -57,7 +67,7 @@ def __init__( # pylint: disable=too-many-arguments to be mixed in into the "const_args" section of the local config. tunables : TunableGroups A collection of tunable parameters for *all* environments. - service: Service + service : Service An optional service object (e.g., providing methods to deploy or reboot a VM, etc.). """ diff --git a/mlos_bench/mlos_bench/event_loop_context.py b/mlos_bench/mlos_bench/event_loop_context.py index 65285e5d66a..39896030872 100644 --- a/mlos_bench/mlos_bench/event_loop_context.py +++ b/mlos_bench/mlos_bench/event_loop_context.py @@ -19,8 +19,11 @@ from typing_extensions import TypeAlias CoroReturnType = TypeVar("CoroReturnType") # pylint: disable=invalid-name +"""Type variable for the return type of an :external:py:mod:`asyncio` coroutine.""" + if sys.version_info >= (3, 9): FutureReturnType: TypeAlias = Future[CoroReturnType] + """Type variable for the return type of a :py:class:`~concurrent.futures.Future`.""" else: FutureReturnType: TypeAlias = Future @@ -29,15 +32,15 @@ class EventLoopContext: """ - EventLoopContext encapsulates a background thread for asyncio event loop processing - as an aid for context managers. + EventLoopContext encapsulates a background thread for :external:py:mod:`asyncio` + event loop processing as an aid for context managers. There is generally only expected to be one of these, either as a base class instance if it's specific to that functionality or for the full mlos_bench process to support parallel trial runners, for instance. - It's enter() and exit() routines are expected to be called from the caller's context - manager routines (e.g., __enter__ and __exit__). + It's :py:meth:`.enter` and :py:meth:`.exit` routines are expected to be called + from the caller's context manager routines (e.g., __enter__ and __exit__). """ def __init__(self) -> None: @@ -94,7 +97,7 @@ def run_coroutine(self, coro: Coroutine[Any, Any, CoroReturnType]) -> FutureRetu Returns ------- - Future[CoroReturnType] + concurrent.futures.Future[CoroReturnType] A future that will be completed when the coroutine completes. """ assert self._event_loop_thread_refcnt > 0 diff --git a/mlos_bench/mlos_bench/launcher.py b/mlos_bench/mlos_bench/launcher.py index 339a11963de..b970b98456e 100644 --- a/mlos_bench/mlos_bench/launcher.py +++ b/mlos_bench/mlos_bench/launcher.py @@ -6,8 +6,8 @@ A helper class to load the configuration files, parse the command line parameters, and instantiate the main components of mlos_bench system. -It is used in `mlos_bench.run` module to run the benchmark/optimizer from the -command line. +It is used in the :py:mod:`mlos_bench.run` module to run the benchmark/optimizer +from the command line. """ import argparse diff --git a/mlos_bench/mlos_bench/optimizers/__init__.py b/mlos_bench/mlos_bench/optimizers/__init__.py index 106b0fc496b..7f2d6310c9a 100644 --- a/mlos_bench/mlos_bench/optimizers/__init__.py +++ b/mlos_bench/mlos_bench/optimizers/__init__.py @@ -2,7 +2,12 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. # -"""Interfaces and wrapper classes for optimizers to be used in Autotune.""" +""" +Interfaces and wrapper classes for optimizers to be used in mlos_bench for autotuning or +benchmarking. + +TODO: Improve documentation here. +""" from mlos_bench.optimizers.base_optimizer import Optimizer from mlos_bench.optimizers.manual_optimizer import ManualOptimizer diff --git a/mlos_bench/mlos_bench/optimizers/base_optimizer.py b/mlos_bench/mlos_bench/optimizers/base_optimizer.py index d9a854b476e..14eb3eba6ce 100644 --- a/mlos_bench/mlos_bench/optimizers/base_optimizer.py +++ b/mlos_bench/mlos_bench/optimizers/base_optimizer.py @@ -9,10 +9,9 @@ import logging from abc import ABCMeta, abstractmethod from types import TracebackType -from typing import Dict, Optional, Sequence, Tuple, Type, Union +from typing import Dict, Literal, Optional, Sequence, Tuple, Type, Union from ConfigSpace import ConfigurationSpace -from typing_extensions import Literal from mlos_bench.config.schemas import ConfigSchema from mlos_bench.environments.status import Status @@ -186,7 +185,7 @@ def config_space(self) -> ConfigurationSpace: Returns ------- - ConfigurationSpace + ConfigSpace.ConfigurationSpace The ConfigSpace representation of the tunable parameters. """ if self._config_space is None: @@ -206,7 +205,7 @@ def name(self) -> str: @property def targets(self) -> Dict[str, Literal["min", "max"]]: - """A dictionary of {target: direction} of optimization targets.""" + """Returns a dictionary of optimization targets and their direction.""" return { opt_target: "min" if opt_dir == 1 else "max" for (opt_target, opt_dir) in self._opt_targets.items() diff --git a/mlos_bench/mlos_bench/optimizers/convert_configspace.py b/mlos_bench/mlos_bench/optimizers/convert_configspace.py index 755918fa99d..3545936623c 100644 --- a/mlos_bench/mlos_bench/optimizers/convert_configspace.py +++ b/mlos_bench/mlos_bench/optimizers/convert_configspace.py @@ -73,7 +73,7 @@ def _tunable_to_configspace( Returns ------- - cs : ConfigurationSpace + cs : ConfigSpace.ConfigurationSpace A ConfigurationSpace object that corresponds to the Tunable. """ # pylint: disable=too-complex @@ -206,7 +206,7 @@ def tunable_groups_to_configspace( Returns ------- - configspace : ConfigurationSpace + configspace : ConfigSpace.ConfigurationSpace A new ConfigurationSpace instance that corresponds to the input TunableGroups. """ space = ConfigurationSpace(seed=seed) @@ -234,7 +234,7 @@ def tunable_values_to_configuration(tunables: TunableGroups) -> Configuration: Returns ------- - Configuration + ConfigSpace.Configuration A ConfigSpace Configuration. """ values: Dict[str, TunableValue] = {} diff --git a/mlos_bench/mlos_bench/optimizers/manual_optimizer.py b/mlos_bench/mlos_bench/optimizers/manual_optimizer.py index d9f48a4e193..e9c3ecde192 100644 --- a/mlos_bench/mlos_bench/optimizers/manual_optimizer.py +++ b/mlos_bench/mlos_bench/optimizers/manual_optimizer.py @@ -2,7 +2,15 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. # -"""Optimizer for mlos_bench that proposes an explicit sequence of configurations.""" +""" +Manual config suggestor (Optimizer) for mlos_bench that proposes an explicit sequence of +configurations. + +This is useful for testing and validation, as it allows you to run a sequence of +configurations in a cyclic fashion. + +TODO: Add an example configuration. +""" import logging from typing import Dict, List, Optional diff --git a/mlos_bench/mlos_bench/optimizers/mlos_core_optimizer.py b/mlos_bench/mlos_bench/optimizers/mlos_core_optimizer.py index 649b070123b..f9d5685ae8f 100644 --- a/mlos_bench/mlos_bench/optimizers/mlos_core_optimizer.py +++ b/mlos_bench/mlos_bench/optimizers/mlos_core_optimizer.py @@ -7,10 +7,9 @@ import logging import os from types import TracebackType -from typing import Dict, Optional, Sequence, Tuple, Type, Union +from typing import Dict, Literal, Optional, Sequence, Tuple, Type, Union import pandas as pd -from typing_extensions import Literal from mlos_bench.environments.status import Status from mlos_bench.optimizers.base_optimizer import Optimizer diff --git a/mlos_bench/mlos_bench/os_environ.py b/mlos_bench/mlos_bench/os_environ.py index f750f120389..c83ce05b343 100644 --- a/mlos_bench/mlos_bench/os_environ.py +++ b/mlos_bench/mlos_bench/os_environ.py @@ -4,13 +4,16 @@ # """ Simple platform agnostic abstraction for the OS environment variables. Meant as a -replacement for os.environ vs nt.environ. +replacement for :external:py:data:`os.environ` vs ``nt.environ``. Example ------- -from mlos_bench.os_env import environ -environ['FOO'] = 'bar' -environ.get('PWD') +>>> # Import the environ object. +>>> from mlos_bench.os_environ import environ +>>> # Set an environment variable. +>>> environ["FOO"] = "bar" +>>> # Get an environment variable. +>>> pwd = environ.get("PWD") """ import os @@ -33,7 +36,9 @@ import nt # type: ignore[import-not-found] # pylint: disable=import-error # (3.8) environ: EnvironType = nt.environ + """A platform agnostic abstraction for the OS environment variables.""" else: environ: EnvironType = os.environ + """A platform agnostic abstraction for the OS environment variables.""" __all__ = ["environ"] diff --git a/mlos_bench/mlos_bench/run.py b/mlos_bench/mlos_bench/run.py index a554f1a803d..cc3cf60b8f6 100755 --- a/mlos_bench/mlos_bench/run.py +++ b/mlos_bench/mlos_bench/run.py @@ -4,11 +4,16 @@ # Licensed under the MIT License. # """ -OS Autotune main optimization loop. +mlos_bench main optimization loop and benchmark runner CLI. -Note: this script is also available as a CLI tool via pip under the name "mlos_bench". +Note: this script is also available as a CLI tool via ``pip`` under the name ``mlos_bench``. -See `--help` output for details. +See the current ``--help`` `output for details `_. + +See Also +-------- +mlos_bench.config : documentation on the configuration system. +mlos_bench.launcher.Launcher : class is responsible for processing the CLI args. """ import logging diff --git a/mlos_bench/mlos_bench/schedulers/base_scheduler.py b/mlos_bench/mlos_bench/schedulers/base_scheduler.py index f38e51e7133..5d51650a59f 100644 --- a/mlos_bench/mlos_bench/schedulers/base_scheduler.py +++ b/mlos_bench/mlos_bench/schedulers/base_scheduler.py @@ -9,10 +9,9 @@ from abc import ABCMeta, abstractmethod from datetime import datetime from types import TracebackType -from typing import Any, Dict, List, Optional, Tuple, Type +from typing import Any, Dict, List, Literal, Optional, Tuple, Type from pytz import UTC -from typing_extensions import Literal from mlos_bench.config.schemas import ConfigSchema from mlos_bench.environments.base_environment import Environment diff --git a/mlos_bench/mlos_bench/services/__init__.py b/mlos_bench/mlos_bench/services/__init__.py index b768afb09c8..65ffc8e8d80 100644 --- a/mlos_bench/mlos_bench/services/__init__.py +++ b/mlos_bench/mlos_bench/services/__init__.py @@ -2,7 +2,11 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. # -"""Services for implementing Environments for mlos_bench.""" +""" +Services for implementing Environments for mlos_bench. + +TODO: Improve documentation here. +""" from mlos_bench.services.base_fileshare import FileShareService from mlos_bench.services.base_service import Service diff --git a/mlos_bench/mlos_bench/services/base_service.py b/mlos_bench/mlos_bench/services/base_service.py index c5d9b78c873..8afb0b55f71 100644 --- a/mlos_bench/mlos_bench/services/base_service.py +++ b/mlos_bench/mlos_bench/services/base_service.py @@ -7,9 +7,7 @@ import json import logging from types import TracebackType -from typing import Any, Callable, Dict, List, Optional, Set, Type, Union - -from typing_extensions import Literal +from typing import Any, Callable, Dict, List, Literal, Optional, Set, Type, Union from mlos_bench.config.schemas import ConfigSchema from mlos_bench.services.types.config_loader_type import SupportsConfigLoading diff --git a/mlos_bench/mlos_bench/services/config_persistence.py b/mlos_bench/mlos_bench/services/config_persistence.py index 72bfad007de..cd0f42bac40 100644 --- a/mlos_bench/mlos_bench/services/config_persistence.py +++ b/mlos_bench/mlos_bench/services/config_persistence.py @@ -25,7 +25,7 @@ import json5 # To read configs with comments and other JSON5 syntax features from jsonschema import SchemaError, ValidationError -from mlos_bench.config.schemas import ConfigSchema +from mlos_bench.config.schemas.config_schemas import ConfigSchema from mlos_bench.environments.base_environment import Environment from mlos_bench.optimizers.base_optimizer import Optimizer from mlos_bench.services.base_service import Service diff --git a/mlos_bench/mlos_bench/services/local/temp_dir_context.py b/mlos_bench/mlos_bench/services/local/temp_dir_context.py index e65a45934b7..bf730ae3452 100644 --- a/mlos_bench/mlos_bench/services/local/temp_dir_context.py +++ b/mlos_bench/mlos_bench/services/local/temp_dir_context.py @@ -77,7 +77,7 @@ def temp_dir_context( Returns ------- - temp_dir_context : TemporaryDirectory + temp_dir_context : tempfile.TemporaryDirectory Temporary directory context to use in the `with` clause. """ temp_dir = path or self._temp_dir diff --git a/mlos_bench/mlos_bench/services/remote/azure/azure_network_services.py b/mlos_bench/mlos_bench/services/remote/azure/azure_network_services.py index 29552de4f04..4f11e89aa2f 100644 --- a/mlos_bench/mlos_bench/services/remote/azure/azure_network_services.py +++ b/mlos_bench/mlos_bench/services/remote/azure/azure_network_services.py @@ -125,7 +125,7 @@ def provision_network(self, params: dict) -> Tuple[Status, dict]: Returns ------- - result : (Status, dict={}) + result : (Status, dict) A pair of Status and result. The result is the input `params` plus the parameters extracted from the response JSON, or {} if the status is FAILED. Status is one of {PENDING, SUCCEEDED, FAILED} @@ -140,13 +140,13 @@ def deprovision_network(self, params: dict, ignore_errors: bool = True) -> Tuple ---------- params : dict Flat dictionary of (key, value) pairs of tunable parameters. - ignore_errors : boolean + ignore_errors : bool Whether to ignore errors (default) encountered during the operation (e.g., due to dependent resources still in use). Returns ------- - result : (Status, dict={}) + result : (Status, dict) A pair of Status and result. The result is always {}. Status is one of {PENDING, SUCCEEDED, FAILED} """ diff --git a/mlos_bench/mlos_bench/services/remote/azure/azure_saas.py b/mlos_bench/mlos_bench/services/remote/azure/azure_saas.py index 042e599f0be..fcd99991f87 100644 --- a/mlos_bench/mlos_bench/services/remote/azure/azure_saas.py +++ b/mlos_bench/mlos_bench/services/remote/azure/azure_saas.py @@ -130,7 +130,7 @@ def configure(self, config: Dict[str, Any], params: Dict[str, Any]) -> Tuple[Sta Returns ------- - result : (Status, dict={}) + result : (Status, dict) A pair of Status and result. The result is always {}. Status is one of {PENDING, SUCCEEDED, FAILED} """ @@ -202,7 +202,7 @@ def _config_one( Returns ------- - result : (Status, dict={}) + result : (Status, dict) A pair of Status and result. The result is always {}. Status is one of {PENDING, SUCCEEDED, FAILED} """ @@ -236,7 +236,7 @@ def _config_many(self, config: Dict[str, Any], params: Dict[str, Any]) -> Tuple[ Returns ------- - result : (Status, dict={}) + result : (Status, dict) A pair of Status and result. The result is always {}. Status is one of {PENDING, SUCCEEDED, FAILED} """ @@ -259,7 +259,7 @@ def _config_batch(self, config: Dict[str, Any], params: Dict[str, Any]) -> Tuple Returns ------- - result : (Status, dict={}) + result : (Status, dict) A pair of Status and result. The result is always {}. Status is one of {PENDING, SUCCEEDED, FAILED} """ diff --git a/mlos_bench/mlos_bench/services/remote/azure/azure_vm_services.py b/mlos_bench/mlos_bench/services/remote/azure/azure_vm_services.py index b62ede5fab5..856f5e99126 100644 --- a/mlos_bench/mlos_bench/services/remote/azure/azure_vm_services.py +++ b/mlos_bench/mlos_bench/services/remote/azure/azure_vm_services.py @@ -280,7 +280,7 @@ def provision_host(self, params: dict) -> Tuple[Status, dict]: Returns ------- - result : (Status, dict={}) + result : (Status, dict) A pair of Status and result. The result is the input `params` plus the parameters extracted from the response JSON, or {} if the status is FAILED. Status is one of {PENDING, SUCCEEDED, FAILED} @@ -298,7 +298,7 @@ def deprovision_host(self, params: dict) -> Tuple[Status, dict]: Returns ------- - result : (Status, dict={}) + result : (Status, dict) A pair of Status and result. The result is always {}. Status is one of {PENDING, SUCCEEDED, FAILED} """ @@ -340,7 +340,7 @@ def deallocate_host(self, params: dict) -> Tuple[Status, dict]: Returns ------- - result : (Status, dict={}) + result : (Status, dict) A pair of Status and result. The result is always {}. Status is one of {PENDING, SUCCEEDED, FAILED} """ @@ -375,7 +375,7 @@ def start_host(self, params: dict) -> Tuple[Status, dict]: Returns ------- - result : (Status, dict={}) + result : (Status, dict) A pair of Status and result. The result is always {}. Status is one of {PENDING, SUCCEEDED, FAILED} """ @@ -412,7 +412,7 @@ def stop_host(self, params: dict, force: bool = False) -> Tuple[Status, dict]: Returns ------- - result : (Status, dict={}) + result : (Status, dict) A pair of Status and result. The result is always {}. Status is one of {PENDING, SUCCEEDED, FAILED} """ @@ -452,7 +452,7 @@ def restart_host(self, params: dict, force: bool = False) -> Tuple[Status, dict] Returns ------- - result : (Status, dict={}) + result : (Status, dict) A pair of Status and result. The result is always {}. Status is one of {PENDING, SUCCEEDED, FAILED} """ diff --git a/mlos_bench/mlos_bench/services/remote/ssh/ssh_fileshare.py b/mlos_bench/mlos_bench/services/remote/ssh/ssh_fileshare.py index 137ab024d1c..657da670d97 100644 --- a/mlos_bench/mlos_bench/services/remote/ssh/ssh_fileshare.py +++ b/mlos_bench/mlos_bench/services/remote/ssh/ssh_fileshare.py @@ -50,8 +50,8 @@ async def _start_file_copy( Local path to the file/dir. remote_path : str Remote path to the file/dir. - recursive : bool, optional - _description_, by default True + recursive : bool + Whether to copy recursively. By default True. Raises ------ diff --git a/mlos_bench/mlos_bench/services/remote/ssh/ssh_host_service.py b/mlos_bench/mlos_bench/services/remote/ssh/ssh_host_service.py index 36f1f7866ba..83fc9374989 100644 --- a/mlos_bench/mlos_bench/services/remote/ssh/ssh_host_service.py +++ b/mlos_bench/mlos_bench/services/remote/ssh/ssh_host_service.py @@ -208,7 +208,7 @@ def _exec_os_op(self, cmd_opts_list: List[str], params: dict) -> Tuple[Status, d Returns ------- - result : (Status, dict={}) + result : (Status, dict) A pair of Status and result. Status is one of {PENDING, SUCCEEDED, FAILED} """ @@ -249,7 +249,7 @@ def shutdown(self, params: dict, force: bool = False) -> Tuple[Status, dict]: Returns ------- - result : (Status, dict={}) + result : (Status, dict) A pair of Status and result. Status is one of {PENDING, SUCCEEDED, FAILED} """ @@ -274,7 +274,7 @@ def reboot(self, params: dict, force: bool = False) -> Tuple[Status, dict]: Returns ------- - result : (Status, dict={}) + result : (Status, dict) A pair of Status and result. Status is one of {PENDING, SUCCEEDED, FAILED} """ diff --git a/mlos_bench/mlos_bench/services/remote/ssh/ssh_service.py b/mlos_bench/mlos_bench/services/remote/ssh/ssh_service.py index 706764a1f1a..7e33d715ec3 100644 --- a/mlos_bench/mlos_bench/services/remote/ssh/ssh_service.py +++ b/mlos_bench/mlos_bench/services/remote/ssh/ssh_service.py @@ -115,7 +115,9 @@ def connection_lost(self, exc: Optional[Exception]) -> None: return super().connection_lost(exc) async def connection(self) -> Optional[SSHClientConnection]: - """Waits for and returns the SSHClientConnection to be established or lost.""" + """Waits for and returns the asyncssh.connection.SSHClientConnection to be + established or lost. + """ _LOG.debug("%s: Waiting for connection to be available.", current_thread().name) await self._conn_event.wait() _LOG.debug("%s: Connection available for %s", current_thread().name, self._connection_id) @@ -176,7 +178,7 @@ async def get_client_connection( Returns ------- - Tuple[SSHClientConnection, SshClient] + Tuple[asyncssh.connection.SSHClientConnection, SshClient] A tuple of (SSHClientConnection, SshClient). """ _LOG.debug("%s: get_client_connection: %s", current_thread().name, connect_params) diff --git a/mlos_bench/mlos_bench/services/types/authenticator_type.py b/mlos_bench/mlos_bench/services/types/authenticator_type.py index b01c30d42de..4d06a7867ff 100644 --- a/mlos_bench/mlos_bench/services/types/authenticator_type.py +++ b/mlos_bench/mlos_bench/services/types/authenticator_type.py @@ -39,6 +39,6 @@ def get_credential(self) -> T_co: Returns ------- - credential : T + credential : T_co Cloud-specific credential object. """ diff --git a/mlos_bench/mlos_bench/services/types/config_loader_type.py b/mlos_bench/mlos_bench/services/types/config_loader_type.py index 33adac67eb4..f0362e8d88d 100644 --- a/mlos_bench/mlos_bench/services/types/config_loader_type.py +++ b/mlos_bench/mlos_bench/services/types/config_loader_type.py @@ -16,7 +16,7 @@ runtime_checkable, ) -from mlos_bench.config.schemas import ConfigSchema +from mlos_bench.config.schemas.config_schemas import ConfigSchema from mlos_bench.tunables.tunable import TunableValue # Avoid's circular import issues. diff --git a/mlos_bench/mlos_bench/services/types/host_ops_type.py b/mlos_bench/mlos_bench/services/types/host_ops_type.py index 166406714da..1ac485530a1 100644 --- a/mlos_bench/mlos_bench/services/types/host_ops_type.py +++ b/mlos_bench/mlos_bench/services/types/host_ops_type.py @@ -25,7 +25,7 @@ def start_host(self, params: dict) -> Tuple["Status", dict]: Returns ------- - result : (Status, dict={}) + result : (Status, dict) A pair of Status and result. The result is always {}. Status is one of {PENDING, SUCCEEDED, FAILED} """ @@ -43,7 +43,7 @@ def stop_host(self, params: dict, force: bool = False) -> Tuple["Status", dict]: Returns ------- - result : (Status, dict={}) + result : (Status, dict) A pair of Status and result. The result is always {}. Status is one of {PENDING, SUCCEEDED, FAILED} """ @@ -61,7 +61,7 @@ def restart_host(self, params: dict, force: bool = False) -> Tuple["Status", dic Returns ------- - result : (Status, dict={}) + result : (Status, dict) A pair of Status and result. The result is always {}. Status is one of {PENDING, SUCCEEDED, FAILED} """ diff --git a/mlos_bench/mlos_bench/services/types/host_provisioner_type.py b/mlos_bench/mlos_bench/services/types/host_provisioner_type.py index 1df0716fa13..af5b776a6c5 100644 --- a/mlos_bench/mlos_bench/services/types/host_provisioner_type.py +++ b/mlos_bench/mlos_bench/services/types/host_provisioner_type.py @@ -27,7 +27,7 @@ def provision_host(self, params: dict) -> Tuple["Status", dict]: Returns ------- - result : (Status, dict={}) + result : (Status, dict) A pair of Status and result. The result is always {}. Status is one of {PENDING, SUCCEEDED, FAILED} """ @@ -64,7 +64,7 @@ def deprovision_host(self, params: dict) -> Tuple["Status", dict]: Returns ------- - result : (Status, dict={}) + result : (Status, dict) A pair of Status and result. The result is always {}. Status is one of {PENDING, SUCCEEDED, FAILED} """ @@ -84,7 +84,7 @@ def deallocate_host(self, params: dict) -> Tuple["Status", dict]: Returns ------- - result : (Status, dict={}) + result : (Status, dict) A pair of Status and result. The result is always {}. Status is one of {PENDING, SUCCEEDED, FAILED} """ diff --git a/mlos_bench/mlos_bench/services/types/local_exec_type.py b/mlos_bench/mlos_bench/services/types/local_exec_type.py index d0c8c357f0d..96d5042d3e9 100644 --- a/mlos_bench/mlos_bench/services/types/local_exec_type.py +++ b/mlos_bench/mlos_bench/services/types/local_exec_type.py @@ -71,6 +71,6 @@ def temp_dir_context( Returns ------- - temp_dir_context : TemporaryDirectory + temp_dir_context : tempfile.TemporaryDirectory Temporary directory context to use in the `with` clause. """ diff --git a/mlos_bench/mlos_bench/services/types/network_provisioner_type.py b/mlos_bench/mlos_bench/services/types/network_provisioner_type.py index 3525fbdee13..542028b7eaf 100644 --- a/mlos_bench/mlos_bench/services/types/network_provisioner_type.py +++ b/mlos_bench/mlos_bench/services/types/network_provisioner_type.py @@ -27,7 +27,7 @@ def provision_network(self, params: dict) -> Tuple["Status", dict]: Returns ------- - result : (Status, dict={}) + result : (Status, dict) A pair of Status and result. The result is always {}. Status is one of {PENDING, SUCCEEDED, FAILED} """ @@ -65,13 +65,13 @@ def deprovision_network( ---------- params : dict Flat dictionary of (key, value) pairs of tunable parameters. - ignore_errors : boolean + ignore_errors : bool Whether to ignore errors (default) encountered during the operation (e.g., due to dependent resources still in use). Returns ------- - result : (Status, dict={}) + result : (Status, dict) A pair of Status and result. The result is always {}. Status is one of {PENDING, SUCCEEDED, FAILED} """ diff --git a/mlos_bench/mlos_bench/services/types/os_ops_type.py b/mlos_bench/mlos_bench/services/types/os_ops_type.py index 8b727f87a6a..dfb5133e035 100644 --- a/mlos_bench/mlos_bench/services/types/os_ops_type.py +++ b/mlos_bench/mlos_bench/services/types/os_ops_type.py @@ -27,7 +27,7 @@ def shutdown(self, params: dict, force: bool = False) -> Tuple["Status", dict]: Returns ------- - result : (Status, dict={}) + result : (Status, dict) A pair of Status and result. The result is always {}. Status is one of {PENDING, SUCCEEDED, FAILED} """ @@ -45,7 +45,7 @@ def reboot(self, params: dict, force: bool = False) -> Tuple["Status", dict]: Returns ------- - result : (Status, dict={}) + result : (Status, dict) A pair of Status and result. The result is always {}. Status is one of {PENDING, SUCCEEDED, FAILED} """ diff --git a/mlos_bench/mlos_bench/services/types/remote_config_type.py b/mlos_bench/mlos_bench/services/types/remote_config_type.py index 7e8d0a6e772..5a75dac382d 100644 --- a/mlos_bench/mlos_bench/services/types/remote_config_type.py +++ b/mlos_bench/mlos_bench/services/types/remote_config_type.py @@ -27,7 +27,7 @@ def configure(self, config: Dict[str, Any], params: Dict[str, Any]) -> Tuple["St Returns ------- - result : (Status, dict={}) + result : (Status, dict) A pair of Status and result. The result is always {}. Status is one of {PENDING, SUCCEEDED, FAILED} """ diff --git a/mlos_bench/mlos_bench/services/types/vm_provisioner_type.py b/mlos_bench/mlos_bench/services/types/vm_provisioner_type.py index 69d24f3fd38..d6c4f1f1c60 100644 --- a/mlos_bench/mlos_bench/services/types/vm_provisioner_type.py +++ b/mlos_bench/mlos_bench/services/types/vm_provisioner_type.py @@ -27,7 +27,7 @@ def vm_provision(self, params: dict) -> Tuple["Status", dict]: Returns ------- - result : (Status, dict={}) + result : (Status, dict) A pair of Status and result. The result is always {}. Status is one of {PENDING, SUCCEEDED, FAILED} """ @@ -63,7 +63,7 @@ def vm_start(self, params: dict) -> Tuple["Status", dict]: Returns ------- - result : (Status, dict={}) + result : (Status, dict) A pair of Status and result. The result is always {}. Status is one of {PENDING, SUCCEEDED, FAILED} """ @@ -79,7 +79,7 @@ def vm_stop(self, params: dict) -> Tuple["Status", dict]: Returns ------- - result : (Status, dict={}) + result : (Status, dict) A pair of Status and result. The result is always {}. Status is one of {PENDING, SUCCEEDED, FAILED} """ @@ -95,7 +95,7 @@ def vm_restart(self, params: dict) -> Tuple["Status", dict]: Returns ------- - result : (Status, dict={}) + result : (Status, dict) A pair of Status and result. The result is always {}. Status is one of {PENDING, SUCCEEDED, FAILED} """ @@ -111,7 +111,7 @@ def vm_deprovision(self, params: dict) -> Tuple["Status", dict]: Returns ------- - result : (Status, dict={}) + result : (Status, dict) A pair of Status and result. The result is always {}. Status is one of {PENDING, SUCCEEDED, FAILED} """ diff --git a/mlos_bench/mlos_bench/storage/__init__.py b/mlos_bench/mlos_bench/storage/__init__.py index 64e70c20f76..840a6d87fce 100644 --- a/mlos_bench/mlos_bench/storage/__init__.py +++ b/mlos_bench/mlos_bench/storage/__init__.py @@ -2,7 +2,58 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. # -"""Interfaces to the storage backends for OS Autotune.""" +""" +Interfaces to the storage backends for mlos_bench. + +Storage backends (for instance :py:mod:`~mlos_bench.storage.sql`) are used to store +and retrieve the results of experiments and implement a persistent queue for +:py:mod:`~mlos_bench.schedulers`. + +The :py:class:`~mlos_bench.storage.base_storage.Storage` class is the main interface +and provides the ability to + +- Create or reload a new :py:class:`~.Storage.Experiment` with one or more + associated :py:class:`~.Storage.Trial` instances which are used by the + :py:mod:`~mlos_bench.schedulers` during ``mlos_bench`` run time to execute + `Trials`. + + In MLOS terms, an *Experiment* is a group of *Trials* that share the same scripts + and target system. + + A *Trial* is a single run of the target system with a specific *Configuration* + (e.g., set of tunable parameter values). + (Note: other systems may call this a *sample*) + +- Retrieve the :py:class:`~mlos_bench.storage.base_trial_data.TrialData` results + with the :py:attr:`~mlos_bench.storage.base_experiment_data.ExperimentData.trials` + property on a :py:class:`~mlos_bench.storage.base_experiment_data.ExperimentData` + instance via the :py:class:`~.Storage` instance's + :py:attr:`~mlos_bench.storage.base_storage.Storage.experiments` property. + + These can be especially useful with :py:mod:`mlos_viz` for interactive exploration + in a Jupyter Notebook interface, for instance. + +The :py:func:`.from_config` :py:mod:`.storage_factory` function can be used to get a +:py:class:`.Storage` instance from a +:py:attr:`~mlos_bench.config.schemas.config_schemas.ConfigSchema.STORAGE` type json +config. + +Example +------- +TODO: Add example usage. + +See Also +-------- +mlos_bench.storage.base_storage : Base interface for backends. +mlos_bench.storage.base_experiment_data : Base interface for ExperimentData. +mlos_bench.storage.base_trial_data : Base interface for TrialData. + +Notes +----- +- See `sqlite-autotuning notebooks + `_ + for additional examples. +""" from mlos_bench.storage.base_storage import Storage from mlos_bench.storage.storage_factory import from_config diff --git a/mlos_bench/mlos_bench/storage/base_experiment_data.py b/mlos_bench/mlos_bench/storage/base_experiment_data.py index 97f946ef92b..098c1bca26c 100644 --- a/mlos_bench/mlos_bench/storage/base_experiment_data.py +++ b/mlos_bench/mlos_bench/storage/base_experiment_data.py @@ -2,7 +2,29 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. # -"""Base interface for accessing the stored benchmark experiment data.""" +""" +Base interface for accessing the stored benchmark experiment data. + +An experiment is a collection of trials that are run with a given set of scripts and +target system. + +Each trial is associated with a configuration (e.g., set of tunable parameters), but +multiple trials may use the same config (e.g., for repeat run variability analysis). + +See Also +-------- +ExperimentData.results_df : + Retrieves a pandas DataFrame of the Experiment's trials' results data. +ExperimentData.trials : + Retrieves a dictionary of the Experiment's trials' data. +ExperimentData.tunable_configs : + Retrieves a dictionary of the Experiment's sampled configs data. +ExperimentData.tunable_config_trial_groups : + Retrieves a dictionary of the Experiment's trials' data, grouped by shared + tunable config. +mlos_bench.storage.base_trial_data.TrialData : + Base interface for accessing the stored benchmark trial data. +""" from abc import ABCMeta, abstractmethod from typing import TYPE_CHECKING, Dict, Literal, Optional, Tuple @@ -78,7 +100,7 @@ def objectives(self) -> Dict[str, Literal["min", "max"]]: Returns ------- - objectives : Dict[str, objective] + objectives : Dict[str, Literal["min", "max"]] A dictionary of the experiment's objective names (optimization_targets) and their directions (e.g., min or max). """ diff --git a/mlos_bench/mlos_bench/storage/base_storage.py b/mlos_bench/mlos_bench/storage/base_storage.py index 867c4e0bc02..a6b3a1aa90a 100644 --- a/mlos_bench/mlos_bench/storage/base_storage.py +++ b/mlos_bench/mlos_bench/storage/base_storage.py @@ -2,15 +2,31 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. # -"""Base interface for saving and restoring the benchmark data.""" +""" +Base interface for saving and restoring the benchmark data. + +See Also +-------- +mlos_bench.storage.base_storage.Storage.experiments : + Retrieves a dictionary of the Experiments' data. +mlos_bench.storage.base_experiment_data.ExperimentData.results_df : + Retrieves a pandas DataFrame of the Experiment's trials' results data. +mlos_bench.storage.base_experiment_data.ExperimentData.trials : + Retrieves a dictionary of the Experiment's trials' data. +mlos_bench.storage.base_experiment_data.ExperimentData.tunable_configs : + Retrieves a dictionary of the Experiment's sampled configs data. +mlos_bench.storage.base_experiment_data.ExperimentData.tunable_config_trial_groups : + Retrieves a dictionary of the Experiment's trials' data, grouped by shared + tunable config. +mlos_bench.storage.base_trial_data.TrialData : + Base interface for accessing the stored benchmark trial data. +""" import logging from abc import ABCMeta, abstractmethod from datetime import datetime from types import TracebackType -from typing import Any, Dict, Iterator, List, Optional, Tuple, Type - -from typing_extensions import Literal +from typing import Any, Dict, Iterator, List, Literal, Optional, Tuple, Type from mlos_bench.config.schemas import ConfigSchema from mlos_bench.dict_templater import DictTemplater @@ -258,7 +274,7 @@ def load_telemetry(self, trial_id: int) -> List[Tuple[datetime, str, Any]]: Returns ------- - metrics : List[Tuple[datetime, str, Any]] + metrics : List[Tuple[datetime.datetime, str, Any]] Telemetry data. """ @@ -298,7 +314,7 @@ def pending_trials( Parameters ---------- - timestamp : datetime + timestamp : datetime.datetime The time in UTC to check for scheduled trials. running : bool If True, include the trials that are already running. @@ -323,7 +339,7 @@ def new_trial( ---------- tunables : TunableGroups Tunable parameters to use for the trial. - ts_start : Optional[datetime] + ts_start : Optional[datetime.datetime] Timestamp of the trial start (can be in the future). config : dict Key/value pairs of additional non-tunable parameters of the trial. @@ -360,7 +376,7 @@ def _new_trial( ---------- tunables : TunableGroups Tunable parameters to use for the trial. - ts_start : Optional[datetime] + ts_start : Optional[datetime.datetime] Timestamp of the trial start (can be in the future). config : dict Key/value pairs of additional non-tunable parameters of the trial. @@ -460,7 +476,7 @@ def update( ---------- status : Status Status of the experiment run. - timestamp: datetime + timestamp: datetime.datetime Timestamp of the status and metrics. metrics : Optional[Dict[str, Any]] One or several metrics of the experiment run. @@ -499,9 +515,9 @@ def update_telemetry( ---------- status : Status Current status of the trial. - timestamp: datetime + timestamp: datetime.datetime Timestamp of the status (but not the metrics). - metrics : List[Tuple[datetime, str, Any]] + metrics : List[Tuple[datetime.datetime, str, Any]] Telemetry data. """ _LOG.info("Store telemetry: %s :: %s %d records", self, status, len(metrics)) diff --git a/mlos_bench/mlos_bench/storage/base_trial_data.py b/mlos_bench/mlos_bench/storage/base_trial_data.py index 4782aa92b36..b4b5936a290 100644 --- a/mlos_bench/mlos_bench/storage/base_trial_data.py +++ b/mlos_bench/mlos_bench/storage/base_trial_data.py @@ -2,7 +2,12 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. # -"""Base interface for accessing the stored benchmark trial data.""" +""" +Base interface for accessing the stored benchmark trial data. + +A single trial is a single run of an experiment with a given configuration (e.g., set of +tunable parameters). +""" from abc import ABCMeta, abstractmethod from datetime import datetime from typing import TYPE_CHECKING, Any, Dict, Optional diff --git a/mlos_bench/mlos_bench/storage/base_tunable_config_data.py b/mlos_bench/mlos_bench/storage/base_tunable_config_data.py index 62751deb8e9..c925fa7b0ec 100644 --- a/mlos_bench/mlos_bench/storage/base_tunable_config_data.py +++ b/mlos_bench/mlos_bench/storage/base_tunable_config_data.py @@ -2,7 +2,12 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. # -"""Base interface for accessing the stored benchmark (tunable) config data.""" +""" +Base interface for accessing the stored benchmark (tunable) config data. + +Note: a configuration in this context is the set of tunable parameter values and can +be used by one or more trials. +""" from abc import ABCMeta, abstractmethod from typing import Any, Dict, Optional diff --git a/mlos_bench/mlos_bench/storage/base_tunable_config_trial_group_data.py b/mlos_bench/mlos_bench/storage/base_tunable_config_trial_group_data.py index c01c7544b39..d17fc74a9db 100644 --- a/mlos_bench/mlos_bench/storage/base_tunable_config_trial_group_data.py +++ b/mlos_bench/mlos_bench/storage/base_tunable_config_trial_group_data.py @@ -2,7 +2,12 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. # -"""Base interface for accessing the stored benchmark config trial group data.""" +""" +Base interface for accessing the stored benchmark config trial group data. + +Since a single config may be used by multiple trials, we can group them together for +easier analysis. +""" from abc import ABCMeta, abstractmethod from typing import TYPE_CHECKING, Any, Dict, Optional @@ -120,5 +125,5 @@ def results_df(self) -> pandas.DataFrame: See Also -------- - ExperimentData.results + :py:attr:`mlos_bench.storage.base_experiment_data.ExperimentData.results_df` """ diff --git a/mlos_bench/mlos_bench/storage/sql/__init__.py b/mlos_bench/mlos_bench/storage/sql/__init__.py index 9d749ed35d1..84c357e7d62 100644 --- a/mlos_bench/mlos_bench/storage/sql/__init__.py +++ b/mlos_bench/mlos_bench/storage/sql/__init__.py @@ -2,7 +2,30 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. # -"""Interfaces to the SQL-based storage backends for OS Autotune.""" +"""Interfaces to the SQL-based storage backends for mlos_bench using `SQLAlchemy +`_. + +In general any SQL system supported by SQLAlchemy can be used, but the default is a +local SQLite instance. + +Although the schema is defined (and printable) by the +:py:mod:`mlos_bench.storage.sql.schema` module so direct queries are possible, users +are expected to interact with the data using the +:py:class:`~mlos_bench.storage.sql.experiment_data.ExperimentSqlData` and +:py:class:`~mlos_bench.storage.sql.trial_data.TrialSqlData` interfaces, which can be +obtained from the initial :py:class:`.SqlStorage` instance obtained by +:py:func:`mlos_bench.storage.storage_factory.from_config`. + +Examples +-------- +TODO: Add example usage. + +Notes +----- +See the `mlos_bench/config/storage +`_ +tree for some configuration examples. +""" from mlos_bench.storage.sql.storage import SqlStorage __all__ = [ diff --git a/mlos_bench/mlos_bench/storage/sql/common.py b/mlos_bench/mlos_bench/storage/sql/common.py index 918ed54ff2a..ab827f2d997 100644 --- a/mlos_bench/mlos_bench/storage/sql/common.py +++ b/mlos_bench/mlos_bench/storage/sql/common.py @@ -6,7 +6,8 @@ from typing import Dict, Optional import pandas -from sqlalchemy import Engine, Integer, and_, func, select +from sqlalchemy import Integer, and_, func, select +from sqlalchemy.engine import Engine from mlos_bench.environments.status import Status from mlos_bench.storage.base_experiment_data import ExperimentData diff --git a/mlos_bench/mlos_bench/storage/sql/experiment.py b/mlos_bench/mlos_bench/storage/sql/experiment.py index 56a3f260498..e128f64b223 100644 --- a/mlos_bench/mlos_bench/storage/sql/experiment.py +++ b/mlos_bench/mlos_bench/storage/sql/experiment.py @@ -2,7 +2,9 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. # -"""Saving and restoring the benchmark data using SQLAlchemy.""" +""":py:class:`.Storage.Experiment` interface implementation for saving and restoring +the benchmark experiment data using `SQLAlchemy `_ backend. +""" import hashlib import logging @@ -10,7 +12,8 @@ from typing import Any, Dict, Iterator, List, Literal, Optional, Tuple from pytz import UTC -from sqlalchemy import Connection, CursorResult, Engine, Table, column, func, select +from sqlalchemy import Connection, CursorResult, Table, column, func, select +from sqlalchemy.engine import Engine from mlos_bench.environments.status import Status from mlos_bench.storage.base_storage import Storage diff --git a/mlos_bench/mlos_bench/storage/sql/experiment_data.py b/mlos_bench/mlos_bench/storage/sql/experiment_data.py index 9103322ae89..f24ed82268b 100644 --- a/mlos_bench/mlos_bench/storage/sql/experiment_data.py +++ b/mlos_bench/mlos_bench/storage/sql/experiment_data.py @@ -2,12 +2,15 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. # -"""An interface to access the experiment benchmark data stored in SQL DB.""" +"""An interface to access the benchmark experiment data stored in SQL DB using the +:py:class:`.ExperimentData` interface. +""" import logging from typing import Dict, Literal, Optional import pandas -from sqlalchemy import Engine, Integer, String, func +from sqlalchemy import Integer, String, func +from sqlalchemy.engine import Engine from mlos_bench.storage.base_experiment_data import ExperimentData from mlos_bench.storage.base_trial_data import TrialData diff --git a/mlos_bench/mlos_bench/storage/sql/schema.py b/mlos_bench/mlos_bench/storage/sql/schema.py index 3900568b75d..e6e36ea2d14 100644 --- a/mlos_bench/mlos_bench/storage/sql/schema.py +++ b/mlos_bench/mlos_bench/storage/sql/schema.py @@ -2,7 +2,17 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. # -"""DB schema definition.""" +""" +DB schema definition for the :py:class:`~mlos_bench.storage.sql.storage.SqlStorage` +backend. + +Notes +----- +The SQL statements are generated by SQLAlchemy, but can be obtained using +``repr`` or ``str`` (e.g., via ``print()``) on this object. +The ``mlos_bench`` CLI will do this automatically if the logging level is set to +``DEBUG``. +""" import logging from typing import Any, List @@ -11,7 +21,6 @@ Column, DateTime, Dialect, - Engine, Float, ForeignKeyConstraint, Integer, @@ -23,6 +32,7 @@ UniqueConstraint, create_mock_engine, ) +from sqlalchemy.engine import Engine _LOG = logging.getLogger(__name__) diff --git a/mlos_bench/mlos_bench/storage/sql/storage.py b/mlos_bench/mlos_bench/storage/sql/storage.py index 3a272ff19cb..495b38e6546 100644 --- a/mlos_bench/mlos_bench/storage/sql/storage.py +++ b/mlos_bench/mlos_bench/storage/sql/storage.py @@ -21,7 +21,9 @@ class SqlStorage(Storage): - """An implementation of the Storage interface using SQLAlchemy backend.""" + """An implementation of the :py:class:`~.Storage` interface using SQLAlchemy + backend. + """ def __init__( self, diff --git a/mlos_bench/mlos_bench/storage/sql/trial.py b/mlos_bench/mlos_bench/storage/sql/trial.py index 5942912efd2..0951e702647 100644 --- a/mlos_bench/mlos_bench/storage/sql/trial.py +++ b/mlos_bench/mlos_bench/storage/sql/trial.py @@ -2,13 +2,16 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. # -"""Saving and updating benchmark data using SQLAlchemy backend.""" +""":py:class:`.Storage.Trial` interface implementation for saving and restoring +the benchmark trial data using `SQLAlchemy `_ backend. +""" + import logging from datetime import datetime from typing import Any, Dict, List, Literal, Optional, Tuple -from sqlalchemy import Connection, Engine +from sqlalchemy.engine import Connection, Engine from sqlalchemy.exc import IntegrityError from mlos_bench.environments.status import Status diff --git a/mlos_bench/mlos_bench/storage/sql/trial_data.py b/mlos_bench/mlos_bench/storage/sql/trial_data.py index ac57b7b5c03..60027f24194 100644 --- a/mlos_bench/mlos_bench/storage/sql/trial_data.py +++ b/mlos_bench/mlos_bench/storage/sql/trial_data.py @@ -2,12 +2,14 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. # -"""An interface to access the benchmark trial data stored in SQL DB.""" +"""An interface to access the benchmark trial data stored in SQL DB using the +:py:class:`.TrialData` interface. +""" from datetime import datetime from typing import TYPE_CHECKING, Optional import pandas -from sqlalchemy import Engine +from sqlalchemy.engine import Engine from mlos_bench.environments.status import Status from mlos_bench.storage.base_trial_data import TrialData diff --git a/mlos_bench/mlos_bench/storage/sql/tunable_config_data.py b/mlos_bench/mlos_bench/storage/sql/tunable_config_data.py index 40225039be5..97fb5d3d0b6 100644 --- a/mlos_bench/mlos_bench/storage/sql/tunable_config_data.py +++ b/mlos_bench/mlos_bench/storage/sql/tunable_config_data.py @@ -2,10 +2,12 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. # -"""An interface to access the tunable config data stored in SQL DB.""" +"""An interface to access the tunable config data stored in a SQL DB using the +:py:class:`.TunableConfigData` interface. +""" import pandas -from sqlalchemy import Engine +from sqlalchemy.engine import Engine from mlos_bench.storage.base_tunable_config_data import TunableConfigData from mlos_bench.storage.sql.schema import DbSchema diff --git a/mlos_bench/mlos_bench/storage/sql/tunable_config_trial_group_data.py b/mlos_bench/mlos_bench/storage/sql/tunable_config_trial_group_data.py index 0e8c022e7f0..6fac71be15f 100644 --- a/mlos_bench/mlos_bench/storage/sql/tunable_config_trial_group_data.py +++ b/mlos_bench/mlos_bench/storage/sql/tunable_config_trial_group_data.py @@ -2,12 +2,15 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. # -"""An interface to access the tunable config trial group data stored in SQL DB.""" +"""An interface to access the tunable config trial group data stored in a SQL DB using +the :py:class:`.TunableConfigTrialGroupData` interface. +""" from typing import TYPE_CHECKING, Dict, Optional import pandas -from sqlalchemy import Engine, Integer, func +from sqlalchemy import Integer, func +from sqlalchemy.engine import Engine from mlos_bench.storage.base_tunable_config_data import TunableConfigData from mlos_bench.storage.base_tunable_config_trial_group_data import ( diff --git a/mlos_bench/mlos_bench/storage/storage_factory.py b/mlos_bench/mlos_bench/storage/storage_factory.py index ea0201717d4..8980323a781 100644 --- a/mlos_bench/mlos_bench/storage/storage_factory.py +++ b/mlos_bench/mlos_bench/storage/storage_factory.py @@ -2,7 +2,15 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. # -"""Factory method to create a new Storage instance from configs.""" +""" +Factory method to create a new :py:class:`.Storage` instance from a +:py:attr:`~mlos_bench.config.schemas.config_schemas.ConfigSchema.STORAGE` type json +config. + +See Also +-------- +mlos_bench.storage : For example usage. +""" from typing import Any, Dict, List, Optional diff --git a/mlos_bench/mlos_bench/tests/config/schemas/optimizers/test-cases/good/partial/grid_search_opt_minimal.jsonc b/mlos_bench/mlos_bench/tests/config/schemas/optimizers/test-cases/good/partial/grid_search_opt_minimal.jsonc new file mode 100644 index 00000000000..297e3a62c8e --- /dev/null +++ b/mlos_bench/mlos_bench/tests/config/schemas/optimizers/test-cases/good/partial/grid_search_opt_minimal.jsonc @@ -0,0 +1,4 @@ +{ + "class": "mlos_bench.optimizers.grid_search_optimizer.GridSearchOptimizer" + // no config required +} diff --git a/mlos_bench/mlos_bench/tests/event_loop_context_test.py b/mlos_bench/mlos_bench/tests/event_loop_context_test.py index eb92c4c1326..09f94eeb91d 100644 --- a/mlos_bench/mlos_bench/tests/event_loop_context_test.py +++ b/mlos_bench/mlos_bench/tests/event_loop_context_test.py @@ -10,10 +10,9 @@ from asyncio import AbstractEventLoop from threading import Thread from types import TracebackType -from typing import Optional, Type +from typing import Literal, Optional, Type import pytest -from typing_extensions import Literal from mlos_bench.event_loop_context import EventLoopContext diff --git a/mlos_bench/mlos_bench/tunables/tunable.py b/mlos_bench/mlos_bench/tunables/tunable.py index 4d2781ad11b..120be58238d 100644 --- a/mlos_bench/mlos_bench/tunables/tunable.py +++ b/mlos_bench/mlos_bench/tunables/tunable.py @@ -406,7 +406,7 @@ def in_range(self, value: Union[int, float, str, None]) -> bool: @property def category(self) -> Optional[str]: - """Get the current value of the tunable as a number.""" + """Get the current value of the tunable as a string.""" if self.is_categorical: return nullable(str, self._current_value) else: @@ -556,7 +556,7 @@ def range(self) -> Union[Tuple[int, int], Tuple[float, float]]: Returns ------- - range : (number, number) + range : Union[Tuple[int, int], Tuple[float, float]] A 2-tuple of numbers that represents the range of the tunable. Numbers can be int or float, depending on the type of the tunable. """ diff --git a/mlos_bench/mlos_bench/tunables/tunable_groups.py b/mlos_bench/mlos_bench/tunables/tunable_groups.py index 45d8a02f9c9..99772acfd05 100644 --- a/mlos_bench/mlos_bench/tunables/tunable_groups.py +++ b/mlos_bench/mlos_bench/tunables/tunable_groups.py @@ -178,7 +178,7 @@ def __iter__(self) -> Generator[Tuple[Tunable, CovariantTunableGroup], None, Non Returns ------- - [(tunable, group), ...] : iter(Tunable, CovariantTunableGroup) + [(tunable, group), ...] : Generator[Tuple[Tunable, CovariantTunableGroup], None, None] An iterator over all tunables in all groups. Each element is a 2-tuple of an instance of the Tunable parameter and covariant group it belongs to. """ diff --git a/mlos_bench/mlos_bench/util.py b/mlos_bench/mlos_bench/util.py index 945359bcddd..e7cd5a89862 100644 --- a/mlos_bench/mlos_bench/util.py +++ b/mlos_bench/mlos_bench/util.py @@ -39,9 +39,17 @@ from mlos_bench.services.base_service import Service from mlos_bench.storage.base_storage import Storage -# BaseTypeVar is a generic with a constraint of the three base classes. BaseTypeVar = TypeVar("BaseTypeVar", "Environment", "Optimizer", "Scheduler", "Service", "Storage") +"""BaseTypeVar is a generic with a constraint of the main base classes (e.g., +:py:class:`~mlos_bench.environments.base_environment.Environment`, +:py:class:`~mlos_bench.optimizers.base_optimizer.Optimizer`, +:py:class:`~mlos_bench.schedulers.base_scheduler.Scheduler`, +:py:class:`~mlos_bench.services.base_service.Service`, +:py:class:`~mlos_bench.storage.base_storage.Storage`, etc.). +""" + BaseTypes = Union["Environment", "Optimizer", "Scheduler", "Service", "Storage"] +"""Similar to :py:data:`.BaseTypeVar`, BaseTypes is a Union of the main base classes.""" # Adjusted from https://github.com/python/cpython/blob/v3.11.10/Lib/distutils/util.py#L308 @@ -50,8 +58,16 @@ def strtobool(val: str) -> bool: """ Convert a string representation of truth to true (1) or false (0). - True values are 'y', 'yes', 't', 'true', 'on', and '1'; false values are 'n', 'no', - 'f', 'false', 'off', and '0'. Raises ValueError if 'val' is anything else. + Parameters + ---------- + val : str + True values are 'y', 'yes', 't', 'true', 'on', and '1'; + False values are 'n', 'no', 'f', 'false', 'off', and '0'. + + Raises + ------ + ValueError + If 'val' is anything else. """ val = val.lower() if val in {"y", "yes", "t", "true", "on", "1"}: @@ -64,7 +80,7 @@ def strtobool(val: str) -> bool: def preprocess_dynamic_configs(*, dest: dict, source: Optional[dict] = None) -> dict: """ - Replaces all $name values in the destination config with the corresponding value + Replaces all ``$name`` values in the destination config with the corresponding value from the source config. Parameters @@ -353,7 +369,7 @@ def utcify_timestamp(timestamp: datetime, *, origin: Literal["utc", "local"]) -> Parameters ---------- - timestamp : datetime + timestamp : datetime.datetime A timestamp to convert to UTC. Note: The original datetime may or may not have tzinfo associated with it. @@ -367,7 +383,7 @@ def utcify_timestamp(timestamp: datetime, *, origin: Literal["utc", "local"]) -> Returns ------- - datetime + datetime.datetime A datetime with zoneinfo in UTC. """ if timestamp.tzinfo is not None or origin == "local": diff --git a/mlos_core/mlos_core/__init__.py b/mlos_core/mlos_core/__init__.py index b8a72cef92c..13b1bf2af82 100644 --- a/mlos_core/mlos_core/__init__.py +++ b/mlos_core/mlos_core/__init__.py @@ -2,7 +2,112 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. # -"""Basic initializer module for the mlos_core package.""" +""" +mlos_core is a wrapper around other OSS tuning libraries to provide a consistent +interface for autotuning experimentation. + +:py:mod:`mlos_core` can be installed from `pypi `_ +with ``pip install mlos-core`` from and provides the main +:py:mod:`Optimizer ` portions of the MLOS project for use with +autotuning purposes. +Although it is generally intended to be used with :py:mod:`mlos_bench` to help +automate the generation of ``(config, score)`` pairs to register with the Optimizer, +it can be used independently as well. + +To do this it provides a small set of wrapper classes around other OSS tuning +libraries in order to provide a consistent interface so that the rest of the code +using it can easily exchange one optimizer for another (or even stack them). + +Specifically: + +- :py:class:`~mlos_core.optimizers.optimizer.BaseOptimizer` is the base class for all Optimizers + + Its core methods are: + + - :py:meth:`~mlos_core.optimizers.optimizer.BaseOptimizer.suggest` which returns a + new configuration to evaluate + - :py:meth:`~mlos_core.optimizers.optimizer.BaseOptimizer.register` which registers + a "score" for an evaluated configuration with the Optimizer + + Each operates on Pandas :py:class:`DataFrames ` as the lingua + franca for data science. + +- :py:meth:`mlos_core.optimizers.OptimizerFactory.create` is a factory function + that creates a new :py:type:`~mlos_core.optimizers.ConcreteOptimizer` instance + + To do this it uses the :py:class:`~mlos_core.optimizers.OptimizerType` enum to + specify which underlying optimizer to use (e.g., + :py:class:`~mlos_core.optimizers.OptimizerType.FLAML` or + :py:class:`~mlos_core.optimizers.OptimizerType.SMAC`). + +Examples +-------- +>>> # Import the necessary classes. +>>> import pandas +>>> from ConfigSpace import ConfigurationSpace, UniformIntegerHyperparameter +>>> from mlos_core.optimizers import OptimizerFactory, OptimizerType +>>> from mlos_core.spaces.adapters import SpaceAdapterFactory, SpaceAdapterType +>>> # Create a simple ConfigurationSpace with a single integer hyperparameter. +>>> cs = ConfigurationSpace(seed=1234) +>>> _ = cs.add(UniformIntegerHyperparameter("x", lower=0, upper=10)) +>>> # Create a new optimizer instance using the SMAC optimizer. +>>> opt_args = {"seed": 1234, "max_trials": 100} +>>> space_adpaters_kwargs = {} # no additional args for this example +>>> opt = OptimizerFactory.create( +... parameter_space=cs, +... optimization_targets=["y"], +... optimizer_type=OptimizerType.SMAC, +... optimizer_kwargs=opt_args, +... space_adapter_type=SpaceAdapterType.IDENTITY, # or LLAMATUNE +... space_adapter_kwargs=space_adpaters_kwargs, +... ) +>>> # Get a new configuration suggestion. +>>> (config_df, _metadata_df) = opt.suggest() +>>> # Examine the suggested configuration. +>>> assert len(config_df) == 1 +>>> config_df.iloc[0] +x 3 +Name: 0, dtype: int64 +>>> # Register the configuration and its corresponding target value +>>> score = 42 # a made up score +>>> scores_df = pandas.DataFrame({"y": [score]}) +>>> opt.register(configs=config_df, scores=scores_df) +>>> # Get a new configuration suggestion. +>>> (config_df, _metadata_df) = opt.suggest() +>>> config_df.iloc[0] +x 10 +Name: 0, dtype: int64 +>>> score = 7 # a better made up score +>>> # Optimizers minimize by convention, so a lower score is better +>>> # You can use a negative score to maximize values instead +>>> # +>>> # Convert it to a DataFrame again +>>> scores_df = pandas.DataFrame({"y": [score]}) +>>> opt.register(configs=config_df, scores=scores_df) +>>> # Get the best observations. +>>> (configs_df, scores_df, _contexts_df) = opt.get_best_observations() +>>> # The default is to only return one +>>> assert len(configs_df) == 1 +>>> assert len(scores_df) == 1 +>>> configs_df.iloc[0] +x 10 +Name: 1, dtype: int64 +>>> scores_df.iloc[0] +y 7 +Name: 1, dtype: int64 + +Notes +----- +See `mlos_core/README.md +`_ +for additional documentation and examples in the source tree. +""" from mlos_core.version import VERSION __version__ = VERSION + + +if __name__ == "__main__": + import doctest + + doctest.testmod() diff --git a/mlos_core/mlos_core/optimizers/__init__.py b/mlos_core/mlos_core/optimizers/__init__.py index e9f402c1878..3b00fa00cee 100644 --- a/mlos_core/mlos_core/optimizers/__init__.py +++ b/mlos_core/mlos_core/optimizers/__init__.py @@ -2,7 +2,31 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. # -"""Basic initializer module for the mlos_core optimizers.""" +""" +Initializer module for the mlos_core optimizers. + +Optimizers are the main component of the :py:mod:`mlos_core` package. +They act as a wrapper around other OSS tuning libraries to provide a consistent API +interface to allow experimenting with different autotuning algorithms. + +The :class:`~mlos_core.optimizers.optimizer.BaseOptimizer` class is the base class +for all Optimizers and provides the core +:py:meth:`~mlos_core.optimizers.optimizer.BaseOptimizer.suggest` and +:py:meth:`~mlos_core.optimizers.optimizer.BaseOptimizer.register` methods. + +This module also provides a simple :py:class:`~.OptimizerFactory` class to +:py:meth:`~.OptimizerFactory.create` an Optimizer. + +Examples +-------- +TODO: Add example usage here. + +Notes +----- +See `mlos_core/optimizers/README.md +`_ +for additional documentation and examples in the source tree. +""" from enum import Enum from typing import List, Optional, TypeVar @@ -16,6 +40,8 @@ from mlos_core.spaces.adapters import SpaceAdapterFactory, SpaceAdapterType __all__ = [ + "OptimizerType", + "ConcreteOptimizer", "SpaceAdapterType", "OptimizerFactory", "BaseOptimizer", @@ -26,34 +52,50 @@ class OptimizerType(Enum): - """Enumerate supported MlosCore optimizers.""" + """Enumerate supported mlos_core optimizers.""" RANDOM = RandomOptimizer - """An instance of RandomOptimizer class will be used.""" + """An instance of :class:`~mlos_core.optimizers.random_optimizer.RandomOptimizer` + class will be used. + """ FLAML = FlamlOptimizer - """An instance of FlamlOptimizer class will be used.""" + """An instance of :class:`~mlos_core.optimizers.flaml_optimizer.FlamlOptimizer` + class will be used. + """ SMAC = SmacOptimizer - """An instance of SmacOptimizer class will be used.""" + """An instance of + :class:`~mlos_core.optimizers.bayesian_optimizers.smac_optimizer.SmacOptimizer` + class will be used. + """ # To make mypy happy, we need to define a type variable for each optimizer type. # https://github.com/python/mypy/issues/12952 # ConcreteOptimizer = TypeVar('ConcreteOptimizer', *[member.value for member in OptimizerType]) # To address this, we add a test for complete coverage of the enum. + ConcreteOptimizer = TypeVar( "ConcreteOptimizer", RandomOptimizer, FlamlOptimizer, SmacOptimizer, ) +""" +Type variable for concrete optimizer classes. + +(e.g., :class:`~mlos_core.optimizers.bayesian_optimizers.smac_optimizer.SmacOptimizer`, etc.) +""" DEFAULT_OPTIMIZER_TYPE = OptimizerType.FLAML +"""Default optimizer type to use if none is specified.""" class OptimizerFactory: - """Simple factory class for creating BaseOptimizer-derived objects.""" + """Simple factory class for creating + :class:`~mlos_core.optimizers.optimizer.BaseOptimizer`-derived objects. + """ # pylint: disable=too-few-public-methods diff --git a/mlos_core/mlos_core/optimizers/bayesian_optimizers/bayesian_optimizer.py b/mlos_core/mlos_core/optimizers/bayesian_optimizers/bayesian_optimizer.py index a39a5516e83..cfdde1656dd 100644 --- a/mlos_core/mlos_core/optimizers/bayesian_optimizers/bayesian_optimizer.py +++ b/mlos_core/mlos_core/optimizers/bayesian_optimizers/bayesian_optimizer.py @@ -29,11 +29,11 @@ def surrogate_predict( Parameters ---------- - configs : pd.DataFrame + configs : pandas.DataFrame Dataframe of configs / parameters. The columns are parameter names and the rows are the configs. - context : pd.DataFrame + context : pandas.DataFrame Not Yet Implemented. """ pass # pylint: disable=unnecessary-pass # pragma: no cover @@ -51,11 +51,11 @@ def acquisition_function( Parameters ---------- - configs : pd.DataFrame + configs : pandas.DataFrame Dataframe of configs / parameters. The columns are parameter names and the rows are the configs. - context : pd.DataFrame + context : pandas.DataFrame Not Yet Implemented. """ pass # pylint: disable=unnecessary-pass # pragma: no cover diff --git a/mlos_core/mlos_core/optimizers/bayesian_optimizers/smac_optimizer.py b/mlos_core/mlos_core/optimizers/bayesian_optimizers/smac_optimizer.py index fc801d7d05c..b41016b013a 100644 --- a/mlos_core/mlos_core/optimizers/bayesian_optimizers/smac_optimizer.py +++ b/mlos_core/mlos_core/optimizers/bayesian_optimizers/smac_optimizer.py @@ -3,9 +3,12 @@ # Licensed under the MIT License. # """ -Contains the wrapper class for SMAC Bayesian optimizers. +Contains the wrapper class for the :py:class:`.SmacOptimizer`. -See Also: +Notes +----- +See the `SMAC3 Documentation `_ for +more details. """ from logging import warning @@ -84,8 +87,8 @@ def __init__( Number of points evaluated at start to bootstrap the optimizer. Default depends on max_trials and number of parameters and max_ratio. Note: it can sometimes be useful to set this to 1 when pre-warming the - optimizer from historical data. - See Also: mlos_bench.optimizer.bulk_register + optimizer from historical data. See Also: + :py:meth:`mlos_bench.optimizers.base_optimizer.Optimizer.bulk_register` max_ratio : Optional[int] Maximum ratio of max_trials to be random configs to be evaluated @@ -93,10 +96,10 @@ def __init__( Useful if you want to explicitly control the number of random configs evaluated at start. - use_default_config: bool + use_default_config : bool Whether to use the default config for the first trial after random initialization. - n_random_probability: float + n_random_probability : float Probability of choosing to evaluate a random configuration during optimization. Defaults to `0.1`. Setting this to a higher value favors exploration over exploitation. """ @@ -193,6 +196,7 @@ def __init__( if max_ratio is not None: assert isinstance(max_ratio, float) and 0.0 <= max_ratio <= 1.0 initial_design_args["max_ratio"] = max_ratio + self._max_ratio = max_ratio # Use the default InitialDesign from SMAC. # (currently SBOL instead of LatinHypercube due to better uniformity @@ -232,6 +236,18 @@ def __del__(self) -> None: # Best-effort attempt to clean up, in case the user forgets to call .cleanup() self.cleanup() + @property + def max_ratio(self) -> Optional[float]: + """ + Gets the `max_ratio` parameter used in py:meth:`constructor <.__init__>` of this + SmacOptimizer. + + Returns + ------- + float + """ + return self._max_ratio + @property def n_random_init(self) -> int: """ @@ -240,7 +256,10 @@ def n_random_init(self) -> int: Note: This may not be equal to the value passed to the initializer, due to logic present in the SMAC. - See Also: max_ratio + + See Also + -------- + :py:attr:`.max_ratio` Returns ------- diff --git a/mlos_core/mlos_core/optimizers/flaml_optimizer.py b/mlos_core/mlos_core/optimizers/flaml_optimizer.py index e5272f103ec..6c0b3a25400 100644 --- a/mlos_core/mlos_core/optimizers/flaml_optimizer.py +++ b/mlos_core/mlos_core/optimizers/flaml_optimizer.py @@ -2,7 +2,14 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. # -"""Contains the FlamlOptimizer class.""" +""" +Contains the :py:class:`.FlamlOptimizer` class. + +Notes +----- +See the `Flaml Documentation `_ for more +details. +""" from typing import Dict, List, NamedTuple, Optional, Tuple, Union from warnings import warn diff --git a/mlos_core/mlos_core/optimizers/optimizer.py b/mlos_core/mlos_core/optimizers/optimizer.py index d7da71ae864..84f0f8fab61 100644 --- a/mlos_core/mlos_core/optimizers/optimizer.py +++ b/mlos_core/mlos_core/optimizers/optimizer.py @@ -2,7 +2,7 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. # -"""Contains the BaseOptimizer abstract class.""" +"""Contains the :py:class:`.BaseOptimizer` abstract class.""" import collections from abc import ABCMeta, abstractmethod @@ -18,7 +18,10 @@ class BaseOptimizer(metaclass=ABCMeta): - """Optimizer abstract base class defining the basic interface.""" + """Optimizer abstract base class defining the basic interface: + :py:meth:`~.BaseOptimizer.suggest`, + :py:meth:`~.BaseOptimizer.register`, + """ # pylint: disable=too-many-instance-attributes @@ -39,15 +42,23 @@ def __init__( The parameter space to optimize. optimization_targets : List[str] The names of the optimization targets to minimize. + To maximize a target, use the negative of the target when registering scores. objective_weights : Optional[List[float]] Optional list of weights of optimization targets. space_adapter : BaseSpaceAdapter The space adapter class to employ for parameter space transformations. """ self.parameter_space: ConfigSpace.ConfigurationSpace = parameter_space + """The parameter space to optimize.""" + self.optimizer_parameter_space: ConfigSpace.ConfigurationSpace = ( parameter_space if space_adapter is None else space_adapter.target_parameter_space ) + """ + The parameter space actually used by the optimizer. + + (in case a :py:mod:`SpaceAdapter ` is used) + """ if space_adapter is not None and space_adapter.orig_parameter_space != parameter_space: raise ValueError("Given parameter space differs from the one given to space adapter") @@ -84,16 +95,16 @@ def register( Parameters ---------- - configs : pd.DataFrame + configs : pandas.DataFrame Dataframe of configs / parameters. The columns are parameter names and the rows are the configs. - scores : pd.DataFrame + scores : pandas.DataFrame Scores from running the configs. The index is the same as the index of the configs. - context : pd.DataFrame + context : pandas.DataFrame Not Yet Implemented. - metadata : Optional[pd.DataFrame] + metadata : Optional[pandas.DataFrame] Metadata returned by the backend optimizer's suggest method. """ # Do some input validation. @@ -134,13 +145,13 @@ def _register( Parameters ---------- - configs : pd.DataFrame + configs : pandas.DataFrame Dataframe of configs / parameters. The columns are parameter names and the rows are the configs. - scores : pd.DataFrame + scores : pandas.DataFrame Scores from running the configs. The index is the same as the index of the configs. - context : pd.DataFrame + context : pandas.DataFrame Not Yet Implemented. """ pass # pylint: disable=unnecessary-pass # pragma: no cover @@ -157,7 +168,7 @@ def suggest( Parameters ---------- - context : pd.DataFrame + context : pandas.DataFrame Not Yet Implemented. defaults : bool Whether or not to return the default config instead of an optimizer guided one. @@ -165,10 +176,10 @@ def suggest( Returns ------- - configuration : pd.DataFrame + configuration : pandas.DataFrame Pandas dataframe with a single row. Column names are the parameter names. - metadata : Optional[pd.DataFrame] + metadata : Optional[pandas.DataFrame] The metadata associated with the given configuration used for evaluations. Backend optimizer specific. """ @@ -203,15 +214,15 @@ def _suggest( Parameters ---------- - context : pd.DataFrame + context : pandas.DataFrame Not Yet Implemented. Returns ------- - configuration : pd.DataFrame + configuration : pandas.DataFrame Pandas dataframe with a single row. Column names are the parameter names. - metadata : Optional[pd.DataFrame] + metadata : Optional[pandas.DataFrame] The metadata associated with the given configuration used for evaluations. Backend optimizer specific. """ @@ -232,12 +243,12 @@ def register_pending( Parameters ---------- - configs : pd.DataFrame + configs : pandas.DataFrame Dataframe of configs / parameters. The columns are parameter names and the rows are the configs. - context : pd.DataFrame + context : pandas.DataFrame Not Yet Implemented. - metadata : Optional[pd.DataFrame] + metadata : Optional[pandas.DataFrame] Metadata returned by the backend optimizer's suggest method. """ pass # pylint: disable=unnecessary-pass # pragma: no cover @@ -248,7 +259,7 @@ def get_observations(self) -> Tuple[pd.DataFrame, pd.DataFrame, Optional[pd.Data Returns ------- - observations : Tuple[pd.DataFrame, pd.DataFrame, Optional[pd.DataFrame]] + observations : Tuple[pandas.DataFrame, pandas.DataFrame, Optional[pandas.DataFrame]] A triplet of (config, score, context) DataFrames of observations. """ if len(self._observations) == 0: @@ -281,7 +292,7 @@ def get_best_observations( Returns ------- - observations : Tuple[pd.DataFrame, pd.DataFrame, Optional[pd.DataFrame]] + observations : Tuple[pandas.DataFrame, pandas.DataFrame, Optional[pandas.DataFrame]] A triplet of best (config, score, context) DataFrames of best observations. """ if len(self._observations) == 0: diff --git a/mlos_core/mlos_core/optimizers/random_optimizer.py b/mlos_core/mlos_core/optimizers/random_optimizer.py index 661a48a373c..a086c9d8042 100644 --- a/mlos_core/mlos_core/optimizers/random_optimizer.py +++ b/mlos_core/mlos_core/optimizers/random_optimizer.py @@ -2,7 +2,7 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. # -"""Contains the RandomOptimizer class.""" +"""RandomOptimizer class.""" from typing import Optional, Tuple from warnings import warn @@ -14,13 +14,9 @@ class RandomOptimizer(BaseOptimizer): """ - Optimizer class that produces random suggestions. Useful for baseline comparison - against Bayesian optimizers. + Optimizer class that produces random suggestions. - Parameters - ---------- - parameter_space : ConfigSpace.ConfigurationSpace - The parameter space to optimize. + Useful for baseline comparison against Bayesian optimizers. """ def _register( @@ -38,11 +34,11 @@ def _register( Parameters ---------- - configs : pd.DataFrame + configs : pandas.DataFrame Dataframe of configs / parameters. The columns are parameter names and the rows are the configs. - scores : pd.DataFrame + scores : pandas.DataFrame Scores from running the configs. The index is the same as the index of the configs. context : None diff --git a/mlos_core/mlos_core/spaces/__init__.py b/mlos_core/mlos_core/spaces/__init__.py index 8de6887783d..cc81a5dcc49 100644 --- a/mlos_core/mlos_core/spaces/__init__.py +++ b/mlos_core/mlos_core/spaces/__init__.py @@ -2,4 +2,4 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. # -"""Space adapters and converters init file.""" +"""Space adapters and converters.""" diff --git a/mlos_core/mlos_core/spaces/adapters/__init__.py b/mlos_core/mlos_core/spaces/adapters/__init__.py index 1645ac9cb45..608993af500 100644 --- a/mlos_core/mlos_core/spaces/adapters/__init__.py +++ b/mlos_core/mlos_core/spaces/adapters/__init__.py @@ -2,7 +2,34 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. # -"""Basic initializer module for the mlos_core space adapters.""" +""" +Basic initializer module for the mlos_core space adapters. + +Space adapters provide a mechanism for automatic transformation of the original +:py:class:`ConfigSpace.ConfigurationSpace` provided to the optimizer into a new +space that is more suitable for the optimizer. + +By default the :py:class:`.IdentityAdapter` is used, which does not perform any +transformation. +But, for instance, the :py:class:`.LlamaTuneAdapter` can be used to automatically +transform the space to a lower dimensional one. + +See the :py:mod:`mlos_bench.optimizers.mlos_core_optimizer` module for more +information on how to do this with :py:mod:`mlos_bench`. + +This module provides a simple :py:class:`.SpaceAdapterFactory` class to +:py:meth:`~.SpaceAdapterFactory.create` space adapters. + +Examples +-------- +TODO: Add example usage here. + +Notes +----- +See `mlos_core/spaces/adapters/README.md +`_ +for additional documentation and examples in the source tree. +""" from enum import Enum from typing import Optional, TypeVar @@ -13,19 +40,22 @@ from mlos_core.spaces.adapters.llamatune import LlamaTuneAdapter __all__ = [ + "ConcreteSpaceAdapter", "IdentityAdapter", "LlamaTuneAdapter", + "SpaceAdapterFactory", + "SpaceAdapterType", ] class SpaceAdapterType(Enum): - """Enumerate supported MlosCore space adapters.""" + """Enumerate supported mlos_core space adapters.""" IDENTITY = IdentityAdapter - """A no-op adapter will be used.""" + """A no-op adapter (:class:`.IdentityAdapter`) will be used.""" LLAMATUNE = LlamaTuneAdapter - """An instance of LlamaTuneAdapter class will be used.""" + """An instance of :class:`.LlamaTuneAdapter` class will be used.""" # To make mypy happy, we need to define a type variable for each optimizer type. @@ -40,10 +70,15 @@ class SpaceAdapterType(Enum): IdentityAdapter, LlamaTuneAdapter, ) +"""Type variable for concrete SpaceAdapter classes (e.g., +:class:`~mlos_core.spaces.adapters.identity_adapter.IdentityAdapter`, etc.) +""" class SpaceAdapterFactory: - """Simple factory class for creating BaseSpaceAdapter-derived objects.""" + """Simple factory class for creating + :class:`~mlos_core.spaces.adapters.adapter.BaseSpaceAdapter`-derived objects. + """ # pylint: disable=too-few-public-methods diff --git a/mlos_core/mlos_core/spaces/adapters/adapter.py b/mlos_core/mlos_core/spaces/adapters/adapter.py index 2d48a14c317..b4b53b73a08 100644 --- a/mlos_core/mlos_core/spaces/adapters/adapter.py +++ b/mlos_core/mlos_core/spaces/adapters/adapter.py @@ -2,7 +2,18 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. # -"""Contains the BaseSpaceAdapter abstract class.""" +""" +Contains the BaseSpaceAdapter abstract class. + +As mentioned in :py:mod:`mlos_core.spaces.adapters`, the space adapters provide a +mechanism for automatic transformation of the original +:py:class:`ConfigSpace.ConfigurationSpace` provided to the Optimizer into a new +space for the Optimizer to search over. + +It's main APIs are the :py:meth:`~.BaseSpaceAdapter.transform` and +:py:meth:`~.BaseSpaceAdapter.inverse_transform` methods, which are used to translate +configurations from one space to another. +""" from abc import ABCMeta, abstractmethod @@ -47,18 +58,19 @@ def target_parameter_space(self) -> ConfigSpace.ConfigurationSpace: def transform(self, configuration: pd.DataFrame) -> pd.DataFrame: """ Translates a configuration, which belongs to the target parameter space, to the - original parameter space. This method is called by the `suggest` method of the - `BaseOptimizer` class. + original parameter space. This method is called by the + :py:meth:`~mlos_core.optimizers.optimizer.BaseOptimizer.suggest` method of the + :py:class:`~mlos_core.optimizers.optimizer.BaseOptimizer` class. Parameters ---------- - configuration : pd.DataFrame + configuration : pandas.DataFrame Pandas dataframe with a single row. Column names are the parameter names of the target parameter space. Returns ------- - configuration : pd.DataFrame + configuration : pandas.DataFrame Pandas dataframe with a single row, containing the translated configuration. Column names are the parameter names of the original parameter space. """ @@ -69,19 +81,20 @@ def inverse_transform(self, configurations: pd.DataFrame) -> pd.DataFrame: """ Translates a configuration, which belongs to the original parameter space, to the target parameter space. This method is called by the `register` method of - the `BaseOptimizer` class, and performs the inverse operation of - `BaseSpaceAdapter.transform` method. + the :py:class:`~mlos_core.optimizers.optimizer.BaseOptimizer` class, and + performs the inverse operation of :py:meth:`~.BaseSpaceAdapter.transform` + method. Parameters ---------- - configurations : pd.DataFrame + configurations : pandas.DataFrame Dataframe of configurations / parameters, which belong to the original parameter space. The columns are the parameter names the original parameter space and the rows are the configurations. Returns ------- - configurations : pd.DataFrame + configurations : pandas.DataFrame Dataframe of the translated configurations / parameters. The columns are the parameter names of the target parameter space and the rows are the configurations. diff --git a/mlos_core/mlos_core/spaces/adapters/llamatune.py b/mlos_core/mlos_core/spaces/adapters/llamatune.py index 5a39f863a56..625dd886d08 100644 --- a/mlos_core/mlos_core/spaces/adapters/llamatune.py +++ b/mlos_core/mlos_core/spaces/adapters/llamatune.py @@ -2,7 +2,17 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. # -"""Implementation of LlamaTune space adapter.""" +""" +Implementation of LlamaTune space adapter. + +LlamaTune is a technique that transforms the original parameter space into a +lower-dimensional space to try and improve the sample efficiency of the underlying +optimizer by making use of the inherent parameter sensitivity correlations in most +systems. + +See Also: `LlamaTune: Sample-Efficient DBMS Configuration Tuning +`_. +""" import os from typing import Dict, List, Optional, Union from warnings import warn @@ -53,11 +63,11 @@ def __init__( # pylint: disable=too-many-arguments ---------- orig_parameter_space : ConfigSpace.ConfigurationSpace The original (user-provided) parameter space to optimize. - num_low_dims: int + num_low_dims : int Number of dimensions used in the low-dimensional parameter search space. - special_param_values_dict: Optional[dict] + special_param_values_dict : Optional[dict] Dictionary of special - max_unique_values_per_param: Optional[int]: + max_unique_values_per_param : Optional[int] Number of unique values per parameter. Used to discretize the parameter space. If `None` space discretization is disabled. """ diff --git a/mlos_core/mlos_core/spaces/converters/__init__.py b/mlos_core/mlos_core/spaces/converters/__init__.py index 2360bda24f8..3abebbfbe54 100644 --- a/mlos_core/mlos_core/spaces/converters/__init__.py +++ b/mlos_core/mlos_core/spaces/converters/__init__.py @@ -2,4 +2,11 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. # -"""Space converters init file.""" +""" +Space converters init file. + +Space converters are helper functions that translate a +:py:class:`ConfigSpace.ConfigurationSpace` that :py:mod:`mlos_core` Optimizers take +as input to the underlying Optimizer's parameter description language (in case it +doesn't use :py:class:`ConfigSpace.ConfigurationSpace`). +""" diff --git a/mlos_core/mlos_core/spaces/converters/flaml.py b/mlos_core/mlos_core/spaces/converters/flaml.py index 71370853e4a..d0dc5e9b67b 100644 --- a/mlos_core/mlos_core/spaces/converters/flaml.py +++ b/mlos_core/mlos_core/spaces/converters/flaml.py @@ -2,7 +2,7 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. # -"""Contains space converters for FLAML.""" +"""Contains space converters for :py:class:`~mlos_core.optimizers.flaml_optimizer`""" import sys from typing import TYPE_CHECKING, Dict @@ -22,7 +22,10 @@ FlamlDomain: TypeAlias = flaml.tune.sample.Domain +"""Flaml domain type alias.""" + FlamlSpace: TypeAlias = Dict[str, flaml.tune.sample.Domain] +"""Flaml space type alias - a `Dict[str, FlamlDomain]`""" def configspace_to_flaml_space( diff --git a/mlos_core/mlos_core/spaces/converters/util.py b/mlos_core/mlos_core/spaces/converters/util.py index 4393890595e..de0edb7cd1b 100644 --- a/mlos_core/mlos_core/spaces/converters/util.py +++ b/mlos_core/mlos_core/spaces/converters/util.py @@ -16,16 +16,19 @@ def monkey_patch_hp_quantization(hp: Hyperparameter) -> Hyperparameter: Monkey-patch quantization into the Hyperparameter. Temporary workaround to dropped quantization support in ConfigSpace 1.0 - See Also: + + Notes + ----- + See . Parameters ---------- - hp : Hyperparameter + hp : ConfigSpace.hyperparameters.Hyperparameter ConfigSpace hyperparameter to patch. Returns ------- - hp : Hyperparameter + hp : ConfigSpace.hyperparameters.Hyperparameter Patched hyperparameter. """ if not isinstance(hp, NumericalHyperparameter): @@ -72,12 +75,12 @@ def monkey_patch_cs_quantization(cs: ConfigurationSpace) -> ConfigurationSpace: Parameters ---------- - cs : ConfigurationSpace + cs : ConfigSpace.ConfigurationSpace ConfigSpace to patch. Returns ------- - cs : ConfigurationSpace + cs : ConfigSpace.ConfigurationSpace Patched ConfigSpace. """ for hp in cs.values(): diff --git a/mlos_core/mlos_core/tests/spaces/spaces_test.py b/mlos_core/mlos_core/tests/spaces/spaces_test.py index b98d40d6270..a54359be906 100644 --- a/mlos_core/mlos_core/tests/spaces/spaces_test.py +++ b/mlos_core/mlos_core/tests/spaces/spaces_test.py @@ -85,7 +85,7 @@ def sample(self, config_space: OptimizerSpace, n_samples: int = 1) -> npt.NDArra ---------- config_space : CS.ConfigurationSpace Configuration space to sample from. - n_samples : int, optional + n_samples : int Number of samples to use, by default 1. """ diff --git a/mlos_core/mlos_core/util.py b/mlos_core/mlos_core/util.py index 027bfd0e35b..2e1c382a31a 100644 --- a/mlos_core/mlos_core/util.py +++ b/mlos_core/mlos_core/util.py @@ -21,7 +21,7 @@ def config_to_dataframe(config: Configuration) -> pd.DataFrame: Returns ------- - pd.DataFrame + pandas.DataFrame A DataFrame with a single row, containing the config's parameters. """ return pd.DataFrame([dict(config)]) @@ -56,14 +56,14 @@ def normalize_config( Parameters ---------- - config_space : ConfigurationSpace + config_space : ConfigSpace.ConfigurationSpace The parameter space to use. config : dict The configuration to convert. Returns ------- - cs_config: Configuration + cs_config: ConfigSpace.Configuration A valid ConfigSpace configuration with inactive parameters removed. """ cs_config = Configuration(config_space, values=config, allow_inactive_with_values=True) diff --git a/mlos_viz/README.md b/mlos_viz/README.md index e4c5c907426..dd744442c28 100644 --- a/mlos_viz/README.md +++ b/mlos_viz/README.md @@ -1,4 +1,4 @@ -# mlos_viz +# mlos-viz The [`mlos_viz`](./) module is an aid to visualizing experiment benchmarking and optimization results generated and stored by [`mlos_bench`](../mlos_bench/). diff --git a/mlos_viz/mlos_viz/__init__.py b/mlos_viz/mlos_viz/__init__.py index 1dfc795d437..8450e32b12b 100644 --- a/mlos_viz/mlos_viz/__init__.py +++ b/mlos_viz/mlos_viz/__init__.py @@ -2,8 +2,14 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. # -"""mlos_viz is a framework to help visualizing, explain, and gain insights from results -from the mlos_bench framework for benchmarking and optimization automation. +""" +mlos_viz is a framework to help visualizing, explain, and gain insights from results +from the :py:mod:`mlos_bench` framework for benchmarking and optimization automation. + +Its main entrypoint is the :py:func:`plot` function, which can be used to +automatically visualize :py:class:`~.ExperimentData` from :py:mod:`mlos_bench` using +other libraries for automatic data correlation and visualization like +:external:py:func:`dabl `. """ from enum import Enum @@ -63,12 +69,12 @@ def plot( ---------- exp_data: ExperimentData The experiment data to plot. - results_df : Optional["pandas.DataFrame"] - Optional results_df to plot. - If not provided, defaults to exp_data.results_df property. + results_df : Optional[pandas.DataFrame] + Optional `results_df` to plot. + If not provided, defaults to :py:attr:`.ExperimentData.results_df` property. objectives : Optional[Dict[str, Literal["min", "max"]]] Optional objectives to plot. - If not provided, defaults to exp_data.objectives property. + If not provided, defaults to :py:attr:`.ExperimentData.objectives` property. plotter_method: MlosVizMethod The method to use for visualizing the experiment results. filter_warnings: bool diff --git a/mlos_viz/mlos_viz/base.py b/mlos_viz/mlos_viz/base.py index 0c6d58cd7f8..e7a52f1bd8c 100644 --- a/mlos_viz/mlos_viz/base.py +++ b/mlos_viz/mlos_viz/base.py @@ -222,13 +222,14 @@ def limit_top_n_configs( exp_data : Optional[ExperimentData] The ExperimentData (e.g., obtained from the storage layer) to operate on. results_df : Optional[pandas.DataFrame] - The results dataframe to augment, by default None to use the results_df property. - objectives : Iterable[str], optional + The results dataframe to augment, by default None to use + :py:attr:`.ExperimentData.results_df` property. + objectives : Iterable[str] Which result column(s) to use for sorting the configs, and in which direction ("min" or "max"). - By default None to automatically select the experiment objectives. - top_n_configs : int, optional - How many configs to return, including the default, by default 20. + By default None to automatically select the :py:attr:`.ExperimentData.objectives`. + top_n_configs : int + How many configs to return, including the default, by default 10. method: Literal["mean", "median", "p50", "p75", "p90", "p95", "p99"] = "mean", Which statistical method to use when sorting the config groups before determining the cutoff, by default "mean". @@ -348,12 +349,12 @@ def plot_optimizer_trends( ---------- exp_data : ExperimentData The ExperimentData (e.g., obtained from the storage layer) to plot. - results_df : Optional["pandas.DataFrame"] + results_df : Optional[pandas.DataFrame] Optional results_df to plot. - If not provided, defaults to exp_data.results_df property. + If not provided, defaults to :py:attr:`.ExperimentData.results_df` property. objectives : Optional[Dict[str, Literal["min", "max"]]] Optional objectives to plot. - If not provided, defaults to exp_data.objectives property. + If not provided, defaults to :py:attr:`.ExperimentData.objectives` property. """ (results_df, obj_cols) = expand_results_data_args(exp_data, results_df, objectives) (results_df, groupby_columns, groupby_column) = _add_groupby_desc_column(results_df) @@ -430,7 +431,8 @@ def plot_top_n_configs( ) -> None: # pylint: disable=too-many-locals """ - Plots the top-N configs along with the default config for the given ExperimentData. + Plots the top-N configs along with the default config for the given + :py:class:`.ExperimentData`. Intended to be used from a Jupyter notebook. @@ -438,16 +440,17 @@ def plot_top_n_configs( ---------- exp_data: ExperimentData The experiment data to plot. - results_df : Optional["pandas.DataFrame"] + results_df : Optional[pandas.DataFrame] Optional results_df to plot. - If not provided, defaults to exp_data.results_df property. + If not provided, defaults to :py:attr:`.ExperimentData.results_df` property. objectives : Optional[Dict[str, Literal["min", "max"]]] Optional objectives to plot. - If not provided, defaults to exp_data.objectives property. + If not provided, defaults to :py:attr:`.ExperimentData.objectives` property. with_scatter_plot : bool Whether to also add scatter plot to the output figure. kwargs : dict - Remaining keyword arguments are passed along to the limit_top_n_configs function. + Remaining keyword arguments are passed along to the + :py:func:`limit_top_n_configs` function. """ (results_df, _obj_cols) = expand_results_data_args(exp_data, results_df, objectives) top_n_config_args = _get_kwarg_defaults(limit_top_n_configs, **kwargs) diff --git a/mlos_viz/mlos_viz/dabl.py b/mlos_viz/mlos_viz/dabl.py index 3f8ac640ad9..918390fdbca 100644 --- a/mlos_viz/mlos_viz/dabl.py +++ b/mlos_viz/mlos_viz/dabl.py @@ -2,7 +2,14 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. # -"""Small wrapper functions for dabl plotting functions via mlos_bench data.""" +""" +Small wrapper functions for plotting :py:mod:`mlos_bench` data via +:external:py:func:`dabl.plot`. + +Notes +----- +See `dabl `_ for more information on the dabl library. +""" import warnings from typing import Dict, Literal, Optional @@ -20,13 +27,14 @@ def plot( objectives: Optional[Dict[str, Literal["min", "max"]]] = None, ) -> None: """ - Plots the Experiment results data using dabl. + Plots the :py:class:`~mlos_bench.storage.base_storage.Storage.Experiment` results + data using :external:py:func:`dabl.plot`. Parameters ---------- exp_data : ExperimentData The ExperimentData (e.g., obtained from the storage layer) to plot. - results_df : Optional["pandas.DataFrame"] + results_df : Optional[pandas.DataFrame] Optional results_df to plot. If not provided, defaults to exp_data.results_df property. objectives : Optional[Dict[str, Literal["min", "max"]]] diff --git a/mlos_viz/mlos_viz/util.py b/mlos_viz/mlos_viz/util.py index cefc3080d9c..2e537021125 100644 --- a/mlos_viz/mlos_viz/util.py +++ b/mlos_viz/mlos_viz/util.py @@ -22,14 +22,14 @@ def expand_results_data_args( Parameters ---------- - exp_data : Optional[ExperimentData], optional + exp_data : Optional[ExperimentData] ExperimentData to operate on. - results_df : Optional[pandas.DataFrame], optional + results_df : Optional[pandas.DataFrame] Optional results_df argument. - Defaults to exp_data.results_df property. - objectives : Optional[Dict[str, Literal["min", "max"]]], optional + If not provided, defaults to :py:attr:`.ExperimentData.results_df` property. + objectives : Optional[Dict[str, Literal["min", "max"]]] Optional objectives set to operate on. - Defaults to exp_data.objectives property. + If not provided, defaults to :py:attr:`.ExperimentData.objectives` property. Returns ------- diff --git a/pyproject.toml b/pyproject.toml index 5464aa60b65..b5014df8a87 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -68,3 +68,18 @@ disable = [ [tool.pylint.string] check-quote-consistency = true check-str-concat-over-line-jumps = true + +# Tell the vscode python extension to ignore some autogenerated files. +[tool.pyright] +exclude = [ + ".git", + ".mypy_cache", + ".pytest_cache", + "**/node_modules", + "**/__pycache__", + "**/*.egg-info", + "doc/source/autoapi", + "doc/build/html", + "doc/build/doctrees", + "htmlcov", +] diff --git a/setup.cfg b/setup.cfg index cd3bdbb9916..f8b0c945f2d 100644 --- a/setup.cfg +++ b/setup.cfg @@ -39,14 +39,15 @@ addopts = -l --ff --nf -n auto + --doctest-modules # --dist loadgroup # --log-level=DEBUG # Moved these to Makefile (coverage is expensive and we only need it in the pipelines generally). #--cov=mlos_core --cov-report=xml -testpaths = mlos_core mlos_bench +testpaths = mlos_core mlos_bench mlos_viz # Ignore some upstream deprecation warnings. filterwarnings = - ignore:.*(get_hyperparam|get_dictionary).*:DeprecationWarning:smac:0 + ignore:.*(get_hyperparam|get_dictionary|get_parents_of|(list\(.*values\(\)\))).*:DeprecationWarning:smac:0 ignore:.*(Please leave at default or explicitly set .size=None).*:DeprecationWarning:smac:0 ignore:.*(Trying to register a configuration that was not previously suggested).*:UserWarning:.*llamatune.*:0 ignore:.*(DISPLAY environment variable is set).*:UserWarning:.*conftest.*:0