From f47bf3451cdf337a346e555ef38e9962d5beb811 Mon Sep 17 00:00:00 2001
From: Mikhail Sveshnikov <mike0sv@gmail.com>
Date: Thu, 6 Oct 2022 15:58:58 +0300
Subject: [PATCH] Add docstrings to extensions (#413)

* Fix docstrings

* comments from mlem.ai PR

* ignore mocks

* fix tests and update metavars

* dot

* feedback

* no dots

* add exts tests

* Add docstrings to extensions

Co-authored-by: Alexander Guschin <1aguschin@gmail.com>
---
 mlem/cli/main.py                    |  3 +++
 mlem/cli/serve.py                   |  3 +--
 mlem/contrib/bitbucketfs.py         |  5 +++++
 mlem/contrib/callable.py            |  5 +++++
 mlem/contrib/catboost.py            |  5 +++++
 mlem/contrib/docker/__init__.py     |  4 +++-
 mlem/contrib/dvc.py                 |  5 +++++
 mlem/contrib/fastapi.py             |  5 +++++
 mlem/contrib/github.py              |  5 +++++
 mlem/contrib/gitlabfs.py            |  5 +++++
 mlem/contrib/heroku/__init__.py     |  5 +++++
 mlem/contrib/kubernetes/__init__.py |  3 +++
 mlem/contrib/lightgbm.py            |  6 +++++
 mlem/contrib/numpy.py               |  5 +++++
 mlem/contrib/onnx.py                |  5 +++++
 mlem/contrib/pandas.py              |  6 +++++
 mlem/contrib/pip/__init__.py        |  6 +++++
 mlem/contrib/rabbitmq.py            |  5 +++++
 mlem/contrib/sagemaker/__init__.py  |  5 +++++
 mlem/contrib/sklearn.py             |  5 +++++
 mlem/contrib/tensorflow.py          |  6 +++++
 mlem/contrib/torch.py               |  7 ++++++
 mlem/contrib/xgboost.py             |  6 +++++
 mlem/ext.py                         | 20 +++++++++++++++++
 tests/test_ext.py                   | 35 ++++++++++++++++++++++++++++-
 25 files changed, 166 insertions(+), 4 deletions(-)

diff --git a/mlem/cli/main.py b/mlem/cli/main.py
index 5d54cac0..55aaedf5 100644
--- a/mlem/cli/main.py
+++ b/mlem/cli/main.py
@@ -47,6 +47,9 @@
 PATH_METAVAR = "path"
 COMMITISH_METAVAR = "commitish"
 
+PATH_METAVAR = "path"
+COMMITISH_METAVAR = "commitish"
+
 
 class MlemFormatter(HelpFormatter):
     def write_heading(self, heading: str) -> None:
diff --git a/mlem/cli/serve.py b/mlem/cli/serve.py
index 8b1f8979..10fb010a 100644
--- a/mlem/cli/serve.py
+++ b/mlem/cli/serve.py
@@ -26,8 +26,7 @@
 
 serve = Typer(
     name="serve",
-    help="""Create an API from model methods using a server implementation.
-    """,
+    help="""Create an API from model methods using a server implementation.""",
     cls=mlem_group("runtime"),
     subcommand_metavar="server",
 )
diff --git a/mlem/contrib/bitbucketfs.py b/mlem/contrib/bitbucketfs.py
index b25da96a..e8209ce7 100644
--- a/mlem/contrib/bitbucketfs.py
+++ b/mlem/contrib/bitbucketfs.py
@@ -1,3 +1,8 @@
+"""BitBucket URI support
+Extension type: uri
+
+Implementation of `BitbucketFileSystem` and `BitbucketResolver`
+"""
 import posixpath
 from typing import ClassVar, List, Optional
 from urllib.parse import quote_plus, urljoin, urlparse, urlsplit
diff --git a/mlem/contrib/callable.py b/mlem/contrib/callable.py
index fb08bb54..44513234 100644
--- a/mlem/contrib/callable.py
+++ b/mlem/contrib/callable.py
@@ -1,3 +1,8 @@
+"""MLEM Models from arbitraty callables
+Extension type: model
+
+ModelType implementation to turn any python callable into MLEM Model
+"""
 import posixpath
 from collections import defaultdict
 from importlib import import_module
diff --git a/mlem/contrib/catboost.py b/mlem/contrib/catboost.py
index a3fe35c1..ff069b2c 100644
--- a/mlem/contrib/catboost.py
+++ b/mlem/contrib/catboost.py
@@ -1,3 +1,8 @@
+"""Catboost Models Support
+Extension type: model
+
+Implementations of ModelType and ModelIO for `CatBoostClassifier` and `CatBoostRegressor`
+"""
 import os
 import posixpath
 import tempfile
diff --git a/mlem/contrib/docker/__init__.py b/mlem/contrib/docker/__init__.py
index d4daad67..b8e65dad 100644
--- a/mlem/contrib/docker/__init__.py
+++ b/mlem/contrib/docker/__init__.py
@@ -1,4 +1,6 @@
-"""
+"""Docker builds support
+Extension type: deployment
+
 Building docker images from the model
 or packing all necessary things to do that in a folder
 """
diff --git a/mlem/contrib/dvc.py b/mlem/contrib/dvc.py
index ee44e6b7..e19192be 100644
--- a/mlem/contrib/dvc.py
+++ b/mlem/contrib/dvc.py
@@ -1,3 +1,8 @@
+"""DVC Support
+Extension type: storage
+
+Support for storing artifacts with DVC
+"""
 import contextlib
 import os.path
 import posixpath
diff --git a/mlem/contrib/fastapi.py b/mlem/contrib/fastapi.py
index 905096eb..807e671f 100644
--- a/mlem/contrib/fastapi.py
+++ b/mlem/contrib/fastapi.py
@@ -1,3 +1,8 @@
+"""FastAPI serving
+Extension type: serving
+
+FastAPIServer implementation
+"""
 import logging
 from collections.abc import Callable
 from types import ModuleType
diff --git a/mlem/contrib/github.py b/mlem/contrib/github.py
index f71800db..097a5b16 100644
--- a/mlem/contrib/github.py
+++ b/mlem/contrib/github.py
@@ -1,3 +1,8 @@
+"""Github URI support
+Extension type: uri
+
+Implementation of `GithubResolver`
+"""
 import pathlib
 import posixpath
 import re
diff --git a/mlem/contrib/gitlabfs.py b/mlem/contrib/gitlabfs.py
index 4dd2b1a9..a80e9255 100644
--- a/mlem/contrib/gitlabfs.py
+++ b/mlem/contrib/gitlabfs.py
@@ -1,3 +1,8 @@
+"""Gitlab URI support
+Extension type: uri
+
+Implementation of `GitlabFileSystem` and `GitlabResolver`
+"""
 import posixpath
 from typing import ClassVar, Optional
 from urllib.parse import quote_plus, urlparse, urlsplit
diff --git a/mlem/contrib/heroku/__init__.py b/mlem/contrib/heroku/__init__.py
index e69de29b..d8f00fee 100644
--- a/mlem/contrib/heroku/__init__.py
+++ b/mlem/contrib/heroku/__init__.py
@@ -0,0 +1,5 @@
+"""Heroku Deployments support
+Extension type: deployment
+
+Implements MlemEnv, MlemDeployment and DeployState to work with heroku.com
+"""
diff --git a/mlem/contrib/kubernetes/__init__.py b/mlem/contrib/kubernetes/__init__.py
index e69de29b..b6b1b1ae 100644
--- a/mlem/contrib/kubernetes/__init__.py
+++ b/mlem/contrib/kubernetes/__init__.py
@@ -0,0 +1,3 @@
+"""Kubernetes Deployments support
+Extension type: deployment
+"""
diff --git a/mlem/contrib/lightgbm.py b/mlem/contrib/lightgbm.py
index eac70965..4914d9db 100644
--- a/mlem/contrib/lightgbm.py
+++ b/mlem/contrib/lightgbm.py
@@ -1,3 +1,9 @@
+"""LightGBM models support
+Extension type: model
+
+ModelType and ModelIO implementations for `lightgbm.Booster` as well as
+LightGBMDataType with Reader and Writer for `lightgbm.Dataset`
+"""
 import os
 import posixpath
 import tempfile
diff --git a/mlem/contrib/numpy.py b/mlem/contrib/numpy.py
index 5ea7ddf5..a3206008 100644
--- a/mlem/contrib/numpy.py
+++ b/mlem/contrib/numpy.py
@@ -1,3 +1,8 @@
+"""Numpy data types support
+Extension type: data
+
+DataType, Reader and Writer implementations for `np.ndarray` and `np.number` primitives
+"""
 from types import ModuleType
 from typing import Any, ClassVar, Iterator, List, Optional, Tuple, Type, Union
 
diff --git a/mlem/contrib/onnx.py b/mlem/contrib/onnx.py
index df3a8526..27156070 100644
--- a/mlem/contrib/onnx.py
+++ b/mlem/contrib/onnx.py
@@ -1,3 +1,8 @@
+"""ONNX models support
+Extension type: model
+
+ModelType and ModelIO implementations for `onnx.ModelProto`
+"""
 from typing import Any, ClassVar, List, Optional, Union
 
 import numpy as np
diff --git a/mlem/contrib/pandas.py b/mlem/contrib/pandas.py
index 9bd4c164..85ce4481 100644
--- a/mlem/contrib/pandas.py
+++ b/mlem/contrib/pandas.py
@@ -1,3 +1,9 @@
+"""Pandas data types support
+Extension type: data
+
+DataType, Reader and Writer implementations for `pd.DataFrame` and `pd.Series`
+ImportHook implementation for files saved with pandas
+"""
 import os.path
 import posixpath
 import re
diff --git a/mlem/contrib/pip/__init__.py b/mlem/contrib/pip/__init__.py
index e69de29b..b5d60c10 100644
--- a/mlem/contrib/pip/__init__.py
+++ b/mlem/contrib/pip/__init__.py
@@ -0,0 +1,6 @@
+"""Python Package builds support
+Extension type: build
+
+Contains two Builder implementations: `pip` to create a directory with
+Python Package from model and `whl` to create a wheel file with Python Package
+"""
diff --git a/mlem/contrib/rabbitmq.py b/mlem/contrib/rabbitmq.py
index 90a6a6ff..2a90cda3 100644
--- a/mlem/contrib/rabbitmq.py
+++ b/mlem/contrib/rabbitmq.py
@@ -1,3 +1,8 @@
+"""RabbitMQ serving
+Extension type: serving
+
+RabbitMQServer implementation
+"""
 import json
 from time import time
 from typing import Callable, ClassVar, Optional
diff --git a/mlem/contrib/sagemaker/__init__.py b/mlem/contrib/sagemaker/__init__.py
index e69de29b..cdd63793 100644
--- a/mlem/contrib/sagemaker/__init__.py
+++ b/mlem/contrib/sagemaker/__init__.py
@@ -0,0 +1,5 @@
+"""Sagemaker Deployments support
+Extension type: deployment
+
+Implements MlemEnv, MlemDeployment and DeployState to work with AWS SageMaker
+"""
diff --git a/mlem/contrib/sklearn.py b/mlem/contrib/sklearn.py
index b81d8000..66eae66b 100644
--- a/mlem/contrib/sklearn.py
+++ b/mlem/contrib/sklearn.py
@@ -1,3 +1,8 @@
+"""Scikit-Learn models support
+Extension type: model
+
+ModelType implementations for any sklearn-compatible classes as well as `Pipeline`
+"""
 from typing import Any, ClassVar, List, Optional, Union
 
 import sklearn
diff --git a/mlem/contrib/tensorflow.py b/mlem/contrib/tensorflow.py
index 83c9455d..813e1398 100644
--- a/mlem/contrib/tensorflow.py
+++ b/mlem/contrib/tensorflow.py
@@ -1,3 +1,9 @@
+"""Tensorflow models support
+Extension type: model
+
+ModelType and ModelIO implementations for `tf.keras.Model`
+DataType, Reader and Writer implementations for `tf.Tensor`
+"""
 import posixpath
 import tempfile
 from typing import Any, ClassVar, Iterator, List, Optional, Tuple
diff --git a/mlem/contrib/torch.py b/mlem/contrib/torch.py
index 76d9837b..9f379684 100644
--- a/mlem/contrib/torch.py
+++ b/mlem/contrib/torch.py
@@ -1,3 +1,10 @@
+"""Torch models support
+Extension type: model
+
+ModelType and ModelIO implementations for `torch.nn.Module`
+ImportHook for importing files saved with `torch.save`
+DataType, Reader and Writer implementations for `torch.Tensor`
+"""
 from typing import Any, ClassVar, Iterator, List, Optional, Tuple
 
 import torch
diff --git a/mlem/contrib/xgboost.py b/mlem/contrib/xgboost.py
index 56b85aed..571d9622 100644
--- a/mlem/contrib/xgboost.py
+++ b/mlem/contrib/xgboost.py
@@ -1,3 +1,9 @@
+"""XGBoost models support
+Extension type: model
+
+ModelType and ModelIO implementations for `xgboost.Booster` as well as
+DataType, Reader and Writer implementations for `xgboost.DMatrix`
+"""
 import os
 import posixpath
 import tempfile
diff --git a/mlem/ext.py b/mlem/ext.py
index 31150828..58660dcb 100644
--- a/mlem/ext.py
+++ b/mlem/ext.py
@@ -4,6 +4,7 @@
 """
 import importlib
 import logging
+import re
 import sys
 from types import ModuleType
 from typing import Callable, Dict, List, Optional, Union
@@ -109,6 +110,12 @@ class ExtensionLoader:
         Extension("mlem.contrib.gitlabfs", [], True),
         Extension("mlem.contrib.bitbucketfs", [], True),
         Extension("mlem.contrib.sagemaker", ["sagemaker", "boto3"], False),
+        Extension("mlem.contrib.dvc", ["dvc"], False),
+        Extension(
+            "mlem.contrib.heroku", ["fastapi", "uvicorn", "docker"], False
+        ),
+        Extension("mlem.contrib.pip", [], False),
+        Extension("mlem.contrib.kubernetes", ["kubernetes", "docker"], False),
     )
 
     _loaded_extensions: Dict[Extension, ModuleType] = {}
@@ -258,6 +265,19 @@ def load_extensions(*exts: str):
         ExtensionLoader.load(ext)
 
 
+def get_ext_type(ext: Union[str, Extension]):
+    if isinstance(ext, Extension):
+        ext_module = ext.module
+    else:
+        ext_module = ext
+
+    doc = import_module(ext_module).__doc__ or ""
+    search = re.search(r"Extension type: (\w*)", doc)
+    if search is None:
+        raise ValueError(f"{ext_module} extension doesnt define it's type")
+    return search.group(1)
+
+
 # Copyright 2019 Zyfra
 # Copyright 2021 Iterative
 #
diff --git a/tests/test_ext.py b/tests/test_ext.py
index 3de02447..7dddff31 100644
--- a/tests/test_ext.py
+++ b/tests/test_ext.py
@@ -1,8 +1,12 @@
+import os
 import re
+from importlib import import_module
 from pathlib import Path
 
-from mlem import ExtensionLoader
+import pytest
+
 from mlem.config import MlemConfig, MlemConfigBase
+from mlem.ext import ExtensionLoader, get_ext_type
 from mlem.utils.entrypoints import (
     MLEM_CONFIG_ENTRY_POINT,
     MLEM_ENTRY_POINT,
@@ -85,3 +89,32 @@ def test_all_ext_has_pip_extra():
         assert name in extras
         ext_extras = extras[name]
         assert set(reqs) == {re.split("[~=]", r)[0] for r in ext_extras}
+
+
+def test_all_ext_registered():
+    from mlem import contrib
+
+    files = os.listdir(os.path.dirname(contrib.__file__))
+    ext_sources = {
+        name[: -len(".py")] if name.endswith(".py") else name
+        for name in files
+        if not name.startswith("__")
+    }
+    assert set(ExtensionLoader.builtin_extensions) == {
+        f"mlem.contrib.{name}" for name in ext_sources
+    }
+
+
+@pytest.mark.parametrize("mod", ExtensionLoader.builtin_extensions.keys())
+def test_all_ext_docstring(mod):
+    module = import_module(mod)
+    assert module.__doc__ is not None
+    assert get_ext_type(mod) in {
+        "model",
+        "deployment",
+        "data",
+        "serving",
+        "build",
+        "uri",
+        "storage",
+    }