diff --git a/content/docs/extensions/build/index.md b/content/docs/extensions/build/index.md new file mode 100644 index 00000000..327051ca --- /dev/null +++ b/content/docs/extensions/build/index.md @@ -0,0 +1,7 @@ +# Build extensions + +Build extensions add new types of builders to use with `build` +[API](/doc/api-reference/build) and [CLI](/doc/command-reference/build) commands + +Typicaly they will implement [Builder](/doc/user-guide/mlem-abcs#builder) +interface diff --git a/content/docs/extensions/build/pip.md b/content/docs/extensions/build/pip.md new file mode 100644 index 00000000..2166895a --- /dev/null +++ b/content/docs/extensions/build/pip.md @@ -0,0 +1,102 @@ +# Python Package Builds Support + +Contains two Builder implementations: `pip` to create a directory with Python +Package from model and `whl` to create a wheel file with Python Package + +## Description + +**TODO** + +## Examples + +### Creating Python package from model using API + +```python +from mlem.api import build + +build(builder="pip", + model="https://github.com/iterative/example-mlem-get-started/rf", + package_name="my_model_package", + target="./build" +) + +# ! pip install ./build +import my_model_package + +data = ... +my_model_package.predict(data) +``` + +### Creating Python wheel package from model using CLI + +```cli +$ mlem build whl -m https://github.com/iterative/example-mlem-get-started/rf \ + --package_name my_model_package --target ./build --version 1.0.0 +$ pip install ./build/my_model_package-1.0.0-py3-none-any.whl +``` + +### Creating wheel builder declaration and using it with CLI + +```cli +$ mlem declare builder whl whl_conf --package_name my_model_package \ + --target ./build --author mike0sv --email mike0sv@gmail.com --version 1.0.0 +$ mlem build --load whl_conf \ + --model https://github.com/iterative/example-mlem-get-started/rf +$ pip install ./build/my_model_package-1.0.0-py3-none-any.whl +``` + +## Implementation reference + +### `class PipBuilder` + +**MlemABC parent type**: `builder` + +**MlemABC type**: `pip` + + Create a directory python package + +**Fields**: + +- `package_name: str` _(required)_ - Name of python package + +- `target: str` _(required)_ - Path to save result + +- `python_version: str` - Required python version + +- `short_description: str = ""` - short_description + +- `url: str = ""` - url + +- `email: str = ""` - author's email + +- `author: str = ""` - author's name + +- `version: str = "0.0.0"` - package version + +--- + +### `class WhlBuilder` + +**MlemABC parent type**: `builder` + +**MlemABC type**: `whl` + + Create a wheel with python package + +**Fields**: + +- `package_name: str` _(required)_ - Name of python package + +- `target: str` _(required)_ - Path to save result + +- `python_version: str` - Required python version + +- `short_description: str = ""` - short_description + +- `url: str = ""` - url + +- `email: str = ""` - author's email + +- `author: str = ""` - author's name + +- `version: str = "0.0.0"` - package version diff --git a/content/docs/extensions/data/index.md b/content/docs/extensions/data/index.md new file mode 100644 index 00000000..23a1d679 --- /dev/null +++ b/content/docs/extensions/data/index.md @@ -0,0 +1,11 @@ +# Data extensions + +Data extensions add support for new types of data object that MLEM can covert +into MLEM data objects in [`save` API method](/doc/api-reference/save) + +Typicaly they will implement [DataType](/doc/user-guide/mlem-abcs#datatype), +[DataReader](/doc/user-guide/mlem-abcs#datareader) and +[DataWriter](/doc/user-guide/mlem-abcs#datawriter) interfaces. + +Some also implement [ImportHook](/doc/user-guide/mlem-abcs#importhook) to +support [importing](/doc/user-guide/importing) files of some format. diff --git a/content/docs/extensions/data/numpy.md b/content/docs/extensions/data/numpy.md new file mode 100644 index 00000000..951e7e3c --- /dev/null +++ b/content/docs/extensions/data/numpy.md @@ -0,0 +1,113 @@ +# Numpy Data Types Support + +DataType, Reader and Writer implementations for `np.ndarray` and `np.number` +primitives + +## Description + +**TODO** + +## Requirements + +```bash +pip install mlem[numpy] +# or +pip install numpy +``` + +## Examples + +### Saving and loading numpy array + +```python +import numpy as np + +from mlem.api import save, load + + +data = np.zeros((100,)) + +save(data, "array") + +data = load("array") +``` + +## Implementation reference + +### `class NumpyArrayReader` + +**MlemABC parent type**: `data_reader` + +**MlemABC type**: `numpy` + + DataReader implementation for numpy ndarray + +**Fields**: + +- `data_type: DataType` _(required)_ - Resulting data type + +--- + +### `class NumpyNumberReader` + +**MlemABC parent type**: `data_reader` + +**MlemABC type**: `numpy_number` + + Read np.number objects + +**Fields**: + +- `data_type: NumpyNumberType` _(required)_ - Resulting data type + +--- + +### `class NumpyNdarrayType` + +**MlemABC parent type**: `data_type` + +**MlemABC type**: `ndarray` + + DataType implementation for `np.ndarray` + +**Fields**: + +- `dtype: str` _(required)_ - Data type of elements + +--- + +### `class NumpyNumberType` + +**MlemABC parent type**: `data_type` + +**MlemABC type**: `number` + + numpy.number DataType + +**Fields**: + +- `dtype: str` _(required)_ - `numpy.number` type name as string + +--- + +### `class NumpyArrayWriter` + +**MlemABC parent type**: `data_writer` + +**MlemABC type**: `numpy` + + DataWriter implementation for numpy ndarray + +**No fields** + +--- + +### `class NumpyNumberWriter` + +**MlemABC parent type**: `data_writer` + +**MlemABC type**: `numpy_number` + + Write np.number objects + +**No fields** diff --git a/content/docs/extensions/data/pandas.md b/content/docs/extensions/data/pandas.md new file mode 100644 index 00000000..0d98162a --- /dev/null +++ b/content/docs/extensions/data/pandas.md @@ -0,0 +1,119 @@ +# Pandas Data Types Support + +DataType, Reader and Writer implementations for `pd.DataFrame` and `pd.Series` +ImportHook implementation for files saved with pandas + +## Description + +**TODO** + +## Requirements + +```bash +pip install mlem[pandas] +# or +pip install pandas +``` + +## Examples + +```python + +``` + +## Implementation reference + +### `class PandasReader` + +**MlemABC parent type**: `data_reader` + +**MlemABC type**: `pandas` + + DataReader for pandas dataframes + +**Fields**: + +- `data_type: DataFrameType` _(required)_ - Resulting data type + +- `format: str` _(required)_ - name of pandas-supported format + +--- + +### `class PandasSeriesReader` + +**MlemABC parent type**: `data_reader` + +**MlemABC type**: `pandas_series` + + DataReader for pandas series + +**Fields**: + +- `data_type: SeriesType` _(required)_ - Resulting data type + +- `format: str` _(required)_ - name of pandas-supported format + +--- + +### `class DataFrameType` + +**MlemABC parent type**: `data_type` + +**MlemABC type**: `dataframe` + + :class:`.DataType` implementation for `pandas.DataFrame` + +**No fields** + +--- + +### `class SeriesType` + +**MlemABC parent type**: `data_type` + +**MlemABC type**: `series` + + :class:`.DataType` implementation for `pandas.Series` objects which + stores them as built-in Python dicts + +**No fields** + +--- + +### `class PandasWriter` + +**MlemABC parent type**: `data_writer` + +**MlemABC type**: `pandas` + + DataWriter for pandas dataframes + +**Fields**: + +- `format: str` _(required)_ - name of pandas-supported format + +--- + +### `class PandasSeriesWriter` + +**MlemABC parent type**: `data_writer` + +**MlemABC type**: `pandas_series` + + DataWriter for pandas series + +**Fields**: + +- `format: str` _(required)_ - name of pandas-supported format + +--- + +### `class PandasImport` + +**MlemABC parent type**: `import` + +**MlemABC type**: `pandas` + + Import files as pd.DataFrame + +**No fields** diff --git a/content/docs/extensions/deployment/docker.md b/content/docs/extensions/deployment/docker.md new file mode 100644 index 00000000..15cd509b --- /dev/null +++ b/content/docs/extensions/deployment/docker.md @@ -0,0 +1,192 @@ +# Docker Builds Support + +Building docker images from the model or packing all necessary things to do that +in a folder + +## Description + +**TODO** + +## Requirements + +```bash +pip install mlem[docker] +# or +pip install docker +``` + +## Examples + +```python + +``` + +## Implementation reference + +### `class DockerImageBuilder` + +**MlemABC parent type**: `builder` + +**MlemABC type**: `docker` + + Build docker image from model + +**Fields**: + +- `image: DockerImage` _(required)_ - Image parameters + +- `server: Server` - Server to use + +- `args: DockerBuildArgs = DockerBuildArgs()` - Additional docker arguments + +- `env: DockerEnv = DockerEnv()` - Where to build and push image. Defaults to + local docker daemon + +- `force_overwrite: bool = False` - Ignore existing image with same name + +- `push: bool = True` - Push image to registry after it is built + +--- + +### `class DockerDirBuilder` + +**MlemABC parent type**: `builder` + +**MlemABC type**: `docker_dir` + + Create a directory with docker context to build docker image + +**Fields**: + +- `target: str` _(required)_ - Path to save result + +- `server: Server` - Server to use + +- `args: DockerBuildArgs = DockerBuildArgs()` - Additional docker arguments + +--- + +### `class DockerContainerState` + +**MlemABC parent type**: `deploy_state` + +**MlemABC type**: `docker_container` + + State of docker container deployment + +**Fields**: + +- `model_hash: str` - hash of deployed model meta + +- `image: DockerImage` - Built image + +- `container_name: str` - Name of container + +- `container_id: str` - Started container id + +--- + +### `class DockerContainer` + +**MlemABC parent type**: `deployment` + +**MlemABC type**: `docker_container` + + MlemDeployment implementation for docker containers + +**Fields**: + +- `server: Server` - Server to use + +- `args: DockerBuildArgs = DockerBuildArgs()` - Additional docker arguments + +- `state_manager: StateManager` - State manager used + +- `container_name: str` - Name to use for container + +- `image_name: str` - Name to use for image + +- `rm: bool = True` - Remove container on stop + +--- + +### `class DockerIORegistry` + +**MlemABC parent type**: `docker_registry` + +**MlemABC type**: `docker_io` + + The class represents docker.io registry. + +**No fields** + +--- + +### `class DockerRegistry` + +**MlemABC parent type**: `docker_registry` + +**MlemABC type**: `local` + + Registry for docker images. This is the default implementation that + represents registry of the docker daemon + +**No fields** + +--- + +### `class RemoteRegistry` + +**MlemABC parent type**: `docker_registry` + +**MlemABC type**: `remote` + + DockerRegistry implementation for official Docker Registry (as in + https://docs.docker.com/registry/) + +**Fields**: + +- `host: str` - Address of the registry + +--- + +### `class DockerEnv` + +**MlemABC parent type**: `env` + +**MlemABC type**: `docker` + + MlemEnv implementation for docker environment + +**Fields**: + +- `registry: DockerRegistry = DockerRegistry()` - Default registry to push + images to + +- `daemon: DockerDaemon = host=''` - Docker daemon parameters + +--- + +### `class DockerBuildArgs` + + Container for DockerBuild arguments + +**Fields**: + +- `python_version: str = "3.9.13"` - Python version to use default: version of + running interpreter + +- `run_cmd: str = "sh run.sh"` - command to run in container + +- `package_install_cmd: str = "apt-get update && apt-get -y upgrade && apt-get install --no-install-recommends -y"` - + command to install packages. Default is apt-get, change it for other package + manager + +- `package_clean_cmd: str = "&& apt-get clean && rm -rf /var/lib/apt/lists/*"` - + command to clean after package installation + +- `mlem_whl: str` - a path to mlem .whl file. If it is empty, mlem will be + installed from pip + +- `platform: str` - platform to build docker for, see + docs.docker.com/desktop/multi-arch/ diff --git a/content/docs/extensions/deployment/heroku.md b/content/docs/extensions/deployment/heroku.md new file mode 100644 index 00000000..340be690 --- /dev/null +++ b/content/docs/extensions/deployment/heroku.md @@ -0,0 +1,130 @@ +# Heroku Deployments Support + +Implements MlemEnv, MlemDeployment and DeployState to work with heroku.com + +## Description + +To create applications on Heroku platform all you need is Heroku API key. + +
+ +### ⚙️How to obtain Heroku API key + +- Go to [heroku.com](http://heroku.com) +- Sign up or login with existing account +- Go to account settings by clicking your profile picture on the main page +- Find API Key section and reveal existing one or re-generate it + +
+ +You can either set `HEROKU_API_KEY` environment variable or use [Heroku CLI]() +to run `heroku login`. + +> You can also set API token via `--api_key` option to some commands, but this +> may have security issues + +## Requirements + +```bash +pip install mlem[heroku] +# or +pip install fastapi uvicorn docker +``` + +## Examples + +### Deploying model to heroku from CLI + +```cli +$ mlem deployment run heroku_app \ + --model https://github.com/iterative/example-mlem-get-started/rf \ + --target heroku \ + --app_name example-mlem-get-started-app +``` + +## Implementation reference + +### `class HerokuState` + +**MlemABC parent type**: `deploy_state` + +**MlemABC type**: `heroku` + + State of heroku deployment + +**Fields**: + +- `model_hash: str` - hash of deployed model meta + +- `app: HerokuAppMeta` - Created heroku app + +- `image: DockerImage` - Built docker image + +--- + +### `class HerokuDeployment` + +**MlemABC parent type**: `deployment` + +**MlemABC type**: `heroku` + + Heroku App + +**Fields**: + +- `app_name: str` _(required)_ - Heroku application name + +- `state_manager: StateManager` - State manager used + +- `region: str = "us"` - Heroku region + +- `stack: str = "container"` - Stack to use + +- `team: str` - Heroku team + +--- + +### `class HerokuRemoteRegistry` + +**MlemABC parent type**: `docker_registry` + +**MlemABC type**: `heroku` + + Heroku docker registry + +**Fields**: + +- `host: str = "registry.heroku.com"` - Registry host + +- `api_key: str` - HEROKU_API_KEY + +--- + +### `class HerokuEnv` + +**MlemABC parent type**: `env` + +**MlemABC type**: `heroku` + + Heroku Account + +**Fields**: + +- `api_key: str` - HEROKU_API_KEY - advised to set via env variable or + `heroku login` + +--- + +### `class HerokuServer` + +**MlemABC parent type**: `server` + +**MlemABC type**: `_heroku` + + Special FastAPI server to pickup port from env PORT + +**Fields**: + +- `host: str = "0.0.0.0"` - Network interface to use + +- `port: int = 8080` - Port to use diff --git a/content/docs/extensions/deployment/index.md b/content/docs/extensions/deployment/index.md new file mode 100644 index 00000000..330dac75 --- /dev/null +++ b/content/docs/extensions/deployment/index.md @@ -0,0 +1,12 @@ +# Deployment extensions + +Deployment extensions add support for new target platforms to deploy your models +to. They are used with [`deploy` API method](/doc/api-reference/deploy) and +[deployment CLI commands](/doc/command-reference/deployment). + +Typicaly they will implement [MlemEnv](/doc/user-guide/mlem-abcs#mlemenv), +[MlemDeployment](/doc/user-guide/mlem-abcs#mlemdeployment) and +[DeployState](/doc/user-guide/mlem-abcs#deploystate) interfaces. + +Some also implement specific [Server](/doc/user-guide/mlem-abcs#server) or +[Builder](/doc/user-guide/mlem-abcs#builder) interfaces diff --git a/content/docs/extensions/deployment/kubernetes.md b/content/docs/extensions/deployment/kubernetes.md new file mode 100644 index 00000000..27bb2cd5 --- /dev/null +++ b/content/docs/extensions/deployment/kubernetes.md @@ -0,0 +1,156 @@ +# Kubernetes Deployments Support + +## Description + +**TODO** + +## Requirements + +```bash +pip install mlem[kubernetes] +# or +pip install kubernetes docker +``` + +## Examples + +```python + +``` + +## Implementation reference + +### `class K8sYamlBuilder` + +**MlemABC parent type**: `builder` + +**MlemABC type**: `kubernetes` + + MlemBuilder implementation for building Kubernetes manifests/yamls + +**Fields**: + +- `target: str` _(required)_ - Target path for the manifest/yaml + +- `namespace: str = "mlem"` - Namespace to create kubernetes resources such as + pods, service in + +- `image_name: str = "ml"` - Name of the docker image to be deployed + +- `image_uri: str = "ml:latest"` - URI of the docker image to be deployed + +- `image_pull_policy: ImagePullPolicy = "Always"` - Image pull policy for the + docker image to be deployed + +- `port: int = 8080` - Port where the service should be available + +- `service_type: ServiceType = NodePortService()` - Type of service by which + endpoints of the model are exposed + +--- + +### `class K8sDeploymentState` + +**MlemABC parent type**: `deploy_state` + +**MlemABC type**: `kubernetes` + + DeployState implementation for Kubernetes deployments + +**Fields**: + +- `model_hash: str` - hash of deployed model meta + +- `image: DockerImage` - Docker Image being used for Deployment + +- `deployment_name: str` - Name of Deployment + +--- + +### `class K8sDeployment` + +**MlemABC parent type**: `deployment` + +**MlemABC type**: `kubernetes` + + MlemDeployment implementation for Kubernetes deployments + +**Fields**: + +- `namespace: str = "mlem"` - Namespace to create kubernetes resources such as + pods, service in + +- `image_name: str = "ml"` - Name of the docker image to be deployed + +- `image_uri: str = "ml:latest"` - URI of the docker image to be deployed + +- `image_pull_policy: ImagePullPolicy = "Always"` - Image pull policy for the + docker image to be deployed + +- `port: int = 8080` - Port where the service should be available + +- `service_type: ServiceType = NodePortService()` - Type of service by which + endpoints of the model are exposed + +- `state_manager: StateManager` - State manager used + +- `server: Server` - Type of Server to use, with options such as FastAPI, + RabbitMQ etc. + +- `registry: DockerRegistry = DockerRegistry()` - Docker registry + +- `daemon: DockerDaemon = host=''` - Docker daemon + +- `kube_config_file_path: str` - Path for kube config file of the cluster + +--- + +### `class K8sEnv` + +**MlemABC parent type**: `env` + +**MlemABC type**: `kubernetes` + + MlemEnv implementation for Kubernetes Environments + +**Fields**: + +- `registry: DockerRegistry` - Docker registry + +--- + +### `class ClusterIPService` + +**MlemABC parent type**: `k8s_service_type` + +**MlemABC type**: `clusterip` + + ClusterIP Service implementation for service inside a Kubernetes + Cluster + +**No fields** + +--- + +### `class LoadBalancerService` + +**MlemABC parent type**: `k8s_service_type` + +**MlemABC type**: `loadbalancer` + + LoadBalancer Service implementation for service inside a Kubernetes + Cluster + +**No fields** + +--- + +### `class NodePortService` + +**MlemABC parent type**: `k8s_service_type` + +**MlemABC type**: `nodeport` + + NodePort Service implementation for service inside a Kubernetes Cluster + +**No fields** diff --git a/content/docs/extensions/deployment/sagemaker.md b/content/docs/extensions/deployment/sagemaker.md new file mode 100644 index 00000000..8cf5bf40 --- /dev/null +++ b/content/docs/extensions/deployment/sagemaker.md @@ -0,0 +1,162 @@ +# Sagemaker Deployments Support + +Implements MlemEnv, MlemDeployment and DeployState to work with AWS SageMaker + +## Description + +**TODO** + +## Requirements + +```bash +pip install mlem[sagemaker] +# or +pip install sagemaker boto3 +``` + +## Examples + +```python + +``` + +## Implementation reference + +### `class SagemakerClient` + +**MlemABC parent type**: `client` + +**MlemABC type**: `sagemaker` + + Client to make SageMaker requests + +**Fields**: + +- `endpoint_name: str` _(required)_ - Name of SageMaker Endpoint + +- `aws_vars: AWSVars` _(required)_ - AWS Configuration + +- `signature: Signature` _(required)_ - Signature of deployed method + +--- + +### `class SagemakerDeployState` + +**MlemABC parent type**: `deploy_state` + +**MlemABC type**: `sagemaker` + + State of SageMaker deployment + +**Fields**: + +- `model_hash: str` - hash of deployed model meta + +- `image: DockerImage` - Built image + +- `image_tag: str` - Built image tag + +- `model_location: str` - Location of uploaded model + +- `endpoint_name: str` - Name of SageMaker endpoint + +- `endpoint_model_hash: str` - Hash of deployed model + +- `method_signature: Signature` - Signature of deployed method + +- `region: str` - AWS Region + +--- + +### `class SagemakerDeployment` + +**MlemABC parent type**: `deployment` + +**MlemABC type**: `sagemaker` + + SageMaker Deployment + +**Fields**: + +- `state_manager: StateManager` - State manager used + +- `method: str = "predict"` - Model method to be deployed + +- `image_tag: str` - Name of the docker image to use + +- `use_prebuilt: bool = False` - Use pre-built docker image. If True, image_name + should be set + +- `model_arch_location: str` - Path on s3 to store model archive (excluding + bucket) + +- `model_name: str` - Name for SageMaker Model + +- `endpoint_name: str` - Name for SageMaker Endpoint + +- `initial_instance_count: int = 1` - Initial instance count for Endpoint + +- `instance_type: str = "ml.t2.medium"` - Instance type for Endpoint + +- `accelerator_type: str` - The size of the Elastic Inference (EI) instance to + use + +--- + +### `class ECRegistry` + +**MlemABC parent type**: `docker_registry` + +**MlemABC type**: `ecr` + + ECR registry + +**Fields**: + +- `account: str` _(required)_ - AWS Account + +- `region: str` _(required)_ - AWS Region + +- `host: str` - Address of the registry + +--- + +### `class SagemakerEnv` + +**MlemABC parent type**: `env` + +**MlemABC type**: `sagemaker` + + SageMaker environment + +**Fields**: + +- `role: str` - Default role + +- `account: str` - Default account + +- `region: str` - Default region + +- `bucket: str` - Default bucket + +- `profile: str` - Default profile + +- `ecr_repository: str` - Default ECR repository + +--- + +### `class SageMakerServer` + +**MlemABC parent type**: `server` + +**MlemABC type**: `_sagemaker` + + Server to use inside SageMaker containers + +**Fields**: + +- `host: str = "0.0.0.0"` - Host to use + +- `port: int = 8080` - Port to use + +- `method: str = "predict"` - Method to expose diff --git a/content/docs/extensions/index.md b/content/docs/extensions/index.md new file mode 100644 index 00000000..ad9a3009 --- /dev/null +++ b/content/docs/extensions/index.md @@ -0,0 +1 @@ +# MLEM Extensions diff --git a/content/docs/extensions/model/callable.md b/content/docs/extensions/model/callable.md new file mode 100644 index 00000000..91a33612 --- /dev/null +++ b/content/docs/extensions/model/callable.md @@ -0,0 +1,45 @@ +# Mlem Models From Arbitraty Callables + +[ModelType](/doc/user-guide/mlem-abcs#modeltype) implementation to turn any +python callable into MLEM Model + +## Description + +**TODO** + +## Examples + +```python + +``` + +## Implementation reference + +### `class PickleModelIO` + +**MlemABC parent type**: `model_io` + +**MlemABC type**: `pickle` + + ModelIO for pickle-able models + When model is dumped, recursively checks objects if they can be dumped + with ModelIO instead of pickling + So, if you use function that internally calls tensorflow model, this + tensorflow model will be dumped with + tensorflow code and not pickled + +**No fields** + +--- + +### `class CallableModelType` + +**MlemABC parent type**: `model_type` + +**MlemABC type**: `callable` + + ModelType implementation for arbitrary callables + +**Fields**: + +- `io: ModelIO` _(required)_ - Model IO diff --git a/content/docs/extensions/model/catboost.md b/content/docs/extensions/model/catboost.md new file mode 100644 index 00000000..f9ddd557 --- /dev/null +++ b/content/docs/extensions/model/catboost.md @@ -0,0 +1,53 @@ +# Catboost Models Support + +Implementations of [ModelType](/doc/user-guide/mlem-abcs#modeltype) and +[ModelIO](/doc/user-guide/mlem-abcs#modelio) for `CatBoostClassifier` and +`CatBoostRegressor` + +## Description + +**TODO** + +## Requirements + +```bash +pip install mlem[catboost] +# or +pip install catboost +``` + +## Examples + +```python + +``` + +## Implementation reference + +### `class CatBoostModelIO` + +**MlemABC parent type**: `model_io` + +**MlemABC type**: `catboost_io` + + :class:`mlem.core.model.ModelIO` for CatBoost models. + +**Fields**: + +- `model_type: CBType = "reg"` - Type of catboost model + +--- + +### `class CatBoostModel` + +**MlemABC parent type**: `model_type` + +**MlemABC type**: `catboost` + + :class:`mlem.core.model.ModelType` for CatBoost models. + `.model` attribute is a `catboost.CatBoostClassifier` or + `catboost.CatBoostRegressor` instance + +**Fields**: + +- `io: ModelIO = CatBoostModelIO()` - Model IO diff --git a/content/docs/extensions/model/index.md b/content/docs/extensions/model/index.md new file mode 100644 index 00000000..48fd1d41 --- /dev/null +++ b/content/docs/extensions/model/index.md @@ -0,0 +1,10 @@ +# Model extensions + +Model extensions add support for new types models that MLEM can covert into MLEM +model objects in [`save` API method](/doc/api-reference/save) + +Typicaly they will implement [ModelType](/doc/user-guide/mlem-abcs#modeltype) +and [ModelIO](/doc/user-guide/mlem-abcs#modelio) interfaces. + +Some also implement [DataType](/doc/user-guide/mlem-abcs#datatype) interface if +specific data objects are needed for model to work. diff --git a/content/docs/extensions/model/lightgbm.md b/content/docs/extensions/model/lightgbm.md new file mode 100644 index 00000000..64343bf0 --- /dev/null +++ b/content/docs/extensions/model/lightgbm.md @@ -0,0 +1,96 @@ +# Lightgbm Models Support + +[ModelType](/doc/user-guide/mlem-abcs#modeltype) and +[ModelIO](/doc/user-guide/mlem-abcs#modelio) implementations for +`lightgbm.Booster` as well as LightGBMDataType with Reader and Writer for +`lightgbm.Dataset` + +## Description + +**TODO** + +## Requirements + +```bash +pip install mlem[lightgbm] +# or +pip install lightgbm +``` + +## Examples + +```python + +``` + +## Implementation reference + +### `class LightGBMDataReader` + +**MlemABC parent type**: `data_reader` + +**MlemABC type**: `lightgbm` + + Wrapper reader for lightgbm.Dataset objects + +**Fields**: + +- `data_type: LightGBMDataType` _(required)_ - Resulting data type + +- `inner: DataReader` _(required)_ - Inner reader + +--- + +### `class LightGBMDataType` + +**MlemABC parent type**: `data_type` + +**MlemABC type**: `lightgbm` + + :class:`.DataType` implementation for `lightgbm.Dataset` type + + :param inner: :class:`.DataType` instance for underlying data + +**Fields**: + +- `inner: DataType` _(required)_ - Inner DataType + +--- + +### `class LightGBMDataWriter` + +**MlemABC parent type**: `data_writer` + +**MlemABC type**: `lightgbm` + + Wrapper writer for lightgbm.Dataset objects + +**No fields** + +--- + +### `class LightGBMModelIO` + +**MlemABC parent type**: `model_io` + +**MlemABC type**: `lightgbm_io` + + :class:`.ModelIO` implementation for `lightgbm.Booster` type + +**Fields**: + +- `model_file_name: str = "model.lgb"` - Filename to use + +--- + +### `class LightGBMModel` + +**MlemABC parent type**: `model_type` + +**MlemABC type**: `lightgbm` + + :class:`.ModelType` implementation for `lightgbm.Booster` type + +**Fields**: + +- `io: ModelIO = LightGBMModelIO()` - LightGBMModelIO diff --git a/content/docs/extensions/model/onnx.md b/content/docs/extensions/model/onnx.md new file mode 100644 index 00000000..77b97a89 --- /dev/null +++ b/content/docs/extensions/model/onnx.md @@ -0,0 +1,49 @@ +# Onnx Models Support + +[ModelType](/doc/user-guide/mlem-abcs#modeltype) and +[ModelIO](/doc/user-guide/mlem-abcs#modelio) implementations for +`onnx.ModelProto` + +## Description + +**TODO** + +## Requirements + +```bash +pip install mlem[onnx] +# or +pip install onnx +``` + +## Examples + +```python + +``` + +## Implementation reference + +### `class ModelProtoIO` + +**MlemABC parent type**: `model_io` + +**MlemABC type**: `model_proto` + + IO for ONNX model object + +**No fields** + +--- + +### `class ONNXModel` + +**MlemABC parent type**: `model_type` + +**MlemABC type**: `onnx` + + :class:`mlem.core.model.ModelType` implementation for `onnx` models + +**Fields**: + +- `io: ModelIO = ModelProtoIO()` - Model IO diff --git a/content/docs/extensions/model/sklearn.md b/content/docs/extensions/model/sklearn.md new file mode 100644 index 00000000..30dedd1a --- /dev/null +++ b/content/docs/extensions/model/sklearn.md @@ -0,0 +1,69 @@ +# Scikit-Learn Models Support + +[ModelType](/doc/user-guide/mlem-abcs#modeltype) implementations for any +sklearn-compatible classes as well as `Pipeline` + +## Description + +**TODO** + +## Requirements + +```bash +pip install mlem[sklearn] +# or +pip install scikit-learn +``` + +## Examples + +### Saving and loading Scikit-Learn model + +```python +from sklearn.datasets import load_iris +from sklearn.ensemble import RandomForestClassifier + +from mlem.api import save, load + + +data, y = load_iris(return_X_y=True, as_frame=True) +rf = RandomForestClassifier() +rf.fit(data, y) + +save( + rf, + "rf", + sample_data=data, +) + +rf = load("rf") +rf.predict(data) +``` + +## Implementation reference + +### `class SklearnModel` + +**MlemABC parent type**: `model_type` + +**MlemABC type**: `sklearn` + + ModelType implementation for `scikit-learn` models + +**Fields**: + +- `io: ModelIO = SimplePickleIO()` - IO + +--- + +### `class SklearnPipelineType` + +**MlemABC parent type**: `model_type` + +**MlemABC type**: `sklearn_pipeline` + + ModelType implementation for `scikit-learn` pipelines + +**Fields**: + +- `io: ModelIO = SimplePickleIO()` - IO diff --git a/content/docs/extensions/model/tensorflow.md b/content/docs/extensions/model/tensorflow.md new file mode 100644 index 00000000..8eef66ce --- /dev/null +++ b/content/docs/extensions/model/tensorflow.md @@ -0,0 +1,92 @@ +# Tensorflow Models Support + +[ModelType](/doc/user-guide/mlem-abcs#modeltype) and +[ModelIO](/doc/user-guide/mlem-abcs#modelio) implementations for +`tf.keras.Model` DataType, Reader and Writer implementations for `tf.Tensor` + +## Description + +**TODO** + +## Requirements + +```bash +pip install mlem[tensorflow] +# or +pip install tensorflow +``` + +## Examples + +```python + +``` + +## Implementation reference + +### `class TFTensorReader` + +**MlemABC parent type**: `data_reader` + +**MlemABC type**: `tf_tensor` + + Read tensorflow tensors from np format + +**Fields**: + +- `data_type: DataType` _(required)_ - Resulting data type + +--- + +### `class TFTensorDataType` + +**MlemABC parent type**: `data_type` + +**MlemABC type**: `tf_tensor` + + DataType implementation for `tensorflow.Tensor` + +**Fields**: + +- `dtype: str` _(required)_ - Data type of `tensorflow.Tensor` objects in data + +--- + +### `class TFTensorWriter` + +**MlemABC parent type**: `data_writer` + +**MlemABC type**: `tf_tensor` + + Write tensorflow tensors to np format + +**No fields** + +--- + +### `class TFKerasModelIO` + +**MlemABC parent type**: `model_io` + +**MlemABC type**: `tf_keras` + + IO for Tensorflow Keras models (:class:`tensorflow.keras.Model` + objects) + +**Fields**: + +- `save_format: str` - `tf` for custom net classes and `h5` otherwise + +--- + +### `class TFKerasModel` + +**MlemABC parent type**: `model_type` + +**MlemABC type**: `tf_keras` + + :class:`.ModelType` implementation for Tensorflow Keras models + +**Fields**: + +- `io: ModelIO = TFKerasModelIO()` - IO diff --git a/content/docs/extensions/model/torch.md b/content/docs/extensions/model/torch.md new file mode 100644 index 00000000..92686b9b --- /dev/null +++ b/content/docs/extensions/model/torch.md @@ -0,0 +1,104 @@ +# Torch Models Support + +[ModelType](/doc/user-guide/mlem-abcs#modeltype) and +[ModelIO](/doc/user-guide/mlem-abcs#modelio) implementations for +`torch.nn.Module` ImportHook for importing files saved with `torch.save` +DataType, Reader and Writer implementations for `torch.Tensor` + +## Description + +**TODO** + +## Requirements + +```bash +pip install mlem[torch] +# or +pip install torch +``` + +## Examples + +```python + +``` + +## Implementation reference + +### `class TorchTensorReader` + +**MlemABC parent type**: `data_reader` + +**MlemABC type**: `torch` + + Read torch tensors + +**Fields**: + +- `data_type: DataType` _(required)_ - Resulting data type + +--- + +### `class TorchTensorDataType` + +**MlemABC parent type**: `data_type` + +**MlemABC type**: `torch` + + DataType implementation for `torch.Tensor` + +**Fields**: + +- `dtype: str` _(required)_ - Type name of `torch.Tensor` elements + +--- + +### `class TorchTensorWriter` + +**MlemABC parent type**: `data_writer` + +**MlemABC type**: `torch` + + Write torch tensors + +**No fields** + +--- + +### `class TorchModelImport` + +**MlemABC parent type**: `import` + +**MlemABC type**: `torch` + + Import torch models saved with `torch.save` + +**No fields** + +--- + +### `class TorchModelIO` + +**MlemABC parent type**: `model_io` + +**MlemABC type**: `torch_io` + + IO for PyTorch models + +**Fields**: + +- `is_jit: bool = False` - Is model jit compiled + +--- + +### `class TorchModel` + +**MlemABC parent type**: `model_type` + +**MlemABC type**: `torch` + + :class:`.ModelType` implementation for PyTorch models + +**Fields**: + +- `io: ModelIO = TorchModelIO()` - TorchModelIO diff --git a/content/docs/extensions/model/xgboost.md b/content/docs/extensions/model/xgboost.md new file mode 100644 index 00000000..91937c9b --- /dev/null +++ b/content/docs/extensions/model/xgboost.md @@ -0,0 +1,67 @@ +# Xgboost Models Support + +[ModelType](/doc/user-guide/mlem-abcs#modeltype) and +[ModelIO](/doc/user-guide/mlem-abcs#modelio) implementations for +`xgboost.Booster` as well as DataType, Reader and Writer implementations for +`xgboost.DMatrix` + +## Description + +**TODO** + +## Requirements + +```bash +pip install mlem[xgboost] +# or +pip install xgboost +``` + +## Examples + +```python + +``` + +## Implementation reference + +### `class DMatrixDataType` + +**MlemABC parent type**: `data_type` + +**MlemABC type**: `xgboost_dmatrix` + + DataType implementation for xgboost.DMatrix type + +**Fields**: + +- `is_from_list: bool` _(required)_ - Whether DMatrix can be constructed from + list + +--- + +### `class XGBoostModelIO` + +**MlemABC parent type**: `model_io` + +**MlemABC type**: `xgboost_io` + + :class:`~.ModelIO` implementation for XGBoost models + +**Fields**: + +- `model_file_name: str = "model.xgb"` - Filename to use + +--- + +### `class XGBoostModel` + +**MlemABC parent type**: `model_type` + +**MlemABC type**: `xgboost` + + :class:`~.ModelType` implementation for XGBoost models + +**Fields**: + +- `io: ModelIO = XGBoostModelIO()` - Model IO diff --git a/content/docs/extensions/serving/fastapi.md b/content/docs/extensions/serving/fastapi.md new file mode 100644 index 00000000..8bd0e243 --- /dev/null +++ b/content/docs/extensions/serving/fastapi.md @@ -0,0 +1,74 @@ +# Fastapi Serving + +FastAPIServer implementation + +## Description + +**TODO** + +## Requirements + +```bash +pip install mlem[fastapi] +# or +pip install fastapi uvicorn +``` + +## Examples + +### Running FastAPI model server from code + +```python +from mlem.api import serve + +serve(model="https://github.com/iterative/example-mlem-get-started/rf", + server="fastapi", + host="0.0.0.0", + port=8000, +) +``` + +### Running FastAPI model server from cli + +```cli +$ mlem serve fastapi \ + --model https://github.com/iterative/example-mlem-get-started/rf \ + --host 0.0.0.0 --port 8000 +) +``` + +### Applying data to running FastAPI server from API + +```python +from mlem.api import apply_remote + +apply_remote("http", + "https://github.com/iterative/example-mlem-get-started/iris.csv", + method="predict", + host="0.0.0.0", + port=8000 +) +``` + +### Applying data to running FastAPI server from CLI + +```cli +$ mlem apply-remote http --method predict --host 0.0.0.0 --port 8000 \ + --data https://github.com/iterative/example-mlem-get-started/iris.csv +``` + +## Implementation reference + +### `class FastAPIServer` + +**MlemABC parent type**: `server` + +**MlemABC type**: `fastapi` + + Serves model with http + +**Fields**: + +- `host: str = "0.0.0.0"` - Network interface to use + +- `port: int = 8080` - Port to use diff --git a/content/docs/extensions/serving/index.md b/content/docs/extensions/serving/index.md new file mode 100644 index 00000000..78b0773f --- /dev/null +++ b/content/docs/extensions/serving/index.md @@ -0,0 +1,7 @@ +# Serving extensions + +Serving extensions add new types of servers to use with `serve` +[API](/doc/api-reference/serve) and [CLI](/doc/command-reference/serve) commands + +Typicaly they will implement [Server](/doc/user-guide/mlem-abcs#builder) and +[Client](/doc/user-guide/mlem-abcs#client) interfaces diff --git a/content/docs/extensions/serving/rabbitmq.md b/content/docs/extensions/serving/rabbitmq.md new file mode 100644 index 00000000..824cd26b --- /dev/null +++ b/content/docs/extensions/serving/rabbitmq.md @@ -0,0 +1,64 @@ +# Rabbitmq Serving + +RabbitMQServer implementation + +## Description + +**TODO** + +## Requirements + +```bash +pip install mlem[rmq] +# or +pip install pika +``` + +## Examples + +```python + +``` + +## Implementation reference + +### `class RabbitMQClient` + +**MlemABC parent type**: `client` + +**MlemABC type**: `rmq` + + Access models served with rmq server + +**Fields**: + +- `host: str` _(required)_ - Host of RMQ instance + +- `port: int` _(required)_ - Port of RMQ instance + +- `exchange: str = ""` - RMQ exchange to use + +- `queue_prefix: str = ""` - Queue prefix + +- `timeout: float = 0` - Time to wait for response. 0 means indefinite + +--- + +### `class RabbitMQServer` + +**MlemABC parent type**: `server` + +**MlemABC type**: `rmq` + + RMQ server that consumes requests and produces model predictions + from/to RMQ instance + +**Fields**: + +- `host: str` _(required)_ - Host of RMQ instance + +- `port: int` _(required)_ - Port of RMQ instance + +- `exchange: str = ""` - RMQ exchange to use + +- `queue_prefix: str = ""` - Queue prefix diff --git a/content/docs/extensions/storage/dvc.md b/content/docs/extensions/storage/dvc.md new file mode 100644 index 00000000..1f1ab02a --- /dev/null +++ b/content/docs/extensions/storage/dvc.md @@ -0,0 +1,61 @@ +# Dvc Support + +Support for storing artifacts with DVC + +## Description + +To enable DVC support for artifacts loading you need to configure DVCStorage as +your default storage like this: + +```cli +$ mlem config set core.storage.type dvc +``` + +You need to do this before you save anything with mlem + +## Requirements + +```bash +pip install mlem[dvc] +# or +pip install dvc +``` + +## Examples + +```python + +``` + +## Implementation reference + +### `class DVCArtifact` + +**MlemABC parent type**: `artifact` + +**MlemABC type**: `dvc` + + Local artifact that can be also read from DVC cache + +**Fields**: + +- `uri: str` _(required)_ - Local path to file + +- `size: int` _(required)_ - size in bytes + +- `hash: str` _(required)_ - md5 hash + +--- + +### `class DVCStorage` + +**MlemABC parent type**: `storage` + +**MlemABC type**: `dvc` + + User-managed dvc storage, which means user should + track corresponding files with dvc manually. + +**Fields**: + +- `uri: str = ""` - Base storage path diff --git a/content/docs/extensions/storage/index.md b/content/docs/extensions/storage/index.md new file mode 100644 index 00000000..bddd45c5 --- /dev/null +++ b/content/docs/extensions/storage/index.md @@ -0,0 +1,7 @@ +# Storage extensions + +Storage extensions add support for new types of location that MLEM can use to +store artifacts in [`save` API method](/doc/api-reference/save) + +Typicaly they will implement [Storage](/doc/user-guide/mlem-abcs#storage) and +[Artifact](/doc/user-guide/mlem-abcs#artifact) interfaces. diff --git a/content/docs/extensions/uri/bitbucketfs.md b/content/docs/extensions/uri/bitbucketfs.md new file mode 100644 index 00000000..aa7f0c85 --- /dev/null +++ b/content/docs/extensions/uri/bitbucketfs.md @@ -0,0 +1,25 @@ +# Bitbucket Uri Support + +Implementation of `BitbucketFileSystem` and `BitbucketResolver` + +## Description + +**TODO** + +## Examples + +```python + +``` + +## Implementation reference + +### `class BitBucketResolver` + +**MlemABC parent type**: `resolver` + +**MlemABC type**: `bitbucket` + + Resolve bitbucket URIs + +**No fields** diff --git a/content/docs/extensions/uri/github.md b/content/docs/extensions/uri/github.md new file mode 100644 index 00000000..5e614bd3 --- /dev/null +++ b/content/docs/extensions/uri/github.md @@ -0,0 +1,33 @@ +# Github Uri Support + +Implementation of `GithubResolver` + +## Description + +This extension does not require additional setup and works out-of-the-box. All +URIs starting with `https://github.com` will be resolved. + +`rev` option is supported, you can specify it separately or as a part of URI +like this: `https://github.com///tree//path` + +## Examples + +```python +from mlem.api import load + +model = load("https://github.com/iterative/example-mlem-get-started/rf", + rev="main" +) +``` + +## Implementation reference + +### `class GithubResolver` + +**MlemABC parent type**: `resolver` + +**MlemABC type**: `github` + + Resolve https://github.com URLs + +**No fields** diff --git a/content/docs/extensions/uri/gitlabfs.md b/content/docs/extensions/uri/gitlabfs.md new file mode 100644 index 00000000..3c4f1342 --- /dev/null +++ b/content/docs/extensions/uri/gitlabfs.md @@ -0,0 +1,25 @@ +# Gitlab Uri Support + +Implementation of `GitlabFileSystem` and `GitlabResolver` + +## Description + +**TODO** + +## Examples + +```python + +``` + +## Implementation reference + +### `class GitlabResolver` + +**MlemABC parent type**: `resolver` + +**MlemABC type**: `gitlab` + + Resolve https://gitlab.com URIs + +**No fields** diff --git a/content/docs/extensions/uri/index.md b/content/docs/extensions/uri/index.md new file mode 100644 index 00000000..0fd10222 --- /dev/null +++ b/content/docs/extensions/uri/index.md @@ -0,0 +1,7 @@ +# URI Resolver extensions + +URI Resolver extensions add support for different URI patterns that MLEM will +understand whenever you reference any MLEM object or project. + +Typicaly they will implement +[URIResolver](/doc/user-guide/mlem-abcs#uriresolver) interface. diff --git a/content/docs/sidebar.json b/content/docs/sidebar.json index e54e9b12..54eda082 100644 --- a/content/docs/sidebar.json +++ b/content/docs/sidebar.json @@ -126,6 +126,167 @@ } ] }, + { + "slug": "extensions", + "label": "Extensions", + "source": "extensions/index.md", + "children": [ + { + "slug": "model", + "label": "Models", + "source": "models/index.md", + "children": [ + { + "slug": "sklearn", + "label": "Sklearn", + "source": "model/sklearn.md" + }, + { + "slug": "onnx", + "label": "Onnx", + "source": "model/onnx.md" + }, + { + "slug": "tensorflow", + "label": "Tensorflow", + "source": "model/tensorflow.md" + }, + { + "slug": "torch", + "label": "Torch", + "source": "model/torch.md" + }, + { + "slug": "catboost", + "label": "Catboost", + "source": "model/catboost.md" + }, + { + "slug": "lightgbm", + "label": "Lightgbm", + "source": "model/lightgbm.md" + }, + { + "slug": "xgboost", + "label": "Xgboost", + "source": "model/xgboost.md" + }, + { + "slug": "callable", + "label": "Callable", + "source": "model/callable.md" + } + ] + }, + { + "slug": "data", + "label": "Data", + "source": "data/index.md", + "children": [ + { + "slug": "numpy", + "label": "Numpy", + "source": "data/numpy.md" + }, + { + "slug": "pandas", + "label": "Pandas", + "source": "data/pandas.md" + } + ] + }, + { + "slug": "build", + "label": "Builders", + "source": "build/index.md", + "children": [ + { + "slug": "pip", + "label": "Pip", + "source": "build/pip.md" + } + ] + }, + { + "slug": "serving", + "label": "Serving", + "source": "serving/index.md", + "children": [ + { + "slug": "fastapi", + "label": "Fastapi", + "source": "serving/fastapi.md" + }, + { + "slug": "rabbitmq", + "label": "Rabbitmq", + "source": "serving/rabbitmq.md" + } + ] + }, + { + "slug": "deployment", + "label": "Deployments", + "source": "deployment/index.md", + "children": [ + { + "slug": "heroku", + "label": "Heroku", + "source": "deployment/heroku.md" + }, + { + "slug": "sagemaker", + "label": "Sagemaker", + "source": "deployment/sagemaker.md" + }, + { + "slug": "docker", + "label": "Docker", + "source": "deployment/docker.md" + }, + { + "slug": "kubernetes", + "label": "Kubernetes", + "source": "deployment/kubernetes.md" + } + ] + }, + { + "slug": "uri", + "label": "URI Resolving", + "source": "uri/index.md", + "children": [ + { + "slug": "github", + "label": "Github", + "source": "uri/github.md" + }, + { + "slug": "gitlabfs", + "label": "Gitlabfs", + "source": "uri/gitlabfs.md" + }, + { + "slug": "bitbucketfs", + "label": "Bitbucketfs", + "source": "uri/bitbucketfs.md" + } + ] + }, + { + "slug": "storage", + "label": "Artifact Storage", + "source": "storage/index.md", + "children": [ + { + "slug": "dvc", + "label": "Dvc", + "source": "storage/dvc.md" + } + ] + } + ] + }, { "slug": "command-reference", "label": "Command Reference", diff --git a/scripts/docs/bootstrap_extensions.py b/scripts/docs/bootstrap_extensions.py new file mode 100644 index 00000000..d946229a --- /dev/null +++ b/scripts/docs/bootstrap_extensions.py @@ -0,0 +1,322 @@ +import importlib +import inspect +import json +import os.path +import re +import string +import textwrap +from dataclasses import dataclass +from typing import Any, Iterator, List, Tuple, Type + +from pydantic import BaseModel, ValidationError +from pydantic.fields import ModelField +from pydantic.typing import display_as_type, get_args, is_union +from typing_extensions import get_origin + +from mlem.cli.utils import get_field_help +from mlem.core.base import MlemABC +from mlem.ext import Extension, ExtensionLoader, get_ext_type +from mlem.utils.entrypoints import load_entrypoints +from scripts.docs.utils import get_sections, replace_sections + +SIDEBAR_PATH = "../../content/docs/sidebar.json" +EXTENSIONS_SLUG = "extensions" +EXTENSIONS_DIR = "../../content/docs/extensions" + +DOC_REPLACEMENTS = { + "ModelType": "[ModelType](/doc/user-guide/mlem-abcs#modeltype)", + "ModelIO": "[ModelIO](/doc/user-guide/mlem-abcs#modelio)" +} + +LINE_WIDTH = 80 + + +def add_extension_to_sidebar(type_, slug, label, source): + with open(SIDEBAR_PATH, "r") as f: + data = json.load(f) + + extensions = [o for o in data if o["slug"] == EXTENSIONS_SLUG][0] + types = extensions["children"] + children = [o for o in types if o["slug"] == type_][0]["children"] + if any(c["slug"] == slug for c in children): + return + children.append({ + "slug": slug, + "label": label, + "source": source + }) + with open(SIDEBAR_PATH, "w") as f: + json.dump(data, f, indent=2) + + +def get_extension_doc(module_doc: str): + doc = "\n\n".join(module_doc.split("\n\n")[1:]) + for key, value in DOC_REPLACEMENTS.items(): + doc = doc.replace(key, value) + return textwrap.fill(doc.replace("\n\n", "\n"), width=LINE_WIDTH, + break_on_hyphens=False) + + +def get_extension_reqs(ext: Extension): + if not ext.reqs: + return "" + extra = ext.extra or ext.module.split(".")[-1] + reqs = " ".join(ext.reqs_packages) + return f"""```bash +pip install mlem[{extra}] +# or +pip install {reqs} +```""" + + +@dataclass +class Field: + name: str + required: bool + type_: str + default: Any + help_: str + + +def iterate_type_fields( + cls: Type[BaseModel] +) -> Iterator[Field]: + """Recursively get CliTypeFields from BaseModel""" + field: ModelField + for name, field in sorted( + cls.__fields__.items(), key=lambda x: not x[1].required + ): + name = field.alias or name + if ( + issubclass(cls, MlemABC) + and name in cls.__config__.exclude + or field.field_info.exclude + ): + # Skip excluded fields + continue + + field_type = field.outer_type_ + # field.type_ is element type for collections/mappings + + if not isinstance(field_type, type): + # skip too complicated stuff + continue + + yield Field(name=name, type_=repr_field_type(field_type), + required=bool(field.required), default=field.default, + help_=get_field_help(cls, name)) + + +def repr_field_type(type_: Type) -> str: + if isinstance(type_, type): + return type_.__name__ + + origin = get_origin(type_) + if is_union(origin): + # get first type for union + generic_args = get_args(type_) + args = ", ".join(repr_field_type(a) for a in generic_args) + return f"Union[{args}]" + if origin is list or origin is dict: + return display_as_type(type_) + + if type_ is Any: + return "Any" + + raise ValueError(f"Unknown type: {type_}") + + +def default_value(fd): + try: + return fd.__class__() + except ValidationError: + return ... + + +def repr_field_default(field: Field) -> Tuple[str, Type]: + fd = field.default + default = f" = {fd}" if fd is not None and fd != "" else "" + if default == " = " or issubclass(fd.__class__, + BaseModel) and fd == default_value(fd): + default = f" = {fd.__class__.__name__}()" + if isinstance(fd, str): + default = f" = \"{fd}\"" + add_type = None + if isinstance(fd, BaseModel) and not issubclass(fd.__class__, MlemABC): + add_type = fd.__class__ + return default, add_type + + +def with_prev_and_next(iterable): + prev = None + current = None + for o in iterable: + if current is not None: + yield prev, current, o + prev = current + current = o + yield current, o, "" + + +def smart_wrap(value: str, width: int, subsequent_indent: str = ""): + SPECIAL = "\0" + QUOTES = "'\"`" + quotes_open = {q: False for q in QUOTES} + chars = [] + new_word = False + for prev, c, nxt in with_prev_and_next(value): + if nxt in string.ascii_letters: + new_word = True + if quotes_open.get(c): + quotes_open[c] = False + chars.append(c) + new_word = False + continue + if any(quotes_open.values()) or new_word is False: + chars.append(SPECIAL if c == " " else c) + continue + if c in QUOTES and prev == " ": + quotes_open[c] = True + chars.append(c) + + return textwrap.fill("".join(chars), width=width, + subsequent_indent=subsequent_indent, + break_on_hyphens=False, + break_long_words=False).replace(SPECIAL, " ") + + +def repr_field(field: Field) -> Tuple[str, Type]: + req = " _(required)_" if field.required else "" + default, add_type = repr_field_default(field) + help_ = re.subn(r"\s+", " ", field.help_)[0] + return smart_wrap( + f"- `{field.name}: {field.type_}{default}`{req} - {help_}", + width=LINE_WIDTH, subsequent_indent=" "), add_type + + +def get_impl_docstring(type_): + doc = inspect.cleandoc(type_.__doc__ or "Class docstring missing").strip() + return "\n".join( + f"{textwrap.fill(' ' + line, subsequent_indent=' ', width=LINE_WIDTH - 5)}" + for line in + doc.splitlines()) + + +def get_impl_description(type_: Type[MlemABC]) -> Tuple[str, List[Type]]: + fields_doc = "**No fields**" + fields = list(iterate_type_fields(type_)) + add_types = [] + if fields: + fields_doc = "**Fields**:\n\n" + fds = [] + for f in fields: + fd, add_type = repr_field(f) + fds.append(fd) + if add_type: + add_types.append(add_type) + fields_doc += "\n\n".join(fds) + doc = get_impl_docstring(type_) + return f"""### `class {type_.__name__}` + +**MlemABC parent type**: `{type_.abs_name}` + +**MlemABC type**: `{type_.__get_alias__()}` + +{doc} + +{fields_doc} +""", add_types + + +def get_model_description(type_: Type[BaseModel]) -> str: + fields_doc = "**No fields**" + fields = list(iterate_type_fields(type_)) + if fields: + fields_doc = "**Fields**:\n\n" + fields_doc += "\n\n".join(repr_field(f)[0] for f in fields) + doc = get_impl_docstring(type_) + return f"""### `class {type_.__name__}` + +{doc} + +{fields_doc} +""" + + +def get_extension_impls(ext: Extension): + eps = load_entrypoints() + ext_eps = { + k: v for k, v in eps.items() if v.ep.module_name.startswith(ext.module) + } + add_types = set() + descr = [] + for e in ext_eps.values(): + d, add = get_impl_description(e.ep.load()) + descr.append(d) + add_types.update(add) + for add in add_types: + descr.append(get_model_description(add)) + return "\n---\n\n".join(descr) + + +def get_extension_md(ext: Extension) -> str: + module_doc = importlib.import_module(ext.module).__doc__ + title = module_doc.splitlines()[0].title() + doc = get_extension_doc(module_doc) + reqs = get_extension_reqs(ext) + if reqs: + reqs = f""" +## Requirements + +{reqs} +""" + implementations = get_extension_impls(ext) + return f"""# {title} + +{doc} + +## Description + +**TODO** +{reqs} +## Examples + +```python + +``` + +## Implementation reference + +{implementations}""" + + +def create_extension_page(type_: str, name: str, ext: Extension, + overwrite: bool = False): + filename = f"{type_}/{name.lower()}.md" + path = os.path.join(EXTENSIONS_DIR, filename) + handcrafted = {} + if os.path.exists(path): + if not overwrite: + return + handcrafted = get_sections(path, "Description", "Examples") + os.unlink(path) + os.makedirs(os.path.dirname(path), exist_ok=True) + + with open(path, "w") as f: + md = get_extension_md(ext) + if handcrafted: + md = replace_sections(md, handcrafted) + f.write(md) + add_extension_to_sidebar(type_, name.lower(), name.capitalize(), filename) + + +def main(): + for mod, ext in ExtensionLoader.builtin_extensions.items(): + ext_name = mod.split(".")[-1] + ext_type = get_ext_type(mod) + print(ext_name, ext_type) + create_extension_page(ext_type, ext_name, ext, overwrite=True) + + +if __name__ == '__main__': + main() diff --git a/scripts/docs/utils.py b/scripts/docs/utils.py new file mode 100644 index 00000000..62125bf2 --- /dev/null +++ b/scripts/docs/utils.py @@ -0,0 +1,32 @@ +import re +from typing import Dict + + +def get_section(content: str, section_name: str, section_prefix: str = "## "): + find = re.findall( + f"{section_prefix}{section_name}(.*?)^{section_prefix}", content, + flags=re.MULTILINE | re.DOTALL) + if not find: + return None + return find[0] + + +def get_sections(path: str, *sections, section_prefix: str = "## "): + with open(path, "r") as f: + content = f.read() + res = {s: get_section(content, s, section_prefix) for s in sections} + return {s: v for s, v in res.items() if v} + + +def replace_section(data: str, section_name: str, new_value: str, + section_prefix: str = "## ") -> str: + return re.sub(f"{section_prefix}{section_name}(.*?)^{section_prefix}", + f"{section_prefix}{section_name}{new_value}{section_prefix}", + data, flags=re.MULTILINE | re.DOTALL) + + +def replace_sections(data: str, sections: Dict[str, str], + section_prefix: str = "## ") -> str: + for s, v in sections.items(): + data = replace_section(data, s, v, section_prefix) + return data