diff --git a/content/docs/api-reference/apply.md b/content/docs/api-reference/apply.md index cd3ea540..0159f70b 100644 --- a/content/docs/api-reference/apply.md +++ b/content/docs/api-reference/apply.md @@ -1,6 +1,6 @@ # mlem.api.apply() -Apply provided model against provided data +Apply provided model against provided data. ```py def apply( @@ -9,8 +9,6 @@ def apply( method: str = None, output: str = None, target_project: str = None, - index: bool = None, - external: bool = None, batch_size: Optional[int] = None, ) -> Optional[Any] ``` @@ -33,16 +31,19 @@ required. ## Parameters -- **`model`** (required) - MLEM model (a MlemModel object). +- **`model`** (required) - MLEM model. - **`data`** (required) - Input to the model. -- `method` (optional) - Which model method to use. If None, use the only method - model has. If more than one is available, will fail. -- `output` (optional) - If value is provided, assume its path and save output +- `method` (optional) - Which model method to use.If None, use the only method + model has.If more than one is available, will fail. +- `output` (optional) - If value is provided,assume it's path and save output there. -- `target_project` (optional) - The path to project to save the results to. -- `index` (optional) - Whether to index saved output in MLEM root folder. -- `external` (optional) - Whether to save result outside mlem dir. -- `batch_size` (optional) - If data is to be loaded and applied in batches. +- `target_project` (optional) - Path to MLEM project to save the result to. +- `batch_size` (optional) - If provided, will process data in batches of given + size. + +## Returns + +If `output=None`, returns results for given data. Otherwise returns None. ## Exceptions diff --git a/content/docs/api-reference/apply_remote.md b/content/docs/api-reference/apply_remote.md index 3a6f306a..cdabba86 100644 --- a/content/docs/api-reference/apply_remote.md +++ b/content/docs/api-reference/apply_remote.md @@ -1,6 +1,6 @@ # mlem.api.apply_remote() -Apply deployed model (possibly remote) against provided data. +Apply provided model against provided data. ```py def apply_remote( @@ -9,7 +9,6 @@ def apply_remote( method: str = None, output: str = None, target_project: str = None, - index: bool = False, **client_kwargs, ) -> Optional[Any] ``` @@ -34,14 +33,16 @@ knows how to make requests to the deployed model. - **`client`** (required) - The client to access methods of deployed model. - **`data`** (required) - Input to the model. -- `method` (optional) - Which model method to use. If None, use the only method - model has. If more than one is available, will fail. -- `output` (optional) - If value is provided, assume its path and save output +- **`method`** (required) - Which model method to use.If None, use the only + method model has.If more than one is available, will fail. +- `output` (optional) - If value is provided,assume it's path and save output there. -- `target_project` (optional) - The path to project to save the results to. -- `index` (optional) - Whether to index saved output in MLEM root folder. -- `client_kwargs` (optional) - Keyword arguments for the underlying client - implementation being used. +- `target_project` (optional) - Path to MLEM project to save the result to. +- `**client_kwargs` (optional) - Additional arguments to pass to client. + +## Returns + +If `output=None`, returns results for given data. Otherwise returns None. ## Exceptions diff --git a/content/docs/api-reference/build.md b/content/docs/api-reference/build.md index 9be2b87a..bc6a0fb7 100644 --- a/content/docs/api-reference/build.md +++ b/content/docs/api-reference/build.md @@ -1,8 +1,7 @@ # mlem.api.build() -Build a [MLEM model](/doc/user-guide/mlem-abcs#modeltype) in pip-ready format, a -built package using whl, docker-build-ready folder or directly build a docker -image. +Pack model into something useful, such as docker image, Python package or +something else. ```py def build( @@ -27,33 +26,56 @@ This API is the underlying mechanism for the programmatically create ship-able assets from MlemModels such as pip-ready packages, Docker images, etc. - +
-The arguments supplied to this method can be found with `mlem types`: +### The arguments supplied to this method can be found with `mlem types` command ```cli $ mlem types builder pip +Type mlem.contrib.pip.base.PipBuilder +MlemABC parent type: builder +MlemABC type: pip +MlemObject type name: builder +Create a directory python package +Fields: [required] package_name: str + Name of python package [required] target: str -[not required] templates_dir: str = [] + Path to save result +[not required] templates_dir: List[str] = [] + list of directories to look for jinja templates +[not required] templates_dir.0: str = None + Element of templates_dir [not required] python_version: str = None + Required python version [not required] short_description: str = "" + short_description [not required] url: str = "" + url [not required] email: str = "" + author's email [not required] author: str = "" + author's name [not required] version: str = "0.0.0" -[not required] additional_setup_kwargs: typing.Any = {} + package version +[not required] additional_setup_kwargs: Dict[str, any] = {} + additional parameters for setup() +[not required] additional_setup_kwargs.key: any = None + Element of additional_setup_kwargs ``` - +
## Parameters -- **`builder`** (required) - Builder to use. Out-of-the-box supported string - values are ['whl', 'pip', 'docker_dir', 'docker']. +- **`builder`** (required) - Builder to use. - **`model`** (required) - The model to build. -- `builder_kwargs` (optional) - Keyword arguments for the underlying builder - being used. +- **`builder_kwargs`** (required) - Additional keyword arguments to pass to the + builder. + +## Returns + +The result of the build, different for different builders. ## Exceptions diff --git a/content/docs/api-reference/clone.md b/content/docs/api-reference/clone.md index 086addcd..9c1ab570 100644 --- a/content/docs/api-reference/clone.md +++ b/content/docs/api-reference/clone.md @@ -1,7 +1,7 @@ # mlem.api.clone() -Clones MLEM object from `path` to `target` and returns Python representation for -the created object. +Clones [MLEM Object](/doc/user-guide/basic-concepts) from `path` to `out` and +returns Python representation for the created object. ```py def clone( @@ -14,8 +14,6 @@ def clone( target_fs: Optional[str] = None, follow_links: bool = True, load_value: bool = False, - index: bool = None, - external: bool = None, ) -> MlemObject ``` @@ -37,20 +35,21 @@ target. ## Parameters - **`path`** (required) - Path to the object. Could be local path or path inside - a Git repo. + a git repo. - **`target`** (required) - Path to save the copy of initial object to. - `project` (optional) - URL to project if object is located there. -- `rev` (optional) - revision, could be Git commit SHA, branch name or tag. +- `rev` (optional) - revision, could be git commit SHA, branch name or tag. - `fs` (optional) - filesystem to load object from - `target_project` (optional) - path to project to save cloned object to - `target_fs` (optional) - target filesystem -- `follow_links` (optional) - If object we read is a MLEM link, whether to load - the actual object link points to. Defaults to True. -- `load_value` (optional) - Load actual Python object incorporated in MlemMeta - object. Defaults to False. -- `index` (optional) - Whether to index output in .mlem directory -- `external` (optional) - whether to put object inside mlem dir in target - project +- `follow_links` (optional) - If object we read is a MLEM link, whether to + loadthe actual object link points to. Defaults to True. +- `load_value` (optional) - Load actual python object incorporated in + MlemObject. Defaults to False. + +## Returns + +`MlemObject`: Copy of initial object saved to `out`. ## Exceptions diff --git a/content/docs/api-reference/deploy.md b/content/docs/api-reference/deploy.md index 49c47a6b..515ab2f1 100644 --- a/content/docs/api-reference/deploy.md +++ b/content/docs/api-reference/deploy.md @@ -1,29 +1,21 @@ # mlem.api.deploy() -Deploy a model to target environment. Can use existing deployment declaration or -create a new one on-the-fly. +Deploy a model to a target environment. Can use an existing deployment +declaration or create a new one on-the-fly. ```py def deploy( - deploy_meta_or_path: Union[MlemDeploy, str], - model: Union[MlemModel, str] = None, + deploy_meta_or_path: Union[MlemDeployment, str], + model: Union[MlemModel, str], env: Union[MlemEnv, str] = None, project: Optional[str] = None, + rev: Optional[str] = None, fs: Optional[AbstractFileSystem] = None, - external: bool = None, - index: bool = None, + env_kwargs: Dict[str, Any] = None, **deploy_kwargs, -) -> MlemDeploy +) -> MlemDeployment ``` -[//]: # '### Usage:' -[//]: # -[//]: # '```py' -[//]: # 'from mlem.api import deploy' -[//]: # -[//]: # '#TODO' -[//]: # '```' - ## Description This API is the underlying mechanism for the @@ -32,17 +24,18 @@ provides a programmatic way to create deployments for a target environment. ## Parameters -- **`deploy_meta_or_path`** (required) - Path to deployment meta (will be - created if it does not exist) -- `model` (optional) - Path to model -- `env` (optional) - Path to target environment -- `project` (optional) - Path to MLEM project -- `fs` (optional) - filesystem to load deploy meta from. If not provided, will - be inferred from `deploy_meta_or_path` -- `external` (optional) - Save result not in mlem dir, but directly in project -- `index` (optional) - Whether to index output in .mlem directory -- `deploy_kwargs` (optional) - Configuration for new deployment meta if it does - not exist +- **`deploy_meta_or_path`** (required) - MlemDeployment object or path to it. +- **`model`** (required) - The model to deploy. +- **`env`** (required) - The environment to deploy to. +- `project` (optional) - Path to mlem project where to load obj from. +- `rev` (optional) - Revision if object is stored in git repo. +- `fs` (optional) - Filesystem to use to load the object. +- `env_kwargs` (optional) - Additional kwargs to pass to the environment. +- `deploy_kwargs` (optional) - Additional kwargs to pass to the deployment. + +## Returns + +`MlemDeployment`: The deployment object. ## Exceptions diff --git a/content/docs/api-reference/import_object.md b/content/docs/api-reference/import_object.md index 295fcbf4..bc0ab146 100644 --- a/content/docs/api-reference/import_object.md +++ b/content/docs/api-reference/import_object.md @@ -1,7 +1,7 @@ # mlem.api.import_object() -Try to load an object as MLEM model (or dataset) and return it, optionally -saving to the specified target location. +Try to load an object as MLEM model (or data) and return it, optionally saving +to the specified target location. ```py def import_object( @@ -14,8 +14,6 @@ def import_object( target_fs: Optional[AbstractFileSystem] = None, type_: Optional[str] = None, copy_data: bool = True, - external: bool = None, - index: bool = None, ) ``` @@ -47,19 +45,21 @@ command. ## Parameters -- **`path`** (required) - Path of file to import. -- `project` (optional) - Path to MLEM project. -- `rev` (optional) - revision, could be Git commit SHA, branch name or tag. -- `fs` (optional) - FileSystem for the `path` argument -- `target` (optional) - Path to save MLEM object into. -- `target_project` (optional) - Path to MLEM project for `target`. -- `target_fs` (optional) - FileSystem for the `target` argument -- `type_` (optional) - Specify how to read file. Available types: ['pickle', - 'pandas']. Defaults to auto-infer. -- `copy_data` (optional) - Whether to create a copy of file in target location - or just link existing file. Defaults to True. -- `external` (optional) - Save result directly to `target` (not inside `.mlem/`) -- `index` (optional) - Whether to index output in `.mlem/` directory +- **`path`** (required) - Path to the object to import. +- `project` (optional) - Path to mlem project where to load obj from. +- `rev` (optional) - Revision if object is stored in git repo. +- `fs` (optional) - Filesystem to use to load the object. +- `target` (optional) - Where to store the imported object. +- `target_project` (optional) - If provided, treat `target` as object name and + dumpobject in this MLEM Project. +- `target_fs` (optional) - Filesystem to use to save the object. +- `type_` (optional) - Type of the object to import. If not provided, will try + toinfer from the object itself. +- `copy_data` (optional) - Whether to copy data to the target location. + +## Returns + +`MlemObject`: Imported object. ## Exceptions diff --git a/content/docs/api-reference/init.md b/content/docs/api-reference/init.md index 62d96d90..06bb273d 100644 --- a/content/docs/api-reference/init.md +++ b/content/docs/api-reference/init.md @@ -1,28 +1,22 @@ # mlem.api.init() -Creates and populates the `.mlem/` directory in `path`. +Creates [MLEM config](/doc/user-guide/configuration) in `path`. ```py def init(path: str = ".") -> None ``` -### Usage: - -```py -from mlem.api import init - -init(path) -``` - ## Description -Initializes a MLEM project by creating a `.mlem/` directory inside the given -path. A new and empty `config.yaml` is also created inside it. +Initializes a MLEM project by creating a new and empty `.mlem.yaml` file. ## Parameters -- **`path`** (required) - location of the target where a MLEM project has to be - initialized i.e. a `.mlem/` folder has to be created. `.` by default +- `path` (optional) - Path to create config in. Defaults to current directory. + +## Returns + +None ## Exceptions diff --git a/content/docs/api-reference/link.md b/content/docs/api-reference/link.md index a5de8ef8..dbb30374 100644 --- a/content/docs/api-reference/link.md +++ b/content/docs/api-reference/link.md @@ -9,7 +9,6 @@ def link( rev: Optional[str] = None, target: Optional[str] = None, target_project: Optional[str] = None, - external: Optional[bool] = None, follow_links: bool = True, absolute: bool = False, ) -> MlemLink @@ -27,7 +26,6 @@ link_obj = link( model_path, target=link_name, target_project=os.getcwd(), - external=False, ) ``` @@ -42,16 +40,19 @@ their aliases for all future purposes. ## Parameters - **`source`** (required) - The object to create link from. -- `source_project` (optional) - Path to mlem project where to load obj from. -- `rev` (optional) - Revision if object is stored in Git repo. +- `source_project` (optional) - Path to mlem project where to load obj from +- `rev` (optional) - Revision if object is stored in git repo. - `target` (optional) - Where to store the link object. -- `target_project` (optional) - If provided, treat `target` as link name and - dump link in MLEM DIR. -- `external` (optional) - Whether to save link outside mlem dir. -- `follow_links` (optional) - Whether to make link to the underlying object if +- `target_project` (optional) - If provided,treat `target` as link name and dump + link in MLEM DIR +- `follow_links` (optional) - Whether to make link to the underlying objectif `source` is itself a link. Defaults to True. - `absolute` (optional) - Whether to make link absolute or relative to mlem - project. Defaults to False. + project + +## Returns + +`MlemLink`: Link object to the `source`. ## Exceptions diff --git a/content/docs/api-reference/load.md b/content/docs/api-reference/load.md index 1ca1af14..ea07eb7f 100644 --- a/content/docs/api-reference/load.md +++ b/content/docs/api-reference/load.md @@ -1,6 +1,6 @@ # mlem.api.load() -Load Python object saved by MLEM +Load python object saved by MLEM. ```py def load( @@ -31,11 +31,15 @@ MLEM. ## Parameters - **`path`** (required) - Path to the object. Could be local path or path inside - a Git repo. + a git repo. - `project` (optional) - URL to project if object is located there. -- `rev` (optional) - revision, could be Git commit SHA, branch name or tag. +- `rev` (optional) - revision, could be git commit SHA, branch name or tag. - `follow_links` (optional) - If object we read is a MLEM link, whether to load - the actual object link points to. Defaults to True. + theactual object link points to. Defaults to True. + +## Returns + +`Any`: Python object saved by MLEM ## Exceptions diff --git a/content/docs/api-reference/load_meta.md b/content/docs/api-reference/load_meta.md index a4aaa380..da5e918f 100644 --- a/content/docs/api-reference/load_meta.md +++ b/content/docs/api-reference/load_meta.md @@ -1,6 +1,6 @@ # mlem.api.load_meta() -Loads MlemObject from a given path +Load MlemObject. ```py def load_meta( @@ -12,7 +12,7 @@ def load_meta( fs: Optional[AbstractFileSystem] = None, *, force_type: Optional[Type[T]] = None, -) -> MlemObject +) -> T ``` ### Usage: @@ -36,18 +36,22 @@ Python object. ## Parameters - **`path`** (required) - Path to the object. Could be local path or path inside - a Git repo. + a git repo. - `project` (optional) - URL to project if object is located there. -- `rev` (optional) - revision, could be Git commit SHA, branch name or tag. +- `rev` (optional) - revision, could be git commit SHA, branch name or tag. - `follow_links` (optional) - If object we read is a MLEM link, whether to load - the actual object link points to. Defaults to True. -- `load_value` (optional) - Load actual Python object incorporated in + theactual object link points to. Defaults to True. +- `load_value` (optional) - Load actual python object incorporated in MlemObject. Defaults to False. - `fs` (optional) - filesystem to load from. If not provided, will be inferred from path - `force_type` (optional) - type of meta to be loaded. Defaults to MlemObject (any mlem meta) +## Returns + +`MlemObject`: Saved MlemObject + ## Exceptions - `WrongMetaType` - Thrown if the loaded meta object has a different type than diff --git a/content/docs/api-reference/ls.md b/content/docs/api-reference/ls.md deleted file mode 100644 index 87ddf3a8..00000000 --- a/content/docs/api-reference/ls.md +++ /dev/null @@ -1,53 +0,0 @@ -# mlem.api.ls() - -Get a view of the MLEM project by listing all of its MLEM Objects - -```py -def ls( - project: str = ".", - rev: Optional[str] = None, - fs: Optional[AbstractFileSystem] = None, - type_filter: Union[ - Type[MlemObject], Iterable[Type[MlemObject]], None - ] = None, - include_links: bool = True, -) -> Dict[Type[MlemObject], List[MlemObject]] -``` - -### Usage: - -```py -from mlem.api import ls - -objects = ls(".", rev=None, type_filter=None, include_links=True) -``` - -## Description - -Populates a dictionary where keys are different `types` of -[MlemObjects](/doc/user-guide/basic-concepts#mlem-objects) and values are a -collection of MlemObjects of that type. This API is internally used by the CLI -command [list](/doc/command-reference/list). - -## Parameters - -- **`project`** (required) - Path or URL to project -- `rev` (optional) - revision, could be Git commit SHA, branch name or tag. -- `fs` (optional) - filesystem to load from. If not provided, will be inferred - from project -- `type_filter` (optional) - type of objects to be listed (eg: models / dataset - / etc.) -- `include_links` (optional) - whether to include links while fetching the list - of MlemObjects. Defaults to True - -## Exceptions - -None - -## Examples - -```py -from mlem.api import ls - -objects = ls(".") -``` diff --git a/content/docs/api-reference/save.md b/content/docs/api-reference/save.md index 1d0755e1..4584defc 100644 --- a/content/docs/api-reference/save.md +++ b/content/docs/api-reference/save.md @@ -1,6 +1,6 @@ # mlem.api.save() -Saves given object to a given path +Saves given object to a given path. ```py def save( @@ -8,9 +8,7 @@ def save( path: str, project: Optional[str] = None, sample_data=None, - fs: Union[str, AbstractFileSystem] = None, - index: bool = None, - external: Optional[bool] = None, + fs: Optional[AbstractFileSystem] = None, params: Dict[str, str] = None, ) -> MlemObject ``` @@ -32,17 +30,19 @@ systems (eg: `S3`). The function returns and saves the object as a ## Parameters - **`obj`** (required) - Object to dump -- **`path`** (required) - If not located on LocalFileSystem, then should be uri - or `fs` argument should be provided -- `project` (optional) - path to mlem project -- `sample_data` (optional) - If the object is a model or function, you can - provide input data sample, so MLEM will include its schema in the model's - metafile +- **`path`** (required) - If not located on LocalFileSystem, then should be + urior `fs` argument should be provided +- `project` (optional) - path to mlem project (optional) +- `sample_data` (optional) - If the object is a model or function, you + canprovide input data sample, so MLEM will include it's schemain the model's + metadata - `fs` (optional) - FileSystem for the `path` argument -- `index` (optional) - Whether to add object to mlem project index -- `external` (optional) - Save result directly to `path` (not inside `.mlem/`) - `params` (optional) - arbitrary params for object +## Returns + +None + ## Exceptions - `MlemObjectNotFound` - Thrown if we can't find MLEM object @@ -62,5 +62,5 @@ train.columns = train.columns.astype(str) model = DecisionTreeClassifier().fit(train, target) path = os.path.join(os.getcwd(), "saved-model") -save(model, path, sample_data=train, index=False) +save(model, path, sample_data=train) ``` diff --git a/content/docs/api-reference/serve.md b/content/docs/api-reference/serve.md index fc1b2098..fd9ec638 100644 --- a/content/docs/api-reference/serve.md +++ b/content/docs/api-reference/serve.md @@ -4,9 +4,7 @@ Serve a model by exposing its methods as endpoints. ```py def serve( - model: MlemModel, - server: Union[Server, str], - **server_kwargs + model: Union[str, MlemModel], server: Union[Server, str], **server_kwargs ) ``` @@ -27,11 +25,13 @@ easily make requests (for inference or otherwise) against the served model. ## Parameters -- **`model`** (required) - The model (a MlemModel object) to serve. -- **`server`** (required) - Which server implementation to use. Out-of-the-box - supported ones are ['fastapi', 'rmq', 'heroku'] -- `server_kwargs` (optional) - Keyword arguments for the underlying server - implementation being used. +- **`model`** (required) - The model to serve. +- **`server`** (required) - Out-of-the-box supported one is "fastapi". +- **`server_kwargs`** (required) - Additional kwargs to pass to the server. + +## Returns + +None ## Exceptions diff --git a/content/docs/command-reference/apply-remote.md b/content/docs/command-reference/apply-remote.md index 7288e853..c9e35166 100644 --- a/content/docs/command-reference/apply-remote.md +++ b/content/docs/command-reference/apply-remote.md @@ -1,17 +1,21 @@ # apply-remote Apply a deployed-model (possibly remotely) to data. The results will be saved as -a MLEM object to `output` if provided. Otherwise, it will be printed to -`stdout`. +a [MLEM Object](/doc/user-guide/basic-concepts) to `output` if provided. +Otherwise, it will be printed to `stdout`. ## Synopsis ```usage -usage: mlem apply-remote [options] [subtype] data - -arguments: -[SUBTYPE] Type of client. Choices: ['http', 'rmq'] -DATA Path to dataset object [required] +usage: mlem apply-remote [-d ] [-p ] [--rev ] + [-o ] [--tp ] [-m ] + [--json] [-f ] [-h] + [ [client options] | --load ] + +Builtin clients: +- http +- rmq +- sagemaker ``` ## Description @@ -27,18 +31,18 @@ clients are `http` and `rmq` - which are used to launch requests against the ## Options -- `-p, --project TEXT`: Path to MLEM project [default: (none)] -- `--rev TEXT`: Repo revision to use [default: (none)] -- `-o, --output TEXT`: Where to store the outputs. -- `--target-project, --tp TEXT`: Project to save target to [default: (none)] -- `-m, --method TEXT`: Which model method is to be applied [default: predict] -- `--index / --no-index`: Whether to index output in .mlem directory -- `--json`: Output as json -- `-l, --load TEXT`: File to load client config from -- `-c, --conf TEXT`: Options for client in format `field.name=value` -- `-f, --file_conf TEXT`: File with options for client in format +- `-d `, `--data ` - Path to MLEM dataset [required] +- `-p `, `--project ` - Path to MLEM project [default: (none)] +- `--rev ` - Repo revision to use [default: (none)] +- `-o `, `--output ` - Where to save model outputs +- `--tp `, `--target-project ` - Project to save target to [default: + (none)] +- `-m `, `--method ` - Which model method is to be applied [default: + predict] +- `--json` - Output as json +- `-f `, `--file_conf ` - File with options for client in format `field.name=path_to_config` -- `-h, --help`: Show this message and exit. +- `-h`, `--help` - Show this message and exit. ## Example: Apply a locally hosted model to a local dataset @@ -48,5 +52,5 @@ this) and a local MLEM dataset `mydataset`, run the following command to infer the entire dataset with the model and save the output result to `myprediction` ```cli -$ mlem apply-remote http mydataset --conf host="127.0.0.1" --conf port=3000 --output myprediction +$ mlem apply-remote http mydataset --host="127.0.0.1" --port=3000 --output myprediction ``` diff --git a/content/docs/command-reference/apply.md b/content/docs/command-reference/apply.md index f697b59a..6299495f 100644 --- a/content/docs/command-reference/apply.md +++ b/content/docs/command-reference/apply.md @@ -1,16 +1,21 @@ # apply -Apply a model to data. The result will be saved as a MLEM object to `output` if -provided. Otherwise, it will be printed to `stdout`. +Apply a model to data. The result will be saved as a +[MLEM Object](/doc/user-guide/basic-concepts) to `output` if provided. +Otherwise, it will be printed to `stdout`. ## Synopsis ```usage -usage: mlem apply [options] model data +usage: mlem apply [-p ] [--rev ] [-o ] + [-m ] [--dp ] [--dr ] + [-i] [--it ] [-b ] [--json] + [-h] + model data arguments: -MODEL Path to model object [required] -DATA Path to dataset object [required] + model Path to model object + data Path to data object ``` ## Description @@ -29,20 +34,20 @@ datasets. ## Options -- `-p, --project TEXT`: Path to MLEM project [default: (none)] -- `--rev TEXT`: Repo revision to use [default: (none)] -- `-o, --output TEXT`: Where to store the outputs. -- `-m, --method TEXT`: Which model method is to be applied [default: predict] -- `--data-project, --dr TEXT`: Project with data -- `--data-rev TEXT`: Revision of data -- `-i, --import`: Try to import data on-the-fly -- `--import-type, --it TEXT`: Specify how to read data file for import. - Available types: ['pandas', 'pickle'] -- `-b, --batch_size INTEGER`: Batch size for reading data in batches. -- `--index / --no-index`: Whether to index output in .mlem directory -- `-e, --external`: Save result not in .mlem, but directly in project -- `--json`: Output as json -- `-h, --help`: Show this message and exit. +- `-p `, `--project ` - Path to MLEM project [default: (none)] +- `--rev ` - Repo revision to use [default: (none)] +- `-o `, `--output ` - Where to save model outputs +- `-m `, `--method ` - Which model method is to be applied [default: + predict] +- `--dp `, `--data-project ` - Project with data +- `--dr `, `--data-rev ` - Revision of data +- `-i`, `--import` - Try to import data on-the-fly +- `--it `, `--import-type ` - Specify how to read data file for + import. Available types: ['pandas', 'pickle', 'torch'] +- `-b `, `--batch_size ` - Batch size for reading data in + batches +- `--json` - Output as json +- `-h`, `--help` - Show this message and exit. ## Examples diff --git a/content/docs/command-reference/build.md b/content/docs/command-reference/build.md index b5fd8aa1..40375ff3 100644 --- a/content/docs/command-reference/build.md +++ b/content/docs/command-reference/build.md @@ -1,16 +1,20 @@ # build -Build models to create re-usable, ship-able entities such as a Docker image or -Python package. +Build models into re-usable assets you can distribute and use in production, +such as a Docker image or Python package. ## Synopsis ```usage -usage: mlem build [options] model [subtype] - -arguments: -MODEL Path to model [required] -[SUBTYPE] Type of build. Choices: ['whl', 'pip', 'docker_dir', 'docker'] +usage: mlem build [-m ] [-p ] [--rev ] + [-f ] [-h] + [ [builder options] | --load ] + +Builtin builders: +- docker +- docker_dir +- pip +- whl ``` ## Description @@ -21,32 +25,14 @@ images. ## Options -- `-p, --project TEXT`: Path to MLEM project [default: (none)] -- `--rev TEXT`: Repo revision to use [default: (none)] -- `-l, --load TEXT`: File to load builder config from -- `-c, --conf TEXT`: Options for builder in format `field.name=value` -- `-f, --file_conf TEXT`: File with options for builder in format +- `-m `, `--model ` - Path to MLEM model [required] +- `-p `, `--project ` - Path to MLEM project [default: (none)] +- `--rev ` - Repo revision to use [default: (none)] +- `-f `, `--file_conf ` - File with options for builder in format `field.name=path_to_config` -- `-h, --help`: Show this message and exit. +- `-h`, `--help` - Show this message and exit. ## Examples -Build a Docker image from a model - -```cli -$ mlem build mymodel docker --conf server.type=fastapi --conf image.name=myimage -``` - -Create a `docker_dir` builder config called `build_dock`, and use it to package -a model - -```cli -$ mlem declare builder docker_dir --conf server=fastapi --conf target=build build_dock -... - -$ mlem build mymodel --load build_dock -... -``` - -For a detailed example using python-package, see the get-started guide -[building example](/doc/get-started/building). +For examples, please refer to [Get Started](/doc/get-started/building) or +[User Guide](/doc/user-guide/building). diff --git a/content/docs/command-reference/checkenv.md b/content/docs/command-reference/checkenv.md new file mode 100644 index 00000000..33fbfd54 --- /dev/null +++ b/content/docs/command-reference/checkenv.md @@ -0,0 +1,27 @@ +# checkenv + +Check that current Python environment satisfies object requirements. + +## Synopsis + +```usage +usage: mlem checkenv [-p ] [--rev ] [-h] + path + +arguments: + path Path to object +``` + +## Options + +- `-p `, `--project ` - Path to MLEM project [default: (none)] +- `--rev ` - Repo revision to use [default: (none)] +- `-h`, `--help` - Show this message and exit. + +## Examples + +```cli +$ mlem checkenv rf +⏳️ Loading meta from rf.mlem +βœ… Requirements are satisfied! +``` diff --git a/content/docs/command-reference/clone.md b/content/docs/command-reference/clone.md index 070b8a09..4d0d542d 100644 --- a/content/docs/command-reference/clone.md +++ b/content/docs/command-reference/clone.md @@ -1,16 +1,18 @@ # clone -Copy a [MLEM Object](/doc/user-guide/basic-concepts#mlem-objects) from `uri` and -saves a copy of it to `target` path. +Copy a [MLEM Object](/doc/user-guide/basic-concepts) from `uri` and saves a copy +of it to `target` path. ## Synopsis ```usage -usage: mlem clone [options] uri target +usage: mlem clone [-p ] [--rev ] + [--tp ] [-h] + uri target arguments: -URI URI to object you want to clone [required] -TARGET Path to store the downloaded object. [required] + uri URI to object you want to clone + target Path to store the downloaded object. ``` ## Description @@ -22,12 +24,11 @@ repository. ## Options -- `-p, --project TEXT`: Path to MLEM project [default: (none)] -- `--rev TEXT`: Repo revision to use [default: (none)] -- `--target-project, --tp TEXT`: Project to save target to [default: (none)] -- `-e, --external`: Save result not in .mlem, but directly in project -- `--link / --no-link`: Whether to create link for output in .mlem directory -- `--help`: Show this message and exit. +- `-p `, `--project ` - Path to MLEM project [default: (none)] +- `--rev ` - Repo revision to use [default: (none)] +- `--tp `, `--target-project ` - Project to save target to [default: + (none)] +- `-h`, `--help` - Show this message and exit. ## Examples diff --git a/content/docs/command-reference/config.md b/content/docs/command-reference/config.md new file mode 100644 index 00000000..ae5c146c --- /dev/null +++ b/content/docs/command-reference/config.md @@ -0,0 +1,16 @@ +# config + +Manipulate +[[MLEM config](/doc/user-guide/configuration)](/doc/user-guide/configuration). + +## Synopsys + +```usage +usage: mlem config [-h] +``` + +## Options + +- `-h`, `--help` - Show this message and exit. + +## Examples diff --git a/content/docs/command-reference/declare.md b/content/docs/command-reference/declare.md index fef69f57..b99fe718 100644 --- a/content/docs/command-reference/declare.md +++ b/content/docs/command-reference/declare.md @@ -1,17 +1,21 @@ # declare -Declares a new [MLEM Object](/doc/user-guide/basic-concepts#mlem-objects) -metafile from config args and config files. +Declares a new [MLEM Object](/doc/user-guide/basic-concepts) +[MLEM Object](/doc/user-guide/basic-concepts) from config args and config files. ## Synopsis ```usage -usage: mlem declare [options] object_type [subtype] path - -arguments: -OBJECT_TYPE Type of metafile to declare [required] -[SUBTYPE] Subtype of MLEM object [default: ] -PATH Where to save object [required] +usage: mlem declare [-h] + [ [subtype options] | --load ] + +Builtin subtypes: +- builder +- client +- deployment +- docker_registry +- env +- server ``` ## Description @@ -28,12 +32,7 @@ check out the last example [here](/doc/command-reference/types#examples) ## Options -- `-c, --conf TEXT`: Values for object fields in format - `field.nested.name=value` -- `-p, --project TEXT`: Path to MLEM project [default: (none)] -- `-e, --external`: Save result not in .mlem, but directly in project -- `--index / --no-index`: Whether to index output in .mlem directory -- `-h, --help`: Show this message and exit. +- `-h`, `--help` - Show this message and exit. ## Examples @@ -45,11 +44,11 @@ $ mlem types env heroku [not required] api_key: str = None # Declare the heroku env -$ mlem declare env heroku production --conf api_key="mlem_heroku_staging" -πŸ’Ύ Saving env to .mlem/env/staging.mlem +$ mlem declare env heroku production --api_key mlem_heroku_staging +πŸ’Ύ Saving env to staging.mlem # Print the contents of the new heroku env metafile -$ cat .mlem/env/staging.mlem +$ cat staging.mlem api_key: mlem_heroku_staging object_type: env type: heroku diff --git a/content/docs/command-reference/deployment/apply.md b/content/docs/command-reference/deployment/apply.md index 0c6a897a..b789111e 100644 --- a/content/docs/command-reference/deployment/apply.md +++ b/content/docs/command-reference/deployment/apply.md @@ -5,11 +5,14 @@ Apply a deployed model to data. ## Synopsis ```usage -usage: mlem deployment apply [options] path data +usage: mlem deployment apply [-p ] [--rev ] + [--dp ] [--dr ] [-o ] + [--tp ] [-m ] [--json] [-h] + path data arguments: -PATH Path to deployment meta [required] -DATA Path to data object [required] + path Path to deployment meta + data Path to data object ``` ## Description @@ -21,16 +24,17 @@ gathered and returned, also as a MLEM Object. ## Options -- `-p, --project TEXT`: Path to MLEM project [default: (none)] -- `--rev TEXT`: Repo revision to use [default: (none)] -- `--data-project, --dr TEXT`: Project with data -- `--data-rev TEXT`: Revision of data -- `-o, --output TEXT`: Where to store the outputs. -- `--target-project, --tp TEXT`: Project to save target to [default: (none)] -- `-m, --method TEXT`: Which model method is to be applied [default: predict] -- `--index / --no-index`: Whether to index output in .mlem directory -- `--json`: Output as json -- `-h, --help`: Show this message and exit. +- `-p `, `--project ` - Path to MLEM project [default: (none)] +- `--rev ` - Repo revision to use [default: (none)] +- `--dp `, `--data-project ` - Project with data +- `--dr `, `--data-rev ` - Revision of data +- `-o `, `--output ` - Where to store the outputs. +- `--tp `, `--target-project ` - Project to save target to [default: + (none)] +- `-m `, `--method ` - Which model method is to be applied [default: + predict] +- `--json` - Output as json +- `-h`, `--help` - Show this message and exit. ## Example: Apply a dataset on a deployed model diff --git a/content/docs/command-reference/deployment/index.md b/content/docs/command-reference/deployment/index.md index 08cc5ae1..f7ad6eae 100644 --- a/content/docs/command-reference/deployment/index.md +++ b/content/docs/command-reference/deployment/index.md @@ -1,18 +1,19 @@ # deployment -A set of commands to set up and manage deployments. +A set of commands to set up and manage deployments ## Synopsis ```usage -usage: mlem deployment [options] COMMAND [ARGS]... - -arguments: -COMMAND - apply Apply method of deployed service - run Deploy a model to target environment - status Print status of deployed service - remove Stop and destroy deployed instance +usage: mlem deployment [-h] + command + +subcommands: + remove Stop and destroy deployed instance. + status Print status of deployed service. + wait Wait for status of deployed service + apply Apply a deployed model to data. + run Deploy a model to a target environment. ``` ## Description @@ -35,4 +36,9 @@ of bulk inferring data on the served model. ## Options -- `-h, --help`: Show this message and exit. +- `-h`, `--help` - Show this message and exit. + +## Examples + +For examples, please refer to [Get Started](/doc/get-started/deploying) or +[User Guide](/doc/user-guide/deploying). diff --git a/content/docs/command-reference/deployment/remove.md b/content/docs/command-reference/deployment/remove.md index c2ec20f6..8dcb65e8 100644 --- a/content/docs/command-reference/deployment/remove.md +++ b/content/docs/command-reference/deployment/remove.md @@ -5,10 +5,11 @@ Stop and destroy deployed instance. ## Synopsis ```usage -usage: mlem deployment remove [options] path +usage: mlem deployment remove [-p ] [-h] + path arguments: -PATH Path to deployment meta [required] + path Path to deployment meta ``` ## Description @@ -19,8 +20,8 @@ deleting its associated runtime resources. ## Options -- `-p, --project TEXT`: Path to MLEM project [default: (none)] -- `-h, --help`: Show this message and exit. +- `-p `, `--project ` - Path to MLEM project [default: (none)] +- `-h`, `--help` - Show this message and exit. ## Example: Stop and destroy a deployment diff --git a/content/docs/command-reference/deployment/run.md b/content/docs/command-reference/deployment/run.md index c3ffc808..bce80f98 100644 --- a/content/docs/command-reference/deployment/run.md +++ b/content/docs/command-reference/deployment/run.md @@ -6,10 +6,9 @@ declaration or create a new one on-the-fly. ## Synopsis ```usage -usage: mlem deployment run [options] path - -arguments: -PATH Path to deployment meta (will be created if it does not exist) [required] +usage: mlem deployment run [-l ] [-m ] [--mp ] + [--mr ] [-p ] + [--rev ] [-h] ``` ## Description @@ -21,13 +20,13 @@ options (see below). ## Options -- `-m, --model TEXT`: Path to model -- `-t, --env TEXT`: Path to target environment -- `-p, --project TEXT`: Path to MLEM project [default: (none)] -- `-e, --external`: Save result not in .mlem, but directly in project -- `--index / --no-index`: Whether to index output in .mlem directory -- `-c, --conf TEXT`: Configuration for new deployment meta if it does not exist -- `-h, --help`: Show this message and exit. +- `-l `, `--load ` - File to load deployment config from +- `-m `, `--model ` - Path to MLEM model +- `--mp `, `--model-project ` - Project with model +- `--mr `, `--model-rev ` - Revision of model +- `-p `, `--project ` - Path to MLEM project [default: (none)] +- `--rev ` - Repo revision to use [default: (none)] +- `-h`, `--help` - Show this message and exit. ## Example: Create a new deployment from scratch @@ -35,10 +34,10 @@ Here, we define an environment and then run a deployment on it, providing the deployment configuration on-the-fly ```cli -$ mlem declare env heroku staging --conf api_key=... +$ mlem declare env heroku staging --api_key=... ... -$ mlem deployment run service_name --model model --env staging --conf name=my_service +$ mlem deployment run service_name --model model --env staging --name=my_service ... ``` @@ -50,10 +49,10 @@ deployment with a simple concise command which uses the existing pre-configured deployment declaration ```cli -$ mlem declare env heroku staging --conf api_key=... +$ mlem declare env heroku staging --api_key=... ... -$ mlem declare deployment heroku service_name --conf app_name=my_service --conf model=model --conf env=staging +$ mlem declare deployment heroku service_name --app_name=my_service --model=model --env=staging ... $ mlem deploy run service_name diff --git a/content/docs/command-reference/deployment/status.md b/content/docs/command-reference/deployment/status.md index cff6f553..b68bb594 100644 --- a/content/docs/command-reference/deployment/status.md +++ b/content/docs/command-reference/deployment/status.md @@ -5,10 +5,11 @@ Print status of deployed service. ## Synopsis ```usage -usage: mlem deployment status [options] path +usage: mlem deployment status [-p ] [-h] + path arguments: -PATH Path to deployment meta [required] + path Path to deployment meta ``` ## Description @@ -29,8 +30,8 @@ The possible statuses for deployments using the `heroku` target platform is: ## Options -- `-p, --project TEXT`: Path to MLEM project [default: (none)] -- `-h, --help`: Show this message and exit. +- `-p `, `--project ` - Path to MLEM project [default: (none)] +- `-h`, `--help` - Show this message and exit. ## Example: Get the status of a deployment diff --git a/content/docs/command-reference/deployment/wait.md b/content/docs/command-reference/deployment/wait.md new file mode 100644 index 00000000..1f55cd1d --- /dev/null +++ b/content/docs/command-reference/deployment/wait.md @@ -0,0 +1,41 @@ +# deployment wait + +Wait for status of deployed service + +## Synopsis + +```usage +usage: mlem deployment wait [-p ] + [-s <[unknown|not_deployed|starting|crashed|stopped|running]>] + [-i <[unknown|not_deployed|starting|crashed|stopped|running]>] + [-p ] [-t ] [-h] + path + +arguments: + path Path to deployment meta +``` + +## Description + +TODO + +## Options + +- `-p `, `--project ` - Path to MLEM project [default: (none)] +- `-s <[unknown|not_deployed|starting|crashed|stopped|running]>`, + `--status <[unknown|not_deployed|starting|crashed|stopped|running]>` - + statuses to wait for [default: DeployStatus.RUNNING] +- `-i <[unknown|not_deployed|starting|crashed|stopped|running]>`, + `--intermediate <[unknown|not_deployed|starting|crashed|stopped|running]>` - + Possible intermediate statuses +- `-p `, `--poll-timeout ` - Timeout between attempts [default: + 1.0] +- `-t `, `--times ` - Number of attempts. 0 -> indefinite + [default: 0] +- `-h`, `--help` - Show this message and exit. + +## Example: Wait for deployment setup (after mlem deployment run command) + +```cli +$ mlem deployment wait service_name -i starting +``` diff --git a/content/docs/command-reference/import.md b/content/docs/command-reference/import.md index 98503f4e..6779b4c2 100644 --- a/content/docs/command-reference/import.md +++ b/content/docs/command-reference/import.md @@ -1,15 +1,18 @@ # import -Create a `.mlem` metafile for a model or data in any file or directory. +Create a `.mlem` [MLEM Object](/doc/user-guide/basic-concepts) for a model or +data in any file or directory. ## Synopsis ```usage -usage: mlem import [options] uri target +usage: mlem import [-p ] [--rev ] + [--tp ] [--copy] [--type ] [-h] + uri target arguments: -URI File to import [required] -TARGET Path to save MLEM object [required] + uri File to import + target Path to save MLEM object ``` ## Description @@ -24,16 +27,15 @@ load those models/datasets into object for subsequent usage in MLEM context. ## Options -- `-p, --project TEXT`: Path to MLEM project [default: (none)] -- `--rev TEXT`: Repo revision to use [default: (none)] -- `--target-project, --tp TEXT`: Project to save target to [default: (none)] -- `--copy / --no-copy`: Whether to create a copy of file in target location or - just link existing file [default: copy] -- `--type TEXT`: Specify how to read file Available types: ['pandas', 'pickle'] - [default: (auto infer)] -- `--index / --no-index`: Whether to index output in .mlem directory -- `-e, --external`: Save result not in .mlem, but directly in project -- `-h, --help`: Show this message and exit. +- `-p `, `--project ` - Path to MLEM project [default: (none)] +- `--rev ` - Repo revision to use [default: (none)] +- `--tp `, `--target-project ` - Project to save target to [default: + (none)] +- `--copy` / `--no-copy` - Whether to create a copy of file in target location + or just link existing file [default: copy] +- `--type ` - Specify how to read file Available types: ['pandas', + 'pickle', 'torch'] [default: (auto infer)] +- `-h`, `--help` - Show this message and exit. ## Examples @@ -54,6 +56,10 @@ $ mlem import data/model.pkl data/imported_model Create a MLEM model from remote `.pkl` (pickle) file ```cli -$ mlem import .mlem/model/rf --project https://github.com/iterative/example-mlem-get-started --rev main data/imported_model --type pickle -πŸ’Ύ Saving model to .mlem/model/data/imported_model.mlem +$ mlem import models/rf \ + --project https://github.com/iterative/example-mlem-get-started \ + --rev main \ + data/imported_model \ + --type pickle +πŸ’Ύ Saving model to data/imported_model.mlem ``` diff --git a/content/docs/command-reference/index.md b/content/docs/command-reference/index.md index 0c69204b..6853b18c 100644 --- a/content/docs/command-reference/index.md +++ b/content/docs/command-reference/index.md @@ -7,9 +7,9 @@ For a list of all commands, type `mlem -h` ## Typical MLEM workflow -- Initialize a MLEM project in a Git Repo with - [mlem init](/doc/command-reference/init). -- Save Models and Data with MLEM. +First, save models with MLEM using [mlem.api.save](/doc/api-reference/save). +Second, productionize them as you want: + - Load and Apply models with [mlem apply](/doc/command-reference/apply). - Build models into Python packages or Docker images with [mlem build](/doc/command-reference/build). diff --git a/content/docs/command-reference/init.md b/content/docs/command-reference/init.md index dfaa6cec..128504dc 100644 --- a/content/docs/command-reference/init.md +++ b/content/docs/command-reference/init.md @@ -1,38 +1,27 @@ # init -Initialize a MLEM project. +Initialize a [MLEM project](/doc/user-guide/project-structure). ## Synopsis ```usage -usage: mlem init [options] [path] +usage: mlem init [-h] + path arguments: -[path] Location (file path or URL) to initialize a MLEM project + path Where to init project ``` ## Description -This creates a `.mlem/` directory and an empty `config.yaml` file in the desired -project `path`, which defaults to the current working directory (`.`). - -The existence of a valid `.mlem/` directory in any location (including [remote]) -enables all of MLEM's functions. Specifically, it allows for storing references -to MLEM objects found in the project (required by `mlem list`) as well as to -[integrate with DVC](/doc/use-cases/dvc). - - - -We recommend initializing MLEM projects inside Git repositories to track changes -and manage them using standard Git workflows. - - - -[remote]: /doc/user-guide/remote-objects +This creates a `.mlem.yaml` file in the desired project `path`, which defaults +to the current working directory (`.`). `.mlem.yaml` is a config file that +configures MLEM project, including the +[integration with DVC](/doc/user-guide/dvc). ## Options -- `-h, --help`: Show this message and exit. +- `-h`, `--help` - Show this message and exit. ## Examples diff --git a/content/docs/command-reference/link.md b/content/docs/command-reference/link.md index 205c9ebd..b7a859b6 100644 --- a/content/docs/command-reference/link.md +++ b/content/docs/command-reference/link.md @@ -1,17 +1,19 @@ # link Create a link (read alias) for an existing -[MLEM Object](/doc/user-guide/basic-concepts#mlem-objects), including from -remote MLEM projects. +[MLEM Object](/doc/user-guide/basic-concepts), including from remote +[[MLEM project](/doc/user-guide/project-structure)](/doc/user-guide/project-structure). ## Synopsis ```usage -usage: mlem link [options] source target +usage: mlem link [--sp ] [--rev ] + [--tp ] [--f] [--abs] [-h] + source target arguments: -SOURCE URI of the object you are creating a link to [required] -TARGET Path to save link object [required] + source URI of the MLEM object you are creating a link to + target Path to save link object ``` ## Description @@ -26,15 +28,16 @@ to incorporate them in the local workspace. ## Options -- `--source-project, --sp TEXT`: Project for source object -- `--rev TEXT`: Repo revision to use [default: (none)] -- `--target-project, --tp TEXT`: Project to save target to [default: (none)] -- `-e, --external`: Save result not in .mlem, but directly in project -- `--follow-links, --f / --no-follow-links, --nf`: If True, first follow links - while reading {source} before creating this link. [default: follow-links] -- `--absolute, --abs / --relative, --rel`: Which path to linked object to +- `--sp `, `--source-project ` - Project for source object +- `--rev ` - Repo revision to use [default: (none)] +- `--tp `, `--target-project ` - Project to save target to [default: + (none)] +- `--follow-links`, `--f` / `--no-follow-links`, `--nf` - If True, first follow + links while reading {source} before creating this link. [default: follow- + links] +- `--absolute`, `--abs` / `--relative`, `--rel` - Which path to linked object to specify: absolute or relative. [default: relative] -- `-h, --help`: Show this message and exit. +- `-h`, `--help` - Show this message and exit. ## Examples diff --git a/content/docs/command-reference/list.md b/content/docs/command-reference/list.md deleted file mode 100644 index 64396b99..00000000 --- a/content/docs/command-reference/list.md +++ /dev/null @@ -1,50 +0,0 @@ -# list - -List [MLEM objects](/doc/user-guide/basic-concepts#mlem-objects) inside a MLEM -project (location should be [initialized](/doc/command-reference/init)). - -> Aliased to `mlem ls` - -## Synopsis - -```usage -usage: mlem list [options] [project] - -arguments: [PROJECT] Project to list from [default: (current directory)] -``` - -## Description - -Produces a view of the MLEM project listing -[MLEM objects](/doc/user-guide/basic-concepts#mlem-objects) like models, -datasets, and links. - -Running the command without an explicit `project` argument defaults to the -current working directory. The `project` argument can take a local path, or -point to a remote repository (e.g. GitHub). - -This command also supports additional options, allowing filtering of MLEM -Objects by type, producing JSON output, selectively displaying -[links](/doc/user-guide/linking) and choosing a particular revision in case of -remote repositories. - -## Options - -- `-t, --type [all|link|model|dataset|env|deployment|packager]`: Type of objects - to list [default: all] -- `--rev TEXT`: Repo revision to use [default: (none)] -- `+l, --links / -l, --no-links`: Whether to include links [default: +l] -- `--json`: Output as json -- `--help`: Show this message and exit. - -## Examples - -List MLEM objects on a remote GitHub repository - -```cli -$ mlem list https://github.com/iterative/example-mlem-get-started -Models: -- rf -Datasets: -- iris.csv -``` diff --git a/content/docs/command-reference/pprint.md b/content/docs/command-reference/pprint.md index ba2a775f..33a51fa3 100644 --- a/content/docs/command-reference/pprint.md +++ b/content/docs/command-reference/pprint.md @@ -1,15 +1,17 @@ # pprint Display all details about a specific -[MLEM object](/doc/user-guide/basic-concepts#mlem-objects) from an existing MLEM -project. +[MLEM Object](/doc/user-guide/basic-concepts) from an existing MLEM project. ## Synopsis ```usage -usage: mlem pprint [options] path +usage: mlem pprint [-p ] [--rev ] [-f] + [--json] [-h] + path -arguments: PATH Path to object [required] +arguments: + path Path to object ``` ## Description @@ -21,25 +23,19 @@ specific information such as `methods` for a `model` or `reader` for `data`. Since only one specific object is printed, a `PATH` to the specific MLEM object is always required. - - -You can use [`mlem list`](/doc/command-reference/list) to list MLEM objects. - - - ## Options -- `-p, --project TEXT`: Path to MLEM project [default: (none)] -- `--rev TEXT`: Repo revision to use [default: (none)] -- `-f, --follow-links`: If specified, follow the link to the actual object. -- `--json`: Output as json -- `--help`: Show this message and exit. +- `-p `, `--project ` - Path to MLEM project [default: (none)] +- `--rev ` - Repo revision to use [default: (none)] +- `-f`, `--follow-links` - If specified, follow the link to the actual object. +- `--json` - Output as json +- `-h`, `--help` - Show this message and exit. ## Example: Showing local model ```cli $ mlem pprint rf -⏳️ Loading meta from .mlem/model/rf.mlem +⏳️ Loading meta from rf.mlem {'artifacts': {'data': {'hash': 'a61a1fa54893dcebe6fa448df81a1418', 'size': 163651, 'type': 'dvc', @@ -55,7 +51,7 @@ $ mlem pprint rf ```cli $ mlem pprint https://github.com/iterative/example-mlem-get-started/iris.csv --rev 4-pack -⏳️ Loading meta from https://github.com/iterative/example-mlem-get-started/tree/4-pack/.mlem/data/iris.csv.mlem +⏳️ Loading meta from https://github.com/iterative/example-mlem-get-started/tree/4-pack/data/iris.csv.mlem {'artifacts': {'data': {'hash': '45109f850511f9474665f2c26f4c79f3', 'size': 2470, 'uri': 'iris.csv'}}, diff --git a/content/docs/command-reference/serve.md b/content/docs/command-reference/serve.md index 3d30a2ca..147c1894 100644 --- a/content/docs/command-reference/serve.md +++ b/content/docs/command-reference/serve.md @@ -1,16 +1,17 @@ # serve -Locally deploy the model using a server implementation and expose its methods as -endpoints. +Create an API from model methods using a server implementation. ## Synopsis ```usage -usage: mlem serve [options] model [subtype] +usage: mlem serve [-m ] [-p ] [--rev ] + [-f ] [-h] + [ [server options] | --load ] -arguments: -MODEL Model to create service from [required] -[SUBTYPE] Server type. Choices: ['fastapi', 'heroku', 'rmq'] [default: ] +Builtin servers: +- fastapi +- rmq ``` ## Description @@ -30,29 +31,14 @@ built-in client, or common HTTP clients, such as [`curl`](https://curl.se/) and ## Options -- `-p, --project TEXT`: Path to MLEM project [default: (none)] -- `--rev TEXT`: Repo revision to use [default: (none)] -- `-l, --load TEXT`: File to load server config from -- `-c, --conf TEXT`: Options for server in format `field.name=value` -- `-f, --file_conf TEXT`: File with options for server in format +- `-m `, `--model ` - Path to MLEM model [required] +- `-p `, `--project ` - Path to MLEM project [default: (none)] +- `--rev ` - Repo revision to use [default: (none)] +- `-f `, `--file_conf ` - File with options for server in format `field.name=path_to_config` -- `--help`: Show this message and exit. - -## Example: FastAPI HTTP server - -Easily serve a model from a remote GitHub repository on a local FastAPI HTTP -server - -```cli -$ mlem serve https://github.com/iterative/example-mlem-get-started/rf fastapi --conf port=3000 -Starting fastapi server... -πŸ–‡οΈ Adding route for /predict -πŸ–‡οΈ Adding route for /predict_proba -πŸ–‡οΈ Adding route for /sklearn_predict -πŸ–‡οΈ Adding route for /sklearn_predict_proba -Checkout openapi docs at -INFO: Started server process [6083] -INFO: Waiting for application startup. -INFO: Application startup complete. -INFO: Uvicorn running on http://0.0.0.0:3000 (Press CTRL+C to quit) -``` +- `-h`, `--help` - Show this message and exit. + +## Examples + +For examples, please refer to [Get Started](/doc/get-started/serving) or +[User Guide](/doc/user-guide/serving). diff --git a/content/docs/command-reference/types.md b/content/docs/command-reference/types.md index bca7192d..4a2ef8ad 100644 --- a/content/docs/command-reference/types.md +++ b/content/docs/command-reference/types.md @@ -6,11 +6,13 @@ subtype is not provided, list all available MLEM types. ## Synopsis ```usage -usage: mlem types [options] [abc] [sub_type] +usage: mlem types [-h] + [abc] [sub_type] arguments: -[ABC] Subtype to list implementations. List subtypes if not provided -[SUB_TYPE] Type of `meta` subtype + [abc] Subtype to list implementations. List subtypes if not + provided. + [sub_type] Type of `meta` subtype ``` ## Description @@ -21,14 +23,14 @@ different implementations available for a specific `SUB_TYPE` (argument). This can be useful, for example, to see which types of servers are supported for hosting and serving a model (see [Examples](#examples)). -Check out [MLEM ABCs](/doc/user-guide/mlem-abcs) for a list of abstract base -classes that subclass `mlem.core.base.MlemABC`. These constitute the building -blocks of MLEM, and can be subclassed to add new functionalities and +Check out [MLEM ABCs](/doc/object-reference/mlem-abcs) for a list of abstract +base classes that subclass `mlem.core.base.MlemABC`. These constitute the +building blocks of MLEM, and can be subclassed to add new functionalities and capabilities. ## Options -- `-h, --help`: Show this message and exit. +- `-h`, `--help` - Show this message and exit. ## Examples diff --git a/content/docs/get-started/applying.md b/content/docs/get-started/applying.md index cdb5dd13..62466d2e 100644 --- a/content/docs/get-started/applying.md +++ b/content/docs/get-started/applying.md @@ -1,102 +1,53 @@ -# Applying models +# Using models -## Evaluating the model +Once you saved the model with MLEM, you can load it to use in Python runtime, or +ask to generate predictions for a dataset stored on the disk from the command +line, like in a batch scoring. -Now, we can use MLEM to apply the model against some data and calculate some -metrics: - -```py -# evaluate.py -import json - -from sklearn import metrics -from sklearn.datasets import load_iris - -from mlem.api import apply - - -def main(): - data, y_true = load_iris(return_X_y=True, as_frame=True) - y_pred = apply("rf", data, method="predict_proba") - roc_auc = metrics.roc_auc_score(y_true, y_pred, multi_class="ovr") - - with open("metrics.json", "w") as fd: - json.dump({"roc_auc": roc_auc}, fd, indent=4) +## Loading model in Python +Now, we can use MLEM to load the model and calculate some metrics: +```py +# predict.py +from mlem.api import load -if __name__ == "__main__": - main() +model = load("models/rf") +y_pred = model.predict_proba([[0, 1, 2, 3]]) +print(y_pred) ``` -Here we use the `apply` function that handles loading of the model for us. But -you can always load your model with `mlem.api.load()` and call any method -manually. - Now, let's run the script ```cli -$ python evaluate.py -$ cat metrics.json -{ - "roc_auc": 1.0 -} -``` - -
- -### β›³ [Evaluation](https://github.com/iterative/example-mlem-get-started/tree/4-eval) - -```cli -$ git add metrics.json -$ git commit -m "Evaluate model" -$ git diff 4-eval +$ python predict.py +[[0.3, 0.3, 0.4]] ``` -
+We see that the prediction was successfully printed in the stdout. -## Applying from CLI +## Batch scoring in CLI -You can also apply your models directly from CLI. For that to work, your data -should be in a file that is supported by -[MLEM import](/doc/user-guide/importing) or you should have the -[data saved with MLEM ](/doc/user-guide/datasets). - -Let's create an example file and run `mlem apply` +In Batch scoring scenario you often want to apply your model to a dataset from a +command line. Let's see how MLEM can help by creating an example file and +running `mlem apply`: ```cli $ echo "sepal length (cm),sepal width (cm),petal length (cm),petal width (cm) 0,1,2,3" > new_data.csv -$ mlem apply rf new_data.csv -i --it pandas[csv] -o prediction + +$ mlem apply models/rf new_data.csv -i --it pandas[csv] ⏳️ Importing object from new_data.csv -⏳️ Loading model from .mlem/model/rf.mlem +⏳️ Loading model from models/rf.mlem 🍏 Applying `predict` method... -πŸ’Ύ Saving dataset to .mlem/dataset/prediction.mlem +[[0.3, 0.3, 0.4]] ``` -Or, if you save your data like this: - -```py -from sklearn.datasets import load_iris -from mlem.api import save - - -def main(): - data, _ = load_iris(return_X_y=True, as_frame=True) - save(data, "iris.csv") - +`-i` and `--it pandas[csv]` tells MLEM it's a csv file that should be read with +Pandas. For that to work, your data should be in a format that is supported by +[MLEM import](/doc/user-guide/importing). You can learn more about specifying +these arguments on `mlem apply` page. -if __name__ == '__main__': - main() -``` - -You can just reference it by name: - -```cli -$ mlem apply rf iris.csv -o prediction -⏳️ Loading dataset from .mlem/dataset/iris.csv.mlem -⏳️ Loading model from .mlem/model/rf.mlem -🍏 Applying `predict` method... -πŸ’Ύ Saving dataset to .mlem/dataset/prediction.mlem -``` +Alternatively, you could save the [data with MLEM](/doc/user-guide/data) to use +`mlem apply` on it. diff --git a/content/docs/get-started/building.md b/content/docs/get-started/building.md index 30e4718c..382a89cf 100644 --- a/content/docs/get-started/building.md +++ b/content/docs/get-started/building.md @@ -1,124 +1,67 @@ -# Exporting models (building) - -Saving and loading models is fun, but the real value of a model is how you can -use it. To make it easier to get models to production, MLEM has 3 related -functionalities: building, serving, and deploying. We’ll start with building. +# Building models Building is a way to β€œbake” your model into something usable in production like -a Docker image, or export your model into another format. For this tutorial we -will create a pip-ready package from our model. You can see the full list of -available builders [here](/doc/user-guide/mlem-abcs#builder). - -## Creating Python package - -To create a `build/` directory with pip package run this command: - -```cli -$ mlem build rf pip -c target=build/ -c package_name=example_mlem_get_started -⏳️ Loading model from .mlem/model/rf.mlem -πŸ’Ό Written `example_mlem_get_started` package data to `build` -``` - -In this command, we specified that we want to build `rf` model with `pip` -builder and then provided two arguments, `target` is the directory where the -builder will write all the files and `package_name` is the name of our package. - -
- -### βš™οΈ About builders and arguments - -There are more types of builders and each one has it’s own set of available -arguments. They are listed [here](/doc/user-guide/mlem-abcs#builder), but for -quick reference you can run `mlem types builder` for list of builders and -`mlem types builder pip` for list of available arguments - -
- -## Exploring Python package - -Let’s see what we’ve got - -```cli -$ tree build/ -build/ -β”œβ”€β”€ MANIFEST.in -β”œβ”€β”€ example_mlem_get_started -β”‚Β Β  β”œβ”€β”€ __init__.py -β”‚Β Β  β”œβ”€β”€ model -β”‚Β Β  └── model.mlem -β”œβ”€β”€ requirements.txt -└── setup.py -``` +a Docker image, a Python package, or export your model into another format. -As you can see, the builder generated all the files necessary for a python -package. This includes sources, requirements, -[setup.py](https://docs.python.org/3/distutils/setupscript.html), and the model -itself. +Building may use Serving functionality under the hood: e.g. if you build a +Docker image with a model, MLEM can add FastAPI or RabbitMQ there so you can +online serving as well. -## Using Python package +Let's see how it works by building a Docker image with the model. -Now you can distribute and install the package. Its code declares all the same -methods our model had, so you can try to use it like this: - -```py -import example_mlem_get_started - -example_mlem_get_started.predict(df) -``` - -## Pre-configured builders - -Alternatively, you can pre configure your builder in the form of yaml file -either manually or via `mlem declare` command which uses the same interface with -multiple `-c` options like this: +## Building Docker image ```cli -$ mlem declare builder pip pip_config \ - -c target=build/ -c package_name=example_mlem_get_started -πŸ’Ύ Saving builder to .mlem/builder/pip_config.mlem -$ cat .mlem/builder/pip_config.mlem -object_type: builder -package_name: example_mlem_get_started -target: build/ -type: pip +$ mlem build docker docker-builder.mlem \ + --model models/rf \ + --image.name mlem-model +⏳️ Loading model from models/rf.mlem +πŸ›  Building MLEM wheel file... +πŸ’Ό Adding model files... +πŸ›  Generating dockerfile... +πŸ’Ό Adding sources... +πŸ’Ό Generating requirements file... +πŸ›  Building docker image mlem-model:latest... +βœ… Built docker image mlem-model:latest ``` -Now you can use this config as a value for `--load` option in `mlem build` - -```cli -$ mlem build rf -l pip_config -⏳️ Loading builder from .mlem/builder/pip_config.mlem -⏳️ Loading model from .mlem/model/rf.mlem -πŸ’Ό Written `example_mlem_get_started` package data to `build` -``` +`docker-builder.mlem` is going to have a +[builder specification](/doc/user-guide/building) (we call it declaration).
-### β›³ [Add builder config](https://github.com/iterative/example-mlem-get-started/tree/4-pack) +### `$ cat docker-builder.mlem` -```cli -$ git add .mlem/packager/pip_config.mlem -$ git commit -m "Add package config" -$ git diff 4-pack +```yaml +image: + name: rf-docker +object_type: builder +server: + type: fastapi +type: docker ```
-Also, you can do all of this programmatically via Python API: +## Using Docker image -```py -from mlem.api import build, load_meta +Now you can distribute and run the Docker image. Let's run the container: -build("pip", "rf", target="build", package_name="example_mlem_get_started") -build(load_meta("pip_config"), "rf") +```cli +$ docker run -p 8080:8080 mlem-model:latest +Starting fastapi server... +πŸ–‡οΈ Adding route for /predict +πŸ–‡οΈ Adding route for /predict_proba +πŸ–‡οΈ Adding route for /sklearn_predict +πŸ–‡οΈ Adding route for /sklearn_predict_proba +Checkout openapi docs at +INFO: Started server process [16696] +INFO: Waiting for application startup. +INFO: Application startup complete. +INFO: Uvicorn running on http://0.0.0.0:8080 (Press CTRL+C to quit) ``` -
- -### βš™οΈ Remote builder config - -Like every other MLEM object, builders can be read from remote repos. Try - -`mlem build rf -l https://github.com/iterative/example-mlem-get-started/pip_config` - -
+As we can see, this is just a FastAPI server. Just like we saw in +[the previous section](/doc/get-started/serving) of the Get Started, we can now +open the https://localhost:8080/docs in the browser and query the model with +`curl` or `mlem apply-remote`. diff --git a/content/docs/get-started/deploying.md b/content/docs/get-started/deploying.md index 88a859a8..11414ad8 100644 --- a/content/docs/get-started/deploying.md +++ b/content/docs/get-started/deploying.md @@ -4,18 +4,20 @@ You can create deployments in the cloud from your models. This uses building and serving functions under the hood. For example, Heroku deployment combines Docker image building with FastAPI serving. - +MLEM can deploy to platforms such as +[Sagemaker](/doc/user-guide/deploying/sagemaker) and +[Kubernetes](/doc/user-guide/deploying/kubernetes) (see the full list in +[User Guide](/doc/user-guide/deploying)). For the Get Started, we'll use Heroku +as the example. -This functionality is experimental and is subject to change. +## Deploying to Heroku - +To create applications on Heroku platform all you need is Heroku API key. -## Defining target environment - -To deploy something somewhere, we need to define this β€œsomewhere” first, or in -MLEM terms, declare a `target environment` object. It will contain all the -information needed to access it. In the case of Heroku, all we need is an API -key. +You can either set `HEROKU_API_KEY` environment variable or use +[Heroku CLI](https://devcenter.heroku.com/articles/heroku-cli) to run +`heroku login`. To allow MLEM to push the Docker image built to Heroku Docker +Registry, you'll also need to execute `heroku container:login`.
@@ -28,164 +30,68 @@ key.
-To declare a new target env, run - -```cli -$ mlem declare env heroku staging -c api_key= -πŸ’Ύ Saving env to .mlem/env/staging.mlem -``` - - - -MLEM will attempt to use the `HEROKU_API_KEY` environment variable if no -`api_key` argument is provided. - - - -## Defining deployment - -Now, as we defined our target env, we can deploy our model there. Deployments -are also MLEM objects, which means that they need to have their definition. - -To create one for Heroku, we once again will use `declare` command to configure -our deployment. We use `example-mlem-get-started-app` for the app name, but you -can change it to something unique: - -```cli -$ mlem declare deployment heroku myservice \ - -c app_name=example-mlem-get-started-app \ - -c model=rf \ - -c env=staging -πŸ’Ύ Saving deployment to .mlem/deployment/myservice.mlem -``` - -
- -### β›³ [Create deployment definition](https://github.com/iterative/example-mlem-get-started/tree/5-deploy-meta) +After we authorized in Heroku, we can run the deployment command: -```cli -$ git add .mlem/env/staging.mlem .mlem/deployment/myservice.mlem -$ git commit -m "Add env and deploy meta" -$ git diff 5-deploy-meta ``` - -
- -Now we can actually run the deployment process (this can take a while): - -```cli -$ mlem deployment run myservice -⏳️ Loading deployment from .mlem/deployment/myservice.mlem -πŸ”— Loading link to .mlem/env/staging.mlem -πŸ”— Loading link to .mlem/model/rf.mlem -πŸ’Ύ Updating deployment at .mlem/deployment/myservice.mlem -πŸ› Creating Heroku App example-mlem-get-started-app -πŸ’Ύ Updating deployment at .mlem/deployment/myservice.mlem +$ mlem deployment run heroku app.mlem \ + --model models/rf \ + --app_name example-mlem-get-started-app +⏳️ Loading model from models/rf.mlem +⏳️ Loading deployment from app.mlem πŸ›  Creating docker image for heroku + πŸ›  Building MLEM wheel file... πŸ’Ό Adding model files... πŸ›  Generating dockerfile... πŸ’Ό Adding sources... πŸ’Ό Generating requirements file... πŸ›  Building docker image registry.heroku.com/example-mlem-get-started-app/web... βœ… Built docker image registry.heroku.com/example-mlem-get-started-app/web - πŸ”Ό Pushed image registry.heroku.com/example-mlem-get-started-app/web to remote registry at host registry.heroku.com -πŸ’Ύ Updating deployment at .mlem/deployment/myservice.mlem -πŸ›  Releasing app my-mlem-service formation -πŸ’Ύ Updating deployment at .mlem/deployment/myservice.mlem + πŸ”Ό Pushing image registry.heroku.com/example-mlem-get-started-app/web to registry.heroku.com + βœ… Pushed image registry.heroku.com/example-mlem-get-started-app/web to registry.heroku.com +πŸ›  Releasing app example-mlem-get-started-app formation βœ… Service example-mlem-get-started-app is up. You can check it out at https://example-mlem-get-started-app.herokuapp.com/ ``` - - -You can also define and run the deployment on-the-fly using `-c` options for -`mlem deployment run`, e.g.: - -```cli -$ mlem deployment run myservice \ - -m model -t staging \ - -c app_name=example-mlem-get-started-app -``` - - +Deployment specification (we call it +[declaration](/doc/command-reference/declare)) was saved to `app.mlem`. Using +it, you can re-deploy the same app with a different model.
-### β›³ [Service deployed](https://github.com/iterative/example-mlem-get-started/tree/8-deploy-create) +### See app.mlem contents -```cli -$ git add .mlem/deployment/myservice.mlem -$ git commit -m "Deploy service" -$ git diff 8-deploy-service +```yaml +$ cat app.mlem +app_name: example-mlem-get-started-app +object_type: deployment +type: heroku ```
+Beside `app.mlem`, there is one more file that was saved: `app.mlem.state`. It +contains the information about the deployment we just created, including which +MLEM model we used, the URL of the deployment and other useful information. You +can learn more about state files in [User Guide](/doc/user-guide/deploying). + ## Making requests The application is now live on Heroku. You can go [here](http://example-mlem-get-started-app.herokuapp.com) and see the same -OpenAPI documentation. For details on it, refer to the **Serving** section. You -can also try to do some requests: - -```py -from mlem.api import load -from mlem.runtime.client.base import HTTPClient - -client = HTTPClient(host="http://example-mlem-get-started-app.herokuapp.com", port=80) -res = client.predict(load("test_x.csv")) -``` - -Also, you can create a client using deployment meta object: - -```py -from mlem.api import load +OpenAPI documentation. To learn how to easily send requests to your model with +MLEM, refer to the [User Guide](/doc/user-guide/deploying). -service = load("myservice") -client = service.state.get_client() -res = client.predict(load("test_x.csv")) -``` - -There is also the remote counterpart of `apply` command. It will send requests -to your service instead of loading model into memory. There are two options to -achieve this in CLI: using the service address or the deploy meta. - -```cli -$ mlem apply-remote http test_x.csv -c host=http://example-mlem-get-started-app.herokuapp.com -c port=80 --json -[1, 0, 2, 1, 1, 0, 1, 2, 1, 1, 2, 0, 0, 0, 0, 1, 2, 1, 1, 2, 0, 2, 0, 2, 2, 2, 2, 2, 0, 0, 0, 0, 1, 0, 0, 2, 1, 0] - -$ mlem deployment apply myservice test_x.csv --json -[1, 0, 2, 1, 1, 0, 1, 2, 1, 1, 2, 0, 0, 0, 0, 1, 2, 1, 1, 2, 0, 2, 0, 2, 2, 2, 2, 2, 0, 0, 0, 0, 1, 0, 0, 2, 1, 0] -``` +## What's next? - +That's it! Thanks for checking out the tool. If you have any questions or +suggestions for us, please reach us out in +[Discord](https://discord.com/channels/485586884165107732/903647230655881226) or +create a new [GitHub issue](https://github.com/iterative/mlem/issues) in our +repo πŸ™Œ. -You don’t even need to have the deployment metadata locally: - -```cli -$ mlem deployment apply --json \ - https://github.com/iterative/example-mlem-get-started/myservice \ - https://github.com/iterative/example-mlem-get-started/test_x.csv -``` - - - -## Managing deployment - -Finally, you can check the status of your service with: - -```cli -$ mlem deployment status myservice -running -``` - -And stop your service with - -```cli -$ mlem deployment remove myservice -⏳️ Loading deployment from .mlem/deployment/myservice.mlem -πŸ”— Loading link to .mlem/env/staging.mlem -πŸ”» Deleting my-mlem-service heroku app -πŸ’Ύ Updating deployment at .mlem/deployment/myservice.mlem -``` +If you would like to destroy the deployment now, you can find the instructions +[here](/doc/user-guide/deploying). -Note, that it will not delete the deployment definition, just update its state. +Please proceed to [Use Cases](/doc/use-cases) if you want to see high-level +scenarios MLEM can cover, or go to [User Guide](/doc/user-guide) to see more +details or short tutorials on how to use specific features of MLEM. diff --git a/content/docs/get-started/index.md b/content/docs/get-started/index.md index 4010fd41..905cfe9a 100644 --- a/content/docs/get-started/index.md +++ b/content/docs/get-started/index.md @@ -4,97 +4,164 @@ description: 'Learn how you can use MLEM to easily manage and deploy models' # Get Started -Assuming MLEM is already [installed](/doc/install) in your active python -environment, let's initialize it by running `mlem init` inside a Git project: +We assume MLEM is already [installed](/doc/install) in your active Python +environment, as well as `pandas`, `sklearn`, `fastapi`, `uvicorn` and `docker` +(if not, you can follow the instructions below).
### βš™οΈ Expand for setup instructions -If you want to follow along with this tutorial and try MLEM, you can use our -[example repo](https://github.com/iterative/example-mlem-get-started). You'll -need to [fork] it first (so you can push models). Then clone it locally: - -[fork]: https://docs.github.com/en/get-started/quickstart/fork-a-repo - -```cli -$ git clone -$ cd example-mlem-get-started -``` - -Next let's create an isolated virtual environment to cleanly install all the -requirements (including MLEM) there: +Let's create a separate folder and an isolated virtual environment to cleanly +install all the requirements (including MLEM): ```cli +$ mkdir mlem-get-started +$ cd mlem-get-started $ python3 -m venv .venv $ source .venv/bin/activate -$ pip install -r requirements.txt +$ pip install pandas scikit-learn mlem[fastapi,heroku] ```
-```cli -$ mlem init -``` +## Saving your ML model -A few [internal files](/doc/user-guide/project-structure) will be created: +To enable all kinds of productionization scenarios supported by MLEM, we first +need to save a machine learning model with MLEM. -```cli -$ tree .mlem -.mlem -└─── config.yaml +Let's take a look and run the following Python script: + +```py +from sklearn.datasets import load_iris +from sklearn.ensemble import RandomForestClassifier + +from mlem.api import save + + +data, y = load_iris(return_X_y=True, as_frame=True) +rf = RandomForestClassifier( + n_jobs=2, + random_state=42, +) +rf.fit(data, y) +save( + rf, + "models/rf", + sample_data=data, +) ``` -Now you’re ready to MLEM! +Here, we load a well-known +[Iris flower dataset](https://archive.ics.uci.edu/ml/datasets/iris) with +scikit-learn and train a simple classifier. But instead of pickling the model, +we save it with MLEM (check out the full list of supported +[ML frameworks](/doc/object-reference/model)). -In our -[example repository](https://github.com/iterative/example-mlem-get-started), -you'll find tags for each step we take in the different sections of this -tutorial. You can just see what is going on there or reproduce everything -yourself and compare. In the different `Get Started` sections, those tags will -be marked with β›³Β emoji. Click on it to expand the section and see the `git` -commands to run if you are following along. Just like this Git tag that -concludes this section: +## Productionization -
+Now MLEM can do the heavy machinery for us, enabling all these scenarios in a +couple lines of code: + +- **[Apply model](/doc/get-started/applying)** - load model in Python or get + prediction in command line. +- **[Serve model](/doc/get-started/serving)** - create a service from your model + for online serving. +- **[Build model](/doc/get-started/building)** - export model into Python + packages, Docker images, etc. +- **[Deploy model](/doc/get-started/deploying)** - deploy your model to Heroku, + Sagemaker, Kubernetes, etc. -# β›³ MLEM init +## Codification -Tag: -[1-mlem-init](https://github.com/iterative/example-mlem-get-started/tree/1-mlem-init) +Let's see what we got when we saved a model with MLEM. ```cli -$ git add .mlem -$ git status -Changes to be committed: - new file: .mlem/config.yaml - ... -$ git commit -m "Initialize MLEM" +$ tree models/ +models +β”œβ”€β”€ rf +└── rf.mlem ``` -To compare your results with the tag you can also run the following +The model binary was saved to `models/rf`, along with some metadata about it in +`models/rf.mlem`. We refer to this as to "Codification". -```cli -$ git diff 1-mlem-init -``` +The `.mlem` file is a bit long, but it contains all the metadata we need to use +the model later: -The output will be empty if you have the same files staged/committed +1. Model methods: `predict` and `predict_proba` +2. Input data schema: describes the data frame (Iris dataset) +3. Requirements: `sklearn` and `pandas`, with specific versions -
+Note that we didn't specify any of this information. MLEM investigates the +object (even if it's complex) and finds out all of this! -MLEM’s features can be grouped around those common functional use cases. We’ll -explore them one by one in the next few pages: - -- **[Saving models](/doc/get-started/saving)** (try this next) is the base layer - of MLEM for machine learning models and datasets. -- **[Applying models](/doc/get-started/applying)** explains how to load and - apply models -- **[Exporting models (building)](/doc/get-started/building)** describes how - models can be built into Python packages, Docker images, etc. -- **[Serving models](/doc/get-started/serving)** shows how to create a service - from your model -- **[Deploying models](/doc/get-started/deploying)** shows how you can deploy - your model with MLEM. - -More examples on how to use MLEM in different scenarios can be found in -[Use Cases](/doc/use-cases) section +
+ +### Click to see the contents of the `rf.mlem` metafile. + +```yaml +artifacts: + data: + hash: 5a38e5d68b9b9e69e9e894bcc9b8a601 + size: 163651 + uri: rf +model_type: + methods: + predict: + args: + - name: data + type_: + columns: + - sepal length (cm) + - sepal width (cm) + - petal length (cm) + - petal width (cm) + dtypes: + - float64 + - float64 + - float64 + - float64 + index_cols: [] + type: dataframe + name: predict + returns: + dtype: int64 + shape: + - null + type: ndarray + predict_proba: + args: + - name: data + type_: + columns: + - sepal length (cm) + - sepal width (cm) + - petal length (cm) + - petal width (cm) + dtypes: + - float64 + - float64 + - float64 + - float64 + index_cols: [] + type: dataframe + name: predict_proba + returns: + dtype: float64 + shape: + - null + - 3 + type: ndarray + type: sklearn +object_type: model +requirements: + - module: sklearn + version: 1.1.2 + - module: numpy + version: 1.22.4 + - module: pandas + version: 1.5.0 +``` + +
diff --git a/content/docs/get-started/saving.md b/content/docs/get-started/saving.md deleted file mode 100644 index feb47574..00000000 --- a/content/docs/get-started/saving.md +++ /dev/null @@ -1,217 +0,0 @@ -# Saving models - -After initializing MLEM we have an empty project (except for the config file), -but soon we'll save something with MLEM to fill it up. - -## Training the model - -To save models with MLEM you just need to use `mlem.api.save()` method instead -of some other way you saved your model before. Let's take a look at the -following Python script: - -```py -# train.py -from sklearn.datasets import load_iris -from sklearn.ensemble import RandomForestClassifier - -from mlem.api import save - - -def main(): - data, y = load_iris(return_X_y=True, as_frame=True) - rf = RandomForestClassifier( - n_jobs=2, - random_state=42, - ) - rf.fit(data, y) - - save( - rf, - "rf", - sample_data=data, - ) - - -if __name__ == "__main__": - main() - -``` - -Here we load well-known iris dataset with sklearn and train a simple classifier. -But instead of pickling the model we saved it with MLEM. - -Now let's run this script and see how we save the model. - -```cli -$ python train.py -... - -$ tree .mlem/model/ -.mlem/model -β”œβ”€β”€ rf -└── rf.mlem -``` - - - -By default, MLEM saves your files to `.mlem/` directory, but that can be -changed. See [Project Structure](/doc/user-guide/project-structure) for more -details. - - - -The model was saved along with some metadata about it: `rf` containing the model -binary and a `rf.mlem` metafile containing information about it. Let's take a -look at it: - -
- -### `$ cat .mlem/model/rf.mlem` - -```yaml -artifacts: - data: - hash: 59440b4398b8d45d8ad64d8d407cfdf9 - size: 993 - uri: logreg -model_type: - methods: - predict: - args: - - name: data - type_: - columns: - - '' - - sepal length (cm) - - sepal width (cm) - - petal length (cm) - - petal width (cm) - dtypes: - - int64 - - float64 - - float64 - - float64 - - float64 - index_cols: - - '' - type: dataframe - name: predict - returns: - dtype: int64 - shape: - - null - type: ndarray - predict_proba: - args: - - name: data - type_: - columns: - - '' - - sepal length (cm) - - sepal width (cm) - - petal length (cm) - - petal width (cm) - dtypes: - - int64 - - float64 - - float64 - - float64 - - float64 - index_cols: - - '' - type: dataframe - name: predict_proba - returns: - dtype: float64 - shape: - - null - - 3 - type: ndarray - sklearn_predict: - args: - - name: X - type_: - columns: - - '' - - sepal length (cm) - - sepal width (cm) - - petal length (cm) - - petal width (cm) - dtypes: - - int64 - - float64 - - float64 - - float64 - - float64 - index_cols: - - '' - type: dataframe - name: predict - returns: - dtype: int64 - shape: - - null - type: ndarray - sklearn_predict_proba: - args: - - name: X - type_: - columns: - - '' - - sepal length (cm) - - sepal width (cm) - - petal length (cm) - - petal width (cm) - dtypes: - - int64 - - float64 - - float64 - - float64 - - float64 - index_cols: - - '' - type: dataframe - name: predict_proba - returns: - dtype: float64 - shape: - - null - - 3 - type: ndarray - type: sklearn -object_type: model -requirements: - - module: sklearn - version: 1.0.2 - - module: pandas - version: 1.4.1 - - module: numpy - version: 1.22.3 -``` - -
- -It's a bit long, but we can see all that we need to use the model later: - -1. Model methods: `predict` and `predict_proba` -2. Input data schema that describes the DataFrame with the iris dataset -3. Requirements: `sklearn`, `numpy`, `pandas` with particular versions we need - to run this model. - -Note that we didn't specify requirements: MLEM investigates the object you're -saving (even if it's a complex one) and finds out all requirements needed! - -
- -### β›³ Train - -Tag: -[2-train](https://github.com/iterative/example-mlem-get-started/tree/2-train) - -```cli -$ git add .mlem/model -$ git commit -m "Train the model" -$ git diff 2-train -``` - -
diff --git a/content/docs/get-started/serving.md b/content/docs/get-started/serving.md index e4e1eb88..8c6d56d9 100644 --- a/content/docs/get-started/serving.md +++ b/content/docs/get-started/serving.md @@ -1,81 +1,40 @@ # Serving models -For online serving, you can create a server from your model. We will try out -FastAPI server. All available server implementations are listed -[here](/doc/user-guide/mlem-abcs#server). +If you want to serve your model online, MLEM can do that for you using FastAPI +or, for example, RabbitMQ. We'll check out how it works with FastAPI since +serving models via REST API is quite common. ## Running server -To start up FastAPI server run: +To start up a FastAPI server run: ```cli -$ mlem serve rf fastapi -⏳️ Loading model from .mlem/model/rf.mlem +$ mlem serve fastapi --model models/rf +⏳️ Loading model from models/rf.mlem Starting fastapi server... -πŸ–‡οΈ Adding route for /predict -πŸ–‡οΈ Adding route for /predict_proba -πŸ–‡οΈ Adding route for /sklearn_predict -πŸ–‡οΈ Adding route for /sklearn_predict_proba +πŸ–‡οΈ Adding route for /predict +πŸ–‡οΈ Adding route for /predict_proba +πŸ–‡οΈ Adding route for /sklearn_predict +πŸ–‡οΈ Adding route for /sklearn_predict_proba Checkout openapi docs at -INFO: Started server process [2917] +INFO: Started server process [16696] INFO: Waiting for application startup. INFO: Application startup complete. INFO: Uvicorn running on http://0.0.0.0:8080 (Press CTRL+C to quit) ``` -Servers automatically create endpoints from model methods with payload schemas -corresponding to serialized dataset types. +Servers automatically create endpoints from model methods using `sample_data` +argument provided to [mlem.api.save](/doc/api-reference/save). + +That's it! You can check out the other available server implementations in +[User Guide](/doc/user-guide/serving/). ## Making requests -You can open Swagger UI (OpenAPI) at +While serving the model, you can open Swagger UI (OpenAPI) at [http://localhost:8080/docs](http://localhost:8080/docs) to check out OpenAPI spec and query examples. -Each server implementation also has its client implementation counterpart, in -the case of FastAPI server it’s HTTPClient. Clients can be used to make requests -to servers. Since a server also exposes the model interface description, the -client will know what methods are available and handle serialization and -deserialization for you. You can use them via CLI: - -```cli -$ mlem apply-remote http test_x.csv -c host="0.0.0.0" -c port=8080 --json -[1, 0, 2, 1, 1, 0, 1, 2, 1, 1, 2, 0, 0, 0, 0, 1, 2, 1, 1, 2, 0, 2, 0, 2, 2, 2, 2, 2, 0, 0, 0, 0, 1, 0, 0, 2, 1, 0] -``` - -or via Python API: - -```py -from mlem.api import load -from mlem.runtime.client.base import HTTPClient - -client = HTTPClient(host="localhost", port=8080) -res = client.predict(load("test_x.csv")) -``` - -
- -### πŸ’‘ Or query the model directly with curl - -```cli -$ curl -X 'POST' \ - 'http://localhost:8080/predict_proba' \ - -H 'accept: application/json' \ - -H 'Content-Type: application/json' \ - -d '{ - "data": { - "values": [ - { - "": 0, - "sepal length (cm)": 0, - "sepal width (cm)": 0, - "petal length (cm)": 0, - "petal width (cm)": 0 - } - ] - } - }' -[[0.92,0.04,0.04]] -``` - -
+MLEM can help you with not only serving, but also querying the model. Using MLEM +API or CLI you can send requests to your model without implementing the "client" +part. Learn more about this in [User Guide](/doc/user-guide/serving/). diff --git a/content/docs/object-reference/build/docker.md b/content/docs/object-reference/build/docker.md new file mode 100644 index 00000000..f1bc3b76 --- /dev/null +++ b/content/docs/object-reference/build/docker.md @@ -0,0 +1,68 @@ +# docker + +## `class DockerDirBuilder` + +**MlemABC parent type**: `builder` + +**MlemABC type**: `docker_dir` + + Create a directory with docker context to build docker image + +**Fields**: + +- `target: str` _(required)_ - Path to save result + +- `server: Server` - Server to use + +- `args: DockerBuildArgs = DockerBuildArgs()` - Additional docker arguments + +--- + +## `class DockerImageBuilder` + +**MlemABC parent type**: `builder` + +**MlemABC type**: `docker` + + Build docker image from model + +**Fields**: + +- `image: DockerImage` _(required)_ - Image parameters + +- `server: Server` - Server to use + +- `args: DockerBuildArgs = DockerBuildArgs()` - Additional docker arguments + +- `env: DockerEnv = DockerEnv()` - Where to build and push image. Defaults to + local docker daemon + +- `force_overwrite: bool = False` - Ignore existing image with same name + +- `push: bool = True` - Push image to registry after it is built + +--- + +## `class DockerBuildArgs` + + Container for DockerBuild arguments + +**Fields**: + +- `python_version: str = "3.10.5"` - Python version to use default: version of + running interpreter + +- `run_cmd: str = "sh run.sh"` - command to run in container + +- `package_install_cmd: str = "apt-get update && apt-get -y upgrade && apt-get install --no-install-recommends -y"` - + command to install packages. Default is apt-get, change it for other package + manager + +- `package_clean_cmd: str = "&& apt-get clean && rm -rf /var/lib/apt/lists/*"` - + command to clean after package installation + +- `mlem_whl: str` - a path to mlem .whl file. If it is empty, mlem will be + installed from pip + +- `platform: str` - platform to build docker for, see + docs.docker.com/desktop/multi-arch/ diff --git a/content/docs/object-reference/build/index.md b/content/docs/object-reference/build/index.md new file mode 100644 index 00000000..64bb4202 --- /dev/null +++ b/content/docs/object-reference/build/index.md @@ -0,0 +1,12 @@ +# Build extensions + +`mlem build` functionality is used to export models to another format that can +be used downstream: Docker image, Python package, or something else. See +[User Guide](/doc/user-guide/building) for more details on how this works. + +Build extensions add new types of builders to use with `build` +[API](/doc/api-reference/build) and [CLI](/doc/command-reference/build) +commands. + +Typicaly they will implement [Builder](/doc/object-reference/mlem-abcs#builder) +interface. diff --git a/content/docs/object-reference/build/pip.md b/content/docs/object-reference/build/pip.md new file mode 100644 index 00000000..5fd7ec5b --- /dev/null +++ b/content/docs/object-reference/build/pip.md @@ -0,0 +1,55 @@ +# pip + +## `class PipBuilder` + +**MlemABC parent type**: `builder` + +**MlemABC type**: `pip` + + Create a directory python package + +**Fields**: + +- `package_name: str` _(required)_ - Name of python package + +- `target: str` _(required)_ - Path to save result + +- `python_version: str` - Required python version + +- `short_description: str = ""` - short_description + +- `url: str = ""` - url + +- `email: str = ""` - author's email + +- `author: str = ""` - author's name + +- `version: str = "0.0.0"` - package version + +--- + +## `class WhlBuilder` + +**MlemABC parent type**: `builder` + +**MlemABC type**: `whl` + + Create a wheel with python package + +**Fields**: + +- `package_name: str` _(required)_ - Name of python package + +- `target: str` _(required)_ - Path to save result + +- `python_version: str` - Required python version + +- `short_description: str = ""` - short_description + +- `url: str = ""` - url + +- `email: str = ""` - author's email + +- `author: str = ""` - author's name + +- `version: str = "0.0.0"` - package version diff --git a/content/docs/object-reference/build/requirements.md b/content/docs/object-reference/build/requirements.md new file mode 100644 index 00000000..a32bd104 --- /dev/null +++ b/content/docs/object-reference/build/requirements.md @@ -0,0 +1,15 @@ +# requirements + +## `class RequirementsBuilder` + +**MlemABC parent type**: `builder` + +**MlemABC type**: `requirements` + + MlemBuilder implementation for building requirements + +**Fields**: + +- `target: str` - Target path for requirements + +- `req_type: str = "installable"` - Type of requirements, example: unix diff --git a/content/docs/object-reference/build/venv.md b/content/docs/object-reference/build/venv.md new file mode 100644 index 00000000..fcc2db9f --- /dev/null +++ b/content/docs/object-reference/build/venv.md @@ -0,0 +1,36 @@ +# venv + +## `class CondaBuilder` + +**MlemABC parent type**: `builder` + +**MlemABC type**: `conda` + + MlemBuilder implementation for building conda environments + +**Fields**: + +- `target: str = "venv"` - Name of the virtual environment + +- `python_version: str = "3.9"` - The python version to use + +- `current_env: bool = False` - Whether to install in the current conda env + +--- + +## `class VenvBuilder` + +**MlemABC parent type**: `builder` + +**MlemABC type**: `venv` + + MlemBuilder implementation for building virtual environments + +**Fields**: + +- `target: str = "venv"` - Name of the virtual environment + +- `no_cache: bool = False` - Disable cache + +- `current_env: bool = False` - Whether to install in the current virtual env, + must be active diff --git a/content/docs/object-reference/data/builtin.md b/content/docs/object-reference/data/builtin.md new file mode 100644 index 00000000..ef7d74e3 --- /dev/null +++ b/content/docs/object-reference/data/builtin.md @@ -0,0 +1,225 @@ +# builtin + +## `class ArrayReader` + +**MlemABC parent type**: `data_reader` + +**MlemABC type**: `array` + + Reader for lists with single element type + +**Fields**: + +- `data_type: ArrayType` _(required)_ - Resulting data type + +--- + +## `class ArrayType` + +**MlemABC parent type**: `data_type` + +**MlemABC type**: `array` + + DataType for lists with elements of the same type such as [1, 2, 3, 4, + 5] + +**Fields**: + +- `dtype: DataType` _(required)_ - DataType of elements + +- `size: int` - Size of the list + +--- + +## `class ArrayWriter` + +**MlemABC parent type**: `data_writer` + +**MlemABC type**: `array` + + Writer for lists with single element type + +**No fields** + +--- + +## `class DictReader` + +**MlemABC parent type**: `data_reader` + +**MlemABC type**: `dict` + + Reader for dicts + +**Fields**: + +- `data_type: DictType` _(required)_ - Resulting data type + +--- + +## `class DictType` + +**MlemABC parent type**: `data_type` + +**MlemABC type**: `dict` + + DataType for dict with fixed set of keys + +**No fields** + +--- + +## `class DictWriter` + +**MlemABC parent type**: `data_writer` + +**MlemABC type**: `dict` + + Writer for dicts + +**No fields** + +--- + +## `class DynamicDictReader` + +**MlemABC parent type**: `data_reader` + +**MlemABC type**: `d_dict` + + Read dicts without fixed set of keys + +**Fields**: + +- `data_type: DynamicDictType` _(required)_ - Resulting data type + +--- + +## `class DynamicDictType` + +**MlemABC parent type**: `data_type` + +**MlemABC type**: `d_dict` + + Dynamic DataType for dict without fixed set of keys + +**Fields**: + +- `key_type: PrimitiveType` _(required)_ - DataType for key (primitive) + +- `value_type: DataType` _(required)_ - DataType for value + +--- + +## `class DynamicDictWriter` + +**MlemABC parent type**: `data_writer` + +**MlemABC type**: `d_dict` + + Write dicts without fixed set of keys + +**No fields** + +--- + +## `class ListType` + +**MlemABC parent type**: `data_type` + +**MlemABC type**: `list` + + DataType for list with separate type for each element + such as [1, False, 3.2, "mlem", None] + +**No fields** + +--- + +## `class PrimitiveReader` + +**MlemABC parent type**: `data_reader` + +**MlemABC type**: `primitive` + + Reader for primitive types + +**Fields**: + +- `data_type: PrimitiveType` _(required)_ - Resulting data type + +--- + +## `class PrimitiveType` + +**MlemABC parent type**: `data_type` + +**MlemABC type**: `primitive` + + DataType for int, str, bool, complex and float types + +**Fields**: + +- `ptype: str` _(required)_ - Name of builtin type + +--- + +## `class PrimitiveWriter` + +**MlemABC parent type**: `data_writer` + +**MlemABC type**: `primitive` + + Writer for primitive types + +**No fields** + +--- + +## `class TupleType` + +**MlemABC parent type**: `data_type` + +**MlemABC type**: `tuple` + + DataType for tuple type + +**No fields** + +--- + +## `class UnspecifiedDataType` + +**MlemABC parent type**: `data_type` + +**MlemABC type**: `unspecified` + + Special data type for cases when it's not provided + +**No fields** + +--- + +## `class _TupleLikeReader` + +**MlemABC parent type**: `data_reader` + +**MlemABC type**: `tuple_like` + + Reader for tuple-like data + +**Fields**: + +- `data_type: _TupleLikeType` _(required)_ - Resulting data type + +--- + +## `class _TupleLikeWriter` + +**MlemABC parent type**: `data_writer` + +**MlemABC type**: `tuple_like` + + Writer for tuple-like data + +**No fields** diff --git a/content/docs/object-reference/data/index.md b/content/docs/object-reference/data/index.md new file mode 100644 index 00000000..a3390da4 --- /dev/null +++ b/content/docs/object-reference/data/index.md @@ -0,0 +1,15 @@ +# Data extensions + +Converting a dataset into MLEM data object enables methods like `mlem apply` as +we seen in the [User Guide](/doc/user-guide/data). + +Data extensions add support for new types of data object that MLEM can convert +into MLEM data objects in [`save` API method](/doc/api-reference/save). + +Typicaly they will implement +[DataType](/doc/object-reference/mlem-abcs#datatype), +[DataReader](/doc/object-reference/mlem-abcs#datareader) and +[DataWriter](/doc/object-reference/mlem-abcs#datawriter) interfaces. + +Some also implement [ImportHook](/doc/object-reference/mlem-abcs#importhook) to +support [importing](/doc/user-guide/importing) files of some format. diff --git a/content/docs/object-reference/data/lightgbm.md b/content/docs/object-reference/data/lightgbm.md new file mode 100644 index 00000000..ae273afa --- /dev/null +++ b/content/docs/object-reference/data/lightgbm.md @@ -0,0 +1,43 @@ +# lightgbm + +## `class LightGBMDataReader` + +**MlemABC parent type**: `data_reader` + +**MlemABC type**: `lightgbm` + + Wrapper reader for lightgbm.Dataset objects + +**Fields**: + +- `data_type: LightGBMDataType` _(required)_ - Resulting data type + +- `inner: DataReader` _(required)_ - Inner reader + +--- + +## `class LightGBMDataType` + +**MlemABC parent type**: `data_type` + +**MlemABC type**: `lightgbm` + + :class:`.DataType` implementation for `lightgbm.Dataset` type + + :param inner: :class:`.DataType` instance for underlying data + +**Fields**: + +- `inner: DataType` _(required)_ - Inner DataType + +--- + +## `class LightGBMDataWriter` + +**MlemABC parent type**: `data_writer` + +**MlemABC type**: `lightgbm` + + Wrapper writer for lightgbm.Dataset objects + +**No fields** diff --git a/content/docs/object-reference/data/numpy.md b/content/docs/object-reference/data/numpy.md new file mode 100644 index 00000000..0142298a --- /dev/null +++ b/content/docs/object-reference/data/numpy.md @@ -0,0 +1,79 @@ +# numpy + +## `class NumpyArrayReader` + +**MlemABC parent type**: `data_reader` + +**MlemABC type**: `numpy` + + DataReader implementation for numpy ndarray + +**Fields**: + +- `data_type: DataType` _(required)_ - Resulting data type + +--- + +## `class NumpyArrayWriter` + +**MlemABC parent type**: `data_writer` + +**MlemABC type**: `numpy` + + DataWriter implementation for numpy ndarray + +**No fields** + +--- + +## `class NumpyNdarrayType` + +**MlemABC parent type**: `data_type` + +**MlemABC type**: `ndarray` + + DataType implementation for `np.ndarray` + +**Fields**: + +- `dtype: str` _(required)_ - Data type of elements + +--- + +## `class NumpyNumberReader` + +**MlemABC parent type**: `data_reader` + +**MlemABC type**: `numpy_number` + + Read np.number objects + +**Fields**: + +- `data_type: NumpyNumberType` _(required)_ - Resulting data type + +--- + +## `class NumpyNumberType` + +**MlemABC parent type**: `data_type` + +**MlemABC type**: `number` + + numpy.number DataType + +**Fields**: + +- `dtype: str` _(required)_ - `numpy.number` type name as string + +--- + +## `class NumpyNumberWriter` + +**MlemABC parent type**: `data_writer` + +**MlemABC type**: `numpy_number` + + Write np.number objects + +**No fields** diff --git a/content/docs/object-reference/data/pandas.md b/content/docs/object-reference/data/pandas.md new file mode 100644 index 00000000..d5631ec2 --- /dev/null +++ b/content/docs/object-reference/data/pandas.md @@ -0,0 +1,84 @@ +# pandas + +## `class DataFrameType` + +**MlemABC parent type**: `data_type` + +**MlemABC type**: `dataframe` + + :class:`.DataType` implementation for `pandas.DataFrame` + +**No fields** + +--- + +## `class PandasReader` + +**MlemABC parent type**: `data_reader` + +**MlemABC type**: `pandas` + + DataReader for pandas dataframes + +**Fields**: + +- `data_type: DataFrameType` _(required)_ - Resulting data type + +- `format: str` _(required)_ - name of pandas-supported format + +--- + +## `class PandasSeriesReader` + +**MlemABC parent type**: `data_reader` + +**MlemABC type**: `pandas_series` + + DataReader for pandas series + +**Fields**: + +- `data_type: SeriesType` _(required)_ - Resulting data type + +- `format: str` _(required)_ - name of pandas-supported format + +--- + +## `class PandasSeriesWriter` + +**MlemABC parent type**: `data_writer` + +**MlemABC type**: `pandas_series` + + DataWriter for pandas series + +**Fields**: + +- `format: str` _(required)_ - name of pandas-supported format + +--- + +## `class PandasWriter` + +**MlemABC parent type**: `data_writer` + +**MlemABC type**: `pandas` + + DataWriter for pandas dataframes + +**Fields**: + +- `format: str` _(required)_ - name of pandas-supported format + +--- + +## `class SeriesType` + +**MlemABC parent type**: `data_type` + +**MlemABC type**: `series` + + :class:`.DataType` implementation for `pandas.Series` objects which + stores them as built-in Python dicts + +**No fields** diff --git a/content/docs/object-reference/data/tensorflow.md b/content/docs/object-reference/data/tensorflow.md new file mode 100644 index 00000000..c5b188ef --- /dev/null +++ b/content/docs/object-reference/data/tensorflow.md @@ -0,0 +1,39 @@ +# tensorflow + +## `class TFTensorDataType` + +**MlemABC parent type**: `data_type` + +**MlemABC type**: `tf_tensor` + + DataType implementation for `tensorflow.Tensor` + +**Fields**: + +- `dtype: str` _(required)_ - Data type of `tensorflow.Tensor` objects in data + +--- + +## `class TFTensorReader` + +**MlemABC parent type**: `data_reader` + +**MlemABC type**: `tf_tensor` + + Read tensorflow tensors from np format + +**Fields**: + +- `data_type: DataType` _(required)_ - Resulting data type + +--- + +## `class TFTensorWriter` + +**MlemABC parent type**: `data_writer` + +**MlemABC type**: `tf_tensor` + + Write tensorflow tensors to np format + +**No fields** diff --git a/content/docs/object-reference/data/torch.md b/content/docs/object-reference/data/torch.md new file mode 100644 index 00000000..2b4d777c --- /dev/null +++ b/content/docs/object-reference/data/torch.md @@ -0,0 +1,39 @@ +# torch + +## `class TorchTensorDataType` + +**MlemABC parent type**: `data_type` + +**MlemABC type**: `torch` + + DataType implementation for `torch.Tensor` + +**Fields**: + +- `dtype: str` _(required)_ - Type name of `torch.Tensor` elements + +--- + +## `class TorchTensorReader` + +**MlemABC parent type**: `data_reader` + +**MlemABC type**: `torch` + + Read torch tensors + +**Fields**: + +- `data_type: DataType` _(required)_ - Resulting data type + +--- + +## `class TorchTensorWriter` + +**MlemABC parent type**: `data_writer` + +**MlemABC type**: `torch` + + Write torch tensors + +**No fields** diff --git a/content/docs/object-reference/data/xgboost.md b/content/docs/object-reference/data/xgboost.md new file mode 100644 index 00000000..28f4ade1 --- /dev/null +++ b/content/docs/object-reference/data/xgboost.md @@ -0,0 +1,14 @@ +# xgboost + +## `class DMatrixDataType` + +**MlemABC parent type**: `data_type` + +**MlemABC type**: `xgboost_dmatrix` + + DataType implementation for xgboost.DMatrix type + +**Fields**: + +- `is_from_list: bool` _(required)_ - Whether DMatrix can be constructed from + list diff --git a/content/docs/object-reference/deployment/docker.md b/content/docs/object-reference/deployment/docker.md new file mode 100644 index 00000000..1adcaefb --- /dev/null +++ b/content/docs/object-reference/deployment/docker.md @@ -0,0 +1,90 @@ +# docker + +## `class DockerContainer` + +**MlemABC parent type**: `deployment` + +**MlemABC type**: `docker_container` + + MlemDeployment implementation for docker containers + +**Fields**: + +- `server: Server` - Server to use + +- `args: DockerBuildArgs = DockerBuildArgs()` - Additional docker arguments + +- `state_manager: StateManager` - State manager used + +- `container_name: str` - Name to use for container + +- `image_name: str` - Name to use for image + +- `rm: bool = True` - Remove container on stop + +--- + +## `class DockerContainerState` + +**MlemABC parent type**: `deploy_state` + +**MlemABC type**: `docker_container` + + State of docker container deployment + +**Fields**: + +- `declaration: MlemDeployment` _(required)_ - Deployment declaration used + +- `model_hash: str` - Hash of deployed model meta + +- `model_link: TypedMlemLink` - Link to deployed model + +- `image: DockerImage` - Built image + +- `container_name: str` - Name of container + +- `container_id: str` - Started container id + +--- + +## `class DockerEnv` + +**MlemABC parent type**: `env` + +**MlemABC type**: `docker` + + MlemEnv implementation for docker environment + +**Fields**: + +- `registry: DockerRegistry = DockerRegistry()` - Default registry to push + images to + +- `daemon: DockerDaemon = DockerDaemon()` - Docker daemon parameters + +--- + +## `class DockerBuildArgs` + + Container for DockerBuild arguments + +**Fields**: + +- `python_version: str = "3.10.5"` - Python version to use default: version of + running interpreter + +- `run_cmd: str = "sh run.sh"` - command to run in container + +- `package_install_cmd: str = "apt-get update && apt-get -y upgrade && apt-get install --no-install-recommends -y"` - + command to install packages. Default is apt-get, change it for other package + manager + +- `package_clean_cmd: str = "&& apt-get clean && rm -rf /var/lib/apt/lists/*"` - + command to clean after package installation + +- `mlem_whl: str` - a path to mlem .whl file. If it is empty, mlem will be + installed from pip + +- `platform: str` - platform to build docker for, see + docs.docker.com/desktop/multi-arch/ diff --git a/content/docs/object-reference/deployment/heroku.md b/content/docs/object-reference/deployment/heroku.md new file mode 100644 index 00000000..0dacc4bc --- /dev/null +++ b/content/docs/object-reference/deployment/heroku.md @@ -0,0 +1,58 @@ +# heroku + +## `class HerokuDeployment` + +**MlemABC parent type**: `deployment` + +**MlemABC type**: `heroku` + + Heroku App + +**Fields**: + +- `app_name: str` _(required)_ - Heroku application name + +- `state_manager: StateManager` - State manager used + +- `region: str = "us"` - Heroku region + +- `stack: str = "container"` - Stack to use + +- `team: str` - Heroku team + +--- + +## `class HerokuEnv` + +**MlemABC parent type**: `env` + +**MlemABC type**: `heroku` + + Heroku Account + +**Fields**: + +- `api_key: str` - HEROKU_API_KEY - advised to set via env variable or + `heroku login` + +--- + +## `class HerokuState` + +**MlemABC parent type**: `deploy_state` + +**MlemABC type**: `heroku` + + State of heroku deployment + +**Fields**: + +- `declaration: MlemDeployment` _(required)_ - Deployment declaration used + +- `model_hash: str` - Hash of deployed model meta + +- `model_link: TypedMlemLink` - Link to deployed model + +- `app: HerokuAppMeta` - Created heroku app + +- `image: DockerImage` - Built docker image diff --git a/content/docs/object-reference/deployment/index.md b/content/docs/object-reference/deployment/index.md new file mode 100644 index 00000000..f642cbee --- /dev/null +++ b/content/docs/object-reference/deployment/index.md @@ -0,0 +1,16 @@ +# Deployment extensions + +`mlem deploy` functionality is used to deploy models to target platforms like +Heroku, Kubernetes or AWS Sagemaker. See [User Guide](/doc/user-guide/deploying) +for more details on how this works. + +Deployment extensions add support for new target platforms to deploy your models +to. They are used with [`deploy` API method](/doc/api-reference/deploy) and +[deployment CLI commands](/doc/command-reference/deployment). + +Typicaly they will implement [MlemEnv](/doc/object-reference/mlem-abcs#mlemenv), +[MlemDeployment](/doc/object-reference/mlem-abcs#mlemdeployment) and +[DeployState](/doc/object-reference/mlem-abcs#deploystate) interfaces. + +Some also implement specific [Server](/doc/object-reference/mlem-abcs#server) or +[Builder](/doc/object-reference/mlem-abcs#builder) interfaces. diff --git a/content/docs/object-reference/deployment/kubernetes.md b/content/docs/object-reference/deployment/kubernetes.md new file mode 100644 index 00000000..2345661d --- /dev/null +++ b/content/docs/object-reference/deployment/kubernetes.md @@ -0,0 +1,73 @@ +# kubernetes + +## `class K8sDeployment` + +**MlemABC parent type**: `deployment` + +**MlemABC type**: `kubernetes` + + MlemDeployment implementation for Kubernetes deployments + +**Fields**: + +- `namespace: str = "mlem"` - Namespace to create kubernetes resources such as + pods, service in + +- `image_name: str = "ml"` - Name of the docker image to be deployed + +- `image_uri: str = "ml:latest"` - URI of the docker image to be deployed + +- `image_pull_policy: ImagePullPolicy = "Always"` - Image pull policy for the + docker image to be deployed + +- `port: int = 8080` - Port where the service should be available + +- `service_type: ServiceType = NodePortService()` - Type of service by which + endpoints of the model are exposed + +- `state_manager: StateManager` - State manager used + +- `server: Server` - Type of Server to use, with options such as FastAPI, + RabbitMQ etc. + +- `registry: DockerRegistry = DockerRegistry()` - Docker registry + +- `daemon: DockerDaemon = DockerDaemon()` - Docker daemon + +- `kube_config_file_path: str` - Path for kube config file of the cluster + +--- + +## `class K8sDeploymentState` + +**MlemABC parent type**: `deploy_state` + +**MlemABC type**: `kubernetes` + + DeployState implementation for Kubernetes deployments + +**Fields**: + +- `declaration: MlemDeployment` _(required)_ - Deployment declaration used + +- `model_hash: str` - Hash of deployed model meta + +- `model_link: TypedMlemLink` - Link to deployed model + +- `image: DockerImage` - Docker Image being used for Deployment + +- `deployment_name: str` - Name of Deployment + +--- + +## `class K8sEnv` + +**MlemABC parent type**: `env` + +**MlemABC type**: `kubernetes` + + MlemEnv implementation for Kubernetes Environments + +**Fields**: + +- `registry: DockerRegistry` - Docker registry diff --git a/content/docs/object-reference/deployment/sagemaker.md b/content/docs/object-reference/deployment/sagemaker.md new file mode 100644 index 00000000..6b1c37ed --- /dev/null +++ b/content/docs/object-reference/deployment/sagemaker.md @@ -0,0 +1,92 @@ +# sagemaker + +## `class SagemakerDeployState` + +**MlemABC parent type**: `deploy_state` + +**MlemABC type**: `sagemaker` + + State of SageMaker deployment + +**Fields**: + +- `declaration: MlemDeployment` _(required)_ - Deployment declaration used + +- `model_hash: str` - Hash of deployed model meta + +- `model_link: TypedMlemLink` - Link to deployed model + +- `image: DockerImage` - Built image + +- `image_tag: str` - Built image tag + +- `model_location: str` - Location of uploaded model + +- `endpoint_name: str` - Name of SageMaker endpoint + +- `endpoint_model_hash: str` - Hash of deployed model + +- `method_signature: Signature` - Signature of deployed method + +- `region: str` - AWS Region + +- `previous: SagemakerDeployState` - Previous state + +--- + +## `class SagemakerDeployment` + +**MlemABC parent type**: `deployment` + +**MlemABC type**: `sagemaker` + + SageMaker Deployment + +**Fields**: + +- `state_manager: StateManager` - State manager used + +- `method: str = "predict"` - Model method to be deployed + +- `image_tag: str` - Name of the docker image to use + +- `use_prebuilt: bool = False` - Use pre-built docker image. If True, image_name + should be set + +- `model_arch_location: str` - Path on s3 to store model archive (excluding + bucket) + +- `model_name: str` - Name for SageMaker Model + +- `endpoint_name: str` - Name for SageMaker Endpoint + +- `initial_instance_count: int = 1` - Initial instance count for Endpoint + +- `instance_type: str = "ml.t2.medium"` - Instance type for Endpoint + +- `accelerator_type: str` - The size of the Elastic Inference (EI) instance to + use + +--- + +## `class SagemakerEnv` + +**MlemABC parent type**: `env` + +**MlemABC type**: `sagemaker` + + SageMaker environment + +**Fields**: + +- `role: str` - Default role + +- `account: str` - Default account + +- `region: str` - Default region + +- `bucket: str` - Default bucket + +- `profile: str` - Default profile + +- `ecr_repository: str` - Default ECR repository diff --git a/content/docs/user-guide/extending.md b/content/docs/object-reference/extending.md similarity index 87% rename from content/docs/user-guide/extending.md rename to content/docs/object-reference/extending.md index 23ca1058..02d39fe9 100644 --- a/content/docs/user-guide/extending.md +++ b/content/docs/object-reference/extending.md @@ -1,10 +1,10 @@ # Extending MLEM can be extended to support more model types, data types, servers, builders -and basically everything listed [here](/doc/user-guide/mlem-abcs). Most of the -builtin implementations are also extensions located in `mlem.contrib` package. -It allows MLEM to not load their code if it is not used, which is especially -cool because it means their requirements are optional. +and basically everything listed [here](/doc/object-reference/mlem-abcs). Most of +the builtin implementations are also extensions located in `mlem.contrib` +package. It allows MLEM to not load their code if it is not used, which is +especially cool because it means their requirements are optional. ## Implementing MlemABC @@ -80,7 +80,7 @@ that you can subclass `MlemConfig` class and list your options there just like any `pydantic` [BaseSettings](https://pydantic-docs.helpmanual.io/usage/settings/) class. In the inner `Config` class you should set `section` option, and after that values -for your configuration will be loaded from `.mlem/config.yaml` from -corresponding section. See +for your configuration will be loaded from `.mlem.yaml` from corresponding +section. See [`PandasConfig`](https://github.com/iterative/mlem/blob/main/mlem/contrib/pandas.py) for example diff --git a/content/docs/object-reference/index.md b/content/docs/object-reference/index.md new file mode 100644 index 00000000..80cd73ec --- /dev/null +++ b/content/docs/object-reference/index.md @@ -0,0 +1,18 @@ +# Object reference + +This section is for you if you want to see the detailed reference for +configuring MLEM Objects (such as +[what parameters are needed to configure an Kubernetes Deployment](/doc/object-reference/deployment/kubernetes)), +or if you're looking to support new functionality (to contribute back to MLEM or +just for your own use) and looking out for examples, such as: + +- adding new [ML framework](/doc/object-reference/model), +- supporting new [data format](/doc/object-reference/data), +- adding another [building target for a model](/doc/object-reference/build), +- implementing new [server to serve a model](/doc/object-reference/serving), +- supporting new [deployment platform](/doc/object-reference/deployment), +- [resolving of URIs](/doc/object-reference/uri) of some special format, or +- [new storage type](/doc/object-reference/storage) to save MLEM objects to. + +You can learn more about this on the +[Extending MLEM](/doc/object-reference/extending) page. diff --git a/content/docs/user-guide/mlem-abcs.md b/content/docs/object-reference/mlem-abcs.md similarity index 100% rename from content/docs/user-guide/mlem-abcs.md rename to content/docs/object-reference/mlem-abcs.md diff --git a/content/docs/object-reference/mlem-objects.md b/content/docs/object-reference/mlem-objects.md new file mode 100644 index 00000000..2a84e894 --- /dev/null +++ b/content/docs/object-reference/mlem-objects.md @@ -0,0 +1,93 @@ +# MLEM Objects + +
+ +### Implementation details + +From a developer's perspective, MLEM Objects are instances of one of the +subclasses of `MlemObject` class. MLEM is using extended +[pydantic](https://pydantic-docs.helpmanual.io/) functionality to save and load +them from files. + +You can get `MlemObject` instance if you use `load_meta` API method instead of +simple `load`. + +See also [MLEM Object API](/doc/api-reference/mlem-object) + +
+ +## Common fields + +Each MLEM Object has an `object_type` field which determines the type of the +object. Specific types may have additional properties, but all MLEM Objects have +the following fields: + +- `params` - arbitrary object with additional parameters +- `location` - if the object is loaded, information about where it came from + +You can check out what methods MLEM Objects have in +[API Reference](/doc/api-reference/mlem-object) + +## MLEM Object Types + +Here are all the builtin MLEM Object types + +Model and Data are special types that can have artifacts, so they have two +additional fields: + +- `artifacts` - a string-to-artifacts mapping, where artifact is an instance of + [`Artifact`](/doc/object-reference/mlem-abcs#artifact) which represents a file + stored somewhere (local/cloud/dvc cache etc) +- `requirements` - a list of + [`Requirement`](/doc/object-reference/mlem-abcs#requirement) which are needed + to use that object in runtime + +### Model + +Represents an ML model, but can be generalized to any model or even any +"function" or any "transformation", thanks to `callable` +[ModelType](/doc/object-reference/mlem-abcs#modeltype). + +**Base class**: `mlem.core.objects.MlemModel` + +**Fields** (in addition to inherited): + +- `model_type` (_lazy_) - + [ModelType](/doc/object-reference/mlem-abcs#modeltype), which is polymorphic + and holds metadata about model's framework, methods and io. + +### Data + +Represent data, which can be used as an input to one of Model's methods. + +**Base class**: `mlem.core.objects.MlemData` + +**Fields** (in addition to inherited): + +- `reader` (_lazy_) - [DataReader](/doc/object-reference/mlem-abcs#datareader) - + how to read saved files and resulting dataset metadata +- `data_type` (_transient_) - + [`DataType`](/doc/object-reference/mlem-abcs#datatype) with dataset value and + metadata (available once data is read) + +### Link + +Represents a link (pointer) to another MLEM Object. More on that +[here](/doc/user-guide/linking) + +**Base class**: `mlem.core.objects.MlemLink` + +**Fields** (in addition to inherited): + +- `path` - path to MLEM Object +- `project` - location of MLEM Project with referenced object +- `rev` - revision of the object +- `link_type` - type of the referenced object + +### Other types + +Some of the `MLEM ABCs` are also MLEM Objects. + +- [Builder](/doc/object-reference/mlem-abcs#builder) +- [Target Environment](/doc/object-reference/mlem-abcs#mlemenv) +- [Deployment](/doc/object-reference/mlem-abcs#mlemdeployment) diff --git a/content/docs/object-reference/model/builtin.md b/content/docs/object-reference/model/builtin.md new file mode 100644 index 00000000..884d3702 --- /dev/null +++ b/content/docs/object-reference/model/builtin.md @@ -0,0 +1,11 @@ +# builtin + +## `class SimplePickleIO` + +**MlemABC parent type**: `model_io` + +**MlemABC type**: `simple_pickle` + + IO with simple pickling of python model object + +**No fields** diff --git a/content/docs/object-reference/model/callable.md b/content/docs/object-reference/model/callable.md new file mode 100644 index 00000000..7ba74ed2 --- /dev/null +++ b/content/docs/object-reference/model/callable.md @@ -0,0 +1,30 @@ +# callable + +## `class CallableModelType` + +**MlemABC parent type**: `model_type` + +**MlemABC type**: `callable` + + ModelType implementation for arbitrary callables + +**Fields**: + +- `io: ModelIO` _(required)_ - Model IO + +--- + +## `class PickleModelIO` + +**MlemABC parent type**: `model_io` + +**MlemABC type**: `pickle` + + ModelIO for pickle-able models + When model is dumped, recursively checks objects if they can be dumped + with ModelIO instead of pickling + So, if you use function that internally calls tensorflow model, this + tensorflow model will be dumped with + tensorflow code and not pickled + +**No fields** diff --git a/content/docs/object-reference/model/catboost.md b/content/docs/object-reference/model/catboost.md new file mode 100644 index 00000000..efa940dd --- /dev/null +++ b/content/docs/object-reference/model/catboost.md @@ -0,0 +1,29 @@ +# catboost + +## `class CatBoostModel` + +**MlemABC parent type**: `model_type` + +**MlemABC type**: `catboost` + + :class:`mlem.core.model.ModelType` for CatBoost models. + `.model` attribute is a `catboost.CatBoostClassifier` or + `catboost.CatBoostRegressor` instance + +**Fields**: + +- `io: ModelIO = CatBoostModelIO()` - Model IO + +--- + +## `class CatBoostModelIO` + +**MlemABC parent type**: `model_io` + +**MlemABC type**: `catboost_io` + + :class:`mlem.core.model.ModelIO` for CatBoost models. + +**Fields**: + +- `model_type: CBType = "reg"` - Type of catboost model diff --git a/content/docs/object-reference/model/index.md b/content/docs/object-reference/model/index.md new file mode 100644 index 00000000..a43e1193 --- /dev/null +++ b/content/docs/object-reference/model/index.md @@ -0,0 +1,18 @@ +# Model extensions + +Converting a model into MLEM model enables all kind of productionization +scenarios as we have seen in [Get Started](/doc/get-started). + +Model extensions add support for new ML frameworks that MLEM can recognize when +you call [`save` API method](/doc/api-reference/save) or pass a ML model to +`mlem.api` methods. Although MLEM can save a model from unknown ML framework +using `pickle`, a special extension for that framework will allow to save the +model in framework-specific format and support special features of that +framework. + +Typicaly model extension will implement +[ModelType](/doc/object-reference/mlem-abcs#modeltype) and +[ModelIO](/doc/object-reference/mlem-abcs#modelio) interfaces. + +Some also implement [DataType](/doc/object-reference/mlem-abcs#datatype) +interface if specific data objects are needed for model to work. diff --git a/content/docs/object-reference/model/lightgbm.md b/content/docs/object-reference/model/lightgbm.md new file mode 100644 index 00000000..b2e89552 --- /dev/null +++ b/content/docs/object-reference/model/lightgbm.md @@ -0,0 +1,27 @@ +# lightgbm + +## `class LightGBMModel` + +**MlemABC parent type**: `model_type` + +**MlemABC type**: `lightgbm` + + :class:`.ModelType` implementation for `lightgbm.Booster` type + +**Fields**: + +- `io: ModelIO = LightGBMModelIO()` - LightGBMModelIO + +--- + +## `class LightGBMModelIO` + +**MlemABC parent type**: `model_io` + +**MlemABC type**: `lightgbm_io` + + :class:`.ModelIO` implementation for `lightgbm.Booster` type + +**Fields**: + +- `model_file_name: str = "model.lgb"` - Filename to use diff --git a/content/docs/object-reference/model/onnx.md b/content/docs/object-reference/model/onnx.md new file mode 100644 index 00000000..917ec842 --- /dev/null +++ b/content/docs/object-reference/model/onnx.md @@ -0,0 +1,25 @@ +# onnx + +## `class ModelProtoIO` + +**MlemABC parent type**: `model_io` + +**MlemABC type**: `model_proto` + + IO for ONNX model object + +**No fields** + +--- + +## `class ONNXModel` + +**MlemABC parent type**: `model_type` + +**MlemABC type**: `onnx` + + :class:`mlem.core.model.ModelType` implementation for `onnx` models + +**Fields**: + +- `io: ModelIO = ModelProtoIO()` - Model IO diff --git a/content/docs/object-reference/model/sklearn.md b/content/docs/object-reference/model/sklearn.md new file mode 100644 index 00000000..d2534c5b --- /dev/null +++ b/content/docs/object-reference/model/sklearn.md @@ -0,0 +1,27 @@ +# sklearn + +## `class SklearnModel` + +**MlemABC parent type**: `model_type` + +**MlemABC type**: `sklearn` + + ModelType implementation for `scikit-learn` models + +**Fields**: + +- `io: ModelIO = SimplePickleIO()` - IO + +--- + +## `class SklearnPipelineType` + +**MlemABC parent type**: `model_type` + +**MlemABC type**: `sklearn_pipeline` + + ModelType implementation for `scikit-learn` pipelines + +**Fields**: + +- `io: ModelIO = SimplePickleIO()` - IO diff --git a/content/docs/object-reference/model/tensorflow.md b/content/docs/object-reference/model/tensorflow.md new file mode 100644 index 00000000..841358d5 --- /dev/null +++ b/content/docs/object-reference/model/tensorflow.md @@ -0,0 +1,28 @@ +# tensorflow + +## `class TFKerasModel` + +**MlemABC parent type**: `model_type` + +**MlemABC type**: `tf_keras` + + :class:`.ModelType` implementation for Tensorflow Keras models + +**Fields**: + +- `io: ModelIO = TFKerasModelIO()` - IO + +--- + +## `class TFKerasModelIO` + +**MlemABC parent type**: `model_io` + +**MlemABC type**: `tf_keras` + + IO for Tensorflow Keras models (:class:`tensorflow.keras.Model` + objects) + +**Fields**: + +- `save_format: str` - `tf` for custom net classes and `h5` otherwise diff --git a/content/docs/object-reference/model/torch.md b/content/docs/object-reference/model/torch.md new file mode 100644 index 00000000..ea418769 --- /dev/null +++ b/content/docs/object-reference/model/torch.md @@ -0,0 +1,27 @@ +# torch + +## `class TorchModel` + +**MlemABC parent type**: `model_type` + +**MlemABC type**: `torch` + + :class:`.ModelType` implementation for PyTorch models + +**Fields**: + +- `io: ModelIO = TorchModelIO()` - TorchModelIO + +--- + +## `class TorchModelIO` + +**MlemABC parent type**: `model_io` + +**MlemABC type**: `torch_io` + + IO for PyTorch models + +**Fields**: + +- `is_jit: bool = False` - Is model jit compiled diff --git a/content/docs/object-reference/model/xgboost.md b/content/docs/object-reference/model/xgboost.md new file mode 100644 index 00000000..0d39e7d1 --- /dev/null +++ b/content/docs/object-reference/model/xgboost.md @@ -0,0 +1,27 @@ +# xgboost + +## `class XGBoostModel` + +**MlemABC parent type**: `model_type` + +**MlemABC type**: `xgboost` + + :class:`~.ModelType` implementation for XGBoost models + +**Fields**: + +- `io: ModelIO = XGBoostModelIO()` - Model IO + +--- + +## `class XGBoostModelIO` + +**MlemABC parent type**: `model_io` + +**MlemABC type**: `xgboost_io` + + :class:`~.ModelIO` implementation for XGBoost models + +**Fields**: + +- `model_file_name: str = "model.xgb"` - Filename to use diff --git a/content/docs/object-reference/serving/builtin.md b/content/docs/object-reference/serving/builtin.md new file mode 100644 index 00000000..a9446e61 --- /dev/null +++ b/content/docs/object-reference/serving/builtin.md @@ -0,0 +1,53 @@ +# builtin + +## `class HTTPClient` + +**MlemABC parent type**: `client` + +**MlemABC type**: `http` + + Access models served with http-based servers + +**Fields**: + +- `host: str = "0.0.0.0"` - Server host + +- `port: int = 8080` - Server port + +--- + +## `class ModelInterface` + +**MlemABC parent type**: `interface` + +**MlemABC type**: `model` + + Interface that descibes model methods + +**No fields** + +--- + +## `class SimpleInterface` + +**MlemABC parent type**: `interface` + +**MlemABC type**: `simple` + + Interface that exposes its own methods that marked with `expose` + decorator + +**Fields**: + +- `methods: InterfaceDescriptor = InterfaceDescriptor()` - Interface version and + methods + +--- + +## `class InterfaceDescriptor` + + Class docstring missing + +**Fields**: + +- `version: str = "0.2.9.dev16+gb7b6879"` - mlem version diff --git a/content/docs/object-reference/serving/fastapi.md b/content/docs/object-reference/serving/fastapi.md new file mode 100644 index 00000000..1a4952e2 --- /dev/null +++ b/content/docs/object-reference/serving/fastapi.md @@ -0,0 +1,15 @@ +# fastapi + +## `class FastAPIServer` + +**MlemABC parent type**: `server` + +**MlemABC type**: `fastapi` + + Serves model with http + +**Fields**: + +- `host: str = "0.0.0.0"` - Network interface to use + +- `port: int = 8080` - Port to use diff --git a/content/docs/object-reference/serving/index.md b/content/docs/object-reference/serving/index.md new file mode 100644 index 00000000..b2345791 --- /dev/null +++ b/content/docs/object-reference/serving/index.md @@ -0,0 +1,12 @@ +# Serving extensions + +`mlem serve` functionality is used to spin up something that can serve models, +like a FastAPI server or RabbitMQ producer/consumer. See +[User Guide](/doc/user-guide/serving) for more details on how this works. + +Serving extensions add new types of servers to use with `serve` +[API](/doc/api-reference/serve) and [CLI](/doc/command-reference/serve) +commands. + +Typicaly they will implement [Server](/doc/object-reference/mlem-abcs#builder) +and [Client](/doc/object-reference/mlem-abcs#client) interfaces. diff --git a/content/docs/object-reference/serving/rabbitmq.md b/content/docs/object-reference/serving/rabbitmq.md new file mode 100644 index 00000000..fe475012 --- /dev/null +++ b/content/docs/object-reference/serving/rabbitmq.md @@ -0,0 +1,42 @@ +# rabbitmq + +## `class RabbitMQClient` + +**MlemABC parent type**: `client` + +**MlemABC type**: `rmq` + + Access models served with rmq server + +**Fields**: + +- `host: str` _(required)_ - Host of RMQ instance + +- `port: int` _(required)_ - Port of RMQ instance + +- `exchange: str = ""` - RMQ exchange to use + +- `queue_prefix: str = ""` - Queue prefix + +- `timeout: float = 0` - Time to wait for response. 0 means indefinite + +--- + +## `class RabbitMQServer` + +**MlemABC parent type**: `server` + +**MlemABC type**: `rmq` + + RMQ server that consumes requests and produces model predictions + from/to RMQ instance + +**Fields**: + +- `host: str` _(required)_ - Host of RMQ instance + +- `port: int` _(required)_ - Port of RMQ instance + +- `exchange: str = ""` - RMQ exchange to use + +- `queue_prefix: str = ""` - Queue prefix diff --git a/content/docs/object-reference/serving/sagemaker.md b/content/docs/object-reference/serving/sagemaker.md new file mode 100644 index 00000000..693b4ce4 --- /dev/null +++ b/content/docs/object-reference/serving/sagemaker.md @@ -0,0 +1,17 @@ +# sagemaker + +## `class SagemakerClient` + +**MlemABC parent type**: `client` + +**MlemABC type**: `sagemaker` + + Client to make SageMaker requests + +**Fields**: + +- `endpoint_name: str` _(required)_ - Name of SageMaker Endpoint + +- `aws_vars: AWSVars` _(required)_ - AWS Configuration + +- `signature: Signature` _(required)_ - Signature of deployed method diff --git a/content/docs/object-reference/storage/builtin.md b/content/docs/object-reference/storage/builtin.md new file mode 100644 index 00000000..45cdcb77 --- /dev/null +++ b/content/docs/object-reference/storage/builtin.md @@ -0,0 +1,63 @@ +# builtin + +## `class FSSpecArtifact` + +**MlemABC parent type**: `artifact` + +**MlemABC type**: `fsspec` + + Represents a file stored in an fsspec filesystem + +**Fields**: + +- `uri: str` _(required)_ - Path to file + +- `size: int` _(required)_ - size in bytes + +- `hash: str` _(required)_ - md5 hash + +--- + +## `class FSSpecStorage` + +**MlemABC parent type**: `storage` + +**MlemABC type**: `fsspec` + + Represents an fsspec filesystem + +**Fields**: + +- `uri: str` _(required)_ - Path to storage dir + +--- + +## `class LocalArtifact` + +**MlemABC parent type**: `artifact` + +**MlemABC type**: `local` + + Special case for local file + +**Fields**: + +- `uri: str` _(required)_ - Path to file + +- `size: int` _(required)_ - size in bytes + +- `hash: str` _(required)_ - md5 hash + +--- + +## `class LocalStorage` + +**MlemABC parent type**: `storage` + +**MlemABC type**: `local` + + Special case for local filesystem + +**Fields**: + +- `uri: str` _(required)_ - Path to storage dir diff --git a/content/docs/object-reference/storage/dvc.md b/content/docs/object-reference/storage/dvc.md new file mode 100644 index 00000000..313e12b1 --- /dev/null +++ b/content/docs/object-reference/storage/dvc.md @@ -0,0 +1,32 @@ +# dvc + +## `class DVCArtifact` + +**MlemABC parent type**: `artifact` + +**MlemABC type**: `dvc` + + Local artifact that can be also read from DVC cache + +**Fields**: + +- `uri: str` _(required)_ - Local path to file + +- `size: int` _(required)_ - size in bytes + +- `hash: str` _(required)_ - md5 hash + +--- + +## `class DVCStorage` + +**MlemABC parent type**: `storage` + +**MlemABC type**: `dvc` + + User-managed dvc storage, which means user should + track corresponding files with dvc manually. + +**Fields**: + +- `uri: str = ""` - Base storage path diff --git a/content/docs/object-reference/storage/index.md b/content/docs/object-reference/storage/index.md new file mode 100644 index 00000000..2cd92c99 --- /dev/null +++ b/content/docs/object-reference/storage/index.md @@ -0,0 +1,7 @@ +# Storage extensions + +Storage extensions add support for new types of location that MLEM can use to +store artifacts in [`save` API method](/doc/api-reference/save). + +Typicaly they will implement [Storage](/doc/object-reference/mlem-abcs#storage) +and [Artifact](/doc/object-reference/mlem-abcs#artifact) interfaces. diff --git a/content/docs/object-reference/uri/bitbucketfs.md b/content/docs/object-reference/uri/bitbucketfs.md new file mode 100644 index 00000000..8bf92e2f --- /dev/null +++ b/content/docs/object-reference/uri/bitbucketfs.md @@ -0,0 +1,11 @@ +# bitbucketfs + +## `class BitBucketResolver` + +**MlemABC parent type**: `resolver` + +**MlemABC type**: `bitbucket` + + Resolve bitbucket URIs + +**No fields** diff --git a/content/docs/object-reference/uri/builtin.md b/content/docs/object-reference/uri/builtin.md new file mode 100644 index 00000000..c878dfea --- /dev/null +++ b/content/docs/object-reference/uri/builtin.md @@ -0,0 +1,11 @@ +# builtin + +## `class FSSpecResolver` + +**MlemABC parent type**: `resolver` + +**MlemABC type**: `fsspec` + + Resolve different fsspec URIs + +**No fields** diff --git a/content/docs/object-reference/uri/github.md b/content/docs/object-reference/uri/github.md new file mode 100644 index 00000000..3b178eb2 --- /dev/null +++ b/content/docs/object-reference/uri/github.md @@ -0,0 +1,11 @@ +# github + +## `class GithubResolver` + +**MlemABC parent type**: `resolver` + +**MlemABC type**: `github` + + Resolve https://github.com URLs + +**No fields** diff --git a/content/docs/object-reference/uri/gitlabfs.md b/content/docs/object-reference/uri/gitlabfs.md new file mode 100644 index 00000000..3520cb71 --- /dev/null +++ b/content/docs/object-reference/uri/gitlabfs.md @@ -0,0 +1,11 @@ +# gitlabfs + +## `class GitlabResolver` + +**MlemABC parent type**: `resolver` + +**MlemABC type**: `gitlab` + + Resolve https://gitlab.com URIs + +**No fields** diff --git a/content/docs/object-reference/uri/index.md b/content/docs/object-reference/uri/index.md new file mode 100644 index 00000000..413fcaaa --- /dev/null +++ b/content/docs/object-reference/uri/index.md @@ -0,0 +1,10 @@ +# URI Resolver extensions + +MLEM CLI commands and API methods can work with +[MLEM objects](/doc/user-guide/basic-concepts) whether they are local or +[remote](/doc/user-guide/remote-objects). URI Resolver extensions add support +for different URI patterns that MLEM will understand whenever you reference any +MLEM object or project. + +Typicaly they will implement +[URIResolver](/doc/object-reference/mlem-abcs#uriresolver) interface. diff --git a/content/docs/sidebar.json b/content/docs/sidebar.json index e54e9b12..b14a994d 100644 --- a/content/docs/sidebar.json +++ b/content/docs/sidebar.json @@ -15,26 +15,21 @@ "label": "Get Started", "source": "get-started/index.md", "children": [ - { - "slug": "saving", - "label": "Saving models", - "source": "saving.md" - }, { "slug": "applying", "label": "Applying models", "source": "applying.md" }, - { - "slug": "building", - "label": "Exporting models (building)", - "source": "building.md" - }, { "slug": "serving", "label": "Serving models", "source": "serving.md" }, + { + "slug": "building", + "label": "Building models", + "source": "building.md" + }, { "slug": "deploying", "label": "Deploying models", @@ -47,25 +42,22 @@ "label": "Use Cases", "source": "use-cases/index.md", "children": [ - { - "slug": "dvc", - "label": "Versioning MLEM objects with DVC", - "source": "dvc.md" - }, - { - "slug": "mlem-mr", - "label": "Pure Mlem Model Registry", - "source": "mlem-mr.md" - }, { "slug": "cicd", - "label": "Using in CI/CD", + "label": "Deploying and publishing models in CI/CD", "source": "cicd.md" }, { "slug": "model-registry", "label": "Model Registry", - "source": "model-registry.md" + "source": "model-registry/index.md", + "children": [ + { + "slug": "mlem-mr", + "label": "Pure Mlem Model Registry", + "source": "mlem-mr.md" + } + ] } ] }, @@ -80,44 +72,123 @@ "source": "basic-concepts.md" }, { - "slug": "project-structure", - "label": "Project structure", - "source": "project-structure.md" + "slug": "models", + "label": "Working with models", + "source": "models/index.md", + "children": [] + }, + { + "slug": "serving", + "label": "Serving models", + "source": "serving/index.md", + "children": [ + { + "slug": "fastapi", + "label": "FastAPI", + "source": "fastapi.md" + }, + { + "slug": "rabbitmq", + "label": "RabbitMQ", + "source": "rabbitmq.md" + } + ] + }, + { + "slug": "building", + "label": "Building models", + "source": "building/index.md", + "children": [ + { + "slug": "conda", + "label": "Conda Environment", + "source": "conda.md" + }, + { + "slug": "docker", + "label": "Docker", + "source": "docker.md" + }, + { + "slug": "pip", + "label": "Python Packages", + "source": "pip.md" + }, + { + "slug": "requirements", + "label": "Requirements", + "source": "requirements.md" + }, + { + "slug": "venv", + "label": "Virtual Environment", + "source": "venv.md" + } + ] + }, + { + "slug": "deploying", + "label": "Deploying models", + "source": "deploying/index.md", + "children": [ + { + "slug": "heroku", + "label": "Heroku", + "source": "heroku.md" + }, + { + "slug": "docker", + "label": "Docker", + "source": "docker.md" + }, + { + "slug": "kubernetes", + "label": "Kubernetes", + "source": "kubernetes.md" + }, + { + "slug": "sagemaker", + "label": "SageMaker", + "source": "sagemaker.md" + } + ] }, { "slug": "data", "label": "Working with data", - "source": "data.md" + "source": "data/index.md", + "children": [] + }, + { + "slug": "dvc", + "label": "Versioning MLEM objects with DVC", + "source": "dvc.md" + }, + { + "slug": "project-structure", + "label": "Project structure", + "source": "project-structure.md" }, { "slug": "remote-objects", - "label": "Working with remote projects and objects", - "source": "remote-objects.md" + "label": "Remote objects and projects", + "source": "remote-objects/index.md" }, { "slug": "configuration", "label": "Configuration", "source": "configuration.md" }, - { - "slug": "importing", - "label": "Importing existing files", - "source": "importing.md" - }, { "slug": "linking", "label": "Links", "source": "linking.md" }, { - "slug": "mlem-abcs", - "label": "MLEM ABCs", - "source": "mlem-abcs.md" - }, - { - "slug": "extending", - "label": "Extending", - "source": "extending.md" + "slug": "importing", + "label": "Importing existing files", + "source": "importing/index.md", + "children": [] }, { "slug": "analytics", @@ -132,30 +203,35 @@ "source": "command-reference/index.md", "children": [ { - "slug": "init", - "label": "init", - "source": "init.md" + "slug": "apply", + "label": "apply", + "source": "apply.md" }, { - "slug": "list", - "label": "list", - "source": "list.md" + "slug": "apply-remote", + "label": "apply-remote", + "source": "apply-remote.md" }, { - "slug": "pprint", - "label": "pprint", - "source": "pprint.md" + "slug": "build", + "label": "build", + "source": "build.md" + }, + { + "slug": "checkenv", + "label": "checkenv", + "source": "checkenv.md" + }, + { + "slug": "clone", + "label": "clone", + "source": "clone.md" }, { "slug": "declare", "label": "declare", "source": "declare.md" }, - { - "slug": "serve", - "label": "serve", - "source": "serve.md" - }, { "slug": "deployment", "label": "deployment", @@ -180,13 +256,23 @@ "slug": "remove", "label": "deployment remove", "source": "remove.md" + }, + { + "slug": "wait", + "label": "deployment wait", + "source": "wait.md" } ] }, { - "slug": "types", - "label": "types", - "source": "types.md" + "slug": "import", + "label": "import", + "source": "import.md" + }, + { + "slug": "init", + "label": "init", + "source": "init.md" }, { "slug": "link", @@ -194,29 +280,19 @@ "source": "link.md" }, { - "slug": "clone", - "label": "clone", - "source": "clone.md" - }, - { - "slug": "import", - "label": "import", - "source": "import.md" - }, - { - "slug": "build", - "label": "build", - "source": "build.md" + "slug": "pprint", + "label": "pprint", + "source": "pprint.md" }, { - "slug": "apply", - "label": "apply", - "source": "apply.md" + "slug": "serve", + "label": "serve", + "source": "serve.md" }, { - "slug": "apply-remote", - "label": "apply-remote", - "source": "apply-remote.md" + "slug": "types", + "label": "types", + "source": "types.md" } ] }, @@ -225,15 +301,45 @@ "label": "Python API Reference", "source": "api-reference/index.md", "children": [ + { + "slug": "apply", + "label": "apply()", + "source": "apply.md" + }, + { + "slug": "apply_remote", + "label": "apply_remote()", + "source": "apply_remote.md" + }, + { + "slug": "build", + "label": "build()", + "source": "build.md" + }, + { + "slug": "clone", + "label": "clone()", + "source": "clone.md" + }, + { + "slug": "deploy", + "label": "deploy()", + "source": "deploy.md" + }, + { + "slug": "import_object", + "label": "import_object()", + "source": "import_object.md" + }, { "slug": "init", "label": "init()", "source": "init.md" }, { - "slug": "save", - "label": "save()", - "source": "save.md" + "slug": "link", + "label": "link()", + "source": "link.md" }, { "slug": "load", @@ -246,49 +352,255 @@ "source": "load_meta.md" }, { - "slug": "ls", - "label": "ls()", - "source": "ls.md" + "slug": "save", + "label": "save()", + "source": "save.md" }, { - "slug": "import_object", - "label": "import_object()", - "source": "import_object.md" - }, + "slug": "serve", + "label": "serve()", + "source": "serve.md" + } + ] + }, + { + "slug": "object-reference", + "label": "Object Reference", + "source": "index.md", + "children": [ { - "slug": "link", - "label": "link()", - "source": "link.md" + "slug": "mlem-abcs", + "label": "MLEM ABCs", + "source": "mlem-abcs.md" }, { - "slug": "clone", - "label": "clone()", - "source": "clone.md" + "slug": "mlem-objects", + "label": "MLEM Objects", + "source": "mlem-objects.md" }, { - "slug": "apply", - "label": "apply()", - "source": "apply.md" + "slug": "model", + "label": "Models", + "source": "model/index.md", + "children": [ + { + "slug": "builtin", + "label": "Builtin", + "source": "builtin.md" + }, + { + "slug": "callable", + "label": "Python callables", + "source": "callable.md" + }, + { + "slug": "sklearn", + "label": "Scikit-Learn", + "source": "sklearn.md" + }, + { + "slug": "onnx", + "label": "Onnx", + "source": "onnx.md" + }, + { + "slug": "tensorflow", + "label": "TensorFlow", + "source": "tensorflow.md" + }, + { + "slug": "torch", + "label": "Torch", + "source": "torch.md" + }, + { + "slug": "catboost", + "label": "CatBoost", + "source": "catboost.md" + }, + { + "slug": "lightgbm", + "label": "LightGBM", + "source": "lightgbm.md" + }, + { + "slug": "xgboost", + "label": "XGBoost", + "source": "xgboost.md" + } + ] }, { - "slug": "apply_remote", - "label": "apply_remote()", - "source": "apply_remote.md" + "slug": "data", + "label": "Data", + "source": "data/index.md", + "children": [ + { + "slug": "builtin", + "label": "Builtin", + "source": "builtin.md" + }, + { + "slug": "numpy", + "label": "NumPy", + "source": "numpy.md" + }, + { + "slug": "pandas", + "label": "Pandas", + "source": "pandas.md" + }, + { + "slug": "lightgbm", + "label": "LightGBM", + "source": "lightgbm.md" + }, + { + "slug": "torch", + "label": "Torch", + "source": "torch.md" + }, + { + "slug": "tensorflow", + "label": "TensorFlow", + "source": "tensorflow.md" + }, + { + "slug": "xgboost", + "label": "XGBoost", + "source": "xgboost.md" + } + ] }, { "slug": "build", - "label": "build()", - "source": "build.md" + "label": "Builders", + "source": "build/index.md", + "children": [ + { + "slug": "docker", + "label": "Docker", + "source": "docker.md" + }, + { + "slug": "pip", + "label": "Python package", + "source": "pip.md" + }, + { + "slug": "requirements", + "label": "Requirements", + "source": "requirements.md" + }, + { + "slug": "venv", + "label": "Virtual Environment", + "source": "venv.md" + } + ] }, { - "slug": "serve", - "label": "serve()", - "source": "serve.md" + "slug": "serving", + "label": "Serving", + "source": "serving/index.md", + "children": [ + { + "slug": "builtin", + "label": "Builtin", + "source": "builtin.md" + }, + { + "slug": "fastapi", + "label": "FastAPI", + "source": "fastapi.md" + }, + { + "slug": "rabbitmq", + "label": "RabbitMQ", + "source": "rabbitmq.md" + }, + { + "slug": "sagemaker", + "label": "SageMaker", + "source": "sagemaker.md" + } + ] }, { - "slug": "deploy", - "label": "deploy()", - "source": "deploy.md" + "slug": "deployment", + "label": "Deployments", + "source": "deployment/index.md", + "children": [ + { + "slug": "heroku", + "label": "Heroku", + "source": "heroku.md" + }, + { + "slug": "sagemaker", + "label": "SageMaker", + "source": "sagemaker.md" + }, + { + "slug": "docker", + "label": "Docker", + "source": "docker.md" + }, + { + "slug": "kubernetes", + "label": "Kubernetes", + "source": "kubernetes.md" + } + ] + }, + { + "slug": "uri", + "label": "URI Resolving", + "source": "uri/index.md", + "children": [ + { + "slug": "builtin", + "label": "Builtin", + "source": "builtin.md" + }, + { + "slug": "github", + "label": "GitHub", + "source": "github.md" + }, + { + "slug": "gitlabfs", + "label": "GitLab", + "source": "gitlabfs.md" + }, + { + "slug": "bitbucketfs", + "label": "BitBucket", + "source": "bitbucketfs.md" + } + ] + }, + { + "slug": "storage", + "label": "Artifact Storage", + "source": "storage/index.md", + "children": [ + { + "slug": "builtin", + "label": "Builtin", + "source": "builtin.md" + }, + { + "slug": "dvc", + "label": "DVC", + "source": "dvc.md" + } + ] + }, + { + "slug": "extending", + "label": "Extending", + "source": "extending.md" } ] }, diff --git a/content/docs/use-cases/cicd.md b/content/docs/use-cases/cicd.md index d05e6815..fe209eb5 100644 --- a/content/docs/use-cases/cicd.md +++ b/content/docs/use-cases/cicd.md @@ -24,7 +24,9 @@ Git tag that kicks off the CI process. To make this build process consistent with future deployment, you can create and commit an MLEM declaration: ```cli -$ mlem declare builder pip -c package_name=mypackagename -c target=package build-to-pip +$ mlem declare builder pip build-to-pip \ + --package_name=mypackagename \ + --target=package πŸ’Ύ Saving builder to build-to-pip.mlem ``` @@ -49,7 +51,7 @@ jobs: - name: build run: | pip3 install -r requirements.txt - mlem build my-model --load build-to-pip.mlem + mlem build --load build-to-pip.mlem --model my-model - name: publish run: | @@ -65,10 +67,10 @@ The deployment scenario is similar. First you need to create environment and deployment declarations, and commit them to Git: ```cli -$ mlem declare env heroku staging -πŸ’Ύ Saving env to staging.mlem - -$ mlem declare deployment heroku myservice -c app_name=mlem-deployed-in-ci -c model=my-model -c env=staging +$ mlem declare deployment heroku myservice \ + --app_name=mlem-deployed-in-ci \ + --model=my-model \ + --env=staging πŸ’Ύ Saving deployment to myservice.mlem ``` @@ -93,7 +95,7 @@ jobs: - name: pack run: | pip3 install -r requirements.txt - mlem deployment my-model --load myservice.mlem + mlem deployment run --load myservice.mlem --model my-model ``` Learn more about deploying ML models [here](/doc/get-started/deploying). diff --git a/content/docs/use-cases/model-registry.md b/content/docs/use-cases/model-registry/index.md similarity index 100% rename from content/docs/use-cases/model-registry.md rename to content/docs/use-cases/model-registry/index.md diff --git a/content/docs/use-cases/mlem-mr.md b/content/docs/use-cases/model-registry/mlem-mr.md similarity index 66% rename from content/docs/use-cases/mlem-mr.md rename to content/docs/use-cases/model-registry/mlem-mr.md index 13c2214a..8db082a6 100644 --- a/content/docs/use-cases/mlem-mr.md +++ b/content/docs/use-cases/model-registry/mlem-mr.md @@ -26,24 +26,7 @@ We need to give some example repo with links here and instead move everything be Let's build an example using [repository from Get Started](https://github.com/iterative/example-mlem-get-started). -That repo already have some models in it: - -```cli -$ mlem ls https://github.com/iterative/example-mlem-get-started -``` - -```yaml -Builders: - - pip_config -Deployments: - - myservice -Envs: - - staging -Models: - - rf -Data: - - iris.csv -``` +That repo already have a `models/rf` model in it. Let's create new repo first: @@ -57,41 +40,29 @@ $ mlem init Let's create some links to them: ```cli -$ mlem link --sp https://github.com/iterative/example-mlem-get-started rf first-model -⏳️ Loading meta from https://github.com/iterative/example-mlem-get-started/tree/main/.mlem/model/rf.mlem -πŸ’Ύ Saving link to .mlem/link/first-model.mlem - -$ mlem link --sp https://github.com/iterative/example-mlem-get-started --rev 5-deploy-meta rf second-model -⏳️ Loading meta from https://github.com/iterative/example-mlem-get-started/tree/5-deploy-meta/.mlem/model/rf.mlem -πŸ’Ύ Saving link to .mlem/link/second-model.mlem -``` - -We've just linked two models from the other repo. You can see both if you run: - -```cli -$ mlem ls -``` +$ mlem link --sp https://github.com/iterative/example-mlem-get-started models/rf first-model +⏳️ Loading meta from https://github.com/iterative/example-mlem-get-started/tree/main/models/rf.mlem +πŸ’Ύ Saving link to first-model.mlem -```yaml -Models: - - first-model -> .mlem/model/rf - - second-model -> .mlem/model/rf +$ mlem link --sp https://github.com/iterative/example-mlem-get-started --rev 5-deploy-meta models/rf second-model +⏳️ Loading meta from https://github.com/iterative/example-mlem-get-started/tree/5-deploy-meta/models/rf.mlem +πŸ’Ύ Saving link to second-model.mlem ``` -Let's check out each link: +We've just linked two models from the other repo. Let's check out each link: ```cli -$ cat .mlem/link/first-model.mlem +$ cat first-model.mlem link_type: model object_type: link -path: .mlem/model/rf.mlem +path: models/rf.mlem repo: https://github.com/iterative/example-mlem-get-started/ rev: main -$ cat .mlem/link/second-model.mlem +$ cat second-model.mlem link_type: model object_type: link -path: .mlem/model/rf.mlem +path: models/rf.mlem repo: https://github.com/iterative/example-mlem-get-started/ rev: 7-deploy-meta ``` @@ -99,6 +70,6 @@ rev: 7-deploy-meta Now you can commit those links, push the repo and use it as a model registry: ```cli -$ git add .mlem/link/first-model.mlem .mlem/link/second-model.mlem +$ git add first-model.mlem second-model.mlem $ git commit -m "Add links to models" ``` diff --git a/content/docs/user-guide/analytics.md b/content/docs/user-guide/analytics.md index 582861f0..62196ef8 100644 --- a/content/docs/user-guide/analytics.md +++ b/content/docs/user-guide/analytics.md @@ -44,7 +44,7 @@ HTTPS. MLEM analytics help the entire community, so leaving it on is appreciated. However, if you want to opt out of MLEM's analytics, you can disable it via setting an environment variable `MLEM_NO_ANALYTICS=true` or by adding -`no_analytics: true` to `.mlem/config.yaml` +`no_analytics: true` to `.mlem.yaml` This will disable it for the project. We'll add an option to opt out globally soon. diff --git a/content/docs/user-guide/basic-concepts.md b/content/docs/user-guide/basic-concepts.md index d3a18e26..cbc9b1b3 100644 --- a/content/docs/user-guide/basic-concepts.md +++ b/content/docs/user-guide/basic-concepts.md @@ -25,94 +25,3 @@ name without `.mlem` extension, for example `model.mlem` and `model`, or If **MLEM Object** have multiple artifacts, they will be stored in a directory with the same name, for example `model.mlem` + `model/data.pkl` + `model/data2.pkl`. - -
- -### Implementation details - -From a developer's perspective, MLEM Objects are instances of one of the -subclasses of `MlemObject` class. MLEM is using extended -[pydantic](https://pydantic-docs.helpmanual.io/) functionality to save and load -them from files. - -You can get `MlemObject` instance if you use `load_meta` API method instead of -simple `load`. - -See also [MLEM Object API](/doc/api-reference/mlem-object) - -
- -## Common fields - -Each MLEM Object has an `object_type` field which determines the type of the -object. Specific types may have additional properties, but all MLEM Objects have -the following fields: - -- `params` - arbitrary object with additional parameters -- `location` - if the object is loaded, information about where it came from - -You can check out what methods MLEM Objects have in -[API Reference](/doc/api-reference/mlem-object) - -## MLEM Object Types - -Here are all the builtin MLEM Object types - -Model and Data are special types that can have artifacts, so they have two -additional fields: - -- `artifacts` - a string-to-artifacts mapping, where artifact is an instance of - [`Artifact`](/doc/user-guide/mlem-abcs#artifact) which represents a file - stored somewhere (local/cloud/dvc cache etc) -- `requirements` - a list of - [`Requirement`](/doc/user-guide/mlem-abcs#requirement) which are needed to use - that object in runtime - -### Model - -Represents an ML model, but can be generalized to any model or even any -"function" or any "transformation", thanks to `callable` -[ModelType](/doc/user-guide/mlem-abcs#modeltype). - -**Base class**: `mlem.core.objects.MlemModel` - -**Fields** (in addition to inherited): - -- `model_type` (_lazy_) - [ModelType](/doc/user-guide/mlem-abcs#modeltype), - which is polymorphic and holds metadata about model's framework, methods and - io. - -### Data - -Represent data, which can be used as an input to one of Model's methods. - -**Base class**: `mlem.core.objects.MlemData` - -**Fields** (in addition to inherited): - -- `reader` (_lazy_) - [DataReader](/doc/user-guide/mlem-abcs#datareader) - how - to read saved files and resulting dataset metadata -- `data_type` (_transient_) - [`DataType`](/doc/user-guide/mlem-abcs#datatype) - with dataset value and metadata (available once data is read) - -### Link - -Represents a link (pointer) to another MLEM Object. More on that -[here](/doc/user-guide/linking) - -**Base class**: `mlem.core.objects.MlemLink` - -**Fields** (in addition to inherited): - -- `path` - path to MLEM Object -- `project` - location of MLEM Project with referenced object -- `rev` - revision of the object -- `link_type` - type of the referenced object - -### Other types - -Some of the `MLEM ABCs` are also MLEM Objects. - -- [Builder](/doc/user-guide/mlem-abcs#builder) -- [Target Environment](/doc/user-guide/mlem-abcs#mlemenv) -- [Deployment](/doc/user-guide/mlem-abcs#mlemdeployment) diff --git a/content/docs/user-guide/building/conda.md b/content/docs/user-guide/building/conda.md new file mode 100644 index 00000000..fe4d7f15 --- /dev/null +++ b/content/docs/user-guide/building/conda.md @@ -0,0 +1,136 @@ +# Conda Environments + +Like [virtual environments](/doc/user-guide/building/venv), +[conda environments](https://docs.conda.io/projects/conda/en/latest/user-guide/concepts/environments.html) +follow the same paradigm of isolating dependencies for a package or a model. +But, they exist globally and are saved in a single location. Further, they don't +need to be limited to installation of python packages. + +## Description + +Currently, Conda based requirements cannot be determined automatically. But, one +can pass them manually. + +In addition to installing conda packages, `pip` based packages (gathered from +the model) will also be installed in the `conda` environment. + +## Preparation + +Make sure that `conda` command line utility is installed and is accessible. + +### Generating a new conda environment + +```cli +$ mlem build conda --model model --target newenv \ + --conda_reqs.0.package_name xtensor \ + --conda_reqs.1.package_name openssl +⏳️ Loading model from model.mlem +Collecting package metadata (current_repodata.json): done +Solving environment: done + +## Package Plan ## + + environment location: /path/to/envs/newenv + + added / updated specs: + - python=3.9 + + +The following NEW packages will be INSTALLED: + + ca-certificates pkgs/main/osx-arm64::ca-certificates-2022.07.19-hca03da5_0 None + certifi pkgs/main/osx-arm64::certifi-2022.9.24-py39hca03da5_0 None + libcxx pkgs/main/osx-arm64::libcxx-14.0.6-h848a8c0_0 None + libffi pkgs/main/osx-arm64::libffi-3.4.2-hc377ac9_4 None + ncurses pkgs/main/osx-arm64::ncurses-6.3-h1a28f6b_3 None + openssl pkgs/main/osx-arm64::openssl-1.1.1q-h1a28f6b_0 None + pip pkgs/main/osx-arm64::pip-22.2.2-py39hca03da5_0 None + python pkgs/main/osx-arm64::python-3.9.13-hbdb9e5c_1 None + readline pkgs/main/osx-arm64::readline-8.1.2-h1a28f6b_1 None + setuptools pkgs/main/osx-arm64::setuptools-63.4.1-py39hca03da5_0 None + sqlite pkgs/main/osx-arm64::sqlite-3.39.3-h1058600_0 None + tk pkgs/main/osx-arm64::tk-8.6.12-hb8d0fd4_0 None + tzdata pkgs/main/noarch::tzdata-2022c-h04d1e81_0 None + wheel pkgs/main/noarch::wheel-0.37.1-pyhd3eb1b0_0 None + xz pkgs/main/osx-arm64::xz-5.2.6-h1a28f6b_0 None + zlib pkgs/main/osx-arm64::zlib-1.2.12-h5a0b063_3 None + + +Preparing transaction: done +Verifying transaction: done +Executing transaction: done +# +# To activate this environment, use +# +# $ conda activate /path/to/envs/newenv +# +# To deactivate an active environment, use +# +# $ conda deactivate + +Retrieving notices: ...working... done +Collecting package metadata (current_repodata.json): done +Solving environment: done + +## Package Plan ## + + environment location: /path/to/envs/newenv + + added / updated specs: + - conda-forge::openssl + - conda-forge::xtensor + + +The following NEW packages will be INSTALLED: + + xtensor conda-forge/osx-arm64::xtensor-0.24.3-hf86a087_0 None + xtl conda-forge/osx-arm64::xtl-0.7.4-hc021e02_0 None + +The following packages will be UPDATED: + + openssl pkgs/main::openssl-1.1.1q-h1a28f6b_0 --> conda-forge::openssl-1.1.1q-h03a7124_1 None + + +Preparing transaction: done +Verifying transaction: done +Executing transaction: done +Retrieving notices: ...working... done +Collecting scikit-learn==1.0.2 + Using cached scikit_learn-1.0.2-cp39-cp39-macosx_12_0_arm64.whl (6.9 MB) +Collecting pandas==1.4.2 + Using cached pandas-1.4.2-cp39-cp39-macosx_11_0_arm64.whl (10.1 MB) +Collecting numpy==1.22.3 + Using cached numpy-1.22.3-cp39-cp39-macosx_11_0_arm64.whl (12.8 MB) +Collecting threadpoolctl>=2.0.0 + Using cached threadpoolctl-3.1.0-py3-none-any.whl (14 kB) +Collecting scipy>=1.1.0 + Using cached scipy-1.9.2-cp39-cp39-macosx_12_0_arm64.whl (28.6 MB) +Collecting joblib>=0.11 + Using cached joblib-1.2.0-py3-none-any.whl (297 kB) +Collecting pytz>=2020.1 + Using cached pytz-2022.4-py2.py3-none-any.whl (500 kB) +Collecting python-dateutil>=2.8.1 + Using cached python_dateutil-2.8.2-py2.py3-none-any.whl (247 kB) +Collecting six>=1.5 + Using cached six-1.16.0-py2.py3-none-any.whl (11 kB) +Installing collected packages: pytz, threadpoolctl, six, numpy, joblib, scipy, python-dateutil, scikit-learn, pandas +Successfully installed joblib-1.2.0 numpy-1.22.3 pandas-1.4.2 python-dateutil-2.8.2 pytz-2022.4 scikit-learn-1.0.2 scipy-1.9.2 six-1.16.0 threadpoolctl-3.1.0 +``` + +If the `target` is not passed, the default name for the new environment is +`venv`. + +Other options include using: + +- `--python_version 3.7` -- to use a custom python version, by default it is + inferred automatically. +- `--current_env True` -- whether to install the requirements in a currently + activated conda environment. + +While options for passing a list of conda requirements include: + +- `--conda_reqs.0.package_name` -- name of the conda package +- `--conda_reqs.0.spec` -- denotes selectors for a package such as '>=1.8,<2' + (optional) +- `--conda_reqs.0.channel_name` -- denotes the channel from which a package is + to be installed (default is `conda-forge`) diff --git a/content/docs/user-guide/building/docker.md b/content/docs/user-guide/building/docker.md new file mode 100644 index 00000000..ec88fc3c --- /dev/null +++ b/content/docs/user-guide/building/docker.md @@ -0,0 +1,93 @@ +# Docker + +Building a docker image from the model or preparing a folder ready for running +`docker build` in it. + +## Requirements + +```bash +pip install mlem[docker] +# or +pip install docker +``` + +If you want to build images locally, you may want to install +[Docker Desktop](https://www.docker.com/products/docker-desktop/). + +## Preparing a model for running `docker build` + +In the [parent page](/doc/user-guide/building) we've seen how to build a Docker +Image for a model using a pre-configured builder. Now let's see how this docker +image is built. + +To run `docker build` you need to prepare a folder with a `Dockerfile` and all +necessary content (like a model itself). This is what MLEM does under the hood +as a first step inside of `mlem build docker`. For your convenience, this +functionality is exposed as a separate builder: + +```cli +$ mlem build docker_dir --model models/rf \ + --server fastapi --target build +⏳️ Loading model from models/rf.mlem +πŸ›  Building MLEM wheel file... +πŸ’Ό Adding model files... +πŸ›  Generating dockerfile... +πŸ’Ό Adding sources... +πŸ’Ό Generating requirements file... +``` + +Now everything is ready and written to `build/`. + +## Exploring folder structure + +```cli +$ tree build/ +build/ +β”œβ”€β”€ Dockerfile # instructions for `docker build` +β”œβ”€β”€ mlem-0.2.9-py3-none-any.whl # requirements to use the model +β”œβ”€β”€ mlem_requirements.txt # requirements to install MLEM +β”œβ”€β”€ model # model binary +β”œβ”€β”€ model.mlem # MLEM model metafile +β”œβ”€β”€ requirements.txt # requirements to run the model +β”œβ”€β”€ run.sh # script that runs `mlem serve` +└── server.yaml # MLEM server configuration +``` + +As you can see, the builder generated everything needed to build a Docker image: +Dockerfile, model, requirements, and so on. Now we can execute MLEM build to +build an actual Docker image: + +```cli +$ docker build . -t mlem-model:latest +[+] Building 70.7s (14/14) FINISHED + => [internal] load build definition from Dockerfile 0.1s + => => transferring dockerfile: 533B 0.0s + => [internal] load .dockerignore 0.1s + => => transferring context: 2B 0.0s + => [internal] load metadata for docker.io/library/python:3.9.5-slim 0.0s + => [1/9] FROM docker.io/library/python:3.9.5-slim 0.1s + => [internal] load build context 0.1s + => => transferring context: 487.14kB 0.1s + => [2/9] WORKDIR /app 0.1s + => [3/9] COPY requirements.txt . 0.0s + => [4/9] RUN pip install -r requirements.txt 44.1s + => [5/9] COPY mlem_requirements.txt . 0.0s + => [6/9] RUN pip install -r mlem_requirements.txt 21.4s + => [7/9] COPY mlem-0.2.9.dev14+gf47bf34-py3-none-any.whl . 0.0s + => [8/9] RUN pip install mlem-0.2.9.dev14+gf47bf34-py3-none-any.whl 1.8s + => [9/9] COPY . ./ 0.0s + => exporting to image 2.8s + => => exporting layers 2.8s + => => writing image sha256:f449c1a69bc4566f61624d75481bf06c52164f05 0.0s + => => naming to docker.io/library/mlem-model:latest 0.0s +``` + +## Running container with a Docker image + +```cli +$ docker run -p 8080:8080 mlem-model:latest +... +``` + +Since we're serving the model with FastAPI, now you can open +http://localhost:8080/docs in your browser and see the OpenAPI spec. diff --git a/content/docs/user-guide/building/index.md b/content/docs/user-guide/building/index.md new file mode 100644 index 00000000..4a052347 --- /dev/null +++ b/content/docs/user-guide/building/index.md @@ -0,0 +1,101 @@ +# Building models + +Building is a way to β€œbake” your model into something usable in production like +a Docker image, or export your model into another format or even export the +underlying requirements and dependencies of the model, allowing one to create +virtual environments out of it. You can see the full list of available builders +[here](/doc/object-reference/build). + +To build your MLEM model you need to use either +[CLI](/doc/command-reference/build) or [API](/doc/api-reference/build) `build` +command and provide builder-specific arguments. + +
+ +### βš™οΈ About builders and arguments + +There are different types of builders and each one has it’s own set of available +arguments. You can find them in the nested pages, but for quick reference you +can run `mlem build --help` for list of builders and +`mlem build $BUILDER --help` for list of available arguments. + +
+ +## Pre-configured builders + +In [Get Started](/doc/get-started/building) we demonstrated how to build a +docker image out of the model. Now let's see what is the builder declaration we +mentioned there. You can pre-configure your builder in the form of yaml file +that we call "declaration" either manually or via `mlem declare` command: + +```cli +$ mlem declare builder docker docker_builder.mlem \ + --image.name mlem-model \ + --env.daemon.host "" \ + --server fastapi +πŸ’Ύ Saving builder to docker_builder.mlem +``` + +Let's see the builder declaration: + +```yaml +$ cat docker_builder.mlem +image: + name: mlem-model +object_type: builder +server: + type: fastapi +type: docker +``` + +This declaration basically defines all things you need to build a docker image. +It includes image name, what server you want to serve your model with, and some +optional things like image tag. Now you can use this config as a value for +`--load` option in `mlem build`: + +```cli +$ mlem build --load docker_builder.mlem \ + --model https://github.com/iterative/example-mlem-get-started/rf +⏳️ Loading builder from docker_builder.mlem +⏳️ Loading model from https://github.com/iterative/example-mlem-get-started/rf +πŸ›  Building MLEM wheel file... +πŸ’Ό Adding model files... +πŸ›  Generating dockerfile... +πŸ’Ό Adding sources... +πŸ’Ό Generating requirements file... +πŸ›  Building docker image mlem-model:latest... +βœ… Built docker image mlem-model:latest +``` + +Also, you can do all of this programmatically via Python API: + +```py +from mlem.api import build, load_meta + +build( + "docker", + "https://github.com/iterative/example-mlem-get-started/rf", + image={"name": "build"}, + server="fastapi", + env={"daemon": {"host": ""}}, +) +# or +build( + load_meta("docker_builder"), + "https://github.com/iterative/example-mlem-get-started/rf", +) +``` + +
+ +### βš™οΈ Remote builder config + +Like every other MLEM object, builders can be read from remote repos. Try + +```cli +mlem build \ + --load https://github.com/iterative/example-mlem-get-started/pip_config \ + --model https://github.com/iterative/example-mlem-get-started/rf +``` + +
diff --git a/content/docs/user-guide/building/pip.md b/content/docs/user-guide/building/pip.md new file mode 100644 index 00000000..40559d33 --- /dev/null +++ b/content/docs/user-guide/building/pip.md @@ -0,0 +1,100 @@ +# Python Packages + +There are two builder implementations to create python packages: `pip` to create +a directory with python package from model and `whl` to create a wheel file with +python package. + +## Creating Python package + +To create a `build/` directory with pip package run this command: + +```cli +$ mlem build pip --package_name example_mlem_get_started \ + --target build/ --model rf +⏳️ Loading model from rf.mlem +πŸ’Ό Written `example_mlem_get_started` package data to `build` +``` + +In this command, we specified that we want to build `rf` model with `pip` +builder and provided two arguments, `target` is the directory where the builder +will write all the files and `package_name` is the name of our package. + +There are more arguments you can use, see +[object reference](/doc/object-reference/build/pip) + +## Exploring Python package + +Let’s see what we’ve got + +```cli +$ tree build/ +build/ +β”œβ”€β”€ MANIFEST.in +β”œβ”€β”€ example_mlem_get_started +β”‚Β Β  β”œβ”€β”€ __init__.py +β”‚Β Β  β”œβ”€β”€ model +β”‚Β Β  └── model.mlem +β”œβ”€β”€ requirements.txt +└── setup.py +``` + +As you can see, the builder generated all the files necessary for a python +package. This includes sources, requirements, +[setup.py](https://docs.python.org/3/distutils/setupscript.html), and the model +itself. + +## Using Python package + +Now you can distribute and install the package. Its code declares all the same +methods our model had, so you can try to use it like this: + +```py +import example_mlem_get_started + +example_mlem_get_started.predict(df) +``` + +## Examples + +### Creating Python package from model using API + +```python +from mlem.api import build + +build(builder="pip", + model="https://github.com/iterative/example-mlem-get-started/rf", + package_name="my_model_package", + target="./build" +) + +# ! pip install ./build +import my_model_package + +data = ... +my_model_package.predict(data) +``` + +### Creating Python wheel package from model using CLI + +```cli +$ mlem build whl \ + --package_name my_model_package + --target ./build \ + --version 1.0.0 \ + --model https://github.com/iterative/example-mlem-get-started/rf +$ pip install ./build/my_model_package-1.0.0-py3-none-any.whl +``` + +### Creating wheel builder declaration and using it with CLI + +```cli +$ mlem declare builder whl whl_conf \ + --package_name my_model_package \ + --target ./build \ + --author mike0sv \ + --email mike0sv@gmail.com \ + --version 1.0.0 +$ mlem build --load whl_conf \ + --model https://github.com/iterative/example-mlem-get-started/rf +$ pip install ./build/my_model_package-1.0.0-py3-none-any.whl +``` diff --git a/content/docs/user-guide/building/requirements.md b/content/docs/user-guide/building/requirements.md new file mode 100644 index 00000000..33e78c01 --- /dev/null +++ b/content/docs/user-guide/building/requirements.md @@ -0,0 +1,132 @@ +# Requirements + +When you have a model saved via MLEM and want to use it, the first step is to +make sure you have the right dependencies and packages. Given a model, MLEM can +get the list of requirements of different types (including Python, Unix, as well +as some others). + +This complements the [checkenv](/doc/command-reference/checkenv) command. + +## Pip based requirements + +MLEM can export the installable requirements needed for a model using the +`mlem build` command, where `model` is the path to model saved via `mlem` + +```cli +$ mlem build requirements -m model +⏳️ Loading model from sk-model.mlem +scikit-learn==1.0.2 pandas==1.4.2 numpy==1.22.3 +``` + +Now, it can be used to install the requirements like this: + +```cli +$ pip install $(mlem -q build requirements -m model) +``` + +where `-q` stands for the quiet mode which disables the emoji output. + +One can also save these `requirements` to generate a `requirements.txt` file +using the `--target` option which allows us to pass a path of the file i.e. + +```cli +$ mlem build requirements -m model --target requirements.txt +⏳️ Loading model from model.mlem +πŸ’Ό Materializing requirements... +βœ… Materialized to requirements.txt! +``` + +and now the contents of `requirements.txt` can be checked using + +```cli +$ cat requirements.txt +scikit-learn==1.0.2 +pandas==1.4.2 +numpy==1.22.3 +``` + +This is different from [creating a python package](/doc/user-guide/building/pip) +for the model. + +## Unix based requirements + +Some python libraries require unix based packages underneath to function +correctly. An example is the +[libgomp1](https://packages.debian.org/sid/libgomp1) package required by the +`lightgbm` library. `MLEM` can figure out `unix` based packages for some +supported libraries and these can be used as below: + +To get a list of `unix` based requirements, use the `--req_type` option such as + +`mlem build requirements -m model --req_type unix`. + +The output of above can be used in conjunction like: + +```cli +$ apt-get install $(mlem -q build requirements -m model --req_type unix) +``` + +The `--target` option is not supported for unix based requirements. + +## Custom requirements + +Custom requirements represent local python code such as files, zipped sources, +etc. Custom requirements always need the `--target` option since they are +materialized at the target. + +For instance, a function can be saved via `MLEM` at a location `mlem-f` + +```python +# func.py +def f(txt): + print(txt) +``` + +```python +# save.py +from mlem.api import save +from func import f + +saved = save(f, 'mlem-f') +``` + +and the following command could be used to materialize this custom requirement +at a target: + +```cli +$ mlem build requirements -m mlem-f --req_type custom --target ./dir +⏳️ Loading model from mlem-f.mlem +πŸ’Ό Materializing requirements... +βœ… Materialized to ./dir! +``` + +The contents of `dir` can be checked using + +```cli +$ ls dir +func.py +``` + +and with + +```cli +$ cat dir/func.py +def f(txt): + print(txt) +``` + + + +## Conda requirements + +Conda based requirements specify conda packages and cannot be determined +automatically as of now. In any case, the `--target` option is not supported for +it. + +One can manually pass conda requirements to create conda based virtual +environments using the conda builder as discussed +[here](/doc/user-guide/building/conda). diff --git a/content/docs/user-guide/building/venv.md b/content/docs/user-guide/building/venv.md new file mode 100644 index 00000000..4df09adc --- /dev/null +++ b/content/docs/user-guide/building/venv.md @@ -0,0 +1,54 @@ +# Virtual Environments + +Given a model and a list of its dependencies and packages, an environment needs +to be present that has these requirements readily available so as to use the +model. To make sure that different dependencies for different models (or +projects) don't clash, +[virtual environments](https://realpython.com/python-virtual-environments-a-primer/) +are used. + +## Description + +MLEM can create a python virtual environment using requirements gathered from a +model. This naturally extends the functionality of the +[`requirements builder`](/doc/user-guide/building/requirements). + +### Generating a new virtual environment + +```cli +$ mlem build venv -m model --target newenv +⏳️ Loading model from model.mlem +πŸ’Ό Creating virtual env newenv... +πŸ’Ό Installing the required packages... +Collecting scikit-learn==1.0.2 + Using cached scikit_learn-1.0.2-cp39-cp39-macosx_12_0_arm64.whl (6.9 MB) +Collecting pandas==1.4.2 + Using cached pandas-1.4.2-cp39-cp39-macosx_11_0_arm64.whl (10.1 MB) +Collecting numpy==1.22.3 + Using cached numpy-1.22.3-cp39-cp39-macosx_11_0_arm64.whl (12.8 MB) +Collecting joblib>=0.11 + Using cached joblib-1.2.0-py3-none-any.whl (297 kB) +Collecting scipy>=1.1.0 + Using cached scipy-1.9.2-cp39-cp39-macosx_12_0_arm64.whl (28.6 MB) +Collecting threadpoolctl>=2.0.0 + Using cached threadpoolctl-3.1.0-py3-none-any.whl (14 kB) +Collecting pytz>=2020.1 + Using cached pytz-2022.4-py2.py3-none-any.whl (500 kB) +Collecting python-dateutil>=2.8.1 + Using cached python_dateutil-2.8.2-py2.py3-none-any.whl (247 kB) +Collecting six>=1.5 + Using cached six-1.16.0-py2.py3-none-any.whl (11 kB) +Installing collected packages: pytz, threadpoolctl, six, numpy, joblib, scipy, python-dateutil, scikit-learn, pandas +Successfully installed joblib-1.2.0 numpy-1.22.3 pandas-1.4.2 python-dateutil-2.8.2 pytz-2022.4 scikit-learn-1.0.2 scipy-1.9.2 six-1.16.0 threadpoolctl-3.1.0 +βœ… virtual environment `newenv` is ready, activate with `source newenv/bin/activate` +``` + +If the `target` is not passed, the default name for the new environment is +`venv`. + +Other options include using: + +- `--no_cache True` -- to disable caching while fetching packages in creation of + the environment. +- `--current_env True` -- whether to install the requirements in a currently + activated virtual environment. diff --git a/content/docs/user-guide/configuration.md b/content/docs/user-guide/configuration.md index 37262bc3..d9d53b5f 100644 --- a/content/docs/user-guide/configuration.md +++ b/content/docs/user-guide/configuration.md @@ -2,8 +2,8 @@ ## Ways to set -MLEM uses `.mlem/config.yaml` file to load configuration from, but it can be -overridden (or set) via corresponding env variable with `MLEM_` prefix. +MLEM uses `.mlem.yaml` file to load configuration from, but it can be overridden +(or set) via corresponding env variable with `MLEM_` prefix. Also, [`mlem config`](/doc/command-reference/config) allows you to manipulate config. @@ -30,5 +30,4 @@ config. ## Extension config Different MLEM extensions can provide additional options that you also can set -via `.mlem/config.yaml` file. Please refer to corresponding extension -documentation. +via `.mlem.yaml` file. Please refer to corresponding extension documentation. diff --git a/content/docs/user-guide/data.md b/content/docs/user-guide/data/index.md similarity index 89% rename from content/docs/user-guide/data.md rename to content/docs/user-guide/data/index.md index fe59469e..098dd0cb 100644 --- a/content/docs/user-guide/data.md +++ b/content/docs/user-guide/data/index.md @@ -3,6 +3,13 @@ You need to save data as [MLEM Objects] so that it is compatible with operations such as [mlem apply](/doc/command-reference/apply). +MLEM supports python collections and primitives, as well as `pandas` and `numpy` +data types and some framework-specific data types like `lightgbm.Dataset` or +`tf.Tensor`. + +Yor can find full list and additional documentation +[here](/doc/object-reference/data) + [mlem objects]: /doc/user-guide/basic-concepts#mlem-objects ## Saving data with MLEM @@ -30,10 +37,10 @@ if __name__ == "__main__": ``` Executing this script results in a few data frames saved to disk along with -certain metadata about them in the `.mlem/data` directory: +certain metadata about them in the `.` directory: ``` -.mlem/data +. β”œβ”€β”€ test_x.csv β”œβ”€β”€ test_x.csv.mlem β”œβ”€β”€ test_y.csv diff --git a/content/docs/user-guide/data/numpy.md b/content/docs/user-guide/data/numpy.md new file mode 100644 index 00000000..21be820f --- /dev/null +++ b/content/docs/user-guide/data/numpy.md @@ -0,0 +1,33 @@ +# Numpy + +DataType, Reader and Writer implementations for `np.ndarray` and `np.number` +primitives + +## Description + +**TODO** + +## Requirements + +```bash +pip install mlem[numpy] +# or +pip install numpy +``` + +## Examples + +### Saving and loading numpy array + +```python +import numpy as np + +from mlem.api import save, load + + +data = np.zeros((100,)) + +save(data, "array") + +data = load("array") +``` diff --git a/content/docs/user-guide/data/pandas.md b/content/docs/user-guide/data/pandas.md new file mode 100644 index 00000000..1575eee2 --- /dev/null +++ b/content/docs/user-guide/data/pandas.md @@ -0,0 +1,22 @@ +# Pandas + +DataType, Reader and Writer implementations for `pd.DataFrame` and `pd.Series` +ImportHook implementation for files saved with pandas + +## Description + +**TODO** + +## Requirements + +```bash +pip install mlem[pandas] +# or +pip install pandas +``` + +## Examples + +```python + +``` diff --git a/content/docs/user-guide/deploying/docker.md b/content/docs/user-guide/deploying/docker.md new file mode 100644 index 00000000..766c0dd9 --- /dev/null +++ b/content/docs/user-guide/deploying/docker.md @@ -0,0 +1,64 @@ +# Docker + +[Docker](https://docs.docker.com/get-started/overview/) is a platform for +shipping applications in an isolated environment known as a container. + +Deploying to Docker essentially means running a model inside a Docker Container +locally. + +## Requirements + +```bash +pip install mlem[docker] +# or +pip install docker +``` + +## Description + +Deploying to a docker container involves 2 main steps: + +1. [Build docker](/doc/user-guide/building/docker) image by running + `docker build` under the hood. +2. Start the server inside the image by running `docker run` under the hood. + +One can do this via a single line: + +```cli +$ mlem deploy run docker_container deployment.mlem -m model +πŸ’Ύ Saving deployment to deployment.mlem +⏳️ Loading model from model.mlem +πŸ›  Creating docker image mlem-deploy-1666728279 + πŸ›  Building MLEM wheel file... + πŸ’Ό Adding model files... + πŸ›  Generating dockerfile... + πŸ’Ό Adding sources... + πŸ’Ό Generating requirements file... + πŸ›  Building docker image mlem-deploy-1666728279:latest... + βœ… Built docker image mlem-deploy-1666728279:latest +βœ… Container mlem-deploy-1666728455 is up +``` + +### Checking the docker image and container + +One can check the docker image built via `docker image ls` which should give the +following output: + +``` +REPOSITORY TAG IMAGE ID CREATED SIZE +mlem-deploy-1666728279 latest fad02f76dbed 19 seconds ago 734MB +... +``` + +and the running container with `docker container ls`: + +``` +CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES +0aa976159580 mlem-deploy-1666728279:latest "/bin/sh -c 'sh run.…" 26 seconds ago Up 25 seconds mlem-deploy-1666728455 +... +``` + +### Configurable parameters + +A lot of parameters can be configured, the full list of which can be accessed +using `mlem deploy run docker_container -h`. diff --git a/content/docs/user-guide/deploying/heroku.md b/content/docs/user-guide/deploying/heroku.md new file mode 100644 index 00000000..c76b3dea --- /dev/null +++ b/content/docs/user-guide/deploying/heroku.md @@ -0,0 +1,167 @@ +# Heroku + +[Heroku](https://heroku.com) is the platform suitable for deploy of simple +applications. If you don't have an experience with deploying to external +platforms such as Sagemaker or Kubernetes, we recommend to start with Heroku. + +## Requirements + +```bash +pip install mlem[heroku] +# or +pip install fastapi uvicorn docker +``` + +To create applications on Heroku platform all you need is Heroku API key. You +need to either set `HEROKU_API_KEY` environment variable or use +[Heroku CLI](https://devcenter.heroku.com/articles/heroku-cli) to run +`heroku login`. To push your Docker image to Heroku Docker Registry, you'll also +need to execute `heroku container:login`. + +
+ +### βš™οΈHow to obtain Heroku API key + +- Go to [heroku.com](http://heroku.com) +- Sign up or login with existing account +- Go to account settings by clicking your profile picture on the main page +- Find API Key section and reveal existing one or re-generate it + +
+ +> You can also set API token via `--api_key` option to some commands, but this +> may have security issues + +## Defining target environment + +To deploy something somewhere, we need to define this β€œsomewhere” first, or in +MLEM terms, declare a `target environment` object. It will contain all the +information needed to access it. In the case of Heroku, all we need is an API +key. + +To declare a new target env, run + +```cli +$ mlem declare env heroku staging +πŸ’Ύ Saving env to staging.mlem +``` + +## Defining deployment + +Now, as we defined our target env, we can deploy our model there. Deployments +are also MLEM objects, which means that they need to have their definition. + +To create one for Heroku, we once again will use `declare` command to configure +our deployment. We use `example-mlem-get-started-app` for the app name, but you +can change it to something unique: + +```cli +$ mlem declare deployment heroku app \ + --app_name=example-mlem-get-started-app \ + --model=models/rf \ + --env=staging +πŸ’Ύ Saving deployment to app.mlem +``` + +Now we can actually run the deployment process (this can take a while): + +```cli +$ mlem deployment run --load app.mlem +⏳️ Loading model from models/rf.mlem +⏳️ Loading deployment from app.mlem +πŸ›  Creating docker image for heroku + πŸ›  Building MLEM wheel file... + πŸ’Ό Adding model files... + πŸ›  Generating dockerfile... + πŸ’Ό Adding sources... + πŸ’Ό Generating requirements file... + πŸ›  Building docker image registry.heroku.com/example-mlem-get-started-app/web... + βœ… Built docker image registry.heroku.com/example-mlem-get-started-app/web + πŸ”Ό Pushing image registry.heroku.com/example-mlem-get-started-app/web to registry.heroku.com + βœ… Pushed image registry.heroku.com/example-mlem-get-started-app/web to registry.heroku.com +πŸ›  Releasing app example-mlem-get-started-app formation +βœ… Service example-mlem-get-started-app is up. You can check it out at https://example-mlem-get-started-app.herokuapp.com/ +``` + + + +You can also define and run the deployment on-the-fly using options for +`mlem deployment run`, e.g.: + +```cli +$ mlem deployment run app \ + -m model -t staging \ + --app_name=example-mlem-get-started-app +``` + + + +## Making requests + +The application is now live on Heroku. You can go +[here](http://example-mlem-get-started-app.herokuapp.com) and see the same +OpenAPI documentation. For details on it, refer to the **Serving** section. You +can also try to do some requests: + +```py +from mlem.api import load +from mlem.runtime.client.base import HTTPClient + +client = HTTPClient(host="http://example-mlem-get-started-app.herokuapp.com", port=80) +res = client.predict(load("test_x.csv")) +``` + +Also, you can create a client using deployment meta object: + +```py +from mlem.api import load + +service = load("app") +client = service.state.get_client() +res = client.predict(load("test_x.csv")) +``` + +There is also the remote counterpart of `apply` command. It will send requests +to your service instead of loading model into memory. There are two options to +achieve this in CLI: using the service address or the deploy meta. + +```cli +$ mlem apply-remote http test_x.csv --host=http://example-mlem-get-started-app.herokuapp.com --port=80 --json +[1, 0, 2, 1, 1, 0, 1, 2, 1, 1, 2, 0, 0, 0, 0, 1, 2, 1, 1, 2, 0, 2, 0, 2, 2, 2, 2, 2, 0, 0, 0, 0, 1, 0, 0, 2, 1, 0] + +$ mlem deployment apply app test_x.csv --json +[1, 0, 2, 1, 1, 0, 1, 2, 1, 1, 2, 0, 0, 0, 0, 1, 2, 1, 1, 2, 0, 2, 0, 2, 2, 2, 2, 2, 0, 0, 0, 0, 1, 0, 0, 2, 1, 0] +``` + + + +You don’t even need to have the deployment metadata locally: + +```cli +$ mlem deployment apply --json \ + https://github.com/iterative/example-mlem-get-started/myservice \ + https://github.com/iterative/example-mlem-get-started/test_x.csv +``` + + + +## Managing deployment + +Finally, you can check the status of your service with: + +```cli +$ mlem deployment status app +running +``` + +And stop your service with + +```cli +$ mlem deployment remove app +⏳️ Loading deployment from app.mlem +πŸ”— Loading link to staging.mlem +πŸ”» Deleting example-mlem-get-started-app heroku app +πŸ’Ύ Updating deployment at app.mlem +``` + +Note, that it will not delete the deployment definition, just update its state. diff --git a/content/docs/user-guide/deploying/index.md b/content/docs/user-guide/deploying/index.md new file mode 100644 index 00000000..796287fa --- /dev/null +++ b/content/docs/user-guide/deploying/index.md @@ -0,0 +1,202 @@ +# Deploying models + +With MLEM you can create and manage deployments of your models in the cloud. +This uses building and serving functionality under the hood. + +Each deployment is MLEM Object that holds following parameters: + +- **Target environment** **parameters** is where you want your model to be + deployed +- **Deployment parameters** are additional parameters for specific deployment + implementation you chose + +Also, each deployment has **state**, \*\*\*\*which is a snapshot of the actual +state of your deployment. It is created and updated by MLEM during deployment +process to keep track of parameters needed for management. It is stored +separately from declaration. + +## Simple deployment + +You can try out MLEM deployments with just one command without additional +configuration. You just need your model saved with MLEM and an environment you +want to deploy to + +```yaml +$ mlem deployment run --model --some_option +option_value +``` + +A MLEM Object named `` of type `deployment` will be created and deployed +to target environment. + + + +Also, near `.mlem` file there will be `.mlem.state` file, where MLEM +will dump some parameters during deployment process. + +## Managing deployment + +After deployment process is done you can use MLEM commands to manage it. + +To check status of your deployment run + +```cli +$ mlem deployment status +``` + +To remove deployment, run + +```cli +$ mlem deployment remove +``` + +This will stop the deployment and erase deployment state value + +## Making requests + +You also can create MLEM Client for your deployment to make some requests: + +```python +from mlem.api import load + +service = load("") +client = service.get_client() +res = client.predict(data) +``` + +Or run `deployment apply` from command line: + +```bash +$ mlem deployment apply +``` + +--- + +## Pre-defining deployment + +You can also create deployments without actually running them and later trigger +already configured deployments. For example, this allows you to track deployment +parameters in VCS and use it in CI/CD pipelines more easily. + +To create deployment declaration, we will use `declare` command: + +```bash +$ mlem declare deployment --some_option option_value +``` + +This will create deployment declaration file `.mlem` with all specified +options. + + +πŸ’‘ You can see available deployment types with `mlem types deployment` and available options with `mlem types deployment ` + + + +Now you can actually run the deployment process just by referencing your +declaration: + +```bash +$ mlem deployment run --load +``` + +## Pre-defining target environment + +If you want to re-use your target environment parameters, you can declare a +separate MLEM Object of type `env` and reference it when creating deployments. +To do this, run + +```yaml +$ mlem declare env --key1 value1 --key2 value2 +``` + +This will create and `env` MLEM Object with name `` that you can reference +in `mlem deployment run` with `--env ` option or in +`mlem declare deployment` with `--env=` option. + +## Setting up remote state manager + +One of the parameters of the deployment is `state_manager`. Before making any +deployments, you should think about which state manager implementation to use. + +If you are a sole collaborator of your project, don't use CI/CD and you don't +plan to run multiple deployment commands in parallel, you should be fine with +the default one. It will save state as local files, you can even commit them to +VCS. + +However, for more advanced usage it will not suffice, because your local files +will not be available from your colleague’s machine/CI/CD runner/etc, and VCS +tracked files have, well, versions, which means state may be inconsistent +between different branches/repo clones. And this is where remote state managers +come to play. + +You can set up a remote filesystem to hold state, which means you'll have +consistent state among all collaborators. Filelocking mechanism will ensure that +no race condition will occur. + +Other remote state manager implementations will be available in future like +databases, key-value stores etc. Please express your interest in them via +issues. + +Setting up remote state manager is a lot like setting DVC remote. All you need +to do is provide uri where you want to store state files. E.g. for s3 it will +look like this + +```bash +$ mlem config set core.state.uri s3://bucket/path +``` + +Note, that all deployments created after that will use it by default if no +`state_manager` field provided. + +You can also override project-configured state manager with option like +`--state_manager.uri s3://bucket/path` provided to `mlem declare deployment` or +`mlem deployment run` commands. + +## Examples of files + +If you are inside MLEM project, +`mlem declare env --option value` will create a +`.mlem` file with contents + +```yaml +option: value +object_type: env +type: +``` + +You can edit it manually if you need + +State configuration `mlem config set core.state.uri s3://bucket/path` will add +this section to `.mlem.yaml` + +```yaml +core: + state: + uri: s3://bucket/path +``` + +Declaring deployment with +`mlem declare deployment --option value --env ` or running +`mlem deployment run ...` without declaring first will create MLEM Object at +`.mlem` with contents + +```yaml +option: value +env: +object_type: deployment +type: +``` + +Running the deployment will create a state file at `.mlem.state` (or +`s3://bucket/path/.mlem.state/.mlem.state` if you configured state +manager) with contents + +```yaml +model_hash: 65bd55a3bb4bc829abdb02f5cf6f1018 +step1: { step1 metadata } +step2: { step2 metadata } +type: +``` diff --git a/content/docs/user-guide/deploying/kubernetes.md b/content/docs/user-guide/deploying/kubernetes.md new file mode 100644 index 00000000..c12b9bf9 --- /dev/null +++ b/content/docs/user-guide/deploying/kubernetes.md @@ -0,0 +1,366 @@ +# Kubernetes + +To serve models in production in a scalable and failure-safe way, one needs +something more than Heroku. [Kubernetes](https://kubernetes.io/docs/home/) is an +open source container orchestration engine for automating deployment, scaling, +and management of containerized applications. + +Below, we will deploy a model to a kubernetes cluster exposing its prediction +endpoints through a service. + +## Requirements + +```bash +pip install mlem[kubernetes] +# or +pip install kubernetes docker +``` + +### Preparation + +- Make sure you have a Kubernetes cluster accessible, with the corresponding + kubeconfig file available. +- The cluster has access to a docker registry so as to pull docker images. +- Relevant permissions to create resources on the cluster -- deployment, + service, etc. are present. +- Nodes are accessible and reachable, with an external IP address (valid for a + NodePort service, more details to come below). + +One can access a +[basic](https://kubernetes.io/docs/tutorials/kubernetes-basics/) tutorial to +learn about the above terms. + +## Description + +Deploying to a Kubernetes cluster involves 2 main steps: + +1. Build the docker image and upload it to a registry. +2. Create resources on the Kubernetes cluster -- specifically, a `namespace`, a + `deployment` and a `service`. + +Once this is done, one can use the usual workflow of +[`mlem deployment run`](/doc/command-reference/deployment/run) to deploy on +Kubernetes. + +
+ +### βš™οΈ About which cluster to use + +MLEM tries to find the kubeconfig file from the environment variable +`KUBECONFIG` or the default location `~/.kube/config`. + +If you need to use another path, one can pass it with + +`--kube_config_file_path ...` + +
+ + + +You can use `mlem deploy run kubernetes -h` to list all the configurable +parameters. + + + +Most of the configurable parameters in the list above come with sensible +defaults. But at the least, one needs to follow the structure given below: + +```cli +$ mlem deployment run kubernetes service_name \ + --model model \ + --service_type loadbalancer +πŸ’Ύ Saving deployment to service_name.mlem +⏳️ Loading model from model.mlem +πŸ›  Creating docker image ml + πŸ›  Building MLEM wheel file... + πŸ’Ό Adding model files... + πŸ›  Generating dockerfile... + πŸ’Ό Adding sources... + πŸ’Ό Generating requirements file... + πŸ›  Building docker image ml:4ee45dc33804b58ee2c7f2f6be447cda... + βœ… Built docker image ml:4ee45dc33804b58ee2c7f2f6be447cda +namespace created. status='{'conditions': None, 'phase': 'Active'}' +deployment created. status='{'available_replicas': None, + 'collision_count': None, + 'conditions': None, + 'observed_generation': None, + 'ready_replicas': None, + 'replicas': None, + 'unavailable_replicas': None, + 'updated_replicas': None}' +service created. status='{'conditions': None, 'load_balancer': {'ingress': None}}' +βœ… Deployment ml is up in mlem namespace +``` + +where: + +- `service_name` is a name of one's own choice, of which corresponding + `service_name.mlem` and `service_name.mlem.state` files will be created. +- `model` denotes the path to model saved via `mlem`. +- `service_type` is configurable and is passed as `loadbalancer`. The default + value is `nodeport` if not passed. + +### Checking the docker images + +One can check the docker image built via `docker image ls` which should give the +following output: + +``` +REPOSITORY TAG IMAGE ID CREATED SIZE +ml 4ee45dc33804b58ee2c7f2f6be447cda 16cf3d92492f 3 minutes ago 778MB +... +``` + +### Checking the kubernetes resources + +Pods created can be checked via `kubectl get pods -A` which should have a pod in +the `mlem` namespace present as shown below: + +``` +NAMESPACE NAME READY STATUS RESTARTS AGE +kube-system coredns-6d4b75cb6d-xp68b 1/1 Running 7 (12m ago) 7d22h +... +kube-system storage-provisioner 1/1 Running 59 (11m ago) 54d +mlem ml-cddbcc89b-zkfhx 1/1 Running 0 5m58s +``` + +By default, all resources are created in the `mlem` namespace. This ofcourse is +configurable using `--namespace prod` where `prod` is the desired namespace +name. + +### Making predictions via MLEM + +One can of course use the +[`mlem deployment apply`](/doc/command-reference/deployment/apply) command to +ping the deployed endpoint to get the predictions back. An example could be: + +```cli +$ mlem deployment apply service_name data --json +[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2] +``` + +where `data` is the dataset saved via `mlem`. + +### Deleting the Kubernetes resources + +A model can easily be undeployed using `mlem deploy remove service_name` which +will delete the `pods`, `services` and the `namespace` i.e. clear the resources +from the cluster. The docker image will still persist in the registry though. + +## Swapping the model in deployment + +If you want to change the model that is currently under deployment, run + +```cli +$ mlem deploy run --load service_name --model other-model +``` + +This will build a new docker image corresponding to the `other-model` and will +terminate the existing pod and create a new one, thereby replacing it, without +downtime. + +This can be seen below: + +### Checking the docker images + +``` +REPOSITORY TAG IMAGE ID CREATED SIZE +ml d57e4cacec82ebd72572d434ec148f1d 9bacd4cd9cc0 11 minutes ago 2.66GB +ml 4ee45dc33804b58ee2c7f2f6be447cda 26cb86b55bc4 About an hour ago 778MB +... +``` + +Notice how a new docker image with the tag `d57e4cacec82ebd72572d434ec148f1d` is +built. + +### Checking the deployment process + +``` +⏳️ Loading model from other-model.mlem +⏳️ Loading deployment from service_name.mlem +πŸ›  Creating docker image ml + πŸ›  Building MLEM wheel file... + πŸ’Ό Adding model files... + πŸ›  Generating dockerfile... + πŸ’Ό Adding sources... + πŸ’Ό Generating requirements file... + πŸ›  Building docker ml:d57e4cacec82ebd72572d434ec148f1d... + βœ… Built docker image ml:d57e4cacec82ebd72572d434ec148f1d +βœ… Deployment ml is up in mlem namespace +``` + +Here, an existing deployment i.e. `service_name` is used but with a newer model. +Hence, details of registry need not be passed again. The contents of +`service_name` can be checked by inspecting the `service_name.mlem` file. + +### Checking the kubernetes resources + +We can see the existing pod being terminated and the new one running in its +place below: + +``` +NAMESPACE NAME READY STATUS RESTARTS AGE +kube-system aws-node-pr8cn 1/1 Running 0 90m +... +kube-system kube-proxy-dfxsv 1/1 Running 0 90m +mlem ml-66b9588df5-wmc2v 1/1 Running 0 99s +mlem ml-cddbcc89b-zkfhx 1/1 Terminating 0 60m +``` + +## Example: Using EKS cluster with ECR on AWS + +The deployment to a cloud managed kubernetes cluster such as +[EKS](https://docs.aws.amazon.com/eks/latest/userguide/what-is-eks.html) is +simple and analogous to how it is done in the steps above for a local cluster +(such as minikube). + + + +To setup an EKS cluster, you can simply use [`eksctl`](https://eksctl.io/) + +A simple command such as + +```cli +eksctl create cluster --name cluster-name --region us-east-1 +``` + +will setup an EKS cluster for you with default parameters such as two `m5.large` +worker nodes. + +Other tools such as +[`terraform`](https://learn.hashicorp.com/tutorials/terraform/eks) can also be +used. + + + +The popular docker registry choice to be used with EKS is +[ECR](https://docs.aws.amazon.com/AmazonECR/latest/userguide/what-is-ecr.html) +(Elastic Container Registry). Make sure the EKS cluster has at least read access +to ECR. + +### ECR + +Make sure you have a repository in ECR where docker images can be uploaded. In +the sample screenshot below, there exists a `classifier` repository: + +![alt text](/img/ecr.png) + +### Using MLEM with ECR and EKS + +Provided that the default kubeconfig file (present at `~/.kube/config`) can +communicate with EKS, execute the following command: + +```cli +$ mlem deploy run kubernetes service_name \ + --model model \ + --registry ecr \ + --registry.account 342840881361 \ + --registry.region "us-east-1" \ + --registry.host "342840881361.dkr.ecr.us-east-1.amazonaws.com/classifier" \ + --image_name classifier --service_type loadbalancer +πŸ’Ύ Saving deployment to service_name.mlem +⏳️ Loading model from model.mlem +πŸ›  Creating docker image classifier + πŸ›  Building MLEM wheel file... + πŸ’Ό Adding model files... + πŸ›  Generating dockerfile... + πŸ’Ό Adding sources... + πŸ’Ό Generating requirements file... + πŸ›  Building docker image 342840881361.dkr.ecr.us-east-1.amazonaws.com/classifier:4ee45dc33804b58ee2c7f2f6be447cda... + πŸ— Logged in to remote registry at host 342840881361.dkr.ecr.us-east-1.amazonaws.com + βœ… Built docker image 342840881361.dkr.ecr.us-east-1.amazonaws.com/classifier:4ee45dc33804b58ee2c7f2f6be447cda + πŸ”Ό Pushing image 342840881361.dkr.ecr.us-east-1.amazonaws.com/classifier:4ee45dc33804b58ee2c7f2f6be447cda to +342840881361.dkr.ecr.us-east-1.amazonaws.com + βœ… Pushed image 342840881361.dkr.ecr.us-east-1.amazonaws.com/classifier:4ee45dc33804b58ee2c7f2f6be447cda to +342840881361.dkr.ecr.us-east-1.amazonaws.com +namespace created. status='{'conditions': None, 'phase': 'Active'}' +deployment created. status='{'available_replicas': None, + 'collision_count': None, + 'conditions': None, + 'observed_generation': None, + 'ready_replicas': None, + 'replicas': None, + 'unavailable_replicas': None, + 'updated_replicas': None}' +service created. status='{'conditions': None, 'load_balancer': {'ingress': None}}' +βœ… Deployment classifier is up in mlem namespace +``` + +- Note that the repository name in ECR i.e. `classifier` has to match with the + `image_name` supplied through `--image_name` + +### Checking the docker images + +One can check the docker image built via `docker image ls` which should give the +following output: + +``` +REPOSITORY TAG IMAGE ID CREATED SIZE +342840881361.dkr.ecr.us-east-1.amazonaws.com/classifier 4ee45dc33804b58ee2c7f2f6be447cda 96afb03ad6f5 2 minutes ago 778MB +... +``` + +This can also be verified in ECR: + +![alt text](/img/ecr_image.png) + +### Checking the kubernetes resources + +Pods created can be checked via `kubectl get pods -A` which should have a pod in +the `mlem` namespace present as shown below: + +``` +NAMESPACE NAME READY STATUS RESTARTS AGE +kube-system aws-node-pr8cn 1/1 Running 0 11m +... +kube-system kube-proxy-dfxsv 1/1 Running 0 11m +mlem classifier-687655f977-h7wsl 1/1 Running 0 83s +``` + +Services created can be checked via `kubectl get svc -A` which should look like +the following: + +``` +NAMESPACE NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE +default kubernetes ClusterIP 10.100.0.1 443/TCP 20m +kube-system kube-dns ClusterIP 10.100.0.10 53/UDP,53/TCP 20m +mlem classifier LoadBalancer 10.100.87.16 a069daf48f9f244338a4bf5c60c6b823-1734837081.us-east-1.elb.amazonaws.com 8080:32067/TCP 2m32s +``` + +### Making predictions via mlem or otherwise + +One can clearly visit the External IP of the service `classifier` created by +`mlem` i.e. + +**a069daf48f9f244338a4bf5c60c6b823-1734837081.us-east-1.elb.amazonaws.com:8080** + +using the browser and see the usual FastAPI docs page: + +![alt text](/img/fastapi.png) + +But one can also use the +[`mlem deployment apply`](/doc/command-reference/deployment/apply) command to +ping the deployed endpoint to get the predictions back. An example could be: + +```cli +$ mlem deployment apply service_name data --json +[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2] +``` + +i.e. `mlem` knows how to calculate the externally reachable endpoint given the +service type. + + + +While the example discussed above deploys a LoadBalancer Service Type, but one +can also use NodePort (which is the default) OR via `--service_type nodeport` + +While `mlem` knows how to calculate externally reachable IP address, make sure +the EC2 machine running the pod has external traffic allowed to it. This can be +configured in the inbound rules of the node's security group. + +This can be seen as the last rule being added below: + +![alt text](/img/inbound.png) + + diff --git a/content/docs/user-guide/deploying/sagemaker.md b/content/docs/user-guide/deploying/sagemaker.md new file mode 100644 index 00000000..d57d738a --- /dev/null +++ b/content/docs/user-guide/deploying/sagemaker.md @@ -0,0 +1,126 @@ +# SageMaker + +MLEM SageMaker allow you to deploy MLEM models to AWS SageMaker. You can learn +more about SageMaker +[here](https://docs.aws.amazon.com/sagemaker/latest/dg/whatis.html). + +## Requirements + +```bash +pip install mlem[sagemaker] +# or +pip install sagemaker boto3 +``` + +To be able to deploy to SageMaker you need to do some AWS configuration. This is +not MLEM specific requirements, rather it's needed for any SageMaker +interaction. + +Here is the list: + +- AWS User Credentials +- SageMaker access for this user (policy + `arn:aws:iam::aws:policy/AmazonSageMakerFullAccess`) +- ECR access for this user (policy + `arn:aws:iam::aws:policy/AmazonEC2ContainerRegistryFullAccess`) +- AWS IAM Role with SageMaker access +- S3 Access + +You can configure those manually or use existing ones. You can also use +[terraform](https://www.terraform.io/) with +[this template](https://github.com/iterative/mlem/tree/main/mlem/contrib/sagemaker/mlem_sagemaker.tf) +and +[helper script](https://github.com/iterative/mlem/tree/main/mlem/contrib/sagemaker/env_setup.py) +(terraform needs to be installed). + +> This script is not part of MLEM public API, so you'll need to run it manually +> like this + +```python +from mlem.contrib.sagemaker.env_setup import sagemaker_terraform + +sagemaker_terraform(export_secret="creds.csv") +``` + +It's recommended to use [aws cli](https://aws.amazon.com/cli/) with separate +profile configured for MLEM. You can also provide credentials with +[AWS environment variables](https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-envvars.html). + +## Configuring and running deployment + +[SageMaker Environment](#class-sagemakerenv) declaration can be used to hold +your SageMaker configuration. + +```cli +$ mlem declare env sagemaker ... --role \ + --account \ + --region \ + --bucket \ + --ecr_repository +``` + +You can also pre-declare [SageMaker Deployment](#class-sagemakerdeployment) +itself. + +```cli +$ mlem declare deployment sagemaker ... --env ... \ + --method predict \ + --instance_type ml.t2.medium +``` + +To run deployment, run + +```cli +$ mlem deployment run ... --model +``` + +### What happens internally + +Once you run our this sweet `mlem deployment run ...` command, a number of +things will happen. + +1. If you did not specify pre-built image, a new docker image will be built. It + will include all model's requirements. This image will be pushed to + configured ECR repository. +2. Model is packaged and uploaded to configured s3 bucket as per + [this doc](https://docs.aws.amazon.com/sagemaker/latest/dg/realtime-endpoints-deployment.html#realtime-endpoints-deployment-create-model) +3. Enpoint Configuration is created as per + [this doc](https://docs.aws.amazon.com/sagemaker/latest/dg/realtime-endpoints-deployment.html#realtime-endpoints-deployment-create-endpoint-config) +4. Model is deployed thus creating a SageMaker Endpoint + +After this command exits, however it can take some time on SageMakers side to +actually run VMs with your model. You can check status with + +```cli +$ mlem deployment status ... +``` + +or block until model is ready with + +```cli +$ mlem deployment wait ... -i starting +``` + +## Making requests + +MLEM SageMaker deployments are fully compatible with SageMaker +[InvokeEndpoint](https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_runtime_InvokeEndpoint.html) +API, however it's a lot easier to use +[MLEM SagemakerClient](#class-sagemakerclient). To obtain one, just call +`get_client` method on your deployment object. + +```python +from mlem.api import load_meta + +service = load_meta("...") +client = service.get_client() +``` + +You can then use this `client` instance to invoke your model as if it is local. + +```python +data = ... # pd.DataFrame or whatever model.predict accepts +preds = client.predict(data) +``` + +> MLEM do not support batch invocations. We will add support for them soon diff --git a/content/docs/use-cases/dvc.md b/content/docs/user-guide/dvc.md similarity index 91% rename from content/docs/use-cases/dvc.md rename to content/docs/user-guide/dvc.md index fa23495a..9c8b143f 100644 --- a/content/docs/use-cases/dvc.md +++ b/content/docs/user-guide/dvc.md @@ -62,7 +62,7 @@ $ git add .dvcignore Finally, we need to stop Git from keeping already indexed binaries. ```cli -$ git rm -r --cached .mlem +$ git rm -r --cached models data ``` β›³ @@ -74,7 +74,7 @@ Next, let’s remove artifacts from Git and re-save them, so MLEM can use new storage for them. You don't need to change a single line of code ```cli -$ git rm -r --cached .mlem/ +$ git rm -r --cached models data $ python train.py ``` @@ -82,8 +82,8 @@ Finally, let’s add and commit new metafiles to Git and artifacts to DVC, respectively: ```cli -$ dvc add .mlem/model/rf -$ git add .mlem +$ dvc add models/rf +$ git add models $ git commit -m "Switch to dvc storage" ... @@ -113,10 +113,10 @@ of a pipelines stage. ## Example Let's continue using the example from above. First, let's stop tracking the -artifact `.mlem/model/rf` in DVC and stop ignoring MLEM files in `.dvcignore`. +artifact `models/rf` in DVC and stop ignoring MLEM files in `.dvcignore`. ```dvc -$ dvc remove .mlem/model/rf.dvc +$ dvc remove models/rf.dvc # we can delete the file since there are no other records # beside one we added above: $ git rm .dvcignore @@ -132,8 +132,8 @@ stages: deps: - train.py outs: - - .mlem/model/rf - - .mlem/model/rf.mlem: + - models/rf + - models/rf.mlem: cache: false ``` @@ -151,5 +151,5 @@ Use `dvc push` to send your updates to remote storage. ``` Now DVC will take care of storing binaries, so you'll need to commit model -metafile (`.mlem/model/rf.mlem`) and `dvc.lock` only. Learn more about +metafile (`models/rf.mlem`) and `dvc.lock` only. Learn more about [DVC](https://dvc.org/doc) and how it can be useful for training your ML models. diff --git a/content/docs/user-guide/importing.md b/content/docs/user-guide/importing/index.md similarity index 90% rename from content/docs/user-guide/importing.md rename to content/docs/user-guide/importing/index.md index 12a415f8..9a9f6a95 100644 --- a/content/docs/user-guide/importing.md +++ b/content/docs/user-guide/importing/index.md @@ -13,4 +13,4 @@ have a live Python object to analyze and tries to recreate it, which may fail.
You can see list of available import implementations -[here](/doc/user-guide/mlem-abcs#importhook). +[here](/doc/object-reference/mlem-abcs#importhook). diff --git a/content/docs/user-guide/linking.md b/content/docs/user-guide/linking.md index b86b972e..920840af 100644 --- a/content/docs/user-guide/linking.md +++ b/content/docs/user-guide/linking.md @@ -8,8 +8,7 @@ anywhere you need to specify MLEM Object bot in API and CLI. Since links are also a type of MLEM Object, they share the same internal logic. -For example, they are saved under the `.mlem/link` directory. To load an -instance of `MlemLink` (and not the object it references) provide +To load an instance of `MlemLink` (and not the object it references) provide `follow_links=False` to `load_meta` method. diff --git a/content/docs/user-guide/models/callable.md b/content/docs/user-guide/models/callable.md new file mode 100644 index 00000000..a58e38bb --- /dev/null +++ b/content/docs/user-guide/models/callable.md @@ -0,0 +1,14 @@ +# Python callables + +[ModelType](/doc/object-reference/mlem-abcs#modeltype) implementation to turn +any python callable into MLEM Model + +## Description + +**TODO** + +## Examples + +```python + +``` diff --git a/content/docs/user-guide/models/catboost.md b/content/docs/user-guide/models/catboost.md new file mode 100644 index 00000000..108e1755 --- /dev/null +++ b/content/docs/user-guide/models/catboost.md @@ -0,0 +1,23 @@ +# Catboost + +Implementations of [ModelType](/doc/object-reference/mlem-abcs#modeltype) and +[ModelIO](/doc/object-reference/mlem-abcs#modelio) for `CatBoostClassifier` and +`CatBoostRegressor` + +## Description + +**TODO** + +## Requirements + +```bash +pip install mlem[catboost] +# or +pip install catboost +``` + +## Examples + +```python + +``` diff --git a/content/docs/user-guide/models/index.md b/content/docs/user-guide/models/index.md new file mode 100644 index 00000000..ce7ca688 --- /dev/null +++ b/content/docs/user-guide/models/index.md @@ -0,0 +1,54 @@ +# Working with models + +To be able to use all MLEM features, you need to turn your model into a MLEM +model first. + +The easiest way to do this is to use [save](/doc/api-reference/save) API method. + +```py +from mlem.api import save + +save(model, "models/mymodel", sample_data=df) +``` + +After that you could work with your model [from CLI](/doc/command-reference) or +call [API methods](/doc/api-reference), passing down a path to the saved model. + +`model` object can be any supported python object. MLEM works with all major +popular machine learning frameworks as well as arbitrary python callables. + +You can find full list [here](/doc/object-reference/model) as well as additional +documentation. + +For most usecases it's mandatory to provide `sample_data` argument. Typically, +it is the same data object you provide for your model `.predict` method. + +MLEM uses it to infer your model's signature which will be needed to build and +deploy it later. + +Additionally, MLEM will automatically infer your model requirements (including +any local code). The determined requirements can be used to +[create virtual environments](/doc/user-guide/building/requirements) + +## Alternative ways to create MLEM model + +Existing model files can be [imported](/doc/user-guide/importing). + +You can also create MLEM model from your object without saving it. Use `.dump` +to save it later. + +```py +from mlem.core.objects import MlemModel + +mlem_model = MlemModel.from_obj(model, sample_data=df) +mlem_model.dump("models/mymodel") +``` + +This may be useful if you're going to work with MLEM model from API inside the +same Python process, or you want to persist it somewhere but would like to avoid +re-reading it after `mlem.api.save`. + +Besides, note that you can pass both `model` and `mlem_model` to MLEM +[Python API methods](/doc/api-reference). Passing `mlem_model` make sense when +you're going to use MLEM with it model multiple times, and want to save some +time on creating a MLEM model object from scratch each time. diff --git a/content/docs/user-guide/models/lightgbm.md b/content/docs/user-guide/models/lightgbm.md new file mode 100644 index 00000000..2cc59d57 --- /dev/null +++ b/content/docs/user-guide/models/lightgbm.md @@ -0,0 +1,24 @@ +# LightGBM + +[ModelType](/doc/object-reference/mlem-abcs#modeltype) and +[ModelIO](/doc/object-reference/mlem-abcs#modelio) implementations for +`lightgbm.Booster` as well as LightGBMDataType with Reader and Writer for +`lightgbm.Dataset` + +## Description + +**TODO** + +## Requirements + +```bash +pip install mlem[lightgbm] +# or +pip install lightgbm +``` + +## Examples + +```python + +``` diff --git a/content/docs/user-guide/models/onnx.md b/content/docs/user-guide/models/onnx.md new file mode 100644 index 00000000..0673f01b --- /dev/null +++ b/content/docs/user-guide/models/onnx.md @@ -0,0 +1,23 @@ +# Onnx + +[ModelType](/doc/object-reference/mlem-abcs#modeltype) and +[ModelIO](/doc/object-reference/mlem-abcs#modelio) implementations for +`onnx.ModelProto` + +## Description + +**TODO** + +## Requirements + +```bash +pip install mlem[onnx] +# or +pip install onnx +``` + +## Examples + +```python + +``` diff --git a/content/docs/user-guide/models/sklearn.md b/content/docs/user-guide/models/sklearn.md new file mode 100644 index 00000000..fa051fcf --- /dev/null +++ b/content/docs/user-guide/models/sklearn.md @@ -0,0 +1,41 @@ +# Scikit-Learn + +[ModelType](/doc/object-reference/mlem-abcs#modeltype) implementations for any +sklearn-compatible classes as well as `Pipeline` + +## Description + +**TODO** + +## Requirements + +```bash +pip install mlem[sklearn] +# or +pip install scikit-learn +``` + +## Examples + +### Saving and loading Scikit-Learn model + +```python +from sklearn.datasets import load_iris +from sklearn.ensemble import RandomForestClassifier + +from mlem.api import save, load + + +data, y = load_iris(return_X_y=True, as_frame=True) +rf = RandomForestClassifier() +rf.fit(data, y) + +save( + rf, + "rf", + sample_data=data, +) + +rf = load("rf") +rf.predict(data) +``` diff --git a/content/docs/user-guide/models/tensorflow.md b/content/docs/user-guide/models/tensorflow.md new file mode 100644 index 00000000..11fefca1 --- /dev/null +++ b/content/docs/user-guide/models/tensorflow.md @@ -0,0 +1,23 @@ +# Tensorflow + +[ModelType](/doc/object-reference/mlem-abcs#modeltype) and +[ModelIO](/doc/object-reference/mlem-abcs#modelio) implementations for +`tf.keras.Model` DataType, Reader and Writer implementations for `tf.Tensor` + +## Description + +**TODO** + +## Requirements + +```bash +pip install mlem[tensorflow] +# or +pip install tensorflow +``` + +## Examples + +```python + +``` diff --git a/content/docs/user-guide/models/torch.md b/content/docs/user-guide/models/torch.md new file mode 100644 index 00000000..c9ab4999 --- /dev/null +++ b/content/docs/user-guide/models/torch.md @@ -0,0 +1,24 @@ +# Torch + +[ModelType](/doc/object-reference/mlem-abcs#modeltype) and +[ModelIO](/doc/object-reference/mlem-abcs#modelio) implementations for +`torch.nn.Module` ImportHook for importing files saved with `torch.save` +DataType, Reader and Writer implementations for `torch.Tensor` + +## Description + +**TODO** + +## Requirements + +```bash +pip install mlem[torch] +# or +pip install torch +``` + +## Examples + +```python + +``` diff --git a/content/docs/user-guide/models/xgboost.md b/content/docs/user-guide/models/xgboost.md new file mode 100644 index 00000000..b6b290ad --- /dev/null +++ b/content/docs/user-guide/models/xgboost.md @@ -0,0 +1,24 @@ +# Xgboost + +[ModelType](/doc/object-reference/mlem-abcs#modeltype) and +[ModelIO](/doc/object-reference/mlem-abcs#modelio) implementations for +`xgboost.Booster` as well as DataType, Reader and Writer implementations for +`xgboost.DMatrix` + +## Description + +**TODO** + +## Requirements + +```bash +pip install mlem[xgboost] +# or +pip install xgboost +``` + +## Examples + +```python + +``` diff --git a/content/docs/user-guide/project-structure.md b/content/docs/user-guide/project-structure.md index 7a0b1d62..a8af3725 100644 --- a/content/docs/user-guide/project-structure.md +++ b/content/docs/user-guide/project-structure.md @@ -2,14 +2,14 @@ ## MLEM Project -Any directory with a valid `.mlem/` directory is considered a **MLEM Project**. -To create one, use `mlem init` or `mlem.api.init()`. This will also create an -empty `config.yaml` (see [Configuration](/doc/user-guide/configuration)). +Any directory with a valid `.mlem.yaml` file is considered a **MLEM Project**. +To create one, use `mlem init` or `mlem.api.init()`. For more details on +`.mlem.yaml` file see [Configuration](/doc/user-guide/configuration). -Some API and CLI commands like `mlem ls` and `mlem config` require this -execution context. But in general, MLEM can work with `.mlem` files anywhere. +Some API and CLI commands like `mlem config` require this execution context. But +in general, MLEM can work with `.mlem` metafiles anywhere. @@ -18,23 +18,6 @@ projects_ help you better structure and easily address existing data artifacts (especially ML models). And Git allows you to version MLEM objects and configuration options along with code. -## Internal vs. External objects - -By default, any MLEM objects that you save into project will be **internal**, -which means they will be saved under `.mlem/{object type}/`. - -To save objects anywhere, use the `external` flag when saving them or set -`default_external=True` via configuration. **External** objects will be indexed -via links under `.mlem/link/`. - - - -You can also turn this off (via the `link=False` flag), but in that case your -object will not be known to the MLEM project, for example it will not be shown -by `mlem ls`. - - - ## Referencing MLEM Objects Everywhere you need to reference any saved MLEM Object, you can do so by @@ -48,19 +31,16 @@ providing those arguments: All of those are saved in `location` field of a MLEM Object. If you didn't provide `project` and/or `rev`, MLEM will try to deduce them from -`path`. `fs` is also can be deduced from `project` or `path`. Also, if you are -referencing object in **MLEM Project**, you can omit `.mlem/{object_type}` from -`path`. +`path`. `fs` is also can be deduced from `project` or `path`. Here is the example of how the same object can be referenced -- `path = rf, project = https://github.com/iterative/example-mlem-get-started, rev=main` - - classic -- `path = .mlem/model/rf, project = https://github.com/iterative/example-mlem-get-started, rev=main` - - can also provide full path -- `path = https://github.com/iterative/example-mlem-get-started/tree/main/rf` - - everything could be provided via path (depends on implementation) -- `path = https://github.com/iterative/example-mlem-get-started/.mlem/model/rf` - +- `path = models/rf, project = https://github.com/iterative/example-mlem-get-started, rev=main` - + using the full path inside MLEM project +- `path = https://github.com/iterative/example-mlem-get-started/tree/main/models/rf` - + everything could be provided via path (path format could differ for different + storages) +- `path = https://github.com/iterative/example-mlem-get-started/models/rf` - also can omit `tree/main` since `main` is default. -- `path = rf, fs = GithubFileSystem(org="iterative", repo="example-mlem-get-started", sha="main")` - - API only, can provide pre-configured fs +- `path = models/rf, fs = GithubFileSystem(org="iterative", repo="example-mlem-get-started", sha="main")` - + API only, can provide pre-configured fs. diff --git a/content/docs/user-guide/remote-objects/bitbucket.md b/content/docs/user-guide/remote-objects/bitbucket.md new file mode 100644 index 00000000..2f713ae7 --- /dev/null +++ b/content/docs/user-guide/remote-objects/bitbucket.md @@ -0,0 +1 @@ +# BitBucket diff --git a/content/docs/user-guide/remote-objects/clouds.md b/content/docs/user-guide/remote-objects/clouds.md new file mode 100644 index 00000000..577f88ed --- /dev/null +++ b/content/docs/user-guide/remote-objects/clouds.md @@ -0,0 +1 @@ +# Clouds (S3, GCS, AZDB etc) diff --git a/content/docs/user-guide/remote-objects/dvc.md b/content/docs/user-guide/remote-objects/dvc.md new file mode 100644 index 00000000..23b97bf0 --- /dev/null +++ b/content/docs/user-guide/remote-objects/dvc.md @@ -0,0 +1,3 @@ +# DVC + +copy here stuff from dvc use case diff --git a/content/docs/user-guide/remote-objects/github.md b/content/docs/user-guide/remote-objects/github.md new file mode 100644 index 00000000..4cef1d47 --- /dev/null +++ b/content/docs/user-guide/remote-objects/github.md @@ -0,0 +1,6 @@ +# GitHub + +All URIs starting with `https://github.com` will be resolved as GitHub uris. + +`rev` option is supported, you can specify it separately or as a part of URI +like this: `https://github.com///tree//path` diff --git a/content/docs/user-guide/remote-objects/gitlab.md b/content/docs/user-guide/remote-objects/gitlab.md new file mode 100644 index 00000000..86beb0f8 --- /dev/null +++ b/content/docs/user-guide/remote-objects/gitlab.md @@ -0,0 +1 @@ +# Gitlab diff --git a/content/docs/user-guide/remote-objects.md b/content/docs/user-guide/remote-objects/index.md similarity index 69% rename from content/docs/user-guide/remote-objects.md rename to content/docs/user-guide/remote-objects/index.md index b3a9302a..b0eb12eb 100644 --- a/content/docs/user-guide/remote-objects.md +++ b/content/docs/user-guide/remote-objects/index.md @@ -16,36 +16,12 @@ operations apply to any [object type] and location. ## Remote MLEM projects Although you can store MLEM objects in any location such as a Git repo, Cloud -storage, or external drives, creating a MLEM project lets you organize and -[discover](#listing-objects) MLEM objects consistently. +storage, or external drives, creating a MLEM project lets you organize MLEM +objects consistently. To create a MLEM project in a remote location, you can provide its URL or path to `mlem init`. -## Listing objects - -You can list MLEM objects inside a remote MLEM project (e.g. in a Git repo) with -`mlem list`. There's no need to download/clone the project. - -```cli -$ mlem list https://github.com/iterative/example-mlem-get-started -Deployments: - - myservice -Models: - - rf -Envs: - - staging -``` - - - -A [MLEM project] is required as target for `mlem list`. The other operations -(below) work with loose MLEM objects (not in a MLEM project) as well. - -[mlem project]: /doc/command-reference/init - - - ## Loading objects (Python) You can load [MLEM objects] from remote locations inside Python code with @@ -64,8 +40,7 @@ model = load( This fetches the `rf` model [from branch `main`] of the `example-mlem-get-started` repo and loads it to memory. -[from branch `main`]: - https://github.com/iterative/example-mlem-get-started/tree/main/.mlem/model +[from branch `main`]: https://github.com/iterative/example-mlem-get-started/ ## Downloading objects @@ -76,14 +51,14 @@ You can download MLEM objects to the local environment in with `mlem clone` $ mlem clone rf \ --project https://github.com/iterative/example-mlem-get-started \ ml_model -⏳️ Loading meta from https://github.com/iterative/example-mlem-get-started/tree/main/.mlem/model/rf.mlem -🐏 Cloning https://github.com/iterative/example-mlem-get-started/tree/main/.mlem/model/rf.mlem -πŸ’Ύ Saving model to .mlem/model/ml_model.mlem +⏳️ Loading meta from https://github.com/iterative/example-mlem-get-started/tree/main/models/rf.mlem +🐏 Cloning https://github.com/iterative/example-mlem-get-started/tree/main/models/rf.mlem +πŸ’Ύ Saving model to ml_model.mlem ``` -This places the `rf` model [from branch `main`] of the -`example-mlem-get-started` repo, renames it to `ml_model`, and places it in the -`.mlem/model` directory. +This copies the `rf` model [from branch `main`] of the +`example-mlem-get-started` repo to the current directory and renames it to +`ml_model`. ## Cloud storage @@ -100,9 +75,9 @@ Loose objects are typically stored this way because they do not require ```cli $ mlem clone rf s3://example-mlem-get-started/rf -⏳️ Loading meta from .mlem/model/rf.mlem -🐏 Cloning .mlem/model/rf.mlem -πŸ’Ύ Saving model to s3://example-mlem-get-started/.mlem/model/rf.mlem +⏳️ Loading meta from rf.mlem +🐏 Cloning rf.mlem +πŸ’Ύ Saving model to s3://example-mlem-get-started/rf.mlem ``` The `rf` model from S3 bucket `example-mlem-get-started` can also be diff --git a/content/docs/user-guide/serving/fastapi.md b/content/docs/user-guide/serving/fastapi.md new file mode 100644 index 00000000..cd106a45 --- /dev/null +++ b/content/docs/user-guide/serving/fastapi.md @@ -0,0 +1,66 @@ +# FastAPI + +[FastAPI](https://fastapi.tiangolo.com) is a modern, fast (high-performance), +web framework for building APIs with Python 3.7+ based on standard Python type +hints. + +To expose your model to a external users via REST API, MLEM can use FastAPI to +serve it. + +## Requirements + +```bash +pip install mlem[fastapi] +# or +pip install fastapi uvicorn +``` + +## Examples + +Since we showed how to use FastAPI on the +[parent page](/doc/user-guide/serving), let not repeat that, but see few +different examples instead. + +### Running FastAPI model server from code + +```python +from mlem.api import serve + +serve( + model="https://github.com/iterative/example-mlem-get-started/rf", + server="fastapi", + host="0.0.0.0", + port=8000, +) +``` + +### Running FastAPI model server from cli + +```cli +$ mlem serve fastapi \ + --model https://github.com/iterative/example-mlem-get-started/rf \ + --host 0.0.0.0 --port 8000 +) +``` + +### Applying data to running FastAPI server from API + +```python +from mlem.api import apply_remote + +apply_remote( + "http", + "https://github.com/iterative/example-mlem-get-started/iris.csv", + method="predict", + host="0.0.0.0", + port=8000, +) +``` + +### Applying data to running FastAPI server from CLI + +```cli +$ mlem apply-remote http \ + --method predict --host 0.0.0.0 --port 8000 \ + --data https://github.com/iterative/example-mlem-get-started/iris.csv +``` diff --git a/content/docs/user-guide/serving/index.md b/content/docs/user-guide/serving/index.md new file mode 100644 index 00000000..44f618ed --- /dev/null +++ b/content/docs/user-guide/serving/index.md @@ -0,0 +1,86 @@ +# Serving models + +For online serving, you can create a server from your model. We will try out +FastAPI server. All available server implementations are listed in the nested +pages. + +## Running server + +To start up FastAPI server run: + +```cli +$ mlem serve fastapi --model https://github.com/iterative/example-mlem-get-started/rf +⏳️ Loading model from https://github.com/iterative/example-mlem-get-started/tree/main/models/rf.mlem +Starting fastapi server... +πŸ–‡οΈ Adding route for /predict +πŸ–‡οΈ Adding route for /predict_proba +πŸ–‡οΈ Adding route for /sklearn_predict +πŸ–‡οΈ Adding route for /sklearn_predict_proba +Checkout openapi docs at +INFO: Started server process [22854] +INFO: Waiting for application startup. +INFO: Application startup complete. +INFO: Uvicorn running on http://0.0.0.0:8080 (Press CTRL+C to quit) +``` + +Servers automatically create endpoints from model methods with payload schemas +corresponding to serialized dataset types. + +Note, that serving the model requires you to have the correct packages to be +installed. You can check out how to create a `venv` with right packages with +MLEM, or how to serve the model in a +[Docker container](/doc/user-guide/deploying/docker). + +## Making requests + +You can open Swagger UI (OpenAPI) at +[http://localhost:8080/docs](http://localhost:8080/docs) to check out OpenAPI +spec and query examples. + +Each server implementation also has its client implementation counterpart, in +the case of FastAPI server it’s HTTPClient. Clients can be used to make requests +to servers. Since a server also exposes the model interface description, the +client will know what methods are available and handle serialization and +deserialization for you. You can use them via CLI: + +```cli +$ mlem apply-remote http test_x.csv --host="0.0.0.0" --port=8080 --json +[1, 0, 2, 1, 1, 0, 1, 2, 1, 1, 2, 0, 0, 0, 0, 1, 2, 1, 1, 2, 0, 2, 0, 2, 2, 2, 2, 2, 0, 0, 0, 0, 1, 0, 0, 2, 1, 0] +``` + +or via Python API: + +```py +from mlem.api import load +from mlem.runtime.client.base import HTTPClient + +client = HTTPClient(host="localhost", port=8080) +res = client.predict(load("test_x.csv")) +``` + +
+ +### πŸ’‘ Or query the model directly with curl + +```cli +$ curl -X 'POST' \ + 'http://localhost:8080/predict_proba' \ + -H 'accept: application/json' \ + -H 'Content-Type: application/json' \ + -d '{ + "data": { + "values": [ + { + "": 0, + "sepal length (cm)": 0, + "sepal width (cm)": 0, + "petal length (cm)": 0, + "petal width (cm)": 0 + } + ] + } + }' +[[0.92,0.04,0.04]] +``` + +
diff --git a/content/docs/user-guide/serving/rabbitmq.md b/content/docs/user-guide/serving/rabbitmq.md new file mode 100644 index 00000000..8cc7c760 --- /dev/null +++ b/content/docs/user-guide/serving/rabbitmq.md @@ -0,0 +1,25 @@ +# RabbitMQ + +[RabbitMQ](https://www.rabbitmq.com) is a widely used open source message +broker. + +MLEM allows you to serve your model via RabbitMQ. This means that your model can +run as a service, consuming messages with input data and producing messages with +predictions. + +## Requirements + +```bash +pip install mlem[rmq] +# or +pip install pika +``` + + diff --git a/content/home-slides.js b/content/home-slides.js index 01f89fec..901d887c 100644 --- a/content/home-slides.js +++ b/content/home-slides.js @@ -8,13 +8,13 @@ const terminalSlideData = [ >>> mlem.api.save(model, "dog-bark-translator") >>> -$ tree .mlem/model - .mlem/model/ +$ tree + . β”œβ”€β”€ dog-bark-translator └── dog-bark-translator.mlem `, ` - $ cat .mlem/model/dog-bark-translator.mlem + $ cat dog-bark-translator.mlem type: sklearn methods:   predict: diff --git a/scripts/docs/bootstrap_api.py b/scripts/docs/bootstrap_api.py new file mode 100644 index 00000000..2c2adf03 --- /dev/null +++ b/scripts/docs/bootstrap_api.py @@ -0,0 +1,124 @@ +import inspect +import os +import re +import textwrap +from pathlib import Path + +from utils import replace_section, place_links_in_doc + +DOCS_PATH = Path(__file__).parent.parent.parent / "content" / "docs" +API_DOCS_PATH = str(DOCS_PATH / "api-reference") + + +def get_signature(cmd): + source = inspect.getsource(cmd) + return source.split('"""')[0].strip().strip(":") + + +def get_docs(cmd): + docs = cmd.__doc__ + if not docs: + raise ValueError(f"Command {cmd} has no docstring") + return docs + + +def generate_signature(cmd): + docs = get_docs(cmd) + docs = docs.split("Args:")[0].strip() + docs = re.subn("\s+", " ", docs)[0] + docs = textwrap.fill(docs, width=79) + docs = docs if docs[-1] == "." else f"{docs}." + return f""" +{docs} + +```py +{get_signature(cmd)} +``` +""" + + +def generate_parameters(cmd, sep="\n- "): + # to add "(required)" or "(optional)" + required_params = ( + len(inspect.signature(cmd).parameters) + - len(cmd.__defaults__ or {}) + - len(cmd.__kwdefaults__ or {}) + ) + docs = get_docs(cmd) + docs = docs.split("Args:\n")[1].split("Returns:")[0] + default_spaces = default_spaces = len(docs) - len(docs.lstrip()) + docs = docs.strip() + params = [] + for line in docs.split("\n"): + spaces = len(line) - len(line.lstrip(" ")) + line = line.lstrip(" ") + # if the line is a continuation of a previous one + if spaces > default_spaces: + params[-1] += line + else: + if required_params > 0: + line = "**`" + line.replace(":", "`** (required) -") + required_params -= 1 + else: + line = "`" + line.replace(":", "` (optional) -") + params.append(line) + return sep + sep.join(params) + "\n\n" + + +def generate_returns(cmd, sep="\n- "): + docs = get_docs(cmd) + docs = docs.split("Returns:")[1].strip() + if ":" in docs: + docs = "`" + docs.replace(":", "`:") + # docs = docs.replace("None", "`None`") + return f"\n{docs}\n" + + +def check_command(cmd, name, path): + with open(path, "r", encoding="utf8") as f: + content = f.read() + + if "## Returns" not in content: + content = content.replace("## Exceptions", "## Returns\n\n## Exceptions") + + content = replace_section( + content, + f"mlem.api.{name}()", + place_links_in_doc(generate_signature(cmd)), + section_prefix="#", + ) + # assert "### Usage" in content, "Usage section not found" + content = replace_section(content, "Parameters", generate_parameters(cmd)) + content = replace_section(content, "Returns", generate_returns(cmd)) + + with open(path, "w", encoding="utf8") as f: + f.write(content) + + +def generate_api(): + from mlem import api + + commands = [] + not_commands = [] + for k, v in api.__dict__.items(): + if k.startswith("__") or not callable(v): + not_commands.append(k) + else: + commands.append(v) + + for cmd in commands: + name = cmd.__name__ + cmd_path = os.path.join(API_DOCS_PATH, name + ".md") + if not os.path.exists(cmd_path): + print(f"creating {name}") + else: + print(f"checking {name}") + check_command(cmd, name, cmd_path) + + +def main(): + generate_api() + + +if __name__ == "__main__": + main() diff --git a/scripts/docs/bootstrap_cli.py b/scripts/docs/bootstrap_cli.py new file mode 100644 index 00000000..b9446f03 --- /dev/null +++ b/scripts/docs/bootstrap_cli.py @@ -0,0 +1,157 @@ +import json +import os +import re +import subprocess +import textwrap +from typing import Dict, List + +from pydantic import BaseModel, parse_obj_as + +from cli_generate_spec import Opt, Spec +from utils import replace_section, place_links_in_doc + +CLI_DOCS_PATH = "../../content/docs/command-reference" +LINE_WIDTH = 80 + + +def repr_option(option: Opt): + decls = ", ".join(f"`{d} <{option.metavar.lower()}>`" for d in option.decls) + if option.is_flag: + decls = ", ".join(f"`{d}`" for d in option.decls) + if option.secondary: + decls += " / " + ", ".join(f"`{d}`" for d in option.secondary) + return textwrap.fill( + f"- {decls} - {option.help}", width=LINE_WIDTH, subsequent_indent=" " + ) + + +def repr_arg(option: Opt): + margin = 17 + metavar = option.metavar.lower() + option_help = option.help + if option_help.endswith(" [required]"): + option_help = option_help[: -len(" [required]")] + return textwrap.fill( + f" {metavar:{margin}}{option_help}", + width=LINE_WIDTH, + subsequent_indent=" " * (margin + 2), + ) + + +def generate_options(options: List[Opt]): + res = ["", ""] + for option in options: + res.append(repr_option(option)) + return "\n".join(res + ["", ""]) + + +def _gen_usage_string(spec: Spec): + usage = f"usage: mlem {spec.name} " + indent = len(usage) + options = [] + for opt in spec.options: + decl = min(opt.decls, key=len) + metavar = opt.metavar.lower() + if metavar == "boolean": + options.append(f"[{decl}]") + else: + options.append(f"[{decl} <{metavar}>]") + max_opts_len = min(45, LINE_WIDTH - indent) + option_lines = [""] + for o in options: + line = f"{option_lines[-1]}{o} " + if len(line) > max_opts_len and option_lines[-1] != "": + option_lines[-1] = option_lines[-1].strip() + option_lines.append(o + " ") + else: + option_lines[-1] = line + option_lines[-1] = option_lines[-1].strip() + options = ("\n" + " " * indent).join(option_lines) + impl = "" + if spec.args.impl_metavar: + impl = f"[<{spec.args.impl_metavar}> [{spec.args.impl_metavar} options] | --load ]" + args = impl + " ".join(a.metavar for a in spec.args.args).lower() + if spec.args.subcommands: + args += "command" + res = f"{usage}{options}" + if args: + res += "\n" + " " * indent + f"{args}" + return res + + +def generate_usage(spec: Spec): + usage = _gen_usage_string(spec) + argspec = spec.args + if argspec.args: + args = "\n".join(repr_arg(a) for a in argspec.args) + args = f"\n\narguments:\n{args}" + else: + args = "" + if argspec.impls: + impls = "\n".join(f"- {c}" for c in argspec.impls) + impls = f"\n\nBuiltin {argspec.impl_metavar}s:\n{impls}" + else: + impls = "" + if argspec.subcommands: + margin = 17 + subcommands = "\n".join( + f" {k:{margin}}{v}" for k, v in argspec.subcommands.items() + ) + subcommands = f"\n\nsubcommands:\n{subcommands}" + else: + subcommands = "" + usage = usage[0].lower() + usage[1:] + return f"\n{usage}{subcommands}{impls}{args}\n" + + +def generate_cli_command(name: str, spec: Spec): + path = os.path.join(CLI_DOCS_PATH, f"{name}.md") + with open(path, "r", encoding="utf8") as f: + data = f.read() + + data = replace_section( + data, + "usage", + generate_usage(spec), + section_prefix="```", + section_prefix_space="", + ) + data = replace_section(data, "Options", generate_options(spec.options)) + + cmd_name = name.replace("/", " ") + if cmd_name.endswith(" index"): + cmd_name = cmd_name[: -len(" index")] + data = replace_section( + data, cmd_name, place_links_in_doc(spec.doc), section_prefix="#" + ) + with open(path, "w", encoding="utf8") as f: + f.write(data) + + +class AllSpec(BaseModel): + __root__: Dict[str, Spec] + + +def main(): + with open("spec.json", "r", encoding="utf8") as f: + spec = parse_obj_as(AllSpec, json.load(f)) + + # spec.__root__ = {"apply": spec.__root__["apply"]} + for k, s in spec.__root__.items(): + print(k) + generate_cli_command(k, s) + + os.unlink("spec.json") + + +def run_lint(): + print("Running linter") + subprocess.check_output("yarn run format", shell=True, cwd="../../") + + +if __name__ == "__main__": + from cli_generate_spec import main as spec_main + + spec_main() + main() + run_lint() diff --git a/scripts/docs/bootstrap_obj_reference.py b/scripts/docs/bootstrap_obj_reference.py new file mode 100644 index 00000000..d5a00831 --- /dev/null +++ b/scripts/docs/bootstrap_obj_reference.py @@ -0,0 +1,342 @@ +import inspect +import os.path +import re +import string +import textwrap +from collections import defaultdict +from dataclasses import dataclass +from typing import Any, Dict, Iterator, List, Tuple, Type + +from pydantic import BaseModel, ValidationError +from pydantic.fields import ModelField +from pydantic.typing import display_as_type, get_args, is_union +from typing_extensions import get_origin + +from mlem.cli.utils import get_field_help +from mlem.core.base import MlemABC, load_impl_ext +from mlem.ext import Extension, ExtensionLoader +from mlem.utils.entrypoints import ( + list_abstractions, + list_implementations, + load_entrypoints, +) +from utils import replace_sections + +SIDEBAR_PATH = "../../content/docs/sidebar.json" +REF_SLUG = "object-reference" +REF_DIR = "../../content/docs/object-reference" + +DOC_REPLACEMENTS = { + "ModelType": "[ModelType](/doc/object-reference/mlem-abcs#modeltype)", + "ModelIO": "[ModelIO](/doc/object-reference/mlem-abcs#modelio)", +} + +LINE_WIDTH = 80 + + +def get_extension_doc(module_doc: str): + doc = "\n\n".join(module_doc.split("\n\n")[1:]) + for key, value in DOC_REPLACEMENTS.items(): + doc = doc.replace(key, value) + return textwrap.fill( + doc.replace("\n\n", "\n"), width=LINE_WIDTH, break_on_hyphens=False + ) + + +def get_extension_reqs(ext: Extension): + if not ext.reqs: + return "" + extra = ext.extra or ext.module.split(".")[-1] + reqs = " ".join(ext.reqs_packages) + return f"""```bash +pip install mlem[{extra}] +# or +pip install {reqs} +```""" + + +@dataclass +class Field: + name: str + required: bool + type_: str + default: Any + help_: str + + +def iterate_type_fields(cls: Type[BaseModel]) -> Iterator[Field]: + """Recursively get CliTypeFields from BaseModel""" + field: ModelField + for name, field in sorted(cls.__fields__.items(), key=lambda x: not x[1].required): + name = field.alias or name + if ( + issubclass(cls, MlemABC) + and name in cls.__config__.exclude + or field.field_info.exclude + ): + # Skip excluded fields + continue + + field_type = field.outer_type_ + # field.type_ is element type for collections/mappings + + if not isinstance(field_type, type): + # skip too complicated stuff + continue + + yield Field( + name=name, + type_=repr_field_type(field_type), + required=bool(field.required), + default=field.default, + help_=get_field_help(cls, name), + ) + + +def repr_field_type(type_: Type) -> str: + if isinstance(type_, type): + return type_.__name__ + + origin = get_origin(type_) + if is_union(origin): + # get first type for union + generic_args = get_args(type_) + args = ", ".join(repr_field_type(a) for a in generic_args) + return f"Union[{args}]" + if origin is list or origin is dict: + return display_as_type(type_) + + if type_ is Any: + return "Any" + + raise ValueError(f"Unknown type: {type_}") + + +def default_value(fd): + try: + return fd.__class__() + except ValidationError: + return ... + + +def repr_field_default(field: Field) -> Tuple[str, Type]: + fd = field.default + default = f" = {fd}" if fd is not None and fd != "" else "" + if ( + default == " = " + or issubclass(fd.__class__, BaseModel) + and fd == default_value(fd) + ): + default = f" = {fd.__class__.__name__}()" + if isinstance(fd, str): + default = f' = "{fd}"' + add_type = None + if isinstance(fd, BaseModel) and not issubclass(fd.__class__, MlemABC): + add_type = fd.__class__ + return default, add_type + + +def with_prev_and_next(iterable): + prev = None + current = None + for o in iterable: + if current is not None: + yield prev, current, o + prev = current + current = o + yield current, o, "" + + +def smart_wrap(value: str, width: int, subsequent_indent: str = ""): + SPECIAL = "\0" + QUOTES = "'\"`" + quotes_open = {q: False for q in QUOTES} + chars = [] + new_word = False + for prev, c, nxt in with_prev_and_next(value): + if nxt in string.ascii_letters: + new_word = True + if quotes_open.get(c): + quotes_open[c] = False + chars.append(c) + new_word = False + continue + if any(quotes_open.values()) or new_word is False: + chars.append(SPECIAL if c == " " else c) + continue + if c in QUOTES and prev == " ": + quotes_open[c] = True + chars.append(c) + + return textwrap.fill( + "".join(chars), + width=width, + subsequent_indent=subsequent_indent, + break_on_hyphens=False, + break_long_words=False, + ).replace(SPECIAL, " ") + + +def repr_field(field: Field) -> Tuple[str, Type]: + req = " _(required)_" if field.required else "" + default, add_type = repr_field_default(field) + help_ = re.subn(r"\s+", " ", field.help_)[0] + return ( + smart_wrap( + f"- `{field.name}: {field.type_}{default}`{req} - {help_}", + width=LINE_WIDTH, + subsequent_indent=" ", + ), + add_type, + ) + + +def get_impl_docstring(type_): + doc = inspect.cleandoc(type_.__doc__ or "Class docstring missing").strip() + return "\n".join( + f"{textwrap.fill(' ' + line, subsequent_indent=' ', width=LINE_WIDTH - 5)}" + for line in doc.splitlines() + ) + + +def get_impl_description(type_: Type[MlemABC]) -> Tuple[str, List[Type]]: + fields_doc = "**No fields**" + fields = list(iterate_type_fields(type_)) + add_types = [] + if fields: + fields_doc = "**Fields**:\n\n" + fds = [] + for f in fields: + fd, add_type = repr_field(f) + fds.append(fd) + if add_type: + add_types.append(add_type) + fields_doc += "\n\n".join(fds) + doc = get_impl_docstring(type_) + return ( + f"""## `class {type_.__name__}` + +**MlemABC parent type**: `{type_.abs_name}` + +**MlemABC type**: `{type_.__get_alias__()}` + +{doc} + +{fields_doc} +""", + add_types, + ) + + +def get_model_description(type_: Type[BaseModel]) -> str: + fields_doc = "**No fields**" + fields = list(iterate_type_fields(type_)) + if fields: + fields_doc = "**Fields**:\n\n" + fields_doc += "\n\n".join(repr_field(f)[0] for f in fields) + doc = get_impl_docstring(type_) + return f"""## `class {type_.__name__}` + +{doc} + +{fields_doc} +""" + + +def get_extension_impls_md(impls: List[Type[MlemABC]]): + add_types = set() + descr = [] + for e in impls: + d, add = get_impl_description(e) + descr.append(d) + add_types.update(add) + for add in add_types: + descr.append(get_model_description(add)) + return "\n---\n\n".join(descr) + + +def get_extension_md(extension: str, impls: List[Type[MlemABC]]) -> str: + implementations = get_extension_impls_md(impls) + return f"""# {extension} + +{implementations}""" + + +def create_ext_impls_page( + section: str, extension: str, impls: List[Type[MlemABC]], overwrite: bool = False +): + filename = f"{section}/{extension.lower()}.md" + path = os.path.join(REF_DIR, filename) + handcrafted = {} + if os.path.exists(path): + if not impls: + os.unlink(path) + if not overwrite: + return + # handcrafted = get_sections(path, "Description", "Examples") + os.unlink(path) + os.makedirs(os.path.dirname(path), exist_ok=True) + + with open(path, "w") as f: + md = get_extension_md(extension, impls) + if handcrafted: + md = replace_sections(md, handcrafted) + f.write(md) + # add_extension_to_sidebar(section, extension.lower(), extension.capitalize(), filename) + + +ABC_GROUPS = { + "data": ["data_type", "data_reader", "data_writer"], + "model": ["model_type", "model_io"], + "deployment": ["deployment", "env", "deploy_state"], + "other": ["state", "docker_registry"], + "build": ["builder"], + "serving": ["server", "client", "interface"], + "storage": ["storage", "artifact"], + "hide": ["requirement", "meta"], + "uri": ["resolver"], +} +ABC_GROUPS_MAP = {v: k for k, values in ABC_GROUPS.items() for v in values} + + +def get_impl_extension_name(cls: Type[MlemABC]): + ep_name = f"{cls.abs_name}.{cls.__get_alias__()}" + ep = load_entrypoints().get(ep_name) + if not ep: + return "builtin" + module_name = ep.ep.module_name + for mod in ExtensionLoader.builtin_extensions: + if module_name.startswith(mod): + return mod.split(".")[-1] + raise Exception(f"No ext for {module_name}") + + +def main(): + section_to_ext: Dict[str, Dict[str, List]] = defaultdict(lambda: defaultdict(list)) + for abc in list_abstractions(include_hidden=False): + section = ABC_GROUPS_MAP.get(abc) + if section == "hide" or section == "other" or not section: + print("skipping", abc) + continue + # root_cls = MlemABC.abs_types[abc] + for impl in list_implementations(abc, include_hidden=False): + cls = load_impl_ext(abc, impl) + if cls.__is_root__: + continue + ext_name = get_impl_extension_name(cls) + section_to_ext[section][ext_name].append(cls) + + section_to_ext = { + section: { + ext: list(sorted(impls, key=lambda i: i.__name__)) + for ext, impls in ext_to_impls.items() + } + for section, ext_to_impls in section_to_ext.items() + } + for section, ext_to_impls in section_to_ext.items(): + for ext, impls in ext_to_impls.items(): + create_ext_impls_page(section, ext, impls, overwrite=True) + + +if __name__ == "__main__": + main() diff --git a/scripts/docs/cli_generate_spec.py b/scripts/docs/cli_generate_spec.py new file mode 100644 index 00000000..1cd162f0 --- /dev/null +++ b/scripts/docs/cli_generate_spec.py @@ -0,0 +1,138 @@ +import json +from typing import Dict, List, Optional + +from click import Command, Context, Group, Option +from pydantic import BaseModel +from typer.main import get_group + +from mlem import cli +from mlem.cli.main import get_cmd_name + +use_group = ["deployment"] +skip = ["dev"] + +abc_group = ["apply-remote", "build", "declare", "serve"] + + +class Opt(BaseModel): + decls: List[str] + secondary: List[str] + metavar: str + help: str + is_flag: bool + + +class Args(BaseModel): + args: List[Opt] + impls: Optional[List[str]] + impl_metavar: Optional[str] + subcommands: Optional[Dict[str, str]] + + +class Spec(BaseModel): + args: Args + options: List[Opt] + doc: str + name: str + + +def get_options(command: Command, ctx): + if command.name not in abc_group: + yield from command.get_params(ctx) + return + + options = None + for subcommand in command.commands.values(): + if options is None: + options = list(get_options(subcommand, ctx)) + continue + new_options = {o.help for o in get_options(subcommand, ctx)} + options = [o for o in options if o.help in new_options] + yield from options + + +def repr_option(option, ctx) -> Opt: + _, help_ = option.get_help_record(ctx) + help_ = help_.replace(" ", " ") # TODO: maybe fix in typer code? + return Opt( + decls=sorted(option.opts, reverse=True), + secondary=sorted(option.secondary_opts, reverse=True), + metavar=option.make_metavar(), + help=help_, + is_flag=option.is_flag if isinstance(option, Option) else False, + ) + + +def generate_options(command: Command, ctx): + res = [] + for option in get_options(command, ctx): + if not isinstance(option, Option): + continue + res.append(repr_option(option, ctx)) + return res + + +def generate_args(command, ctx): + args = [] + for arg in command.get_params(ctx): + if isinstance(arg, Option): + continue + args.append(repr_option(arg, ctx)) + impls = None + metavar = None + subcommands = None + if command.name in abc_group: + impls = list(sorted([c for c in command.commands if not c.startswith("_")])) + metavar = command.subcommand_metavar + args.extend(generate_args(list(command.commands.values())[0], ctx).args) + if command.name in use_group: + subcommands = { + c.name: c.get_short_help_str() for c in command.commands.values() + } + return Args(args=args, impls=impls, impl_metavar=metavar, subcommands=subcommands) + + +def generate_usage(command: Command, ctx): + if command.name not in abc_group: + return command.get_usage(ctx) + subcommand = list(command.commands.values())[0] + subctx = Context(subcommand, parent=ctx, info_name=subcommand.name) + sub_usage = generate_usage(subcommand, subctx) + return sub_usage.replace(subcommand.name, command.subcommand_metavar) + + +def generate_cli_command(command: Command, ctx): + return Spec( + args=generate_args(command, ctx), + options=generate_options(command, ctx), + doc=command.help.split("Documentation")[0].strip(), + name=get_cmd_name(ctx), + ) + + +def main(): + group = get_group(cli.app) + ctx = Context(group, info_name="mlem", help_option_names=["-h", "--help"]) + with ctx: + spec = {} + for name, command in group.commands.items(): + if name in skip: + continue + subctx = Context(command, ctx, info_name=name) + if isinstance(command, Group) and name in use_group: + + spec[f"{name}/index"] = generate_cli_command(command, subctx) + for subname, subcommand in command.commands.items(): + subsubctx = Context(subcommand, subctx, info_name=subname) + spec[f"{name}/{subname}"] = generate_cli_command( + subcommand, subsubctx + ) + continue + spec[name] = generate_cli_command(command, subctx) + + with open("spec.json", "w", encoding="utf8") as f: + json.dump({k: v.dict() for k, v in spec.items()}, f, indent=2) + + +if __name__ == "__main__": + main() diff --git a/scripts/docs/generate_sidebar.py b/scripts/docs/generate_sidebar.py new file mode 100644 index 00000000..11019e3c --- /dev/null +++ b/scripts/docs/generate_sidebar.py @@ -0,0 +1,99 @@ +import glob +import json +import os.path +from pathlib import Path +from typing import List + +SIDEBAR_PATH = "../../content/docs/sidebar.json" +DOCS_PATH = os.path.dirname(SIDEBAR_PATH) + +IGNORE = [ + "user-guide/what-is-mlem.md", + "api-reference/mlem-object.md", + "user-guide/models", + "user-guide/data", +] + + +def get_label(path): + for line in path.open(): + if line.startswith("# "): + return line.lstrip("#").strip() + return "" + + +def get_sidebar_paths(nodes, parent=""): + for node in nodes: + if node["source"]: + path = parent + node["source"] + yield path + parent_path = path[: -len("/index.md")] + "/" + else: + parent_path = node["slug"] + "/" + + if "children" in node: + yield from get_sidebar_paths(node["children"], parent=parent_path) + + +def adjust_sidebar(sidebar, to_add: List[str], to_remove: List[str], parent: str = ""): + res = [] + for entry in sidebar: + if entry["source"] and parent + entry["source"] in to_remove: + print("removing", parent + entry["source"]) + continue + + if "children" in entry: + parent_path = ( + parent + entry["source"][: -len("/index.md")] + if entry["source"] + else entry["slug"] + ) + entry["children"] = adjust_sidebar( + entry["children"], to_add, to_remove, parent_path + "/" + ) + + res.append(entry) + for add_root, add_file in [os.path.split(a) for a in to_add]: + if add_file == "index.md": + add_root, root_name = os.path.split(add_root) + add_file = os.path.join(root_name, add_file) + if add_root + "/" == parent or (parent == add_root == ""): + path = Path(DOCS_PATH) / add_root / add_file + print("adding", parent, add_root, add_file) + slug = os.path.dirname(add_file) or add_file[: -len(".md")] + res.append( + { + "slug": slug, + "label": get_label(path), + "source": str(add_file), + "children": [], + } + ) + return res + + +def main(): + with open(SIDEBAR_PATH, "r") as f: + sidebar = json.load(f) + + md_files = [ + os.path.relpath(p, DOCS_PATH) + for p in glob.glob(os.path.join(DOCS_PATH, "**", "*.md"), recursive=True) + ] + + sidebar_paths = list(get_sidebar_paths(sidebar)) + + new_files = list(set(md_files).difference(sidebar_paths)) + new_files = [ + f for f in new_files if not any(f.startswith(ignored) for ignored in IGNORE) + ] + removed_files = list(set(sidebar_paths).difference(md_files)) + + sidebar = adjust_sidebar(sidebar, new_files, removed_files) + with open(SIDEBAR_PATH, "w") as f: + json.dump(sidebar, f, indent=2) + f.write("\n") + + +if __name__ == "__main__": + main() diff --git a/scripts/docs/utils.py b/scripts/docs/utils.py new file mode 100644 index 00000000..20547716 --- /dev/null +++ b/scripts/docs/utils.py @@ -0,0 +1,64 @@ +import re +from typing import Dict + + +DOC_AUTO_REPLACE = { + "MLEM Object": "[MLEM Object](/doc/user-guide/basic-concepts)", + "MLEM objects": "[MLEM objects](/doc/user-guide/basic-concepts)", + "MLEM object": "[MLEM Object](/doc/user-guide/basic-concepts)", + "MLEM projects": "[MLEM project](/doc/user-guide/project-structure)", + "MLEM project": "[MLEM project](/doc/user-guide/project-structure)", + "metafile": "[MLEM Object](/doc/user-guide/basic-concepts)", + "MLEM configuration": "[MLEM config](/doc/user-guide/configuration)", + "MLEM config": "[MLEM config](/doc/user-guide/configuration)", +} + + +def place_links_in_doc(doc): + for k, v in DOC_AUTO_REPLACE.items(): + doc = doc.replace(k, v) + return f"\n\n{doc}\n\n" + + +def get_section(content: str, section_name: str, section_prefix: str = "## "): + find = re.findall( + f"{section_prefix}{section_name}(.*?)^{section_prefix}", + content, + flags=re.MULTILINE | re.DOTALL, + ) + if not find: + return None + return find[0] + + +def get_sections(path: str, *sections, section_prefix: str = "## "): + with open(path, "r") as f: + content = f.read() + res = {s: get_section(content, s, section_prefix) for s in sections} + return {s: v for s, v in res.items() if v} + + +def replace_section( + data: str, + section_name: str, + new_value: str, + section_prefix: str = "##", + section_prefix_space=" ", +) -> str: + section, n = re.subn( + f"{section_prefix}{section_prefix_space}{section_name}(.*?)^{section_prefix}", + f"{section_prefix}{section_prefix_space}{section_name}{new_value}{section_prefix}", + data, + flags=re.MULTILINE | re.DOTALL, + ) + if n == 0: + raise ValueError(f"Section {section_name} not found") + return section + + +def replace_sections( + data: str, sections: Dict[str, str], section_prefix: str = "## " +) -> str: + for s, v in sections.items(): + data = replace_section(data, s, v, section_prefix) + return data diff --git a/static/img/ecr.png b/static/img/ecr.png new file mode 100644 index 00000000..22830428 Binary files /dev/null and b/static/img/ecr.png differ diff --git a/static/img/ecr_image.png b/static/img/ecr_image.png new file mode 100644 index 00000000..a2683984 Binary files /dev/null and b/static/img/ecr_image.png differ diff --git a/static/img/fastapi.png b/static/img/fastapi.png new file mode 100644 index 00000000..071e28d0 Binary files /dev/null and b/static/img/fastapi.png differ diff --git a/static/img/inbound.png b/static/img/inbound.png new file mode 100644 index 00000000..2dba0b70 Binary files /dev/null and b/static/img/inbound.png differ