diff --git a/content/docs/object-reference/build/requirements.md b/content/docs/object-reference/build/requirements.md new file mode 100644 index 00000000..a32bd104 --- /dev/null +++ b/content/docs/object-reference/build/requirements.md @@ -0,0 +1,15 @@ +# requirements + +## `class RequirementsBuilder` + +**MlemABC parent type**: `builder` + +**MlemABC type**: `requirements` + + MlemBuilder implementation for building requirements + +**Fields**: + +- `target: str` - Target path for requirements + +- `req_type: str = "installable"` - Type of requirements, example: unix diff --git a/content/docs/object-reference/build/venv.md b/content/docs/object-reference/build/venv.md new file mode 100644 index 00000000..fcc2db9f --- /dev/null +++ b/content/docs/object-reference/build/venv.md @@ -0,0 +1,36 @@ +# venv + +## `class CondaBuilder` + +**MlemABC parent type**: `builder` + +**MlemABC type**: `conda` + + MlemBuilder implementation for building conda environments + +**Fields**: + +- `target: str = "venv"` - Name of the virtual environment + +- `python_version: str = "3.9"` - The python version to use + +- `current_env: bool = False` - Whether to install in the current conda env + +--- + +## `class VenvBuilder` + +**MlemABC parent type**: `builder` + +**MlemABC type**: `venv` + + MlemBuilder implementation for building virtual environments + +**Fields**: + +- `target: str = "venv"` - Name of the virtual environment + +- `no_cache: bool = False` - Disable cache + +- `current_env: bool = False` - Whether to install in the current virtual env, + must be active diff --git a/content/docs/sidebar.json b/content/docs/sidebar.json index 6ce3353b..caab2b9d 100644 --- a/content/docs/sidebar.json +++ b/content/docs/sidebar.json @@ -96,18 +96,33 @@ }, { "slug": "building", - "label": "Exporting models", + "label": "Building models", "source": "building/index.md", "children": [ { - "slug": "pip", - "label": "Python Packages", - "source": "pip.md" + "slug": "conda", + "label": "Conda Environment", + "source": "conda.md" }, { "slug": "docker", "label": "Docker", "source": "docker.md" + }, + { + "slug": "pip", + "label": "Python Packages", + "source": "pip.md" + }, + { + "slug": "requirements", + "label": "Requirements", + "source": "requirements.md" + }, + { + "slug": "venv", + "label": "Virtual Environment", + "source": "venv.md" } ] }, @@ -452,15 +467,25 @@ "label": "Builders", "source": "build/index.md", "children": [ + { + "slug": "docker", + "label": "Docker", + "source": "docker.md" + }, { "slug": "pip", "label": "Python package", "source": "pip.md" }, { - "slug": "docker", - "label": "Docker", - "source": "docker.md" + "slug": "requirements", + "label": "Requirements", + "source": "requirements.md" + }, + { + "slug": "venv", + "label": "Virtual Environment", + "source": "venv.md" } ] }, diff --git a/content/docs/user-guide/building/conda.md b/content/docs/user-guide/building/conda.md new file mode 100644 index 00000000..fe4d7f15 --- /dev/null +++ b/content/docs/user-guide/building/conda.md @@ -0,0 +1,136 @@ +# Conda Environments + +Like [virtual environments](/doc/user-guide/building/venv), +[conda environments](https://docs.conda.io/projects/conda/en/latest/user-guide/concepts/environments.html) +follow the same paradigm of isolating dependencies for a package or a model. +But, they exist globally and are saved in a single location. Further, they don't +need to be limited to installation of python packages. + +## Description + +Currently, Conda based requirements cannot be determined automatically. But, one +can pass them manually. + +In addition to installing conda packages, `pip` based packages (gathered from +the model) will also be installed in the `conda` environment. + +## Preparation + +Make sure that `conda` command line utility is installed and is accessible. + +### Generating a new conda environment + +```cli +$ mlem build conda --model model --target newenv \ + --conda_reqs.0.package_name xtensor \ + --conda_reqs.1.package_name openssl +⏳️ Loading model from model.mlem +Collecting package metadata (current_repodata.json): done +Solving environment: done + +## Package Plan ## + + environment location: /path/to/envs/newenv + + added / updated specs: + - python=3.9 + + +The following NEW packages will be INSTALLED: + + ca-certificates pkgs/main/osx-arm64::ca-certificates-2022.07.19-hca03da5_0 None + certifi pkgs/main/osx-arm64::certifi-2022.9.24-py39hca03da5_0 None + libcxx pkgs/main/osx-arm64::libcxx-14.0.6-h848a8c0_0 None + libffi pkgs/main/osx-arm64::libffi-3.4.2-hc377ac9_4 None + ncurses pkgs/main/osx-arm64::ncurses-6.3-h1a28f6b_3 None + openssl pkgs/main/osx-arm64::openssl-1.1.1q-h1a28f6b_0 None + pip pkgs/main/osx-arm64::pip-22.2.2-py39hca03da5_0 None + python pkgs/main/osx-arm64::python-3.9.13-hbdb9e5c_1 None + readline pkgs/main/osx-arm64::readline-8.1.2-h1a28f6b_1 None + setuptools pkgs/main/osx-arm64::setuptools-63.4.1-py39hca03da5_0 None + sqlite pkgs/main/osx-arm64::sqlite-3.39.3-h1058600_0 None + tk pkgs/main/osx-arm64::tk-8.6.12-hb8d0fd4_0 None + tzdata pkgs/main/noarch::tzdata-2022c-h04d1e81_0 None + wheel pkgs/main/noarch::wheel-0.37.1-pyhd3eb1b0_0 None + xz pkgs/main/osx-arm64::xz-5.2.6-h1a28f6b_0 None + zlib pkgs/main/osx-arm64::zlib-1.2.12-h5a0b063_3 None + + +Preparing transaction: done +Verifying transaction: done +Executing transaction: done +# +# To activate this environment, use +# +# $ conda activate /path/to/envs/newenv +# +# To deactivate an active environment, use +# +# $ conda deactivate + +Retrieving notices: ...working... done +Collecting package metadata (current_repodata.json): done +Solving environment: done + +## Package Plan ## + + environment location: /path/to/envs/newenv + + added / updated specs: + - conda-forge::openssl + - conda-forge::xtensor + + +The following NEW packages will be INSTALLED: + + xtensor conda-forge/osx-arm64::xtensor-0.24.3-hf86a087_0 None + xtl conda-forge/osx-arm64::xtl-0.7.4-hc021e02_0 None + +The following packages will be UPDATED: + + openssl pkgs/main::openssl-1.1.1q-h1a28f6b_0 --> conda-forge::openssl-1.1.1q-h03a7124_1 None + + +Preparing transaction: done +Verifying transaction: done +Executing transaction: done +Retrieving notices: ...working... done +Collecting scikit-learn==1.0.2 + Using cached scikit_learn-1.0.2-cp39-cp39-macosx_12_0_arm64.whl (6.9 MB) +Collecting pandas==1.4.2 + Using cached pandas-1.4.2-cp39-cp39-macosx_11_0_arm64.whl (10.1 MB) +Collecting numpy==1.22.3 + Using cached numpy-1.22.3-cp39-cp39-macosx_11_0_arm64.whl (12.8 MB) +Collecting threadpoolctl>=2.0.0 + Using cached threadpoolctl-3.1.0-py3-none-any.whl (14 kB) +Collecting scipy>=1.1.0 + Using cached scipy-1.9.2-cp39-cp39-macosx_12_0_arm64.whl (28.6 MB) +Collecting joblib>=0.11 + Using cached joblib-1.2.0-py3-none-any.whl (297 kB) +Collecting pytz>=2020.1 + Using cached pytz-2022.4-py2.py3-none-any.whl (500 kB) +Collecting python-dateutil>=2.8.1 + Using cached python_dateutil-2.8.2-py2.py3-none-any.whl (247 kB) +Collecting six>=1.5 + Using cached six-1.16.0-py2.py3-none-any.whl (11 kB) +Installing collected packages: pytz, threadpoolctl, six, numpy, joblib, scipy, python-dateutil, scikit-learn, pandas +Successfully installed joblib-1.2.0 numpy-1.22.3 pandas-1.4.2 python-dateutil-2.8.2 pytz-2022.4 scikit-learn-1.0.2 scipy-1.9.2 six-1.16.0 threadpoolctl-3.1.0 +``` + +If the `target` is not passed, the default name for the new environment is +`venv`. + +Other options include using: + +- `--python_version 3.7` -- to use a custom python version, by default it is + inferred automatically. +- `--current_env True` -- whether to install the requirements in a currently + activated conda environment. + +While options for passing a list of conda requirements include: + +- `--conda_reqs.0.package_name` -- name of the conda package +- `--conda_reqs.0.spec` -- denotes selectors for a package such as '>=1.8,<2' + (optional) +- `--conda_reqs.0.channel_name` -- denotes the channel from which a package is + to be installed (default is `conda-forge`) diff --git a/content/docs/user-guide/building/index.md b/content/docs/user-guide/building/index.md index 3005510b..4a052347 100644 --- a/content/docs/user-guide/building/index.md +++ b/content/docs/user-guide/building/index.md @@ -1,8 +1,10 @@ -# Exporting models (building) +# Building models Building is a way to “bake” your model into something usable in production like -a Docker image, or export your model into another format. You can see the full -list of available builders [here](/doc/object-reference/build). +a Docker image, or export your model into another format or even export the +underlying requirements and dependencies of the model, allowing one to create +virtual environments out of it. You can see the full list of available builders +[here](/doc/object-reference/build). To build your MLEM model you need to use either [CLI](/doc/command-reference/build) or [API](/doc/api-reference/build) `build` diff --git a/content/docs/user-guide/building/requirements.md b/content/docs/user-guide/building/requirements.md new file mode 100644 index 00000000..33e78c01 --- /dev/null +++ b/content/docs/user-guide/building/requirements.md @@ -0,0 +1,132 @@ +# Requirements + +When you have a model saved via MLEM and want to use it, the first step is to +make sure you have the right dependencies and packages. Given a model, MLEM can +get the list of requirements of different types (including Python, Unix, as well +as some others). + +This complements the [checkenv](/doc/command-reference/checkenv) command. + +## Pip based requirements + +MLEM can export the installable requirements needed for a model using the +`mlem build` command, where `model` is the path to model saved via `mlem` + +```cli +$ mlem build requirements -m model +⏳️ Loading model from sk-model.mlem +scikit-learn==1.0.2 pandas==1.4.2 numpy==1.22.3 +``` + +Now, it can be used to install the requirements like this: + +```cli +$ pip install $(mlem -q build requirements -m model) +``` + +where `-q` stands for the quiet mode which disables the emoji output. + +One can also save these `requirements` to generate a `requirements.txt` file +using the `--target` option which allows us to pass a path of the file i.e. + +```cli +$ mlem build requirements -m model --target requirements.txt +⏳️ Loading model from model.mlem +💼 Materializing requirements... +✅ Materialized to requirements.txt! +``` + +and now the contents of `requirements.txt` can be checked using + +```cli +$ cat requirements.txt +scikit-learn==1.0.2 +pandas==1.4.2 +numpy==1.22.3 +``` + +This is different from [creating a python package](/doc/user-guide/building/pip) +for the model. + +## Unix based requirements + +Some python libraries require unix based packages underneath to function +correctly. An example is the +[libgomp1](https://packages.debian.org/sid/libgomp1) package required by the +`lightgbm` library. `MLEM` can figure out `unix` based packages for some +supported libraries and these can be used as below: + +To get a list of `unix` based requirements, use the `--req_type` option such as + +`mlem build requirements -m model --req_type unix`. + +The output of above can be used in conjunction like: + +```cli +$ apt-get install $(mlem -q build requirements -m model --req_type unix) +``` + +The `--target` option is not supported for unix based requirements. + +## Custom requirements + +Custom requirements represent local python code such as files, zipped sources, +etc. Custom requirements always need the `--target` option since they are +materialized at the target. + +For instance, a function can be saved via `MLEM` at a location `mlem-f` + +```python +# func.py +def f(txt): + print(txt) +``` + +```python +# save.py +from mlem.api import save +from func import f + +saved = save(f, 'mlem-f') +``` + +and the following command could be used to materialize this custom requirement +at a target: + +```cli +$ mlem build requirements -m mlem-f --req_type custom --target ./dir +⏳️ Loading model from mlem-f.mlem +💼 Materializing requirements... +✅ Materialized to ./dir! +``` + +The contents of `dir` can be checked using + +```cli +$ ls dir +func.py +``` + +and with + +```cli +$ cat dir/func.py +def f(txt): + print(txt) +``` + + + +## Conda requirements + +Conda based requirements specify conda packages and cannot be determined +automatically as of now. In any case, the `--target` option is not supported for +it. + +One can manually pass conda requirements to create conda based virtual +environments using the conda builder as discussed +[here](/doc/user-guide/building/conda). diff --git a/content/docs/user-guide/building/venv.md b/content/docs/user-guide/building/venv.md new file mode 100644 index 00000000..4df09adc --- /dev/null +++ b/content/docs/user-guide/building/venv.md @@ -0,0 +1,54 @@ +# Virtual Environments + +Given a model and a list of its dependencies and packages, an environment needs +to be present that has these requirements readily available so as to use the +model. To make sure that different dependencies for different models (or +projects) don't clash, +[virtual environments](https://realpython.com/python-virtual-environments-a-primer/) +are used. + +## Description + +MLEM can create a python virtual environment using requirements gathered from a +model. This naturally extends the functionality of the +[`requirements builder`](/doc/user-guide/building/requirements). + +### Generating a new virtual environment + +```cli +$ mlem build venv -m model --target newenv +⏳️ Loading model from model.mlem +💼 Creating virtual env newenv... +💼 Installing the required packages... +Collecting scikit-learn==1.0.2 + Using cached scikit_learn-1.0.2-cp39-cp39-macosx_12_0_arm64.whl (6.9 MB) +Collecting pandas==1.4.2 + Using cached pandas-1.4.2-cp39-cp39-macosx_11_0_arm64.whl (10.1 MB) +Collecting numpy==1.22.3 + Using cached numpy-1.22.3-cp39-cp39-macosx_11_0_arm64.whl (12.8 MB) +Collecting joblib>=0.11 + Using cached joblib-1.2.0-py3-none-any.whl (297 kB) +Collecting scipy>=1.1.0 + Using cached scipy-1.9.2-cp39-cp39-macosx_12_0_arm64.whl (28.6 MB) +Collecting threadpoolctl>=2.0.0 + Using cached threadpoolctl-3.1.0-py3-none-any.whl (14 kB) +Collecting pytz>=2020.1 + Using cached pytz-2022.4-py2.py3-none-any.whl (500 kB) +Collecting python-dateutil>=2.8.1 + Using cached python_dateutil-2.8.2-py2.py3-none-any.whl (247 kB) +Collecting six>=1.5 + Using cached six-1.16.0-py2.py3-none-any.whl (11 kB) +Installing collected packages: pytz, threadpoolctl, six, numpy, joblib, scipy, python-dateutil, scikit-learn, pandas +Successfully installed joblib-1.2.0 numpy-1.22.3 pandas-1.4.2 python-dateutil-2.8.2 pytz-2022.4 scikit-learn-1.0.2 scipy-1.9.2 six-1.16.0 threadpoolctl-3.1.0 +✅ virtual environment `newenv` is ready, activate with `source newenv/bin/activate` +``` + +If the `target` is not passed, the default name for the new environment is +`venv`. + +Other options include using: + +- `--no_cache True` -- to disable caching while fetching packages in creation of + the environment. +- `--current_env True` -- whether to install the requirements in a currently + activated virtual environment. diff --git a/content/docs/user-guide/models/index.md b/content/docs/user-guide/models/index.md index 1fa17f57..ce7ca688 100644 --- a/content/docs/user-guide/models/index.md +++ b/content/docs/user-guide/models/index.md @@ -27,7 +27,8 @@ MLEM uses it to infer your model's signature which will be needed to build and deploy it later. Additionally, MLEM will automatically infer your model requirements (including -any local code). +any local code). The determined requirements can be used to +[create virtual environments](/doc/user-guide/building/requirements) ## Alternative ways to create MLEM model