diff --git a/.github/workflows/check-test-release.yml b/.github/workflows/check-test-release.yml index 9a157092..d09cd2ff 100644 --- a/.github/workflows/check-test-release.yml +++ b/.github/workflows/check-test-release.yml @@ -7,6 +7,7 @@ on: env: MLEM_TESTS: "true" + MLEM_DEBUG: "true" jobs: authorize: @@ -58,7 +59,7 @@ jobs: # no HDF5 support installed for tables - os: windows-latest python: "3.9" - fail-fast: true + fail-fast: false steps: - uses: actions/checkout@v3 with: @@ -67,6 +68,10 @@ jobs: - uses: actions/setup-python@v4 with: python-version: ${{ matrix.python }} + - uses: conda-incubator/setup-miniconda@v2 + with: + python-version: ${{ matrix.python }} + activate-environment: true - name: get pip cache dir id: pip-cache-dir run: | @@ -92,6 +97,9 @@ jobs: pip install pre-commit .[tests] - run: pre-commit run pylint -a -v --show-diff-on-failure if: matrix.python != '3.7' + - name: Start minikube + if: matrix.os == 'ubuntu-latest' && matrix.python == '3.9' + uses: medyagh/setup-minikube@master - name: Run tests timeout-minutes: 40 run: pytest diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 3e7ce75e..49cfcf5b 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,8 +1,8 @@ default_language_version: python: python3 repos: - - repo: 'https://github.com/pre-commit/pre-commit-hooks' - rev: v4.0.1 + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.3.0 hooks: - id: check-added-large-files - id: check-case-conflict @@ -17,8 +17,8 @@ repos: - id: mixed-line-ending - id: sort-simple-yaml - id: trailing-whitespace - - repo: 'https://github.com/pycqa/flake8' - rev: 4.0.1 + - repo: https://github.com/pycqa/flake8 + rev: 5.0.4 hooks: - id: flake8 args: @@ -28,16 +28,16 @@ repos: - flake8-comprehensions - flake8-debugger - flake8-string-format - - repo: 'https://github.com/psf/black' - rev: 22.3.0 + - repo: https://github.com/psf/black + rev: 22.10.0 hooks: - id: black - repo: 'https://github.com/PyCQA/isort' rev: 5.10.1 hooks: - id: isort - - repo: 'https://github.com/pre-commit/mirrors-mypy' - rev: v0.942 + - repo: https://github.com/pre-commit/mirrors-mypy + rev: v0.982 hooks: - id: mypy additional_dependencies: @@ -54,7 +54,11 @@ repos: entry: pylint -v language: system types: [ python ] - # - repo: https://github.com/PyCQA/bandit - # rev: '1.7.0' - # hooks: - # - id: bandit + - repo: https://github.com/PyCQA/bandit + rev: 1.7.4 + hooks: + - id: bandit + exclude: tests/ + args: + - -iii # high level + - -lll # high confidence diff --git a/.pylintrc b/.pylintrc index 4ea818e6..9beb1074 100644 --- a/.pylintrc +++ b/.pylintrc @@ -170,7 +170,8 @@ disable=print-statement, redefined-builtin, # TODO: https://github.com/iterative/mlem/issues/60 no-self-use, # TODO: https://github.com/iterative/mlem/issues/60 maybe leave it import-outside-toplevel, - wrong-import-order # handeled by isort + wrong-import-order, # handeled by isort + cannot-enumerate-pytest-fixtures # TODO: https://github.com/iterative/mlem/issues/60 # Enable the message, report, category or checker with the given id(s). You can # either give multiple identifier separated by comma (,) or put this option @@ -369,7 +370,7 @@ indent-string=' ' max-line-length=100 # Maximum number of lines in a module. -max-module-lines=1000 +max-module-lines=2000 # Allow the body of a class to be on the same line as the declaration if body # contains single statement. @@ -389,7 +390,7 @@ ignore-comments=yes ignore-docstrings=yes # Ignore imports when computing similarities. -ignore-imports=no +ignore-imports=yes # Ignore function signatures when computing similarities. ignore-signatures=no diff --git a/README.md b/README.md index a05c91f8..62fe1eb8 100644 --- a/README.md +++ b/README.md @@ -45,7 +45,7 @@ The main reason to use MLEM instead of other tools is to adopt a **GitOps approa ## Usage -This a quick walkthrough showcasing deployment and export functionality of MLEM. +This a quick walkthrough showcasing deployment functionality of MLEM. Please read [Get Started guide](https://mlem.ai/doc/get-started) for a full version. @@ -81,7 +81,7 @@ def main(): save( rf, - "rf", + "models/rf", sample_data=data, ) @@ -89,10 +89,26 @@ if __name__ == "__main__": main() ``` +### Productionization + +We'll show how to deploy your model with MLEM below, but let's briefly mention all the +scenarios that MLEM enables with a couple lines of code: + +- **[Apply model](/doc/get-started/applying)** - load model in Python or get + prediction in command line. +- **[Serve model](/doc/get-started/serving)** - create a service from your model + for online serving. +- **[Build model](/doc/get-started/building)** - export model into Python + packages, Docker images, etc. +- **[Deploy model](/doc/get-started/deploying)** - deploy your model to Heroku, + Sagemaker, Kubernetes, etc. + +### Codification + Check out what we have: ```shell -$ ls +$ ls models/ rf rf.mlem $ cat rf.mlem @@ -153,51 +169,6 @@ model_type: - null - 3 type: ndarray - sklearn_predict: - args: - - name: X - type_: - columns: - - sepal length (cm) - - sepal width (cm) - - petal length (cm) - - petal width (cm) - dtypes: - - float64 - - float64 - - float64 - - float64 - index_cols: [] - type: dataframe - name: predict - returns: - dtype: int64 - shape: - - null - type: ndarray - sklearn_predict_proba: - args: - - name: X - type_: - columns: - - sepal length (cm) - - sepal width (cm) - - petal length (cm) - - petal width (cm) - dtypes: - - float64 - - float64 - - float64 - - float64 - index_cols: [] - type: dataframe - name: predict_proba - returns: - dtype: float64 - shape: - - null - - 3 - type: ndarray type: sklearn object_type: model requirements: @@ -213,54 +184,31 @@ requirements: ### Deploying the model If you want to follow this Quick Start, you'll need to sign up on https://heroku.com, -create an API_KEY and populate `HEROKU_API_KEY` env var. - -First, create an environment to deploy your model: - -```shell -$ mlem declare env heroku staging -💾 Saving env to staging.mlem -``` +create an API_KEY and populate `HEROKU_API_KEY` env var (or run `heroku login` in command line). +Besides, you'll need to run `heroku container:login`. This will log you in to Heroku +container registry. Now we can [deploy the model with `mlem deploy`](https://mlem.ai/doc/get-started/deploying) (you need to use different `app_name`, since it's going to be published on https://herokuapp.com): ```shell -$ mlem deployment run mydeploy -m rf -t staging -c app_name=mlem-quick-start -⏳️ Loading deployment from .mlem/deployment/myservice.mlem -🔗 Loading link to .mlem/env/staging.mlem -🔗 Loading link to .mlem/model/rf.mlem -💾 Updating deployment at .mlem/deployment/myservice.mlem -🏛 Creating Heroku App example-mlem-get-started -💾 Updating deployment at .mlem/deployment/myservice.mlem +$ mlem deployment run heroku app.mlem \ + --model models/rf \ + --app_name example-mlem-get-started-app +⏳️ Loading model from models/rf.mlem +⏳️ Loading deployment from app.mlem 🛠 Creating docker image for heroku + 🛠 Building MLEM wheel file... 💼 Adding model files... 🛠 Generating dockerfile... 💼 Adding sources... 💼 Generating requirements file... - 🛠 Building docker image registry.heroku.com/example-mlem-get-started/web... - ✅ Built docker image registry.heroku.com/example-mlem-get-started/web - 🔼 Pushed image registry.heroku.com/example-mlem-get-started/web to remote registry at host registry.heroku.com -💾 Updating deployment at .mlem/deployment/myservice.mlem -🛠 Releasing app my-mlem-service formation -💾 Updating deployment at .mlem/deployment/myservice.mlem -✅ Service example-mlem-get-started is up. You can check it out at https://mlem-quick-start.herokuapp.com/ -``` - -### Exporting the model - -You could easily [export the model to a different format using `mlem build`](https://mlem.ai/doc/get-started/building): - -``` -$ mlem build rf docker -c server.type=fastapi -c image.name=sklearn-model -⏳️ Loading model from rf.mlem -🛠 Building MLEM wheel file... -💼 Adding model files... -🛠 Generating dockerfile... -💼 Adding sources... -💼 Generating requirements file... -🛠 Building docker image sklearn-model:latest... -✅ Built docker image sklearn-model:latest + 🛠 Building docker image registry.heroku.com/example-mlem-get-started-app/web... + ✅ Built docker image registry.heroku.com/example-mlem-get-started-app/web + 🔼 Pushing image registry.heroku.com/example-mlem-get-started-app/web to registry.heroku.com + ✅ Pushed image registry.heroku.com/example-mlem-get-started-app/web to registry.heroku.com +🛠 Releasing app example-mlem-get-started-app formation +✅ Service example-mlem-get-started-app is up. You can check it out at https://example-mlem-get-started-app.herokuapp.com/ ``` ## Contributing diff --git a/mlem/api/__init__.py b/mlem/api/__init__.py index 7244a0d8..83c11a0f 100644 --- a/mlem/api/__init__.py +++ b/mlem/api/__init__.py @@ -11,7 +11,6 @@ import_object, init, link, - ls, serve, ) @@ -19,7 +18,6 @@ "save", "load", "load_meta", - "ls", "clone", "init", "link", diff --git a/mlem/api/commands.py b/mlem/api/commands.py index a30aaa78..da50aa2b 100644 --- a/mlem/api/commands.py +++ b/mlem/api/commands.py @@ -2,7 +2,7 @@ MLEM's Python API """ import posixpath -from typing import Any, Dict, Iterable, List, Optional, Type, Union +from typing import Any, Dict, Optional, Union from fsspec import AbstractFileSystem from fsspec.implementations.local import LocalFileSystem @@ -14,8 +14,7 @@ get_model_meta, parse_import_type_modifier, ) -from mlem.config import CONFIG_FILE_NAME, project_config -from mlem.constants import PREDICT_METHOD_NAME +from mlem.constants import MLEM_CONFIG_FILE_NAME, PREDICT_METHOD_NAME from mlem.core.errors import ( InvalidArgumentError, MlemError, @@ -25,7 +24,7 @@ WrongMethodError, ) from mlem.core.import_objects import ImportAnalyzer, ImportHook -from mlem.core.meta_io import MLEM_DIR, Location, UriResolver, get_fs +from mlem.core.meta_io import Location, get_fs from mlem.core.metadata import load_meta, save from mlem.core.objects import ( MlemBuilder, @@ -56,27 +55,24 @@ def apply( method: str = None, output: str = None, target_project: str = None, - index: bool = None, - external: bool = None, batch_size: Optional[int] = None, ) -> Optional[Any]: """Apply provided model against provided data Args: - model (MlemModel): MLEM model. - data (Any): Input to the model. - method (str, optional): Which model method to use. + model: MLEM model. + data: Input to the model. + method: Which model method to use. If None, use the only method model has. If more than one is available, will fail. - output (str, optional): If value is provided, + output: If value is provided, assume it's path and save output there. - index (bool): Whether to index saved output in MLEM root folder. - external (bool): Whether to save result outside mlem dir + target_project: Path to MLEM project to save the result to. + batch_size: If provided, will process data in batches of given size. Returns: If `output=None`, returns results for given data. Otherwise returns None. - """ model = get_model_meta(model) w = model.model_type @@ -103,9 +99,7 @@ def apply( return res if len(res) == 1: res = res[0] - return save( - res, output, project=target_project, external=external, index=index - ) + return save(res, output, project=target_project) def apply_remote( @@ -114,25 +108,24 @@ def apply_remote( method: str = None, output: str = None, target_project: str = None, - index: bool = False, **client_kwargs, ) -> Optional[Any]: """Apply provided model against provided data Args: - client (Client): The client to access methods of deployed model. - data (Any): Input to the model. - method (str, optional): Which model method to use. + client: The client to access methods of deployed model. + data: Input to the model. + method: Which model method to use. If None, use the only method model has. If more than one is available, will fail. - output (str, optional): If value is provided, + output: If value is provided, assume it's path and save output there. - index (bool): Whether to index saved output in MLEM root folder. + target_project: Path to MLEM project to save the result to. + **client_kwargs: Additional arguments to pass to client. Returns: If `output=None`, returns results for given data. Otherwise returns None. - """ client = ensure_mlem_object(Client, client, **client_kwargs) if method is not None: @@ -151,7 +144,7 @@ def apply_remote( return res if len(res) == 1: res = res[0] - return save(res, output, project=target_project, index=index) + return save(res, output, project=target_project) def clone( @@ -164,28 +157,24 @@ def clone( target_fs: Optional[str] = None, follow_links: bool = True, load_value: bool = False, - index: bool = None, - external: bool = None, ) -> MlemObject: """Clones MLEM object from `path` to `out` and returns Python representation for the created object Args: - path (str): Path to the object. Could be local path or path inside a git repo. - target (str): Path to save the copy of initial object to. - project (Optional[str], optional): URL to project if object is located there. - rev (Optional[str], optional): revision, could be git commit SHA, branch name or tag. - fs (Optional[AbstractFileSystem], optional): filesystem to load object from - target_project (Optional[str], optional): path to project to save cloned object to - target_fs (Optional[AbstractFileSystem], optional): target filesystem - follow_links (bool, optional): If object we read is a MLEM link, whether to load + path: Path to the object. Could be local path or path inside a git repo. + target: Path to save the copy of initial object to. + project: URL to project if object is located there. + rev: revision, could be git commit SHA, branch name or tag. + fs: filesystem to load object from + target_project: path to project to save cloned object to + target_fs: target filesystem + follow_links: If object we read is a MLEM link, whether to load the actual object link points to. Defaults to True. - load_value (bool, optional): Load actual python object incorporated in MlemObject. Defaults to False. - index: whether to index object in target project - external: wheter to put object inside mlem dir in target project + load_value: Load actual python object incorporated in MlemObject. Defaults to False. Returns: - MlemObject: Copy of initial object saved to `out` + MlemObject: Copy of initial object saved to `out`. """ meta = load_meta( path, @@ -202,14 +191,19 @@ def clone( target, fs=target_fs, project=target_project, - index=index, - external=external, ) def init(path: str = ".") -> None: - """Creates .mlem directory in `path`""" - path = posixpath.join(path, MLEM_DIR) + """Creates MLEM config in `path` + + Args: + path: Path to create config in. Defaults to current directory. + + Returns: + None + """ + path = posixpath.join(path, MLEM_CONFIG_FILE_NAME) fs, path = get_fs(path) if fs.exists(path): echo( @@ -252,9 +246,8 @@ def init(path: str = ".") -> None: "" ) ) - fs.makedirs(path) # some fs dont support creating empty dirs - with fs.open(posixpath.join(path, CONFIG_FILE_NAME), "w"): + with fs.open(path, "w"): pass echo( EMOJI_MLEM @@ -273,23 +266,21 @@ def link( rev: Optional[str] = None, target: Optional[str] = None, target_project: Optional[str] = None, - external: Optional[bool] = None, follow_links: bool = True, absolute: bool = False, ) -> MlemLink: """Creates MlemLink for an `source` object and dumps it if `target` is provided Args: - source (Union[str, MlemObject]): The object to create link from. - source_project (str, optional): Path to mlem project where to load obj from - rev (str, optional): Revision if object is stored in git repo. - target (str, optional): Where to store the link object. - target_project (str, optional): If provided, + source: The object to create link from. + source_project: Path to mlem project where to load obj from + rev: Revision if object is stored in git repo. + target: Where to store the link object. + target_project: If provided, treat `target` as link name and dump link in MLEM DIR - follow_links (bool): Whether to make link to the underlying object + follow_links: Whether to make link to the underlying object if `source` is itself a link. Defaults to True. - external (bool): Whether to save link outside mlem dir - absolute (bool): Whether to make link absolute or relative to mlem project + absolute: Whether to make link absolute or relative to mlem project Returns: MlemLink: Link object to the `source`. @@ -308,7 +299,6 @@ def link( return source.make_link( target, project=target_project, - external=external, absolute=absolute, ) @@ -318,12 +308,15 @@ def build( model: Union[str, MlemModel], **builder_kwargs, ): - """Pack model in docker-build-ready folder or directly build a docker image. + """Pack model into something useful, such as docker image, Python package or something else. Args: - builder (Union[str, MlemBuilder]): Packager to use. - Out-of-the-box supported string values are "docker_dir" and "docker". - model (Union[str, MlemModel]): The model to build. + builder: Builder to use. + model: The model to build. + builder_kwargs: Additional keyword arguments to pass to the builder. + + Returns: + The result of the build, different for different builders. """ model = get_model_meta(model, load_value=False) return ensure_mlem_object(MlemBuilder, builder, **builder_kwargs).build( @@ -334,11 +327,15 @@ def build( def serve( model: Union[str, MlemModel], server: Union[Server, str], **server_kwargs ): - """Serve model via HTTP/HTTPS. + """Serve a model by exposing its methods as endpoints. Args: - model (Union[str, MlemModel]): The model to serve. - server (Union[Server, str]): Out-of-the-box supported one is "fastapi". + model: The model to serve. + server: Out-of-the-box supported one is "fastapi". + server_kwargs: Additional kwargs to pass to the server. + + Returns: + None """ from mlem.runtime.interface import ModelInterface @@ -359,25 +356,6 @@ def _validate_ls_project(loc: Location, project): mlem_project_exists(loc.project, loc.fs, raise_on_missing=True) -def ls( # pylint: disable=too-many-locals - project: str = ".", - rev: Optional[str] = None, - fs: Optional[AbstractFileSystem] = None, - type_filter: Union[ - Type[MlemObject], Iterable[Type[MlemObject]], None - ] = None, - include_links: bool = True, - ignore_errors: bool = False, -) -> Dict[Type[MlemObject], List[MlemObject]]: - loc = UriResolver.resolve( - "", project=project, rev=rev, fs=fs, find_project=True - ) - _validate_ls_project(loc, project) - return project_config(project, fs).index.list( - loc, type_filter, include_links, ignore_errors - ) - - def import_object( path: str, project: Optional[str] = None, @@ -388,13 +366,27 @@ def import_object( target_fs: Optional[AbstractFileSystem] = None, type_: Optional[str] = None, copy_data: bool = True, - external: bool = None, - index: bool = None, ): """Try to load an object as MLEM model (or data) and return it, optionally saving to the specified target location + + Args: + path: Path to the object to import. + project: Path to mlem project where to load obj from. + rev: Revision if object is stored in git repo. + fs: Filesystem to use to load the object. + target: Where to store the imported object. + target_project: If provided, treat `target` as object name and dump + object in this MLEM Project. + target_fs: Filesystem to use to save the object. + type_: Type of the object to import. If not provided, will try to + infer from the object itself. + copy_data: Whether to copy data to the target location. + + Returns: + MlemObject: Imported object. """ - loc = UriResolver.resolve(path, project, rev, fs) + loc = Location.resolve(path, project, rev, fs) echo(EMOJI_LOAD + f"Importing object from {loc.uri_repr}") if type_ is not None: type_, modifier = parse_import_type_modifier(type_) @@ -408,60 +400,77 @@ def import_object( target, fs=target_fs, project=target_project, - index=index, - external=external, ) return meta def deploy( deploy_meta_or_path: Union[MlemDeployment, str], - model: Union[MlemModel, str] = None, + model: Union[MlemModel, str], env: Union[MlemEnv, str] = None, project: Optional[str] = None, + rev: Optional[str] = None, fs: Optional[AbstractFileSystem] = None, - external: bool = None, - index: bool = None, + env_kwargs: Dict[str, Any] = None, **deploy_kwargs, ) -> MlemDeployment: - deploy_path = None + """Deploy a model to a target environment. Can use an existing deployment + declaration or create a new one on-the-fly. + + Args: + deploy_meta_or_path: MlemDeployment object or path to it. + model: The model to deploy. + env: The environment to deploy to. + project: Path to mlem project where to load obj from. + rev: Revision if object is stored in git repo. + fs: Filesystem to use to load the object. + env_kwargs: Additional kwargs to pass to the environment. + deploy_kwargs: Additional kwargs to pass to the deployment. + + Returns: + MlemDeployment: The deployment object. + """ + deploy_meta: MlemDeployment + update = False if isinstance(deploy_meta_or_path, str): - deploy_path = deploy_meta_or_path try: deploy_meta = load_meta( - path=deploy_path, + path=deploy_meta_or_path, project=project, + rev=rev, fs=fs, force_type=MlemDeployment, ) - except MlemObjectNotFound: - deploy_meta = None - + update = True + except MlemObjectNotFound as e: + if env is None: + raise MlemError( + "Please provide model and env args for new deployment" + ) from e + if not deploy_meta_or_path: + raise MlemError("deploy_path cannot be empty") from e + + env_meta = ensure_meta(MlemEnv, env, allow_typename=True) + if isinstance(env_meta, type): + env = None + if env_kwargs: + env = env_meta(**env_kwargs) + deploy_type = env_meta.deploy_type + deploy_meta = deploy_type( + env=env, + **deploy_kwargs, + ) + deploy_meta.dump(deploy_meta_or_path, fs, project) else: deploy_meta = deploy_meta_or_path - if model is not None: - deploy_meta.replace_model(get_model_meta(model)) + update = True - if deploy_meta is None: - if model is None or env is None: - raise MlemError( - "Please provide model and env args for new deployment" - ) - if not deploy_path: - raise MlemError("deploy_path cannot be empty") - model_meta = get_model_meta(model) - env_meta = ensure_meta(MlemEnv, env) - deploy_meta = env_meta.deploy_type( - model=model_meta, - env=env_meta, - env_link=env_meta.make_link(), - model_link=model_meta.make_link(), - **deploy_kwargs, - ) - deploy_meta.dump(deploy_path, fs, project, index, external) + if update: + pass # todo update from deploy_args and env_args # ensuring links are working deploy_meta.get_env() - deploy_meta.get_model() + model_meta = get_model_meta(model) - deploy_meta.run() + deploy_meta.check_unchanged() + deploy_meta.deploy(model_meta) return deploy_meta diff --git a/mlem/api/utils.py b/mlem/api/utils.py index 902b8640..5d941b6b 100644 --- a/mlem/api/utils.py +++ b/mlem/api/utils.py @@ -1,8 +1,10 @@ import re -from typing import Any, Optional, Tuple, Type, TypeVar, Union +from typing import Any, Optional, Tuple, Type, TypeVar, Union, overload -from mlem.core.base import MlemABC, build_mlem_object -from mlem.core.errors import InvalidArgumentError +from typing_extensions import Literal + +from mlem.core.base import MlemABC, build_mlem_object, load_impl_ext +from mlem.core.errors import InvalidArgumentError, MlemObjectNotFound from mlem.core.metadata import load, load_meta from mlem.core.objects import MlemData, MlemModel, MlemObject @@ -45,9 +47,41 @@ def get_model_meta( MM = TypeVar("MM", bound=MlemObject) -def ensure_meta(as_class: Type[MM], obj_or_path: Union[str, MM]) -> MM: +@overload +def ensure_meta( + as_class: Type[MM], + obj_or_path: Union[str, MM], + allow_typename: bool = False, +) -> Union[MM, Type[MM]]: + pass + + +@overload +def ensure_meta( + as_class: Type[MM], + obj_or_path: Union[str, MM], + allow_typename: Literal[False] = False, +) -> MM: + pass + + +def ensure_meta( + as_class: Type[MM], + obj_or_path: Union[str, MM], + allow_typename: bool = False, +) -> Union[MM, Type[MM]]: if isinstance(obj_or_path, str): - return load_meta(obj_or_path, force_type=as_class) + try: + return load_meta(obj_or_path, force_type=as_class) + except MlemObjectNotFound: + if allow_typename: + impl = load_impl_ext( + as_class.abs_name, obj_or_path, raise_on_missing=False + ) + if impl is None or not issubclass(impl, as_class): + raise + return impl + raise if isinstance(obj_or_path, as_class): return obj_or_path raise ValueError(f"Cannot get {as_class} from '{obj_or_path}'") diff --git a/mlem/cli/__init__.py b/mlem/cli/__init__.py index 24952f95..5ea9d854 100644 --- a/mlem/cli/__init__.py +++ b/mlem/cli/__init__.py @@ -10,7 +10,7 @@ from mlem.cli.deployment import deployment from mlem.cli.dev import dev from mlem.cli.import_object import import_object -from mlem.cli.info import ls, pretty_print +from mlem.cli.info import pretty_print from mlem.cli.init import init from mlem.cli.link import link from mlem.cli.main import app @@ -25,7 +25,6 @@ "build", "pretty_print", "link", - "ls", "clone", "serve", "config", diff --git a/mlem/cli/apply.py b/mlem/cli/apply.py index 5fd0c1b7..830e5454 100644 --- a/mlem/cli/apply.py +++ b/mlem/cli/apply.py @@ -1,18 +1,19 @@ from json import dumps from typing import List, Optional -from typer import Argument, Option +from typer import Argument, Option, Typer from mlem.api import import_object from mlem.cli.main import ( - config_arg, + PATH_METAVAR, + app, mlem_command, - option_conf, + mlem_group, + mlem_group_callback, + option_data, option_data_project, option_data_rev, - option_external, option_file_conf, - option_index, option_json, option_load, option_method, @@ -20,6 +21,13 @@ option_rev, option_target_project, ) +from mlem.cli.utils import ( + abc_fields_parameters, + config_arg, + for_each_impl, + lazy_class_docstring, + make_not_required, +) from mlem.core.data_type import DataAnalyzer from mlem.core.errors import UnsupportedDataBatchLoading from mlem.core.import_objects import ImportHook @@ -29,55 +37,51 @@ from mlem.ui import set_echo from mlem.utils.entrypoints import list_implementations +option_output = Option( + None, + "-o", + "--output", + help="Where to save model outputs", + metavar=PATH_METAVAR, +) +option_import = Option( + False, + "-i", + "--import", + help="Try to import data on-the-fly", +) +option_import_type = Option( + None, + "--import-type", + "--it", + # TODO: change ImportHook to MlemObject to support ext machinery + help=f"Specify how to read data file for import. Available types: {list_implementations(ImportHook)}", +) +option_batch_size = Option( + None, + "-b", + "--batch_size", + help="Batch size for reading data in batches", +) + @mlem_command("apply", section="runtime") def apply( - model: str = Argument(..., help="Path to model object"), + model: str = Argument(..., metavar="model", help="Path to model object"), data_path: str = Argument(..., metavar="data", help="Path to data object"), project: Optional[str] = option_project, rev: Optional[str] = option_rev, - output: Optional[str] = Option( - None, "-o", "--output", help="Where to store the outputs." - ), + output: Optional[str] = option_output, method: str = option_method, data_project: Optional[str] = option_data_project, data_rev: Optional[str] = option_data_rev, - import_: bool = Option( - False, - "-i", - "--import", - help="Try to import data on-the-fly", - ), - import_type: str = Option( - None, - "--import-type", - "--it", - # TODO: change ImportHook to MlemObject to support ext machinery - help=f"Specify how to read data file for import. Available types: {list_implementations(ImportHook)}", - ), - batch_size: Optional[int] = Option( - None, - "-b", - "--batch_size", - help="Batch size for reading data in batches.", - ), - index: bool = option_index, - external: bool = option_external, + import_: bool = option_import, + import_type: str = option_import_type, + batch_size: Optional[int] = option_batch_size, json: bool = option_json, ): - """Apply a model to data. Resulting data will be saved as MLEM object to `output` if it is provided, otherwise will be printed - - Examples: - Apply local mlem model to local mlem data - $ mlem apply mymodel mydata --method predict --output myprediction - - Apply local mlem model to local data file - $ mlem apply mymodel data.csv --method predict --import --import-type pandas[csv] --output myprediction - - Apply a version of remote model to a version of remote data - $ mlem apply models/logreg --project https://github.com/iterative/example-mlem --rev main - data/test_x --data-project https://github.com/iterative/example-mlem --data-rev main - --method predict --output myprediction + """Apply a model to data. The result will be saved as a MLEM object to `output` if + provided. Otherwise, it will be printed to `stdout`. """ from mlem.api import apply @@ -108,8 +112,6 @@ def apply( data, method=method, output=output, - index=index, - external=external, batch_size=batch_size, ) if output is None and json: @@ -120,38 +122,49 @@ def apply( ) -@mlem_command("apply-remote", section="runtime") -def apply_remote( - subtype: str = Argument( - "", - help=f"Type of client. Choices: {list_implementations(Client)}", - show_default=False, - ), - data: str = Argument(..., help="Path to data object"), - project: Optional[str] = option_project, - rev: Optional[str] = option_rev, - output: Optional[str] = Option( - None, "-o", "--output", help="Where to store the outputs." - ), - target_project: Optional[str] = option_target_project, - method: str = option_method, - index: bool = option_index, - json: bool = option_json, - load: Optional[str] = option_load("client"), - conf: List[str] = option_conf("client"), - file_conf: List[str] = option_file_conf("client"), -): - """Apply a model (deployed somewhere remotely) to data. Resulting data will be saved as MLEM object to `output` if it is provided, otherwise will be printed +apply_remote = Typer( + name="apply-remote", + help="""Apply a deployed-model (possibly remotely) to data. The results will be saved as +a MLEM object to `output` if provided. Otherwise, it will be printed to +`stdout`. + """, + cls=mlem_group("runtime"), + subcommand_metavar="client", +) +app.add_typer(apply_remote) - Examples: - Apply hosted mlem model to local mlem data - $ mlem apply-remote http mydata -c host="0.0.0.0" -c port=8080 --output myprediction - """ - client = config_arg(Client, load, subtype, conf, file_conf) + +def _apply_remote( + data, + project, + rev, + method, + output, + target_project, + json, + type_name, + load, + file_conf, + kwargs, +): + client = config_arg( + Client, + load, + type_name, + conf=None, + file_conf=file_conf, + **(kwargs or {}), + ) with set_echo(None if json else ...): result = run_apply_remote( - client, data, project, rev, index, method, output, target_project + client, + data, + project, + rev, + method, + output, + target_project, ) if output is None and json: print( @@ -161,12 +174,75 @@ def apply_remote( ) +@mlem_group_callback(apply_remote, required=["data", "load"]) +def apply_remote_load( + data: str = make_not_required(option_data), + project: Optional[str] = option_project, + rev: Optional[str] = option_rev, + output: Optional[str] = option_output, + target_project: Optional[str] = option_target_project, + method: str = option_method, + json: bool = option_json, + load: Optional[str] = option_load("client"), +): + return _apply_remote( + data, + project, + rev, + method, + output, + target_project, + json, + None, + load, + None, + None, + ) + + +@for_each_impl(Client) +def create_apply_remote(type_name): + @mlem_command( + type_name, + section="clients", + parent=apply_remote, + dynamic_metavar="__kwargs__", + dynamic_options_generator=abc_fields_parameters(type_name, Client), + hidden=type_name.startswith("_"), + lazy_help=lazy_class_docstring(Client.abs_name, type_name), + no_pass_from_parent=["file_conf"], + ) + def apply_remote_func( + data: str = option_data, + project: Optional[str] = option_project, + rev: Optional[str] = option_rev, + output: Optional[str] = option_output, + target_project: Optional[str] = option_target_project, + method: str = option_method, + json: bool = option_json, + file_conf: List[str] = option_file_conf("client"), + **__kwargs__, + ): + return _apply_remote( + data, + project, + rev, + method, + output, + target_project, + json, + type_name, + None, + file_conf, + __kwargs__, + ) + + def run_apply_remote( client: Client, data_path: str, project, rev, - index, method, output, target_project, @@ -186,6 +262,5 @@ def run_apply_remote( method=method, output=output, target_project=target_project, - index=index, ) return result diff --git a/mlem/cli/build.py b/mlem/cli/build.py index 4b4b1623..35372e08 100644 --- a/mlem/cli/build.py +++ b/mlem/cli/build.py @@ -1,49 +1,92 @@ from typing import List, Optional -from typer import Argument +from typer import Typer from mlem.cli.main import ( - config_arg, + app, mlem_command, - option_conf, + mlem_group, + mlem_group_callback, option_file_conf, option_load, + option_model, option_project, option_rev, ) +from mlem.cli.utils import ( + abc_fields_parameters, + config_arg, + for_each_impl, + lazy_class_docstring, + make_not_required, +) from mlem.core.metadata import load_meta from mlem.core.objects import MlemBuilder, MlemModel -from mlem.utils.entrypoints import list_implementations + +build = Typer( + name="build", + help=""" + Build models into re-usable assets you can distribute and use in production, +such as a Docker image or Python package. + """, + cls=mlem_group("runtime", aliases=["export"]), + subcommand_metavar="builder", +) +app.add_typer(build) -@mlem_command("build", section="runtime", aliases=["export"]) -def build( - model: str = Argument(..., help="Path to model"), - subtype: str = Argument( - "", - help=f"Type of build. Choices: {list_implementations(MlemBuilder)}", - show_default=False, - ), +@mlem_group_callback(build, required=["model", "load"]) +def build_load( + model: str = make_not_required(option_model), project: Optional[str] = option_project, rev: Optional[str] = option_rev, - load: Optional[str] = option_load("builder"), - conf: List[str] = option_conf("builder"), - file_conf: List[str] = option_file_conf("builder"), + load: str = option_load("builder"), ): - """ - Build/export model - - Examples: - Build docker image from model - $ mlem build mymodel docker -c server.type=fastapi -c image.name=myimage - - Create build docker_dir declaration and build it - $ mlem declare builder docker_dir -c server=fastapi -c target=build build_dock - $ mlem build mymodel --load build_dock - """ from mlem.api.commands import build build( - config_arg(MlemBuilder, load, subtype, conf, file_conf), + config_arg( + MlemBuilder, + load, + None, + conf=None, + file_conf=None, + ), load_meta(model, project, rev, force_type=MlemModel), ) + + +@for_each_impl(MlemBuilder) +def create_build_command(type_name): + @mlem_command( + type_name, + section="builders", + parent=build, + dynamic_metavar="__kwargs__", + dynamic_options_generator=abc_fields_parameters( + type_name, MlemBuilder + ), + hidden=type_name.startswith("_"), + lazy_help=lazy_class_docstring(MlemBuilder.abs_name, type_name), + no_pass_from_parent=["file_conf"], + ) + def build_type( + model: str = option_model, + project: Optional[str] = option_project, + rev: Optional[str] = option_rev, + file_conf: List[str] = option_file_conf("builder"), + **__kwargs__ + ): + from mlem.api.commands import build + + build( + config_arg( + MlemBuilder, + None, + type_name, + conf=None, + file_conf=file_conf, + **__kwargs__ + ), + load_meta(model, project, rev, force_type=MlemModel), + ) diff --git a/mlem/cli/checkenv.py b/mlem/cli/checkenv.py index b475ca98..75e45d6b 100644 --- a/mlem/cli/checkenv.py +++ b/mlem/cli/checkenv.py @@ -2,7 +2,12 @@ from typer import Argument -from mlem.cli.main import mlem_command, option_project, option_rev +from mlem.cli.main import ( + PATH_METAVAR, + mlem_command, + option_project, + option_rev, +) from mlem.core.metadata import load_meta from mlem.core.objects import MlemData, MlemModel from mlem.ui import EMOJI_OK, echo @@ -10,19 +15,11 @@ @mlem_command("checkenv", hidden=True) def checkenv( - path: str = Argument(..., help="Path to object"), + path: str = Argument(..., help="Path to object", metavar=PATH_METAVAR), project: Optional[str] = option_project, rev: Optional[str] = option_rev, ): - """Check that current environment satisfies object requrements - - Examples: - Check local object - $ mlem checkenv mymodel - - Check remote object - $ mlem checkenv https://github.com/iterative/example-mlem/models/logreg - """ + """Check that current environment satisfies object requrements""" meta = load_meta(path, project, rev, follow_links=True, load_value=False) if isinstance(meta, (MlemModel, MlemData)): meta.checkenv() diff --git a/mlem/cli/clone.py b/mlem/cli/clone.py index 96f8e3cc..1962edd8 100644 --- a/mlem/cli/clone.py +++ b/mlem/cli/clone.py @@ -4,8 +4,6 @@ from mlem.cli.main import ( mlem_command, - option_external, - option_index, option_project, option_rev, option_target_project, @@ -19,17 +17,9 @@ def clone( project: Optional[str] = option_project, rev: Optional[str] = option_rev, target_project: Optional[str] = option_target_project, - external: Optional[bool] = option_external, - index: Optional[bool] = option_index, ): - """Download MLEM object from `uri` and save it to `target` - - Examples: - Copy remote model to local directory - $ mlem clone models/logreg --project https://github.com/iterative/example-mlem --rev main mymodel - - Copy remote model to remote MLEM project - $ mlem clone models/logreg --project https://github.com/iterative/example-mlem --rev main mymodel --tp s3://mybucket/mymodel + """Copy a MLEM Object from `uri` and + saves a copy of it to `target` path. """ from mlem.api.commands import clone @@ -39,6 +29,4 @@ def clone( project=project, rev=rev, target_project=target_project, - external=external, - index=index, ) diff --git a/mlem/cli/config.py b/mlem/cli/config.py index 50ac9002..abfdc4bf 100644 --- a/mlem/cli/config.py +++ b/mlem/cli/config.py @@ -5,9 +5,9 @@ from yaml import safe_dump, safe_load from mlem.cli.main import app, mlem_command, mlem_group, option_project -from mlem.config import CONFIG_FILE_NAME, get_config_cls -from mlem.constants import MLEM_DIR -from mlem.core.base import get_recursively, set_recursively, smart_split +from mlem.config import get_config_cls +from mlem.constants import MLEM_CONFIG_FILE_NAME +from mlem.core.base import SmartSplitDict, get_recursively, smart_split from mlem.core.errors import MlemError from mlem.core.meta_io import get_fs, get_uri from mlem.ui import EMOJI_OK, echo @@ -19,7 +19,7 @@ @config.callback() def config_callback(): - """Manipulate MLEM configuration""" + """Manipulate MLEM configuration.""" @mlem_command("set", parent=config) @@ -33,8 +33,7 @@ def config_set( ): """Set configuration value - Examples: - $ mlem config set pandas.default_format csv + Documentation: """ fs, path = get_fs(project or "") project = find_project_root(path, fs=fs) @@ -42,16 +41,17 @@ def config_set( section, name = name.split(".", maxsplit=1) except ValueError as e: raise MlemError("[name] should contain at least one dot") from e - with fs.open(posixpath.join(project, MLEM_DIR, CONFIG_FILE_NAME)) as f: + config_file_path = posixpath.join(project, MLEM_CONFIG_FILE_NAME) + with fs.open(config_file_path) as f: new_conf = safe_load(f) or {} - new_conf[section] = new_conf.get(section, {}) - set_recursively(new_conf[section], smart_split(name, "."), value) + conf = SmartSplitDict(new_conf.get(section, {})) + conf[name] = value + new_conf[section] = conf.build() if validate: config_cls = get_config_cls(section) config_cls(**new_conf[section]) - config_file = posixpath.join(project, MLEM_DIR, CONFIG_FILE_NAME) - with fs.open(config_file, "w", encoding="utf8") as f: + with fs.open(config_file_path, "w", encoding="utf8") as f: safe_dump( new_conf, f, @@ -69,13 +69,11 @@ def config_get( ): """Get configuration value - Examples: - $ mlem config get pandas.default_format - $ mlem config get pandas.default_format --project https://github.com/iterative/example-mlem/ + Documentation: """ fs, path = get_fs(project or "") project = find_project_root(path, fs=fs) - with fs.open(posixpath.join(project, MLEM_DIR, CONFIG_FILE_NAME)) as f: + with fs.open(posixpath.join(project, MLEM_CONFIG_FILE_NAME)) as f: try: echo(get_recursively(safe_load(f), smart_split(name, "."))) except KeyError as e: diff --git a/mlem/cli/declare.py b/mlem/cli/declare.py index d840fb22..c31bdead 100644 --- a/mlem/cli/declare.py +++ b/mlem/cli/declare.py @@ -1,40 +1,196 @@ -from typing import List, Optional +from typing import Any, Dict, Type -from typer import Argument, Option +from typer import Argument, Typer +from yaml import safe_dump -from ..core.base import build_mlem_object -from ..core.objects import MlemObject -from .main import ( - mlem_command, - option_external, - option_index, - option_project, +from ..core.base import MlemABC, build_mlem_object, load_impl_ext +from ..core.meta_io import Location +from ..core.objects import EnvLink, MlemDeployment, MlemObject +from ..utils.entrypoints import list_abstractions, list_implementations +from .main import app, mlem_command, mlem_group, option_project +from .utils import ( + NOT_SET, + CallContext, + CliTypeField, + _option_from_field, + _options_from_model, + abc_fields_parameters, + lazy_class_docstring, wrap_build_error, ) +declare = Typer( + name="declare", + help="""Declares a new MLEM Object metafile from config args and config files. + """, + cls=mlem_group("object"), + subcommand_metavar="subtype", +) +app.add_typer(declare) + + +def create_declare_mlem_object(type_name, cls: Type[MlemObject]): + if cls.__is_root__: + typer = Typer( + name=type_name, help=cls.__doc__, cls=mlem_group("Mlem Objects") + ) + declare.add_typer(typer) + + for subtype in list_implementations(MlemObject, cls): + create_declare_mlem_object_subcommand( + typer, subtype, type_name, cls + ) + + +def add_env_params_deployment(subtype, parent_cls: Type[MlemDeployment]): + try: + impl = load_impl_ext(parent_cls.object_type, subtype) + except ImportError: + return lambda ctx: [] + + assert issubclass(impl, MlemDeployment) # just to help mypy + env_impl = impl.env_type + + def add_env(ctx: CallContext): + yield from abc_fields_parameters(subtype, parent_cls)(ctx) + yield from ( + _options_from_model(env_impl, ctx, path="env", force_not_set=True) + ) + yield from ( + _options_from_model(EnvLink, ctx, path="env", force_not_set=True) + ) + yield _option_from_field( + CliTypeField( + path="env", + required=False, + allow_none=False, + type_=str, + help="", + default=NOT_SET, + is_list=False, + is_mapping=False, + ), + "env", + ) + + return add_env + + +def process_env_params_deployments( + subtype, kwargs: Dict[str, Any] +) -> Dict[str, Any]: + env_params = {p[len("env.") :] for p in kwargs if p.startswith("env.")} + if not env_params.issubset({"project", "path", "rev"}): + kwargs["env"] = subtype + return kwargs + + +_add_fields = {"deployment": add_env_params_deployment} +_process_fields = {"deployment": process_env_params_deployments} + + +def add_fields(subtype: str, parent_cls): + return _add_fields.get(parent_cls.object_type, abc_fields_parameters)( + subtype, parent_cls + ) -@mlem_command("declare", section="object") -def declare( - object_type: str = Argument(..., help="Type of metafile to create"), - subtype: str = Argument("", help="Subtype of MLEM object"), - conf: Optional[List[str]] = Option( - None, - "-c", - "--conf", - help="Values for object fields in format `field.nested.name=value`", - ), - path: str = Argument(..., help="Where to save object"), - project: str = option_project, - external: bool = option_external, - index: bool = option_index, + +def process_fields(subtype: str, parent_cls, kwargs): + if parent_cls.object_type in _process_fields: + kwargs = _process_fields[parent_cls.object_type](subtype, kwargs) + return kwargs + + +def create_declare_mlem_object_subcommand( + parent: Typer, subtype: str, type_name: str, parent_cls +): + @mlem_command( + subtype, + section="MLEM Objects", + parent=parent, + dynamic_metavar="__kwargs__", + dynamic_options_generator=add_fields(subtype, parent_cls), + hidden=subtype.startswith("_"), + lazy_help=lazy_class_docstring(type_name, subtype), + ) + def subtype_command( + path: str = Argument( + ..., help="Where to save the object (.mlem file)" + ), + project: str = option_project, + **__kwargs__, + ): + __kwargs__ = process_fields(subtype, parent_cls, __kwargs__) + subtype_cls = load_impl_ext(type_name, subtype) + cls = subtype_cls.__type_map__[subtype] + with wrap_build_error(subtype, cls): + meta = build_mlem_object( + cls, subtype, str_conf=None, file_conf=[], **__kwargs__ + ) + meta.dump(path, project=project) + + +for meta_type in list_implementations(MlemObject): + create_declare_mlem_object(meta_type, MlemObject.__type_map__[meta_type]) + + +def create_declare_mlem_abc(abs_name: str): + try: + root_cls = MlemABC.abs_types[abs_name] + except KeyError: + root_cls = None + + typer = Typer( + name=abs_name, + help=root_cls.__doc__ + if root_cls + else f"Create `{abs_name}` configuration", + cls=mlem_group("Subtypes"), + ) + declare.add_typer(typer) + + for subtype in list_implementations(abs_name): + if root_cls is None: + try: + impl = load_impl_ext(abs_name, subtype) + root_cls = impl.__parent__ # type: ignore[assignment] + except ImportError: + pass + create_declare_mlem_abc_subcommand(typer, subtype, abs_name, root_cls) + + +def create_declare_mlem_abc_subcommand( + parent: Typer, subtype: str, abs_name: str, root_cls ): - """Creates new mlem object metafile from conf args and config files - - Examples: - Create heroku deployment - $ mlem declare env heroku production -c api_key=<...> - """ - cls = MlemObject.__type_map__[object_type] - with wrap_build_error(subtype, cls): - meta = build_mlem_object(cls, subtype, conf, []) - meta.dump(path, project=project, index=index, external=external) + @mlem_command( + subtype, + section="Subtypes", + parent=parent, + dynamic_metavar="__kwargs__", + dynamic_options_generator=abc_fields_parameters(subtype, root_cls) + if root_cls + else None, + hidden=subtype.startswith("_"), + lazy_help=lazy_class_docstring(abs_name, subtype), + ) + def subtype_command( + path: str = Argument(..., help="Where to save object"), + project: str = option_project, + **__kwargs__, + ): + with wrap_build_error(subtype, root_cls): + obj = build_mlem_object( + root_cls, subtype, str_conf=None, file_conf=[], **__kwargs__ + ) + location = Location.resolve( + path=path, project=project, rev=None, fs=None + ) + with location.fs.open(location.fullpath, "w") as f: + safe_dump(obj.dict(), f) + + +_exposed = {"server", "client", "docker_registry"} +for abs_name in list_abstractions(include_hidden=False): + if abs_name not in _exposed: + continue + create_declare_mlem_abc(abs_name) diff --git a/mlem/cli/deployment.py b/mlem/cli/deployment.py index 9ce50d1a..081ee886 100644 --- a/mlem/cli/deployment.py +++ b/mlem/cli/deployment.py @@ -4,90 +4,145 @@ from typer import Argument, Option, Typer from mlem.cli.apply import run_apply_remote +from mlem.cli.declare import add_env_params_deployment, process_fields from mlem.cli.main import ( app, mlem_command, mlem_group, + mlem_group_callback, option_data_project, option_data_rev, - option_external, - option_index, + option_file_conf, option_json, + option_load, option_method, + option_model, + option_model_project, + option_model_rev, option_project, option_rev, option_target_project, ) -from mlem.core.base import parse_string_conf +from mlem.cli.utils import ( + for_each_impl, + lazy_class_docstring, + make_not_required, + wrap_build_error, +) +from mlem.core.base import build_mlem_object from mlem.core.data_type import DataAnalyzer -from mlem.core.errors import DeploymentError +from mlem.core.errors import DeploymentError, MlemObjectNotFound from mlem.core.metadata import load_meta -from mlem.core.objects import MlemDeployment +from mlem.core.objects import ( + DeployState, + DeployStatus, + MlemDeployment, + MlemModel, +) from mlem.ui import echo, no_echo, set_echo deployment = Typer( name="deployment", - help="Manage deployments", + help="A set of commands to set up and manage deployments", cls=mlem_group("runtime", aliases=["deploy"]), ) app.add_typer(deployment) +deploy_run = Typer( + name="run", + help="""Deploy a model to a target environment. Can use an existing deployment + declaration or create a new one on-the-fly. + """, + cls=mlem_group("other"), + subcommand_metavar="deployment", +) +deployment.add_typer(deploy_run) -@mlem_command("run", parent=deployment) -def deploy_run( - path: str = Argument( - ..., - help="Path to deployment meta (will be created if it does not exist)", - ), - model: Optional[str] = Option(None, "-m", "--model", help="Path to model"), - env: Optional[str] = Option( - None, "-t", "--env", help="Path to target environment" - ), + +@mlem_group_callback(deploy_run, required=["model", "load"]) +def deploy_run_callback( + load: str = option_load("deployment"), + model: str = make_not_required(option_model), + model_project: Optional[str] = option_model_project, + model_rev: Optional[str] = option_model_rev, project: Optional[str] = option_project, - external: bool = option_external, - index: bool = option_index, - conf: Optional[List[str]] = Option( - None, - "-c", - "--conf", - help="Configuration for new deployment meta if it does not exist", - ), + rev: Optional[str] = option_rev, ): - """Deploy a model to target environment. Can use existing deployment declaration or create a new one on-the-fly - - Examples: - Create new deployment - $ mlem declare env heroku staging -c api_key=... - $ mlem deploy run service_name -m model -t staging -c name=my_service - - Deploy existing meta - $ mlem declare env heroku staging -c api_key=... - $ mlem declare deployment heroku service_name -c app_name=my_service -c model=model -c env=staging - $ mlem deploy run service_name + """Deploy a model to a target environment. Can use an existing deployment + declaration or create a new one on-the-fly. """ from mlem.api.commands import deploy deploy( - path, - model, - env, - project, - external=external, - index=index, - **parse_string_conf(conf or []), + load, + load_meta( + model, project=model_project, rev=model_rev, force_type=MlemModel + ), + project=project, + rev=rev, ) +@for_each_impl(MlemDeployment) +def create_deploy_run_command(type_name): + @mlem_command( + type_name, + section="deployments", + parent=deploy_run, + dynamic_metavar="__kwargs__", + dynamic_options_generator=add_env_params_deployment( + type_name, MlemDeployment + ), + hidden=type_name.startswith("_"), + lazy_help=lazy_class_docstring(MlemDeployment.object_type, type_name), + no_pass_from_parent=["file_conf"], + ) + def deploy_run_command( + path: str = Argument( + ..., help="Where to save the object (.mlem file)" + ), + model: str = make_not_required(option_model), + model_project: Optional[str] = option_model_project, + model_rev: Optional[str] = option_model_rev, + project: Optional[str] = option_project, + file_conf: List[str] = option_file_conf("deployment"), + **__kwargs__, + ): + from mlem.api.commands import deploy + + __kwargs__ = process_fields(type_name, MlemDeployment, __kwargs__) + try: + meta = load_meta(path, project=project, force_type=MlemDeployment) + raise DeploymentError( + f"Deployment meta already exists at {meta.loc}. Please use `mlem deployment run --load ...`" + ) + except MlemObjectNotFound: + with wrap_build_error(type_name, MlemDeployment): + meta = build_mlem_object( + MlemDeployment, + type_name, + str_conf=None, + file_conf=file_conf, + **__kwargs__, + ).dump(path, project=project) + deploy( + meta, + load_meta( + model, + project=model_project, + rev=model_rev, + force_type=MlemModel, + ), + project=project, + ) + + @mlem_command("remove", parent=deployment) def deploy_remove( path: str = Argument(..., help="Path to deployment meta"), project: Optional[str] = option_project, ): - """Stop and destroy deployed instance - - Examples: - $ mlem deployment remove service_name - """ + """Stop and destroy deployed instance.""" deploy_meta = load_meta(path, project=project, force_type=MlemDeployment) deploy_meta.remove() @@ -97,11 +152,7 @@ def deploy_status( path: str = Argument(..., help="Path to deployment meta"), project: Optional[str] = option_project, ): - """Print status of deployed service - - Examples: - $ mlem deployment status service_name - """ + """Print status of deployed service.""" with no_echo(): deploy_meta = load_meta( path, project=project, force_type=MlemDeployment @@ -110,6 +161,36 @@ def deploy_status( echo(status) +@mlem_command("wait", parent=deployment) +def deploy_wait( + path: str = Argument(..., help="Path to deployment meta"), + project: Optional[str] = option_project, + statuses: List[DeployStatus] = Option( + [DeployStatus.RUNNING], + "-s", + "--status", + help="statuses to wait for", + ), + intermediate: List[DeployStatus] = Option( + None, "-i", "--intermediate", help="Possible intermediate statuses" + ), + poll_timeout: float = Option( + 1.0, "-p", "--poll-timeout", help="Timeout between attempts" + ), + times: int = Option( + 0, "-t", "--times", help="Number of attempts. 0 -> indefinite" + ), +): + """Wait for status of deployed service""" + with no_echo(): + deploy_meta = load_meta( + path, project=project, force_type=MlemDeployment + ) + deploy_meta.wait_for_status( + statuses, poll_timeout, times, allowed_intermediate=intermediate + ) + + @mlem_command("apply", parent=deployment) def deploy_apply( path: str = Argument(..., help="Path to deployment meta"), @@ -123,31 +204,28 @@ def deploy_apply( ), target_project: Optional[str] = option_target_project, method: str = option_method, - index: bool = option_index, json: bool = option_json, ): - """Apply method of deployed service - - Examples: - $ mlem deployment apply service_name - """ - + """Apply a deployed model to data.""" with set_echo(None if json else ...): deploy_meta = load_meta( path, project=project, rev=rev, force_type=MlemDeployment ) - if deploy_meta.state is None: + state: DeployState = deploy_meta.get_state() + if ( + state == deploy_meta.state_type(declaration=deploy_meta) + and not deploy_meta.state_type.allow_default + ): raise DeploymentError( f"{deploy_meta.type} deployment has no state. Either {deploy_meta.type} is not deployed yet or has been un-deployed again." ) - client = deploy_meta.state.get_client() + client = deploy_meta.get_client(state) result = run_apply_remote( client, data, data_project, data_rev, - index, method, output, target_project, diff --git a/mlem/cli/dev.py b/mlem/cli/dev.py index 526bb7ca..00935b35 100644 --- a/mlem/cli/dev.py +++ b/mlem/cli/dev.py @@ -14,7 +14,10 @@ @dev.callback() def dev_callback(): - """Developer utility tools""" + """Developer utility tools + + Documentation: + """ @mlem_command(parent=dev, aliases=["fi"]) @@ -24,8 +27,7 @@ def find_implementations_diff( """Loads `root` module or package and finds implementations of MLEM base classes Shows differences between what was found and what is registered in entrypoints - Examples: - $ mlem dev fi + Documentation: """ exts = {e.entry for e in load_entrypoints().values()} impls = set(find_abc_implementations(root)[MLEM_ENTRY_POINT]) diff --git a/mlem/cli/import_object.py b/mlem/cli/import_object.py index 782bfa84..f40e9c67 100644 --- a/mlem/cli/import_object.py +++ b/mlem/cli/import_object.py @@ -4,8 +4,6 @@ from mlem.cli.main import ( mlem_command, - option_external, - option_index, option_project, option_rev, option_target_project, @@ -26,21 +24,8 @@ def import_object( help="Whether to create a copy of file in target location or just link existing file", ), type_: Optional[str] = Option(None, "--type", help=f"Specify how to read file Available types: {list_implementations(ImportHook)}", show_default="auto infer"), # type: ignore - index: bool = option_index, - external: bool = option_external, ): - """Create MLEM model or data metadata from file/dir - - Examples: - Create MLEM data from local csv - $ mlem import data/data.csv data/imported_data --type pandas[csv] - - Create MLEM model from local pickle file - $ mlem import data/model.pkl data/imported_model - - Create MLEM model from remote pickle file - $ mlem import models/logreg --project https://github.com/iterative/example-mlem --rev no-dvc data/imported_model --type pickle - """ + """Create a `.mlem` metafile for a model or data in any file or directory.""" from mlem.api.commands import import_object import_object( @@ -51,6 +36,4 @@ def import_object( target_project=target_project, copy_data=copy, type_=type_, - external=external, - index=index, ) diff --git a/mlem/cli/info.py b/mlem/cli/info.py index cef6ac6f..9145bee8 100644 --- a/mlem/cli/info.py +++ b/mlem/cli/info.py @@ -4,13 +4,7 @@ from typer import Argument, Option -from mlem.cli.main import ( - Choices, - mlem_command, - option_json, - option_project, - option_rev, -) +from mlem.cli.main import mlem_command, option_json, option_project, option_rev from mlem.core.metadata import load_meta from mlem.core.objects import MLEM_EXT, MlemLink, MlemObject from mlem.ui import echo, set_echo @@ -39,68 +33,6 @@ def _print_objects_of_type(cls: Type[MlemObject], objects: List[MlemObject]): echo("", "-", meta.name, *[link] if link else []) -TYPE_ALIASES = { - "models": "model", -} - - -@mlem_command("list", aliases=["ls"], section="common") -def ls( - type_filter: Choices("all", *MlemObject.non_abstract_subtypes().keys()) = Option( # type: ignore[valid-type] - "all", - "-t", - "--type", - help="Type of objects to list", - ), - project: str = Argument( - "", help="Project to list from", show_default="current directory" - ), - rev: Optional[str] = option_rev, - links: bool = Option( - True, "+l/-l", "--links/--no-links", help="Include links" - ), - json: bool = option_json, - ignore_errors: bool = Option( - False, "-i", "--ignore-errors", help="Ignore corrupted objects" - ), -): - """List MLEM objects of in project - - Examples: - $ mlem list https://github.com/iterative/example-mlem - $ mlem list -t models - """ - from mlem.api.commands import ls - - if type_filter == "all": - types = None - else: - types = MlemObject.__type_map__[ - TYPE_ALIASES.get(type_filter, type_filter) - ] - - objects = ls( - project or ".", - rev=rev, - type_filter=types, - include_links=links, - ignore_errors=ignore_errors, - ) - if json: - print( - dumps( - { - cls.object_type: [obj.dict() for obj in objs] - for cls, objs in objects.items() - } - ) - ) - else: - for cls, objs in objects.items(): - _print_objects_of_type(cls, objs) - return {"type_filter": type_filter.value} - - @mlem_command("pprint", hidden=True) def pretty_print( path: str = Argument(..., help="Path to object"), @@ -114,14 +46,8 @@ def pretty_print( ), json: bool = option_json, ): - """Print specified MLEM object - - Examples: - Print local object - $ mlem pprint mymodel - - Print remote object - $ mlem pprint https://github.com/iterative/example-mlem/models/logreg + """Display all details about a specific MLEM Object from an existing MLEM + project. """ with set_echo(None if json else ...): meta = load_meta( diff --git a/mlem/cli/init.py b/mlem/cli/init.py index 1f881e38..0d53720d 100644 --- a/mlem/cli/init.py +++ b/mlem/cli/init.py @@ -1,19 +1,18 @@ from typer import Argument -from mlem.cli.main import mlem_command +from mlem.cli.main import PATH_METAVAR, mlem_command @mlem_command("init", section="common") def init( - path: str = Argument(".", help="Where to init project", show_default=False) + path: str = Argument( + ".", + help="Where to init project", + show_default=False, + metavar=PATH_METAVAR, + ) ): - """Initialize MLEM project - - Examples: - $ mlem init - $ mlem init some/local/path - $ mlem init s3://bucket/path/in/cloud - """ + """Initialize a MLEM project.""" from mlem.api.commands import init init(path) diff --git a/mlem/cli/link.py b/mlem/cli/link.py index 2cdf7c35..b4eb43b1 100644 --- a/mlem/cli/link.py +++ b/mlem/cli/link.py @@ -3,8 +3,8 @@ from typer import Argument, Option from mlem.cli.main import ( + PATH_METAVAR, mlem_command, - option_external, option_rev, option_target_project, ) @@ -12,17 +12,19 @@ @mlem_command("link", section="object") def link( - source: str = Argument(..., help="URI to object you are crating link to"), + source: str = Argument( + ..., help="URI of the MLEM object you are creating a link to" + ), target: str = Argument(..., help="Path to save link object"), source_project: Optional[str] = Option( None, "--source-project", "--sp", help="Project for source object", + metavar=PATH_METAVAR, ), rev: Optional[str] = option_rev, target_project: Optional[str] = option_target_project, - external: bool = option_external, follow_links: bool = Option( True, "--follow-links/--no-follow-links", @@ -36,14 +38,8 @@ def link( help="Which path to linked object to specify: absolute or relative.", ), ): - """Create link for MLEM object - - Examples: - Add alias to local object - $ mlem link my_model latest - - Add remote object to your project without copy - $ mlem link models/logreg --source-project https://github.com/iteartive/example-mlem remote_model + """Create a link (read alias) for an existing MLEM Object, including from + remote MLEM projects. """ from mlem.api.commands import link @@ -54,6 +50,5 @@ def link( target=target, target_project=target_project, follow_links=follow_links, - external=external or False, absolute=absolute, ) diff --git a/mlem/cli/main.py b/mlem/cli/main.py index 1d50110f..31a5b03f 100644 --- a/mlem/cli/main.py +++ b/mlem/cli/main.py @@ -1,27 +1,38 @@ -import contextlib +import inspect import logging -import typing as t from collections import defaultdict -from enum import Enum, EnumMeta from functools import partial, wraps from gettext import gettext -from typing import List, Optional, Tuple, Type +from typing import ( + Any, + Callable, + Dict, + Iterable, + List, + Optional, + Sequence, + Union, +) +import click import typer -from click import Abort, ClickException, Command, HelpFormatter, pass_context -from click.exceptions import Exit -from pydantic import BaseModel, MissingError, ValidationError, parse_obj_as -from pydantic.error_wrappers import ErrorWrapper +from click import Abort, ClickException, Command, HelpFormatter, Parameter +from click.exceptions import Exit, MissingParameter +from pydantic import ValidationError from typer import Context, Option, Typer from typer.core import TyperCommand, TyperGroup -from yaml import safe_load from mlem import LOCAL_CONFIG, version -from mlem.constants import MLEM_DIR, PREDICT_METHOD_NAME -from mlem.core.base import MlemABC, build_mlem_object +from mlem.cli.utils import ( + FILE_CONF_PARAM_NAME, + LOAD_PARAM_NAME, + NOT_SET, + CallContext, + _format_validation_error, + get_extra_keys, +) +from mlem.constants import PREDICT_METHOD_NAME from mlem.core.errors import MlemError -from mlem.core.metadata import load_meta -from mlem.core.objects import MlemObject from mlem.telemetry import telemetry from mlem.ui import ( EMOJI_FAIL, @@ -30,9 +41,13 @@ cli_echo, color, echo, + no_echo, stderr_echo, ) +PATH_METAVAR = "path" +COMMITISH_METAVAR = "commitish" + class MlemFormatter(HelpFormatter): def write_heading(self, heading: str) -> None: @@ -43,18 +58,16 @@ class MlemMixin(Command): def __init__( self, *args, - examples: Optional[str], section: str = "other", aliases: List[str] = None, **kwargs, ): super().__init__(*args, **kwargs) - self.examples = examples self.section = section self.aliases = aliases self.rich_help_panel = section.capitalize() - def collect_usage_pieces(self, ctx: Context) -> t.List[str]: + def collect_usage_pieces(self, ctx: Context) -> List[str]: return [p.lower() for p in super().collect_usage_pieces(ctx)] def get_help(self, ctx: Context) -> str: @@ -68,11 +81,17 @@ def get_help(self, ctx: Context) -> str: self.format_help(ctx, formatter) return formatter.getvalue().rstrip("\n") - def format_epilog(self, ctx: Context, formatter: HelpFormatter) -> None: - super().format_epilog(ctx, formatter) - if self.examples: - with formatter.section("Examples"): - formatter.write(self.examples) + def _get_cmd_name_for_docs_link(self): + ctx = click.get_current_context() + return get_cmd_name(ctx, no_aliases=True, sep="/") + + @staticmethod + def _add_docs_link(help, cmd_name): + return ( + help + if "Documentation" in help + else f"{help}\n\nDocumentation: " + ) class MlemCommand( @@ -85,38 +104,118 @@ def __init__( section: str = "other", aliases: List[str] = None, help: Optional[str] = None, + dynamic_options_generator: Callable[ + [CallContext], Iterable[Parameter] + ] = None, + dynamic_metavar: str = None, + lazy_help: Optional[Callable[[], str]] = None, + pass_from_parent: Optional[List[str]] = None, **kwargs, ): - examples, help = _extract_examples(help) + self.dynamic_metavar = dynamic_metavar + self.dynamic_options_generator = dynamic_options_generator + self._help = help + self.lazy_help = lazy_help + self.pass_from_parent = pass_from_parent super().__init__( name=name, section=section, aliases=aliases, - examples=examples, help=help, **kwargs, ) + def make_context( + self, + info_name: Optional[str], + args: List[str], + parent: Optional[Context] = None, + **extra: Any, + ) -> Context: + args_copy = args[:] + ctx = super().make_context(info_name, args, parent, **extra) + if not self.dynamic_options_generator: + return ctx + extra_args = ctx.args + params = ctx.params.copy() + while extra_args: + ctx.params = params + ctx.args = args_copy[:] + with ctx.scope(cleanup=False): + self.parse_args(ctx, args_copy[:]) + params.update(ctx.params) + + if ctx.args == extra_args: + break + extra_args = ctx.args + + return ctx + + def invoke(self, ctx: Context) -> Any: + ctx.params = {k: v for k, v in ctx.params.items() if v != NOT_SET} + return super().invoke(ctx) + + def get_params(self, ctx) -> List["Parameter"]: + regular_options = super().get_params(ctx) + res: List[Parameter] = ( + list( + self.dynamic_options_generator( + CallContext( + ctx.params, + get_extra_keys(ctx.args), + [o.name for o in regular_options], + ) + ) + ) + if self.dynamic_options_generator is not None + else [] + ) + regular_options + + if self.dynamic_metavar is not None: + kw_param = [p for p in res if p.name == self.dynamic_metavar] + if len(kw_param) > 0: + res.remove(kw_param[0]) + if self.pass_from_parent is not None: + res = [ + o + for o in res + if o.name not in self.pass_from_parent + or o.name not in ctx.parent.params + or ctx.parent.params[o.name] is None + ] + return res + + @property + def help(self): + cmd_name = self._get_cmd_name_for_docs_link() + if self.lazy_help: + if "/" in cmd_name: + cmd_name = cmd_name[: cmd_name.index("/")] + return self._add_docs_link(self.lazy_help(), cmd_name) + return self._add_docs_link(self._help, cmd_name) + + @help.setter + def help(self, value): + self._help = value + class MlemGroup(MlemMixin, TyperGroup): order = ["common", "object", "runtime", "other"] def __init__( self, - name: t.Optional[str] = None, - commands: t.Optional[ - t.Union[t.Dict[str, Command], t.Sequence[Command]] + name: Optional[str] = None, + commands: Optional[ + Union[Dict[str, Command], Sequence[Command]] ] = None, section: str = "other", aliases: List[str] = None, help: str = None, - **attrs: t.Any, + **attrs: Any, ) -> None: - examples, help = _extract_examples(help) super().__init__( name=name, help=help, - examples=examples, aliases=aliases, section=section, commands=commands, @@ -160,7 +259,7 @@ def format_commands(self, ctx: Context, formatter: HelpFormatter) -> None: ): formatter.write_dl(sections[section]) - def get_command(self, ctx: Context, cmd_name: str) -> t.Optional[Command]: + def get_command(self, ctx: Context, cmd_name: str) -> Optional[Command]: cmd = super().get_command(ctx, cmd_name) if cmd is not None: return cmd @@ -174,6 +273,17 @@ def get_command(self, ctx: Context, cmd_name: str) -> t.Optional[Command]: return cmd return None + @property + def help(self): + cmd_name = self._get_cmd_name_for_docs_link() + if "/" in cmd_name: + cmd_name = cmd_name[: cmd_name.index("/")] + return self._add_docs_link(self._help, cmd_name) + + @help.setter + def help(self, value): + self._help = value + def mlem_group(section, aliases: Optional[List[str]] = None): class MlemGroupSection(MlemGroup): @@ -183,25 +293,29 @@ def __init__(self, *args, **kwargs): return MlemGroupSection -class ChoicesMeta(EnumMeta): - def __call__(cls, *names, module=None, qualname=None, type=None, start=1): - if len(names) == 1: - return super().__call__(names[0]) - return super().__call__( - "Choice", - names, - module=module, - qualname=qualname, - type=type, - start=start, +def mlem_group_callback(group: Typer, required: Optional[List[str]] = None): + def decorator(f): + @wraps(f) + def inner(*args, **kwargs): + ctx = click.get_current_context() + if ctx.invoked_subcommand is not None: + return None + if required is not None: + for req in required: + if req not in kwargs or kwargs[req] is None: + param = [ + p + for p in ctx.command.get_params(ctx) + if p.name == req + ][0] + raise MissingParameter(ctx=ctx, param=param) + return f(*args, **kwargs) + + return group.callback(invoke_without_command=True)( + wrap_mlem_cli_call(inner, None) ) - -class Choices(str, Enum, metaclass=ChoicesMeta): - def _generate_next_value_( # pylint: disable=no-self-argument - name, start, count, last_values - ): - return name + return decorator app = Typer( @@ -223,21 +337,15 @@ def mlem_callback( False, "--verbose", "-v", help="Print debug messages" ), traceback: bool = Option(False, "--traceback", "--tb", hidden=True), + quiet: bool = Option(False, "--quiet", "-q", help="Suppress output"), ): """\b MLEM is a tool to help you version and deploy your Machine Learning models: * Serialize any model trained in Python into ready-to-deploy format * Model lifecycle management using Git and GitOps principles * Provider-agnostic deployment - - Examples: - $ mlem init - $ mlem list https://github.com/iterative/example-mlem - $ mlem clone models/logreg --project https://github.com/iterative/example-mlem --rev main logreg - $ mlem link logreg latest - $ mlem apply latest https://github.com/iterative/example-mlem/data/test_x -o pred - $ mlem serve latest fastapi -c port=8001 - $ mlem build latest docker_dir -c target=build/ -c server.type=fastapi + \b + Documentation: """ if ctx.invoked_subcommand is None and show_version: with cli_echo(): @@ -246,19 +354,15 @@ def mlem_callback( logger = logging.getLogger("mlem") logger.handlers[0].setLevel(logging.DEBUG) logger.setLevel(logging.DEBUG) - ctx.obj = {"traceback": traceback or LOCAL_CONFIG.DEBUG} + ctx.obj = {"traceback": traceback or LOCAL_CONFIG.DEBUG, "quiet": quiet} -def _extract_examples( - help_str: Optional[str], -) -> Tuple[Optional[str], Optional[str]]: - if help_str is None: - return None, None - try: - examples = help_str.index("Examples:") - except ValueError: - return None, help_str - return help_str[examples + len("Examples:") + 1 :], help_str[:examples] +def get_cmd_name(ctx: Context, no_aliases=False, sep=" "): + pieces = [] + while ctx.parent is not None: + pieces.append(ctx.command.name if no_aliases else ctx.info_name) + ctx = ctx.parent + return sep.join(reversed(pieces)) def mlem_command( @@ -267,72 +371,115 @@ def mlem_command( aliases=None, options_metavar="[options]", parent=app, + mlem_cls=None, + dynamic_metavar=None, + dynamic_options_generator=None, + lazy_help=None, + pass_from_parent: Optional[List[str]] = None, + no_pass_from_parent: Optional[List[str]] = None, **kwargs, ): def decorator(f): - if len(args) > 0: - cmd_name = args[0] + context_settings = kwargs.get("context_settings", {}) + if dynamic_options_generator: + context_settings.update( + {"allow_extra_args": True, "ignore_unknown_options": True} + ) + if no_pass_from_parent is not None: + _pass_from_parent = [ + a + for a in inspect.getfullargspec(f).args + if a not in no_pass_from_parent + ] else: - cmd_name = kwargs.get("name", f.__name__) - - @parent.command( + _pass_from_parent = pass_from_parent + call = wrap_mlem_cli_call(f, _pass_from_parent) + return parent.command( *args, options_metavar=options_metavar, + context_settings=context_settings, **kwargs, - cls=partial(MlemCommand, section=section, aliases=aliases), - ) - @wraps(f) - @pass_context - def inner(ctx, *iargs, **ikwargs): - res = {} - error = None - try: - with cli_echo(): - res = f(*iargs, **ikwargs) or {} - res = {f"cmd_{cmd_name}_{k}": v for k, v in res.items()} - except (ClickException, Exit, Abort) as e: - error = f"{e.__class__.__module__}.{e.__class__.__name__}" + cls=partial( + mlem_cls or MlemCommand, + section=section, + aliases=aliases, + dynamic_options_generator=dynamic_options_generator, + dynamic_metavar=dynamic_metavar, + lazy_help=lazy_help, + pass_from_parent=pass_from_parent, + ), + )(call) + + return decorator + + +def wrap_mlem_cli_call(f, pass_from_parent: Optional[List[str]]): + @wraps(f) + def inner(*iargs, **ikwargs): + res = {} + error = None + ctx = click.get_current_context() + cmd_name = get_cmd_name(ctx) + try: + if pass_from_parent is not None: + ikwargs.update( + { + o: ctx.parent.params[o] + for o in pass_from_parent + if o in ctx.parent.params + and (o not in ikwargs or ikwargs[o] is None) + } + ) + with (cli_echo() if not ctx.obj["quiet"] else no_echo()): + res = f(*iargs, **ikwargs) or {} + res = {f"cmd_{cmd_name}_{k}": v for k, v in res.items()} + except (ClickException, Exit, Abort) as e: + error = f"{e.__class__.__module__}.{e.__class__.__name__}" + raise + except MlemError as e: + error = f"{e.__class__.__module__}.{e.__class__.__name__}" + if ctx.obj["traceback"]: raise - except MlemError as e: - error = f"{e.__class__.__module__}.{e.__class__.__name__}" - if ctx.obj["traceback"]: - raise - with stderr_echo(): - echo(EMOJI_FAIL + color(str(e), col=typer.colors.RED)) - raise typer.Exit(1) - except ValidationError as e: - error = f"{e.__class__.__module__}.{e.__class__.__name__}" - if ctx.obj["traceback"]: - raise - msgs = "\n".join(_format_validation_error(e)) - with stderr_echo(): - echo(EMOJI_FAIL + color("Error:\n", "red") + msgs) - raise typer.Exit(1) - except Exception as e: # pylint: disable=broad-except - error = f"{e.__class__.__module__}.{e.__class__.__name__}" - if ctx.obj["traceback"]: - raise - with stderr_echo(): - echo( - EMOJI_FAIL - + color( - "Unexpected error: " + str(e), col=typer.colors.RED - ) - ) - echo( - "Please report it here: " + with stderr_echo(): + echo(EMOJI_FAIL + color(str(e), col=typer.colors.RED)) + raise typer.Exit(1) + except ValidationError as e: + error = f"{e.__class__.__module__}.{e.__class__.__name__}" + if ctx.obj["traceback"]: + raise + msgs = "\n".join(_format_validation_error(e)) + with stderr_echo(): + echo(EMOJI_FAIL + color("Error:\n", "red") + msgs) + raise typer.Exit(1) + except Exception as e: # pylint: disable=broad-except + error = f"{e.__class__.__module__}.{e.__class__.__name__}" + if ctx.obj["traceback"]: + raise + with stderr_echo(): + echo( + EMOJI_FAIL + + color( + "Unexpected error: " + str(e), col=typer.colors.RED ) - raise typer.Exit(1) - finally: + ) + echo( + "Please report it here: " + ) + raise typer.Exit(1) + finally: + if error is not None or ctx.invoked_subcommand is None: telemetry.send_cli_call(cmd_name, error=error, **res) - return inner - - return decorator + return inner option_project = Option( - None, "-p", "--project", help="Path to MLEM project", show_default="none" # type: ignore + None, + "-p", + "--project", + help="Path to MLEM project", + metavar=PATH_METAVAR, + show_default="none", # type: ignore ) option_method = Option( PREDICT_METHOD_NAME, @@ -340,54 +487,64 @@ def inner(ctx, *iargs, **ikwargs): "--method", help="Which model method is to be applied", ) -option_rev = Option(None, "--rev", help="Repo revision to use", show_default="none") # type: ignore -option_index = Option( - None, - "--index/--no-index", - help="Whether to index output in .mlem directory", -) -option_external = Option( - None, - "--external", - "-e", - is_flag=True, - help=f"Save result not in {MLEM_DIR}, but directly in project", -) +option_rev = Option(None, "--rev", help="Repo revision to use", show_default="none", metavar=COMMITISH_METAVAR) # type: ignore option_target_project = Option( None, "--target-project", "--tp", help="Project to save target to", + metavar=PATH_METAVAR, show_default="none", # type: ignore ) option_json = Option(False, "--json", help="Output as json") option_data_project = Option( None, "--data-project", - "--dr", + "--dp", + metavar=PATH_METAVAR, help="Project with data", ) option_data_rev = Option( None, "--data-rev", + "--dr", help="Revision of data", + metavar=COMMITISH_METAVAR, +) +option_model_project = Option( + None, + "--model-project", + "--mp", + metavar=PATH_METAVAR, + help="Project with model", +) +option_model_rev = Option( + None, + "--model-rev", + "--mr", + help="Revision of model", + metavar=COMMITISH_METAVAR, +) +option_model = Option( + ..., + "-m", + "--model", + help="Path to MLEM model", + metavar=PATH_METAVAR, +) +option_data = Option( + ..., "-d", "--data", help="Path to MLEM dataset", metavar=PATH_METAVAR ) def option_load(type_: str = None): type_ = type_ + " " if type_ is not None else "" - return Option( - None, "-l", "--load", help=f"File to load {type_}config from" - ) - - -def option_conf(type_: str = None): - type_ = f"for {type_} " if type_ is not None else "" return Option( None, - "-c", - "--conf", - help=f"Options {type_}in format `field.name=value`", + "-l", + f"--{LOAD_PARAM_NAME}", + help=f"File to load {type_}config from", + metavar=PATH_METAVAR, ) @@ -396,91 +553,6 @@ def option_file_conf(type_: str = None): return Option( None, "-f", - "--file_conf", + f"--{FILE_CONF_PARAM_NAME}", help=f"File with options {type_}in format `field.name=path_to_config`", ) - - -def _iter_errors( - errors: t.Sequence[t.Any], model: Type, loc: Optional[Tuple] = None -): - for error in errors: - if isinstance(error, ErrorWrapper): - - if loc: - error_loc = loc + error.loc_tuple() - else: - error_loc = error.loc_tuple() - - if isinstance(error.exc, ValidationError): - yield from _iter_errors( - error.exc.raw_errors, error.exc.model, error_loc - ) - else: - yield error_loc, model, error.exc - - -def _format_validation_error(error: ValidationError) -> List[str]: - res = [] - for loc, model, exc in _iter_errors(error.raw_errors, error.model): - path = ".".join(loc_part for loc_part in loc if loc_part != "__root__") - field_name = loc[-1] - if field_name not in model.__fields__: - res.append( - f"Unknown field '{field_name}'. Fields available: {', '.join(model.__fields__)}" - ) - continue - field_type = model.__fields__[field_name].type_ - if ( - isinstance(exc, MissingError) - and isinstance(field_type, type) - and issubclass(field_type, BaseModel) - ): - msgs = [ - str(EMOJI_FAIL + f"field `{path}.{f.name}`: {exc}") - for f in field_type.__fields__.values() - if f.required - ] - if msgs: - res.extend(msgs) - else: - res.append(str(EMOJI_FAIL + f"field `{path}`: {exc}")) - else: - res.append(str(EMOJI_FAIL + f"field `{path}`: {exc}")) - return res - - -@contextlib.contextmanager -def wrap_build_error(subtype, model: Type[MlemABC]): - try: - yield - except ValidationError as e: - msgs = "\n".join(_format_validation_error(e)) - raise typer.BadParameter( - f"Error on constructing {subtype} {model.abs_name}:\n{msgs}" - ) from e - - -def config_arg( - model: Type[MlemABC], - load: Optional[str], - subtype: str, - conf: Optional[List[str]], - file_conf: Optional[List[str]], -): - obj: MlemABC - if load is not None: - if issubclass(model, MlemObject): - obj = load_meta(load, force_type=model) - else: - with open(load, "r", encoding="utf8") as of: - obj = parse_obj_as(model, safe_load(of)) - else: - if not subtype: - raise typer.BadParameter( - f"Cannot configure {model.abs_name}: either subtype or --load should be provided" - ) - with wrap_build_error(subtype, model): - obj = build_mlem_object(model, subtype, conf, file_conf) - - return obj diff --git a/mlem/cli/serve.py b/mlem/cli/serve.py index 0ca81c51..10fb010a 100644 --- a/mlem/cli/serve.py +++ b/mlem/cli/serve.py @@ -1,42 +1,88 @@ from typing import List, Optional -from typer import Argument +from typer import Typer from mlem.cli.main import ( - config_arg, + app, mlem_command, - option_conf, + mlem_group, + mlem_group_callback, option_file_conf, option_load, + option_model, option_project, option_rev, ) +from mlem.cli.utils import ( + abc_fields_parameters, + config_arg, + for_each_impl, + lazy_class_docstring, + make_not_required, +) from mlem.core.metadata import load_meta from mlem.core.objects import MlemModel from mlem.runtime.server import Server -from mlem.utils.entrypoints import list_implementations + +serve = Typer( + name="serve", + help="""Create an API from model methods using a server implementation.""", + cls=mlem_group("runtime"), + subcommand_metavar="server", +) +app.add_typer(serve) -@mlem_command("serve", section="runtime") -def serve( - model: str = Argument(..., help="Model to create service from"), - subtype: str = Argument( - "", help=f"Server type. Choices: {list_implementations(Server)}" - ), +@mlem_group_callback(serve, required=["model", "load"]) +def serve_load( + model: str = make_not_required(option_model), project: Optional[str] = option_project, rev: Optional[str] = option_rev, load: Optional[str] = option_load("server"), - conf: List[str] = option_conf("server"), - file_conf: List[str] = option_file_conf("server"), ): - """Serve selected model - - Examples: - $ mlem serve https://github.com/iterative/example-mlem/models/logreg fastapi - """ from mlem.api.commands import serve serve( load_meta(model, project, rev, force_type=MlemModel), - config_arg(Server, load, subtype, conf, file_conf), + config_arg( + Server, + load, + None, + conf=None, + file_conf=None, + ), ) + + +@for_each_impl(Server) +def create_serve_command(type_name): + @mlem_command( + type_name, + section="servers", + parent=serve, + dynamic_metavar="__kwargs__", + dynamic_options_generator=abc_fields_parameters(type_name, Server), + hidden=type_name.startswith("_"), + lazy_help=lazy_class_docstring(Server.abs_name, type_name), + no_pass_from_parent=["file_conf"], + ) + def serve_command( + model: str = option_model, + project: Optional[str] = option_project, + rev: Optional[str] = option_rev, + file_conf: List[str] = option_file_conf("server"), + **__kwargs__ + ): + from mlem.api.commands import serve + + serve( + load_meta(model, project, rev, force_type=MlemModel), + config_arg( + Server, + None, + type_name, + conf=None, + file_conf=file_conf, + **__kwargs__ + ), + ) diff --git a/mlem/cli/types.py b/mlem/cli/types.py index c3d56381..cba6b318 100644 --- a/mlem/cli/types.py +++ b/mlem/cli/types.py @@ -1,97 +1,117 @@ -from typing import Optional, Type +from typing import Iterator, Optional, Type from pydantic import BaseModel from typer import Argument from mlem.cli.main import mlem_command +from mlem.cli.utils import CliTypeField, iterate_type_fields, parse_type_field from mlem.core.base import MlemABC, load_impl_ext +from mlem.core.errors import MlemError from mlem.core.objects import MlemObject from mlem.ui import EMOJI_BASE, bold, color, echo -from mlem.utils.entrypoints import list_implementations +from mlem.utils.entrypoints import list_abstractions, list_implementations -def explain_type(cls: Type[BaseModel], prefix="", force_not_req=False): - for name, field in sorted( - cls.__fields__.items(), key=lambda x: not x[1].required - ): - if issubclass(cls, MlemObject) and name in MlemObject.__fields__: - continue - if issubclass(cls, MlemABC) and name in cls.__config__.exclude: - continue - fullname = name if not prefix else f"{prefix}.{name}" - module = field.type_.__module__ - type_name = getattr(field.type_, "__name__", str(field.type_)) - if module != "builtins" and "." not in type_name: - type_name = f"{module}.{type_name}" - type_name = color(type_name, "yellow") - - if field.required and not force_not_req: - req = color("[required] ", "grey") - else: - req = color("[not required] ", "white") - if not field.required: - default = field.default - if isinstance(default, str): - default = f'"{default}"' - default = f" = {default}" - else: - default = "" - if ( - isinstance(field.type_, type) - and issubclass(field.type_, MlemABC) - and field.type_.__is_root__ - ): - echo( - req - + color(fullname, "green") - + ": One of " - + color(f"mlem types {field.type_.abs_name}", "yellow") +def _add_examples( + generator: Iterator[CliTypeField], + root_cls: Type[BaseModel], + parent_help=None, +): + for field in generator: + field.help = parent_help or field.help + yield field + if field.is_list or field.is_mapping: + key = ".key" if field.is_mapping else ".0" + yield from _add_examples( + parse_type_field( + path=field.path + key, + type_=field.type_, + help_=field.help, + is_list=False, + is_mapping=False, + required=False, + allow_none=False, + default=None, + root_cls=root_cls, + ), + root_cls=root_cls, + parent_help=f"Element of {field.path}", ) - elif isinstance(field.type_, type) and issubclass( - field.type_, BaseModel - ): - echo(req + color(fullname, "green") + ": " + type_name) - explain_type(field.type_, fullname, not field.required) - else: - echo(req + color(fullname, "green") + ": " + type_name + default) + + +def type_fields_with_collection_examples(cls): + yield from _add_examples(iterate_type_fields(cls), root_cls=cls) + + +def explain_type(cls: Type[BaseModel]): + echo( + color("Type ", "") + + color(cls.__module__ + ".", "yellow") + + color(cls.__name__, "green") + ) + if issubclass(cls, MlemABC): + echo(color("MlemABC parent type: ", "") + color(cls.abs_name, "green")) + echo(color("MlemABC type: ", "") + color(cls.__get_alias__(), "green")) + if issubclass(cls, MlemObject): + echo( + color("MlemObject type name: ", "") + + color(cls.object_type, "green") + ) + echo((cls.__doc__ or "Class docstring missing").strip()) + fields = list(type_fields_with_collection_examples(cls)) + if not fields: + echo("No fields") + else: + echo("Fields:") + for field in fields: + echo(field.to_text()) @mlem_command("types", hidden=True) def list_types( abc: Optional[str] = Argument( None, - help="Subtype to list implementations. List subtypes if not provided", + help="Subtype to list implementations. List subtypes if not provided.", ), sub_type: Optional[str] = Argument(None, help="Type of `meta` subtype"), ): - """List MLEM types implementations available in current env. - If subtype is not provided, list ABCs - - Examples: - List ABCs - $ mlem types - - List available server implementations - $ mlem types server + """List different implementations available for a particular MLEM type. If a + subtype is not provided, list all available MLEM types. """ if abc is None: for at in MlemABC.abs_types.values(): echo(EMOJI_BASE + bold(at.abs_name) + ":") echo( - f"\tBase class: {at.__module__}.{at.__name__}\n\t{at.__doc__.strip()}" + f"\tBase class: {at.__module__}.{at.__name__}\n\t{(at.__doc__ or 'Class docstring missing').strip()}" ) elif abc == MlemObject.abs_name: if sub_type is None: - echo(list(MlemObject.non_abstract_subtypes().keys())) + echo("\n".join(MlemObject.non_abstract_subtypes().keys())) else: - echo( - list_implementations( - MlemObject, MlemObject.non_abstract_subtypes()[sub_type] + mlem_object_type = MlemObject.non_abstract_subtypes()[sub_type] + if mlem_object_type.__is_root__: + echo( + "\n".join( + list_implementations( + MlemObject, mlem_object_type, include_hidden=False + ) + ) ) - ) + else: + explain_type(mlem_object_type) else: if sub_type is None: - echo(list_implementations(abc)) + abcs = list_abstractions(include_hidden=False) + if abc not in abcs: + raise MlemError( + f"Unknown abc \"{abc}\". Known abcs: {' '.join(abcs)}" + ) + echo("\n".join(list_implementations(abc, include_hidden=False))) else: - cls = load_impl_ext(abc, sub_type, True) + try: + cls = load_impl_ext(abc, sub_type, True) + except ValueError as e: + raise MlemError( + f"Unknown implementation \"{sub_type}\" of abc \"{abc}\". Known implementations: {' '.join(list_implementations(abc, include_hidden=False))}" + ) from e explain_type(cls) diff --git a/mlem/cli/utils.py b/mlem/cli/utils.py new file mode 100644 index 00000000..e01ccce5 --- /dev/null +++ b/mlem/cli/utils.py @@ -0,0 +1,630 @@ +import ast +import contextlib +import copy +import inspect +from dataclasses import dataclass +from enum import Enum, EnumMeta +from functools import lru_cache +from typing import Any, Dict, Iterator, List, Optional, Sequence, Tuple, Type + +import typer +from click import Context, MissingParameter +from pydantic import ( + BaseModel, + MissingError, + ValidationError, + create_model, + parse_obj_as, +) +from pydantic.error_wrappers import ErrorWrapper +from pydantic.fields import ( + MAPPING_LIKE_SHAPES, + SHAPE_LIST, + SHAPE_SEQUENCE, + SHAPE_SET, + SHAPE_TUPLE, + SHAPE_TUPLE_ELLIPSIS, + ModelField, +) +from pydantic.typing import display_as_type, get_args, is_union +from typer.core import TyperOption +from typing_extensions import get_origin +from yaml import safe_load + +from mlem import LOCAL_CONFIG +from mlem.core.base import ( + MlemABC, + build_mlem_object, + load_impl_ext, + smart_split, +) +from mlem.core.errors import ExtensionRequirementError, MlemObjectNotFound +from mlem.core.meta_io import Location +from mlem.core.metadata import load_meta +from mlem.core.objects import MlemObject +from mlem.ui import EMOJI_FAIL, color +from mlem.utils.entrypoints import list_implementations +from mlem.utils.module import lstrip_lines + +LIST_LIKE_SHAPES = ( + SHAPE_LIST, + SHAPE_TUPLE, + SHAPE_SET, + SHAPE_TUPLE_ELLIPSIS, + SHAPE_SEQUENCE, +) + + +class ChoicesMeta(EnumMeta): + def __call__(cls, *names, module=None, qualname=None, type=None, start=1): + if len(names) == 1: + return super().__call__(names[0]) + return super().__call__( + "Choice", + names, + module=module, + qualname=qualname, + type=type, + start=start, + ) + + +class Choices(str, Enum, metaclass=ChoicesMeta): + def _generate_next_value_( # pylint: disable=no-self-argument + name, start, count, last_values + ): + return name + + +class CliTypeField(BaseModel): + """A descriptor of model field to build cli option""" + + path: str + """a.dotted.path from schema root""" + required: bool + allow_none: bool + type_: Type + help: str + default: Any + is_list: bool + is_mapping: bool + mapping_key_type: Optional[Type] + + @property + def type_repr(self): + type_name = self.type_.__name__ + if self.is_list: + return f"List[{type_name}]" + if self.is_mapping: + return f"Dict[{self.mapping_key_type.__name__}, {type_name}]" + return type_name + + def to_text(self): + req = ( + color("[required]", "") + if self.required + else color("[not required]", "white") + ) + if not self.required: + default = self.default + if isinstance(default, str): + default = f'"{default}"' + default = f" = {default}" + else: + default = "" + return ( + req + + " " + + color(self.path, "green") + + ": " + + self.type_repr + + default + + "\n\t" + + self.help + ) + + +@lru_cache() +def get_attribute_docstrings(cls) -> Dict[str, str]: + """Parses cls source to find all classfields followed by docstring expr""" + res = {} + tree = ast.parse(lstrip_lines(inspect.getsource(cls))) + class_def = tree.body[0] + assert isinstance(class_def, ast.ClassDef) + field: Optional[str] = None + for statement in class_def.body: + if isinstance(statement, ast.AnnAssign) and isinstance( + statement.target, ast.Name + ): + field = statement.target.id + continue + if ( + isinstance(statement, ast.Assign) + and len(statement.targets) == 1 + and isinstance(statement.targets[0], ast.Name) + ): + field = statement.targets[0].id + continue + if field is not None and isinstance(statement, ast.Expr): + if isinstance(statement.value, ast.Constant) and isinstance( + statement.value.value, str + ): + res[field] = statement.value.value + if isinstance(statement.value, ast.Str): + res[field] = statement.value.s + field = None + return res + + +@lru_cache() +def get_field_help(cls: Type, field_name: str): + """Parses all class mro to find classfield docstring""" + for base_cls in cls.mro(): + if base_cls is object: + continue + try: + docsting = get_attribute_docstrings(base_cls).get(field_name) + if docsting: + return docsting + except OSError: + pass + return "Field docstring missing" + + +def _get_type_name_alias(type_): + if not isinstance(type_, type): + type_ = get_origin(type_) + return type_.__name__ if type_ is not None else "any" + + +def anything(type_): + """Creates special type that is named as original type or collection type + It returns original object on creation and is needed for nice typename in cli option help""" + return type( + _get_type_name_alias(type_), (), {"__new__": lambda cls, value: value} + ) + + +def optional(type_): + """Creates special type that is named as original type or collection type + It allows use string `None` to indicate None value""" + return type( + _get_type_name_alias(type_), + (), + { + "__new__": lambda cls, value: None + if value == "None" + else type_(value) + }, + ) + + +def parse_type_field( + path: str, + type_: Type, + help_: str, + is_list: bool, + is_mapping: bool, + required: bool, + allow_none: bool, + default: Any, + root_cls: Type[BaseModel], +) -> Iterator[CliTypeField]: + """Recursively creates CliTypeFields from field description""" + if is_list or is_mapping: + # collection + yield CliTypeField( + required=required, + allow_none=allow_none, + path=path, + type_=type_, + default=default, + help=help_, + is_list=is_list, + is_mapping=is_mapping, + mapping_key_type=str, + ) + return + + if ( + isinstance(type_, type) + and issubclass(type_, MlemABC) + and type_.__is_root__ + ): + # mlem abstraction: substitute default and extend help + if isinstance(default, type_): + default = default.__get_alias__() + yield CliTypeField( + required=required, + allow_none=allow_none, + path=path, + type_=type_, + help=f"{help_}. One of {list_implementations(type_, include_hidden=False)}. Run 'mlem types {type_.abs_name} ' for list of nested fields for each subtype", + default=default, + is_list=is_list, + is_mapping=is_mapping, + mapping_key_type=str, + ) + return + if isinstance(type_, type) and issubclass(type_, BaseModel): + # BaseModel (including MlemABC non-root classes): reqursively get nested + yield from iterate_type_fields(type_, path, not required, root_cls) + return + # probably primitive field + yield CliTypeField( + required=required, + allow_none=allow_none, + path=path, + type_=type_, + default=default, + help=help_, + is_list=is_list, + is_mapping=is_mapping, + mapping_key_type=str, + ) + + +def iterate_type_fields( + cls: Type[BaseModel], + path: str = "", + force_not_req: bool = False, + root_cls: Type[BaseModel] = None, +) -> Iterator[CliTypeField]: + """Recursively get CliTypeFields from BaseModel""" + if cls is root_cls: + # avoid infinite recursion + return + root_cls = root_cls or cls + field: ModelField + for name, field in sorted( + cls.__fields__.items(), key=lambda x: not x[1].required + ): + name = field.alias or name + if issubclass(cls, MlemObject) and name in MlemObject.__fields__: + # Skip base MlemObject fields + continue + if ( + issubclass(cls, MlemABC) + and name in cls.__config__.exclude + or field.field_info.exclude + ): + # Skip excluded fields + continue + if name == "__root__": + fullname = path + else: + fullname = name if not path else f"{path}.{name}" + + field_type = field.type_ + # field.type_ is element type for collections/mappings + + if not isinstance(field_type, type): + # Handle generics. Probably will break in complex cases + origin = get_origin(field_type) + if is_union(origin): + # get first type for union + generic_args = get_args(field_type) + field_type = generic_args[0] + if origin is list or origin is dict: + # replace with dynamic __root__: Dict/List model + field_type = create_model( + display_as_type(field_type), __root__=(field_type, ...) + ) + if field_type is Any: + field_type = anything(field_type) + + if not isinstance(field_type, type): + # skip too complicated stuff + continue + + yield from parse_type_field( + path=fullname, + type_=field_type, + help_=get_field_help(cls, name), + is_list=field.shape in LIST_LIKE_SHAPES, + is_mapping=field.shape in MAPPING_LIKE_SHAPES, + required=not force_not_req and bool(field.required), + allow_none=field.allow_none, + default=field.default, + root_cls=root_cls, + ) + + +@dataclass +class CallContext: + params: Dict[str, Any] + extra_keys: List[str] + regular_options: List[str] + + +def _options_from_model( + cls: Type[BaseModel], + ctx: CallContext, + path="", + force_not_set: bool = False, +) -> Iterator[TyperOption]: + """Generate additional cli options from model field""" + for field in iterate_type_fields(cls, path=path): + path = field.path + if path in ctx.regular_options: + # add dot if path shadows existing parameter + # it will be ignored on model building + path = f".{path}" + + if field.is_list: + yield from _options_from_list(path, field, ctx) + continue + if field.is_mapping: + yield from _options_from_mapping(path, field, ctx) + continue + if issubclass(field.type_, MlemABC) and field.type_.__is_root__: + yield from _options_from_mlem_abc( + ctx, field, path, force_not_set=force_not_set + ) + continue + + yield _option_from_field(field, path, force_not_set=force_not_set) + + +def _options_from_mlem_abc( + ctx: CallContext, + field: CliTypeField, + path: str, + force_not_set: bool = False, +): + """Generate str option for mlem abc type. + If param is already set, also generate respective implementation fields""" + assert issubclass(field.type_, MlemABC) and field.type_.__is_root__ + if ( + path in ctx.params + and ctx.params[path] != NOT_SET + and ctx.params[path] is not None + ): + yield from _options_from_model( + load_impl_ext(field.type_.abs_name, ctx.params[path]), + ctx, + path, + ) + yield _option_from_field( + field, path, override_type=str, force_not_set=force_not_set + ) + + +def _options_from_mapping(path: str, field: CliTypeField, ctx: CallContext): + """Generate options for mapping and example element. + If some keys are already set, also generate options for them""" + mapping_keys = [ + key[len(path) + 1 :].split(".", maxsplit=1)[0] + for key in ctx.extra_keys + if key.startswith(path + ".") + ] + for key in mapping_keys: + yield from _options_from_collection_element( + f"{path}.{key}", field, ctx + ) + + override_type = Dict[str, field.type_] # type: ignore[name-defined] + yield _option_from_field( + field, path, override_type=override_type, force_not_set=True + ) + yield from _options_from_collection_element( + f"{path}.key", field, ctx, force_not_set=True + ) + + +def _options_from_list(path: str, field: CliTypeField, ctx: CallContext): + """Generate option for list and example element. + If some indexes are already set, also generate options for them""" + index = 0 + next_path = f"{path}.{index}" + while any(p.startswith(next_path) for p in ctx.params) and any( + v != NOT_SET for p, v in ctx.params.items() if p.startswith(next_path) + ): + yield from _options_from_collection_element(next_path, field, ctx) + index += 1 + next_path = f"{path}.{index}" + + override_type = List[field.type_] # type: ignore[name-defined] + yield _option_from_field( + field, path, override_type=override_type, force_not_set=True + ) + yield from _options_from_collection_element( + f"{path}.{index}", field, ctx, force_not_set=True + ) + + +def _options_from_collection_element( + path: str, + field: CliTypeField, + ctx: CallContext, + force_not_set: bool = False, +) -> Iterator[TyperOption]: + """Generate options for collection/mapping values""" + if issubclass(field.type_, MlemABC) and field.type_.__is_root__: + yield from _options_from_mlem_abc( + ctx, field, path, force_not_set=force_not_set + ) + return + if issubclass(field.type_, BaseModel): + yield from _options_from_model( + field.type_, ctx, path, force_not_set=force_not_set + ) + return + yield _option_from_field(field, path, force_not_set=force_not_set) + + +NOT_SET = "__NOT_SET__" +FILE_CONF_PARAM_NAME = "file_conf" +LOAD_PARAM_NAME = "load" + + +class SetViaFileTyperOption(TyperOption): + def process_value(self, ctx: Context, value: Any) -> Any: + try: + return super().process_value(ctx, value) + except MissingParameter: + if ( + LOAD_PARAM_NAME in ctx.params + or FILE_CONF_PARAM_NAME in ctx.params + and any( + smart_split(v, "=", 1)[0] == self.name + for v in ctx.params[FILE_CONF_PARAM_NAME] + ) + ): + return NOT_SET + raise + + +def _option_from_field( + field: CliTypeField, + path: str, + override_type: Type = None, + force_not_set: bool = False, +) -> TyperOption: + """Create cli option from field descriptor""" + type_ = override_type or field.type_ + if force_not_set: + type_ = anything(type_) + elif field.allow_none: + type_ = optional(type_) + option = SetViaFileTyperOption( + param_decls=[f"--{path}", path.replace(".", "_")], + type=type_ if not force_not_set else anything(type_), + required=field.required and not force_not_set, + default=field.default + if not field.is_list and not field.is_mapping and not force_not_set + else NOT_SET, + help=field.help, + show_default=not field.required, + ) + option.name = path + return option + + +def abc_fields_parameters(type_name: str, mlem_abc: Type[MlemABC]): + """Create a dynamic options generator that adds implementation fields""" + + def generator(ctx: CallContext): + try: + cls = load_impl_ext(mlem_abc.abs_name, type_name=type_name) + except ImportError: + return + yield from _options_from_model(cls, ctx) + + return generator + + +def get_extra_keys(args): + return [a[2:] for a in args if a.startswith("--")] + + +def lazy_class_docstring(abs_name: str, type_name: str): + def load_docstring(): + try: + return load_impl_ext(abs_name, type_name).__doc__ + except ExtensionRequirementError as e: + return f"Help unavailbale: {e}" + + return load_docstring + + +def for_each_impl(mlem_abc: Type[MlemABC]): + def inner(f): + for type_name in list_implementations(mlem_abc): + f(type_name) + return f + + return inner + + +def make_not_required(option: TyperOption): + option = copy.deepcopy(option) + option.required = False + option.default = None + return option + + +def _iter_errors( + errors: Sequence[Any], model: Type, loc: Optional[Tuple] = None +): + for error in errors: + if isinstance(error, ErrorWrapper): + + if loc: + error_loc = loc + error.loc_tuple() + else: + error_loc = error.loc_tuple() + + if isinstance(error.exc, ValidationError): + yield from _iter_errors( + error.exc.raw_errors, error.exc.model, error_loc + ) + else: + yield error_loc, model, error.exc + + +def _format_validation_error(error: ValidationError) -> List[str]: + res = [] + for loc, model, exc in _iter_errors(error.raw_errors, error.model): + path = ".".join(loc_part for loc_part in loc if loc_part != "__root__") + field_name = loc[-1] + if field_name not in model.__fields__: + res.append( + f"Unknown field '{field_name}'. Fields available: {', '.join(model.__fields__)}" + ) + continue + field_type = model.__fields__[field_name].type_ + if ( + isinstance(exc, MissingError) + and isinstance(field_type, type) + and issubclass(field_type, BaseModel) + ): + msgs = [ + str(EMOJI_FAIL + f"field `{path}.{f.name}`: {exc}") + for f in field_type.__fields__.values() + if f.required + ] + if msgs: + res.extend(msgs) + else: + res.append(str(EMOJI_FAIL + f"field `{path}`: {exc}")) + else: + res.append(str(EMOJI_FAIL + f"field `{path}`: {exc}")) + return res + + +@contextlib.contextmanager +def wrap_build_error(subtype, model: Type[MlemABC]): + try: + yield + except ValidationError as e: + if LOCAL_CONFIG.DEBUG: + raise + msgs = "\n".join(_format_validation_error(e)) + raise typer.BadParameter( + f"Error on constructing {subtype} {model.abs_name}:\n{msgs}" + ) from e + + +def config_arg( + model: Type[MlemABC], + load: Optional[str], + subtype: Optional[str], + conf: Optional[List[str]], + file_conf: Optional[List[str]], + **kwargs, +): + if load is not None: + if issubclass(model, MlemObject): + try: + return load_meta(load, force_type=model) + except MlemObjectNotFound: + pass + with Location.resolve(load).open("r", encoding="utf8") as of: + return parse_obj_as(model, safe_load(of)) + if not subtype: + raise typer.BadParameter( + f"Cannot configure {model.abs_name}: either subtype or --load should be provided" + ) + with wrap_build_error(subtype, model): + return build_mlem_object(model, subtype, conf, file_conf, kwargs) diff --git a/mlem/config.py b/mlem/config.py index 51afee87..691c2eff 100644 --- a/mlem/config.py +++ b/mlem/config.py @@ -10,12 +10,10 @@ from pydantic import BaseSettings, Field, parse_obj_as, root_validator from pydantic.env_settings import InitSettingsSource -from mlem.constants import MLEM_DIR +from mlem.constants import MLEM_CONFIG_FILE_NAME from mlem.core.errors import UnknownConfigSection from mlem.utils.entrypoints import MLEM_CONFIG_ENTRY_POINT, load_entrypoints -CONFIG_FILE_NAME = "config.yaml" - def _set_location_init_source(init_source: InitSettingsSource): def inner(settings: "MlemConfig"): @@ -41,7 +39,7 @@ def inner(settings: BaseSettings) -> Dict[str, Any]: project = find_project_root(config_path, fs=fs, raise_on_missing=False) if project is None: return {} - config_file = posixpath.join(project, MLEM_DIR, CONFIG_FILE_NAME) + config_file = posixpath.join(project, MLEM_CONFIG_FILE_NAME) if not fs.exists(config_file): return {} with fs.open(config_file, encoding=encoding) as f: @@ -116,9 +114,9 @@ class Config: NO_ANALYTICS: bool = False TESTS: bool = False STORAGE: Dict = {} - INDEX: Dict = {} - EXTERNAL: bool = False EMOJIS: bool = True + STATE: Dict = {} + SERVER: Dict = {} @property def storage(self): @@ -129,14 +127,6 @@ def storage(self): s = parse_obj_as(Storage, self.STORAGE) return s - @property - def index(self): - from mlem.core.index import Index, LinkIndex - - if not self.INDEX: - return LinkIndex() - return parse_obj_as(Index, self.INDEX) - @property def additional_extensions(self) -> List[str]: if self.ADDITIONAL_EXTENSIONS == "": @@ -145,6 +135,22 @@ def additional_extensions(self) -> List[str]: "," ) + @property + def state(self): + if not self.STATE: + return None + from mlem.core.objects import StateManager + + return parse_obj_as(StateManager, self.STATE) + + @property + def server(self): + from mlem.runtime.server import Server + + if not self.SERVER: + return parse_obj_as(Server, {"type": "fastapi"}) + return parse_obj_as(Server, self.SERVER) + LOCAL_CONFIG = MlemConfig() diff --git a/mlem/constants.py b/mlem/constants.py index dfe1af43..26f3f516 100644 --- a/mlem/constants.py +++ b/mlem/constants.py @@ -1,5 +1,8 @@ -MLEM_DIR = ".mlem" +MLEM_STATE_DIR = ".mlem.state" +MLEM_STATE_EXT = ".state" PREDICT_METHOD_NAME = "predict" PREDICT_PROBA_METHOD_NAME = "predict_proba" PREDICT_ARG_NAME = "data" + +MLEM_CONFIG_FILE_NAME = ".mlem.yaml" diff --git a/mlem/contrib/bitbucketfs.py b/mlem/contrib/bitbucketfs.py index 3e5ad75f..9575cbe0 100644 --- a/mlem/contrib/bitbucketfs.py +++ b/mlem/contrib/bitbucketfs.py @@ -1,5 +1,10 @@ +"""BitBucket URI support +Extension type: uri + +Implementation of `BitbucketFileSystem` and `BitbucketResolver` +""" import posixpath -from typing import ClassVar, List, Optional +from typing import ClassVar, Dict, Optional from urllib.parse import quote_plus, urljoin, urlparse, urlsplit import requests @@ -11,6 +16,7 @@ from mlem.config import MlemConfigBase from mlem.core.meta_io import CloudGitResolver +from mlem.utils.git import is_long_sha BITBUCKET_ORG = "https://bitbucket.org" @@ -28,6 +34,7 @@ def __init__( self.username = username self.password = password self.url = url + self.refs_cache: Dict[str, Dict[str, str]] = {} @property def auth(self): @@ -36,6 +43,7 @@ def auth(self): return None def tree(self, path: str, repo: str, rev: str): + rev = self.get_rev_sha(repo, rev) r = requests.get( urljoin( self.url, @@ -55,6 +63,7 @@ def get_default_branch(self, repo: str): return r.json()["mainbranch"]["name"] def open(self, path: str, repo: str, rev: str): + rev = self.get_rev_sha(repo, rev) r = requests.get( urljoin( self.url, @@ -65,13 +74,26 @@ def open(self, path: str, repo: str, rev: str): r.raise_for_status() return r.content - def get_refs(self, repo: str) -> List[str]: + def _get_refs(self, repo: str) -> Dict[str, str]: r = requests.get( urljoin(self.url, self.refs_endpoint.format(repo=repo)), auth=self.auth, ) r.raise_for_status() - return [v["name"] for v in r.json()["values"]] + return {v["name"]: v["target"]["hash"] for v in r.json()["values"]} + + def get_refs(self, repo: str) -> Dict[str, str]: + if repo not in self.refs_cache: + self.refs_cache[repo] = self._get_refs(repo) + return self.refs_cache[repo] + + def invalidate_cache(self): + self.refs_cache = {} + + def get_rev_sha(self, repo: str, rev: str): + if is_long_sha(rev): + return rev + return self.get_refs(repo).get(rev, rev) def check_rev(self, repo: str, rev: str) -> bool: r = requests.head( @@ -119,6 +141,7 @@ def __init__( def invalidate_cache(self, path=None): super().invalidate_cache(path) self.dircache.clear() + self.bb.invalidate_cache() def ls(self, path, detail=False, sha=None, **kwargs): path = self._strip_protocol(path) @@ -195,7 +218,7 @@ def _open( } -def ls_bb_refs(repo): +def ls_bb_refs(repo) -> Dict[str, str]: conf = BitbucketConfig.local() password = conf.PASSWORD username = conf.USERNAME @@ -222,11 +245,13 @@ def _mathch_path_with_ref(repo, path): class BitBucketResolver(CloudGitResolver): + """Resolve bitbucket URIs""" + type: ClassVar = "bitbucket" FS = BitBucketFileSystem PROTOCOL = "bitbucket" - # TODO: support on-prem gitlab (other hosts) + # TODO: https://github.com/iterative/mlem/issues/388 PREFIXES = [BITBUCKET_ORG, PROTOCOL + "://"] versioning_support = True diff --git a/mlem/contrib/callable.py b/mlem/contrib/callable.py index f5b3fcbe..44513234 100644 --- a/mlem/contrib/callable.py +++ b/mlem/contrib/callable.py @@ -1,3 +1,8 @@ +"""MLEM Models from arbitraty callables +Extension type: model + +ModelType implementation to turn any python callable into MLEM Model +""" import posixpath from collections import defaultdict from importlib import import_module @@ -191,6 +196,8 @@ def persistent_load(self, pid: str) -> Any: class CallableModelType(ModelType, ModelHook): + """ModelType implementation for arbitrary callables""" + type: ClassVar = "callable" priority: ClassVar = LOW_PRIORITY_VALUE diff --git a/mlem/contrib/catboost.py b/mlem/contrib/catboost.py index 655a2bee..ff069b2c 100644 --- a/mlem/contrib/catboost.py +++ b/mlem/contrib/catboost.py @@ -1,3 +1,8 @@ +"""Catboost Models Support +Extension type: model + +Implementations of ModelType and ModelIO for `CatBoostClassifier` and `CatBoostRegressor` +""" import os import posixpath import tempfile @@ -25,8 +30,11 @@ class CatBoostModelIO(ModelIO): type: ClassVar[str] = "catboost_io" classifier_file_name: ClassVar = "clf.cb" + """Filename for catboost classifier""" regressor_file_name: ClassVar = "rgr.cb" + """Filename for catboost classifier""" model_type: CBType = CBType.regressor + """Type of catboost model""" def dump(self, storage: Storage, path, model) -> Artifacts: with tempfile.TemporaryDirectory() as tmpdir: @@ -74,10 +82,11 @@ class CatBoostModel(ModelType, ModelHook, IsInstanceHookMixin): """ type: ClassVar[str] = "catboost" - io: ModelIO = CatBoostModelIO() model: ClassVar[Optional[CatBoost]] valid_types: ClassVar = (CatBoostClassifier, CatBoostRegressor) + io: ModelIO = CatBoostModelIO() + @classmethod def process( cls, obj: Any, sample_data: Optional[Any] = None, **kwargs diff --git a/mlem/contrib/docker/__init__.py b/mlem/contrib/docker/__init__.py index d4daad67..b8e65dad 100644 --- a/mlem/contrib/docker/__init__.py +++ b/mlem/contrib/docker/__init__.py @@ -1,4 +1,6 @@ -""" +"""Docker builds support +Extension type: deployment + Building docker images from the model or packing all necessary things to do that in a folder """ diff --git a/mlem/contrib/docker/base.py b/mlem/contrib/docker/base.py index 134978ba..1e0f15e0 100644 --- a/mlem/contrib/docker/base.py +++ b/mlem/contrib/docker/base.py @@ -3,6 +3,7 @@ import logging import os import tempfile +import time from time import sleep from typing import ClassVar, Dict, Generator, Iterator, Optional @@ -12,6 +13,7 @@ from docker.errors import NotFound from pydantic import BaseModel +from mlem.config import LOCAL_CONFIG, project_config from mlem.contrib.docker.context import DockerBuildArgs, DockerModelDirectory from mlem.contrib.docker.utils import ( build_image_with_logs, @@ -40,8 +42,12 @@ CONTAINER_STATUS_MAPPING = { + "created": DeployStatus.NOT_DEPLOYED, "running": DeployStatus.RUNNING, + "restarting": DeployStatus.STARTING, + "paused": DeployStatus.STOPPED, "exited": DeployStatus.CRASHED, + "dead": DeployStatus.CRASHED, } @@ -127,14 +133,12 @@ def delete_image( class RemoteRegistry(DockerRegistry): - """DockerRegistry implementation for official Docker Registry (as in https://docs.docker.com/registry/) - - :param host: adress of the registry""" + """DockerRegistry implementation for official Docker Registry (as in https://docs.docker.com/registry/)""" type: ClassVar = "remote" - host: Optional[ - str - ] = None # TODO: https://github.com/iterative/mlem/issues/38 credentials + # TODO: https://github.com/iterative/mlem/issues/38 credentials + host: Optional[str] = None + """Address of the registry""" def login(self, client): """ @@ -185,10 +189,10 @@ def push(self, client, tag): if "error" in status: error_msg = status["error"] raise DeploymentError(f"Cannot push docker image: {error_msg}") - echo(EMOJI_OK + f"Pushed image {tag} to {self.host}") + echo(EMOJI_OK + f"Pushed image {tag} to {self.get_host()}") def uri(self, image: str): - return f"{self.host}/{image}" + return f"{self.get_host()}/{image}" def _get_digest(self, name, tag): r = requests.head( @@ -227,11 +231,12 @@ def delete_image( class DockerDaemon(MlemABC): - """Class that represents docker daemon - - :param host: adress of the docker daemon (empty string for local)""" + """Class that represents docker daemon""" - host: str # TODO: https://github.com/iterative/mlem/issues/38 credentials + host: str = ( + "" # TODO: https://github.com/iterative/mlem/issues/38 credentials + ) + """adress of the docker daemon (empty string for local)""" @contextlib.contextmanager def client(self) -> Iterator[docker.DockerClient]: @@ -242,19 +247,18 @@ def client(self) -> Iterator[docker.DockerClient]: class DockerImage(BaseModel): """:class:`.Image.Params` implementation for docker images - full uri for image looks like registry.host/repository/name:tag - - :param name: name of the image - :param tag: tag of the image - :param repository: repository of the image - :param registry: :class:`.DockerRegistry` instance with this image - :param image_id: docker internal id of this image""" + full uri for image looks like registry.host/repository/name:tag""" name: str + """name of the image""" tag: str = "latest" + """tag of the image""" repository: Optional[str] = None + """repository of the image""" registry: DockerRegistry = DockerRegistry() + """DockerRegistry instance with this image""" image_id: Optional[str] = None + """internal docker id of this image""" @property def fullname(self): @@ -277,87 +281,170 @@ def delete(self, client: docker.DockerClient, force=False, **kwargs): self.registry.delete_image(client, self, force, **kwargs) +class DockerEnv(MlemEnv): + """MlemEnv implementation for docker environment""" + + type: ClassVar = "docker" + registry: DockerRegistry = DockerRegistry() + """Default registry to push images to""" + daemon: DockerDaemon = DockerDaemon(host="") + """Docker daemon parameters""" + + def delete_image(self, image: DockerImage, force: bool = False, **kwargs): + with self.daemon.client() as client: + return image.delete(client, force=force, **kwargs) + + def image_exists(self, image: DockerImage): + with self.daemon.client() as client: + return image.exists(client) + + class DockerContainerState(DeployState): + """State of docker container deployment""" + type: ClassVar = "docker_container" image: Optional[DockerImage] + """Built image""" + container_name: Optional[str] + """Name of container""" container_id: Optional[str] - - def get_client(self): - raise NotImplementedError + """Started container id""" class _DockerBuildMixin(BaseModel): - server: Server + server: Optional[Server] = None + """Server to use""" args: DockerBuildArgs = DockerBuildArgs() + """Additional docker arguments""" -class DockerContainer(MlemDeployment, _DockerBuildMixin): - """:class:`.MlemDeployment` implementation for docker containers +def generate_docker_container_name(): + return f"mlem-deploy-{int(time.time())}" - :param name: name of the container - :param port_mapping: port mapping in this container - :param params: other parameters for docker run cmd - :param container_id: internal docker id for this container""" + +class DockerContainer( + MlemDeployment[DockerContainerState, DockerEnv], _DockerBuildMixin +): + """MlemDeployment implementation for docker containers""" type: ClassVar = "docker_container" + state_type: ClassVar = DockerContainerState + env_type: ClassVar = DockerEnv - container_name: str + container_name: Optional[str] = None + """Name to use for container""" image_name: Optional[str] = None + """Name to use for image""" port_mapping: Dict[int, int] = {} + """Expose ports""" params: Dict[str, str] = {} + """Additional params""" rm: bool = True - state: Optional[DockerContainerState] = None + """Remove container on stop""" @property def ensure_image_name(self): return self.image_name or self.container_name + def _get_client(self, state: DockerContainerState): + raise NotImplementedError -class DockerEnv(MlemEnv[DockerContainer]): - """:class:`.MlemEnv` implementation for docker environment - - :param registry: default registry to push images to - :param daemon: :class:`.DockerDaemon` instance""" - - type: ClassVar = "docker" - deploy_type: ClassVar = DockerContainer - registry: DockerRegistry = DockerRegistry() - daemon: DockerDaemon = DockerDaemon(host="") + def deploy(self, model: MlemModel): + # self.check_type(meta) + redeploy = False + with self.lock_state(): + state = self.get_state() + if state.image is None or self.model_changed(model): + from .helpers import build_model_image + + image_name = ( + self.image_name + or self.container_name + or generate_docker_container_name() + ) + echo(EMOJI_BUILD + f"Creating docker image {image_name}") + with set_offset(2): + state.image = build_model_image( + model, + image_name, + self.server + or project_config( + self.loc.project if self.is_saved else None + ).server, + self.get_env(), + force_overwrite=True, + **self.args.dict(), + ) + state.update_model(model) + self.update_state(state) + redeploy = True + if state.container_id is None or redeploy: + self.run_container(state) + + echo(EMOJI_OK + f"Container {state.container_name} is up") + + def remove(self): + with self.lock_state(): + state = self.get_state() + if state.container_id is None: + raise DeploymentError( + f"Container {self.container_name} is not deployed" + ) - def delete_image(self, image: DockerImage, force: bool = False, **kwargs): - with self.daemon.client() as client: - return image.delete(client, force=force, **kwargs) + with self.get_env().daemon.client() as client: + try: + container = client.containers.get(state.container_id) + container.stop() + container.remove() + except docker.errors.NotFound: + pass + state.container_id = None + self.update_state(state) + + def get_status(self, raise_on_error=True) -> DeployStatus: + state = self.get_state() + if state.container_id is None: + return DeployStatus.NOT_DEPLOYED - def image_exists(self, image: DockerImage): - with self.daemon.client() as client: - return image.exists(client) + with self.get_env().daemon.client() as client: + try: + status = container_status(client, state.container_id) + return CONTAINER_STATUS_MAPPING[status] + except NotFound: + return DeployStatus.UNKNOWN - def run_container(self, meta: DockerContainer): - if meta.state is None or meta.state.image is None: + def run_container( + self, + state: Optional[DockerContainerState] = None, + ): + state = state or self.get_state() + if state.image is None: raise DeploymentError( - f"Image {meta.ensure_image_name} is not built" + f"Image {self.ensure_image_name} is not built" ) - with self.daemon.client() as client: - meta.state.image.registry.login(client) + with self.get_env().daemon.client() as client: + state.image.registry.login(client) try: # always detach from container and just stream logs if detach=False + name = self.container_name or generate_docker_container_name() container = client.containers.run( - meta.state.image.uri, - name=meta.container_name, - auto_remove=meta.rm, - ports=meta.port_mapping, + state.image.uri, + name=name, + auto_remove=self.rm, + ports=self.port_mapping, detach=True, - **meta.params, + **self.params, ) - meta.state.container_id = container.id - meta.update() + state.container_id = container.id + state.container_name = name + self.update_state(state) sleep(0.5) - if not container_is_running(client, meta.container_name): - if not meta.rm: - for log in self.logs(meta, stdout=False, stderr=True): + if not container_is_running(client, name): + if not self.rm: + for log in self.logs(stdout=False, stderr=True): raise DeploymentError( "The container died unexpectedly.", log ) @@ -372,89 +459,28 @@ def run_container(self, meta: DockerContainer): "Docker container raised an error: " + e.stderr.decode() ) from e - def logs( - self, meta: DockerContainer, **kwargs - ) -> Generator[str, None, None]: - if meta.state is None or meta.state.container_id is None: + def logs(self, **kwargs) -> Generator[str, None, None]: + state = self.get_state() + if state.container_id is None: raise DeploymentError( - f"Container {meta.container_name} is not deployed" + f"Container {self.container_name} is not deployed" ) - with self.daemon.client() as client: - container = client.containers.get(meta.state.container_id) + with self.get_env().daemon.client() as client: + container = client.containers.get(state.container_id) yield from container_logs(container, **kwargs) - def deploy(self, meta: DockerContainer): - self.check_type(meta) - - if meta.state is None: - meta.state = DockerContainerState() - - meta.update() - - redeploy = False - if meta.state.image is None or meta.model_changed(): - from .helpers import build_model_image - - image_name = meta.image_name or meta.container_name - echo(EMOJI_BUILD + f"Creating docker image {image_name}") - with set_offset(2): - meta.state.image = build_model_image( - meta.get_model(), - image_name, - meta.server, - self, - force_overwrite=True, - **meta.args.dict(), - ) - meta.update_model_hash() - meta.update() - redeploy = True - if meta.state.container_id is None or redeploy: - self.run_container(meta) - - echo(EMOJI_OK + f"Container {meta.container_name} is up") - - def remove(self, meta: DockerContainer): - self.check_type(meta) - if meta.state is None or meta.state.container_id is None: - raise DeploymentError( - f"Container {meta.container_name} is not deployed" - ) - - with self.daemon.client() as client: - try: - container = client.containers.get(meta.state.container_id) - container.stop() - container.remove() - except docker.errors.NotFound: - pass - meta.state.container_id = None - meta.update() - - def get_status( - self, meta: DockerContainer, raise_on_error=True - ) -> DeployStatus: - self.check_type(meta) - - if meta.state is None or meta.state.container_id is None: - return DeployStatus.NOT_DEPLOYED - - with self.daemon.client() as client: - try: - status = container_status(client, meta.state.container_id) - return CONTAINER_STATUS_MAPPING[status] - except NotFound: - return DeployStatus.UNKNOWN - class DockerDirBuilder(MlemBuilder, _DockerBuildMixin): + """Create a directory with docker context to build docker image""" + type: ClassVar[str] = "docker_dir" target: str + """Path to save result""" def build(self, obj: MlemModel): docker_dir = DockerModelDirectory( model=obj, - server=self.server, + server=self.server or LOCAL_CONFIG.server, path=self.target, docker_args=self.args, debug=True, @@ -464,11 +490,17 @@ def build(self, obj: MlemModel): class DockerImageBuilder(MlemBuilder, _DockerBuildMixin): + """Build docker image from model""" + type: ClassVar[str] = "docker" image: DockerImage + """Image parameters""" env: DockerEnv = DockerEnv() + """Where to build and push image. Defaults to local docker daemon""" force_overwrite: bool = False + """Ignore existing image with same name""" push: bool = True + """Push image to registry after it is built""" def build(self, obj: MlemModel) -> DockerImage: with tempfile.TemporaryDirectory(prefix="mlem_build_") as tempdir: diff --git a/mlem/contrib/docker/context.py b/mlem/contrib/docker/context.py index 5546aa51..94767d6f 100644 --- a/mlem/contrib/docker/context.py +++ b/mlem/contrib/docker/context.py @@ -13,6 +13,7 @@ from fsspec import AbstractFileSystem from fsspec.implementations.local import LocalFileSystem from pydantic import BaseModel +from yaml import safe_dump import mlem from mlem.config import MlemConfigBase, project_config @@ -26,6 +27,7 @@ REQUIREMENTS = "requirements.txt" MLEM_REQUIREMENTS = "mlem_requirements.txt" +SERVER = "server.yaml" TEMPLATE_FILE = "dockerfile.j2" MLEM_LOCAL_WHL = f"mlem-{mlem.__version__}-py3-none-any.whl" @@ -93,7 +95,7 @@ def _new_whl(path): with tempfile.TemporaryDirectory() as whl_dir: subprocess.check_output( f"pip wheel . --no-deps -w {whl_dir}", - shell=True, + shell=True, # nosec: B602 cwd=mlem_src_path, ) whl_path = glob.glob(os.path.join(whl_dir, "*.whl"))[0] @@ -195,31 +197,34 @@ def get_mlem_requirements(): class DockerBuildArgs(BaseModel): - """ - Container for DockerBuild arguments + """Container for DockerBuild arguments""" - :param base_image: base image for the built image in form of a string or function from python version, - default: python:{python_version} - :param python_version: Python version to use, default: version of running interpreter - :param templates_dir: directory or list of directories for Dockerfile templates, default: ./docker_templates - - `pre_install.j2` - Dockerfile commands to run before pip - - `post_install.j2` - Dockerfile commands to run after pip - - `post_copy.j2` - Dockerfile commands to run after pip and MLEM distribution copy - :param run_cmd: command to run in container, default: sh run.sh - :param package_install_cmd: command to install packages. Default is apt-get, change it for other package manager - :param prebuild_hook: callable to call before build, accepts python version. Used for pre-building server images - :param mlem_whl: a path to mlem .whl file. If it is empty, mlem will be installed from pip TODO - :param platform: platform to build docker for, see https://docs.docker.com/desktop/multi-arch/ - """ + class Config: + fields = {"prebuild_hook": {"exclude": True}} base_image: Optional[Union[str, Callable[[str], str]]] = None + """base image for the built image in form of a string or function from python version, + default: python:{python_version}""" python_version: str = get_python_version() + """Python version to use + default: version of running interpreter""" templates_dir: List[str] = [] - run_cmd: Union[bool, str] = "sh run.sh" + """directory or list of directories for Dockerfile templates + - `pre_install.j2` - Dockerfile commands to run before pip + - `post_install.j2` - Dockerfile commands to run after pip + - `post_copy.j2` - Dockerfile commands to run after pip and MLEM distribution copy""" + run_cmd: Optional[str] = "sh run.sh" + """command to run in container""" package_install_cmd: str = "apt-get update && apt-get -y upgrade && apt-get install --no-install-recommends -y" + """command to install packages. Default is apt-get, change it for other package manager""" package_clean_cmd: str = "&& apt-get clean && rm -rf /var/lib/apt/lists/*" + """command to clean after package installation""" prebuild_hook: Optional[Callable[[str], Any]] = None + """callable to call before build, accepts python version. Used for pre-building server images""" + mlem_whl: Optional[str] = None + """a path to mlem .whl file. If it is empty, mlem will be installed from pip""" platform: Optional[str] = None + """platform to build docker for, see docs.docker.com/desktop/multi-arch/""" def get_base_image(self): if self.base_image is None: @@ -317,11 +322,11 @@ def write_model(self): with no_echo(): path = os.path.join(self.path, self.model_name) if self.model.is_saved: - self.model.clone(path, external=True) + self.model.clone(path) else: copy = self.model.copy() copy.model_type.bind(self.model.model_type.model) - copy.dump(path, external=True) + copy.dump(path) def write_dockerfile(self, requirements: Requirements): echo(EMOJI_BUILD + "Generating dockerfile...") @@ -338,7 +343,10 @@ def write_dockerfile(self, requirements: Requirements): df.write(dockerfile) def write_configs(self): - pass + with self.fs.open( + posixpath.join(self.path, SERVER), "w", encoding="utf8" + ) as f: + safe_dump(self.server.dict(), f) def write_local_sources(self, requirements: Requirements): echo(EMOJI_PACK + "Adding sources...") @@ -363,7 +371,7 @@ def write_local_sources(self, requirements: Requirements): def write_run_file(self): with self.fs.open(posixpath.join(self.path, "run.sh"), "w") as sh: - sh.write(f"mlem serve {self.model_name} {self.server.type}") + sh.write(f"mlem serve -l {SERVER} -m {self.model_name}") def write_mlem_source(self): source = get_mlem_source() diff --git a/mlem/contrib/docker/copy.j2 b/mlem/contrib/docker/copy.j2 new file mode 100644 index 00000000..916bbf2c --- /dev/null +++ b/mlem/contrib/docker/copy.j2 @@ -0,0 +1 @@ +COPY . ./ diff --git a/mlem/contrib/docker/dockerfile.j2 b/mlem/contrib/docker/dockerfile.j2 index 3abe1d12..a9c62383 100644 --- a/mlem/contrib/docker/dockerfile.j2 +++ b/mlem/contrib/docker/dockerfile.j2 @@ -1,13 +1,10 @@ FROM {{ base_image }} WORKDIR /app {% include "pre_install.j2" ignore missing %} -{% if packages %}RUN {{ package_install_cmd }} {{ packages|join(" ") }} {{ package_clean_cmd }}{% endif %} -COPY requirements.txt . -RUN pip install -r requirements.txt -{{ mlem_install }} +{% include "install_req.j2" %} {% include "post_install.j2" ignore missing %} -COPY . ./ +{% include "copy.j2" %} {% for name, value in env.items() %}ENV {{ name }}={{ value }} {% endfor %} {% include "post_copy.j2" ignore missing %} -{% if run_cmd is not false %}CMD {{ run_cmd }}{% endif %} +{% if run_cmd is not none %}CMD {{ run_cmd }}{% endif %} diff --git a/mlem/contrib/docker/install_req.j2 b/mlem/contrib/docker/install_req.j2 new file mode 100644 index 00000000..64f22d04 --- /dev/null +++ b/mlem/contrib/docker/install_req.j2 @@ -0,0 +1,4 @@ +{% if packages %}RUN {{ package_install_cmd }} {{ packages|join(" ") }} {{ package_clean_cmd }}{% endif %} +COPY requirements.txt . +RUN pip install -r requirements.txt +{{ mlem_install }} diff --git a/mlem/contrib/dvc.py b/mlem/contrib/dvc.py index eab8ea92..e19192be 100644 --- a/mlem/contrib/dvc.py +++ b/mlem/contrib/dvc.py @@ -1,3 +1,8 @@ +"""DVC Support +Extension type: storage + +Support for storing artifacts with DVC +""" import contextlib import os.path import posixpath @@ -34,13 +39,14 @@ def find_dvc_repo_root(path: str): class DVCStorage(LocalStorage): - """For now this storage is user-managed dvc storage, which means user should - track corresponding files with dvc manually. - TODO: add support for pipeline-tracked files and for single files with .dvc - Also add possibility to automatically add and push every artifact""" + """User-managed dvc storage, which means user should + track corresponding files with dvc manually.""" + + # TODO: https://github.com//issues/47 type: ClassVar = "dvc" uri: str = "" + """Base storage path""" def upload(self, local_path: str, target_path: str) -> "DVCArtifact": return DVCArtifact( @@ -64,8 +70,11 @@ def relative(self, fs: AbstractFileSystem, path: str) -> Storage: class DVCArtifact(LocalArtifact): + """Local artifact that can be also read from DVC cache""" + type: ClassVar = "dvc" uri: str + """Local path to file""" def _download(self, target_path: str) -> LocalArtifact: if os.path.isdir(target_path): diff --git a/mlem/contrib/fastapi.py b/mlem/contrib/fastapi.py index 4d7dd587..807e671f 100644 --- a/mlem/contrib/fastapi.py +++ b/mlem/contrib/fastapi.py @@ -1,3 +1,8 @@ +"""FastAPI serving +Extension type: serving + +FastAPIServer implementation +""" import logging from collections.abc import Callable from types import ModuleType @@ -34,11 +39,15 @@ def _create_schema_route(app: FastAPI, interface: Interface): class FastAPIServer(Server, LibRequirementsMixin): + """Serves model with http""" + libraries: ClassVar[List[ModuleType]] = [uvicorn, fastapi] type: ClassVar[str] = "fastapi" host: str = "0.0.0.0" + """Network interface to use""" port: int = 8080 + """Port to use""" @classmethod def _create_handler( diff --git a/mlem/contrib/github.py b/mlem/contrib/github.py index 85b31f5c..31c7592c 100644 --- a/mlem/contrib/github.py +++ b/mlem/contrib/github.py @@ -1,6 +1,10 @@ +"""Github URI support +Extension type: uri + +Implementation of `GithubResolver` +""" import pathlib import posixpath -import re from typing import ClassVar, Dict, Optional from urllib.parse import quote_plus, urlparse @@ -9,6 +13,7 @@ from mlem.config import LOCAL_CONFIG from mlem.core.meta_io import CloudGitResolver +from mlem.utils.git import is_long_sha def ls_branches(repo_url: str) -> Dict[str, str]: @@ -54,21 +59,17 @@ def _ls_github_refs(org: str, repo: str, endpoint: str): return None -def is_long_sha(sha: str): - return re.match(r"^[a-f\d]{40}$", sha) - - class GithubResolver(CloudGitResolver): """Resolve https://github.com URLs""" type: ClassVar = "github" FS: ClassVar = GithubFileSystem - PROTOCOL = "github" - GITHUB_COM = "https://github.com" + PROTOCOL: ClassVar = "github" + GITHUB_COM: ClassVar = "https://github.com" - # TODO: support on-prem github (other hosts) - PREFIXES = [GITHUB_COM, PROTOCOL + "://"] - versioning_support = True + # TODO: https://github.com//issues/388 + PREFIXES: ClassVar = [GITHUB_COM, PROTOCOL + "://"] + versioning_support: ClassVar = True @classmethod def get_envs(cls): diff --git a/mlem/contrib/gitlabfs.py b/mlem/contrib/gitlabfs.py index 14899688..a80e9255 100644 --- a/mlem/contrib/gitlabfs.py +++ b/mlem/contrib/gitlabfs.py @@ -1,3 +1,8 @@ +"""Gitlab URI support +Extension type: uri + +Implementation of `GitlabFileSystem` and `GitlabResolver` +""" import posixpath from typing import ClassVar, Optional from urllib.parse import quote_plus, urlparse, urlsplit @@ -157,14 +162,16 @@ def _mathch_path_with_ref(project_id, path): class GitlabResolver(CloudGitResolver): + """Resolve https://gitlab.com URIs""" + type: ClassVar = "gitlab" - FS = GitlabFileSystem - PROTOCOL = "gitlab" - GITLAB_COM = "https://gitlab.com" + FS: ClassVar = GitlabFileSystem + PROTOCOL: ClassVar = "gitlab" + GITLAB_COM: ClassVar = "https://gitlab.com" - # TODO: support on-prem gitlab (other hosts) - PREFIXES = [GITLAB_COM, PROTOCOL + "://"] - versioning_support = True + # TODO: https://github.com//issues/388 + PREFIXES: ClassVar = [GITLAB_COM, PROTOCOL + "://"] + versioning_support: ClassVar = True @classmethod def get_kwargs(cls, uri): diff --git a/mlem/contrib/heroku/__init__.py b/mlem/contrib/heroku/__init__.py index e69de29b..d8f00fee 100644 --- a/mlem/contrib/heroku/__init__.py +++ b/mlem/contrib/heroku/__init__.py @@ -0,0 +1,5 @@ +"""Heroku Deployments support +Extension type: deployment + +Implements MlemEnv, MlemDeployment and DeployState to work with heroku.com +""" diff --git a/mlem/contrib/heroku/build.py b/mlem/contrib/heroku/build.py index 32c1f494..2736cff2 100644 --- a/mlem/contrib/heroku/build.py +++ b/mlem/contrib/heroku/build.py @@ -14,9 +14,13 @@ class HerokuRemoteRegistry(RemoteRegistry): + """Heroku docker registry""" + type: ClassVar = "heroku" api_key: Optional[str] = None - host = DEFAULT_HEROKU_REGISTRY + """HEROKU_API_KEY""" + host: str = DEFAULT_HEROKU_REGISTRY + """Registry host""" def uri(self, image: str): return super().uri(image).split(":")[0] diff --git a/mlem/contrib/heroku/meta.py b/mlem/contrib/heroku/meta.py index a2aee45f..3b2052d2 100644 --- a/mlem/contrib/heroku/meta.py +++ b/mlem/contrib/heroku/meta.py @@ -8,6 +8,7 @@ DeployStatus, MlemDeployment, MlemEnv, + MlemModel, ) from mlem.runtime.client import Client, HTTPClient @@ -28,15 +29,31 @@ class HerokuAppMeta(BaseModel): name: str + """App name""" web_url: str + """App web url""" meta_info: dict + """Additional metadata""" + + +class HerokuEnv(MlemEnv): + """Heroku Account""" + + type: ClassVar = "heroku" + api_key: Optional[str] = None + """HEROKU_API_KEY - advised to set via env variable or `heroku login`""" class HerokuState(DeployState): + """State of heroku deployment""" + type: ClassVar = "heroku" app: Optional[HerokuAppMeta] + """Created heroku app""" image: Optional[DockerImage] + """Built docker image""" release_state: Optional[Union[dict, list]] + """State of the release""" @property def ensured_app(self) -> HerokuAppMeta: @@ -44,85 +61,82 @@ def ensured_app(self) -> HerokuAppMeta: raise ValueError("App is not created yet") return self.app - def get_client(self) -> Client: - return HTTPClient( - host=urlparse(self.ensured_app.web_url).netloc, port=80 - ) +class HerokuDeployment(MlemDeployment[HerokuState, HerokuEnv]): + """Heroku App""" -class HerokuDeployment(MlemDeployment): type: ClassVar = "heroku" - state: Optional[HerokuState] + state_type: ClassVar = HerokuState + env_type: ClassVar = HerokuEnv + app_name: str + """Heroku application name""" region: str = "us" + """Heroku region""" stack: str = "container" + """Stack to use""" team: Optional[str] = None + """Heroku team""" + def _get_client(self, state: HerokuState) -> Client: + return HTTPClient( + host=urlparse(state.ensured_app.web_url).netloc, port=80 + ) -class HerokuEnv(MlemEnv[HerokuDeployment]): - type: ClassVar = "heroku" - deploy_type: ClassVar = HerokuDeployment - api_key: Optional[str] = None - - def deploy(self, meta: HerokuDeployment): + def deploy(self, model: MlemModel): from .utils import create_app, release_docker_app - if meta.state is None: - meta.state = HerokuState() + with self.lock_state(): + state: HerokuState = self.get_state() + if state.app is None: + state.app = create_app(self, api_key=self.get_env().api_key) + self.update_state(state) - meta.update() - self.check_type(meta) - - if meta.state.app is None: - meta.state.app = create_app(meta, api_key=self.api_key) - meta.update() + redeploy = False + if state.image is None or self.model_changed(model): + state.image = build_heroku_docker( + model, state.app.name, api_key=self.get_env().api_key + ) + state.update_model(model) + self.update_state(state) + redeploy = True + if state.release_state is None or redeploy: + state.release_state = release_docker_app( + state.app.name, + state.image.image_id, + api_key=self.get_env().api_key, + ) + self.update_state(state) - redeploy = False - if meta.state.image is None or meta.model_changed(): - meta.state.image = build_heroku_docker( - meta.get_model(), meta.state.app.name, api_key=self.api_key - ) - meta.update_model_hash() - meta.update() - redeploy = True - if meta.state.release_state is None or redeploy: - meta.state.release_state = release_docker_app( - meta.state.app.name, - meta.state.image.image_id, - api_key=self.api_key, + echo( + EMOJI_OK + + f"Service {self.app_name} is up. You can check it out at {state.app.web_url}" ) - meta.update() - echo( - EMOJI_OK - + f"Service {meta.app_name} is up. You can check it out at {meta.state.app.web_url}" - ) - - def remove(self, meta: HerokuDeployment): + def remove(self): from .utils import delete_app - self.check_type(meta) - if meta.state is None: - return + with self.lock_state(): + state: HerokuState = self.get_state() - delete_app(meta.state.ensured_app.name, self.api_key) - meta.state = None - meta.update() + if state.app is not None: + delete_app(state.ensured_app.name, self.get_env().api_key) + self.purge_state() - def get_status( - self, meta: "HerokuDeployment", raise_on_error=True - ) -> DeployStatus: + def get_status(self, raise_on_error=True) -> DeployStatus: from .utils import list_dynos - self.check_type(meta) - if meta.state is None or meta.state.app is None: + state: HerokuState = self.get_state() + if state.app is None: return DeployStatus.NOT_DEPLOYED - dynos = list_dynos(meta.state.ensured_app.name, "web", self.api_key) + dynos = list_dynos( + state.ensured_app.name, "web", self.get_env().api_key + ) if not dynos: if raise_on_error: raise DeploymentError( f"No heroku web dynos found, check your dashboard " - f"at https://dashboard.heroku.com/apps/{meta.state.ensured_app.name}" + f"at https://dashboard.heroku.com/apps/{state.ensured_app.name}" ) return DeployStatus.NOT_DEPLOYED return HEROKU_STATE_MAPPING[dynos[0]["state"]] diff --git a/mlem/contrib/heroku/server.py b/mlem/contrib/heroku/server.py index c91cda25..f10e7164 100644 --- a/mlem/contrib/heroku/server.py +++ b/mlem/contrib/heroku/server.py @@ -9,7 +9,9 @@ class HerokuServer(FastAPIServer): - type: ClassVar = "heroku" + """Special FastAPI server to pickup port from env PORT""" + + type: ClassVar = "_heroku" def serve(self, interface: Interface): self.port = int(os.environ["PORT"]) diff --git a/mlem/contrib/kubernetes/__init__.py b/mlem/contrib/kubernetes/__init__.py new file mode 100644 index 00000000..b6b1b1ae --- /dev/null +++ b/mlem/contrib/kubernetes/__init__.py @@ -0,0 +1,3 @@ +"""Kubernetes Deployments support +Extension type: deployment +""" diff --git a/mlem/contrib/kubernetes/base.py b/mlem/contrib/kubernetes/base.py new file mode 100644 index 00000000..88be95cf --- /dev/null +++ b/mlem/contrib/kubernetes/base.py @@ -0,0 +1,202 @@ +import os +from typing import ClassVar, List, Optional + +from kubernetes import client, config + +from mlem.config import project_config +from mlem.core.errors import DeploymentError, EndpointNotFound, MlemError +from mlem.core.objects import ( + DeployState, + DeployStatus, + MlemDeployment, + MlemEnv, + MlemModel, +) +from mlem.runtime.client import Client, HTTPClient +from mlem.runtime.server import Server +from mlem.ui import EMOJI_OK, echo + +from ..docker.base import ( + DockerDaemon, + DockerImage, + DockerRegistry, + generate_docker_container_name, +) +from .build import build_k8s_docker +from .context import K8sYamlBuildArgs, K8sYamlGenerator +from .utils import create_k8s_resources, namespace_deleted, pod_is_running + +POD_STATE_MAPPING = { + "Pending": DeployStatus.STARTING, + "Running": DeployStatus.RUNNING, + "Succeeded": DeployStatus.STOPPED, + "Failed": DeployStatus.CRASHED, + "Unknown": DeployStatus.UNKNOWN, +} + + +class K8sEnv(MlemEnv): + """MlemEnv implementation for Kubernetes Environments""" + + type: ClassVar = "kubernetes" + """Type of deployment being used for the Kubernetes environment""" + + registry: Optional[DockerRegistry] = None + """Docker registry""" + templates_dir: List[str] = [] + """List of dirs where templates reside""" + + +class K8sDeploymentState(DeployState): + """DeployState implementation for Kubernetes deployments""" + + type: ClassVar = "kubernetes" + + image: Optional[DockerImage] = None + """Docker Image being used for Deployment""" + deployment_name: Optional[str] = None + """Name of Deployment""" + + +class K8sDeployment( + MlemDeployment[K8sDeploymentState, K8sEnv], K8sYamlBuildArgs +): + """MlemDeployment implementation for Kubernetes deployments""" + + type: ClassVar = "kubernetes" + state_type: ClassVar = K8sDeploymentState + """Type of state for Kubernetes deployments""" + env_type: ClassVar = K8sEnv + + server: Optional[Server] = None + """Type of Server to use, with options such as FastAPI, RabbitMQ etc.""" + registry: Optional[DockerRegistry] = DockerRegistry() + """Docker registry""" + daemon: Optional[DockerDaemon] = DockerDaemon(host="") + """Docker daemon""" + kube_config_file_path: Optional[str] = None + """Path for kube config file of the cluster""" + templates_dir: List[str] = [] + """List of dirs where templates reside""" + + def load_kube_config(self): + config.load_kube_config( + config_file=self.kube_config_file_path + or os.getenv("KUBECONFIG", default="~/.kube/config") + ) + + def _get_client(self, state: K8sDeploymentState) -> Client: + self.load_kube_config() + service = client.CoreV1Api().list_namespaced_service(self.namespace) + try: + host, port = self.service_type.get_host_and_port( + service, self.namespace + ) + except MlemError as e: + raise EndpointNotFound( + "Couldn't determine host and port from the service deployed" + ) from e + if host is not None and port is not None: + return HTTPClient(host=host, port=port) + raise MlemError( + f"host and port determined are not valid, received host as {host} and port as {port}" + ) + + def get_registry(self): + registry = self.registry or self.get_env().registry + if not registry: + raise MlemError( + "registry to be used by Docker is not set or supplied" + ) + return registry + + def get_image_name(self): + return self.image_name or generate_docker_container_name() + + def get_server(self): + return ( + self.server + or project_config( + self.loc.project if self.is_saved else None + ).server + ) + + def deploy(self, model: MlemModel): + redeploy = False + with self.lock_state(): + self.load_kube_config() + state: K8sDeploymentState = self.get_state() + if state.image is None or self.model_changed(model): + image_name = self.get_image_name() + state.image = build_k8s_docker( + meta=model, + image_name=image_name, + registry=self.get_registry(), + daemon=self.daemon, + server=self.get_server(), + ) + state.update_model(model) + redeploy = True + + if ( + state.deployment_name is None or redeploy + ) and state.image is not None: + generator = K8sYamlGenerator( + namespace=self.namespace, + image_name=state.image.name, + image_uri=state.image.uri, + image_pull_policy=self.image_pull_policy, + port=self.port, + service_type=self.service_type, + templates_dir=self.templates_dir + or self.get_env().templates_dir, + ) + create_k8s_resources(generator) + + if pod_is_running(namespace=self.namespace): + deployments_list = ( + client.AppsV1Api().list_namespaced_deployment( + namespace=self.namespace + ) + ) + + if len(deployments_list.items) == 0: + raise DeploymentError( + f"Deployment {image_name} couldn't be found in {self.namespace} namespace" + ) + dpl_name = deployments_list.items[0].metadata.name + state.deployment_name = dpl_name + self.update_state(state) + + echo( + EMOJI_OK + + f"Deployment {state.deployment_name} is up in {self.namespace} namespace" + ) + else: + raise DeploymentError( + f"Deployment {image_name} couldn't be set-up on the Kubernetes cluster" + ) + + def remove(self): + with self.lock_state(): + self.load_kube_config() + state: K8sDeploymentState = self.get_state() + if state.deployment_name is not None: + client.CoreV1Api().delete_namespace(name=self.namespace) + if namespace_deleted(self.namespace): + echo( + EMOJI_OK + + f"Deployment {state.deployment_name} and the corresponding service are removed from {self.namespace} namespace" + ) + state.deployment_name = None + self.update_state(state) + + def get_status(self, raise_on_error=True) -> DeployStatus: + self.load_kube_config() + state: K8sDeploymentState = self.get_state() + if state.deployment_name is None: + return DeployStatus.NOT_DEPLOYED + + pods_list = client.CoreV1Api().list_namespaced_pod(self.namespace) + + return POD_STATE_MAPPING[pods_list.items[0].status.phase] diff --git a/mlem/contrib/kubernetes/build.py b/mlem/contrib/kubernetes/build.py new file mode 100644 index 00000000..b5a98c26 --- /dev/null +++ b/mlem/contrib/kubernetes/build.py @@ -0,0 +1,30 @@ +from typing import Optional + +from mlem.core.objects import MlemModel +from mlem.runtime.server import Server +from mlem.ui import EMOJI_BUILD, echo, set_offset + +from ..docker.base import DockerDaemon, DockerEnv, DockerRegistry +from ..docker.helpers import build_model_image + + +def build_k8s_docker( + meta: MlemModel, + image_name: str, + registry: Optional[DockerRegistry], + daemon: Optional[DockerDaemon], + server: Server, + platform: Optional[str] = "linux/amd64", + # runners usually do not support arm64 images built on Mac M1 devices +): + echo(EMOJI_BUILD + f"Creating docker image {image_name}") + with set_offset(2): + return build_model_image( + meta, + image_name, + server, + DockerEnv(registry=registry, daemon=daemon), + tag=meta.meta_hash(), + force_overwrite=True, + platform=platform, + ) diff --git a/mlem/contrib/kubernetes/context.py b/mlem/contrib/kubernetes/context.py new file mode 100644 index 00000000..c6649ced --- /dev/null +++ b/mlem/contrib/kubernetes/context.py @@ -0,0 +1,55 @@ +import logging +import os +from enum import Enum +from typing import ClassVar + +from pydantic import BaseModel + +from mlem.contrib.kubernetes.service import NodePortService, ServiceType +from mlem.utils.templates import TemplateModel + +logger = logging.getLogger(__name__) + + +class ImagePullPolicy(str, Enum): + always = "Always" + never = "Never" + if_not_present = "IfNotPresent" + + +class K8sYamlBuildArgs(BaseModel): + """Class encapsulating parameters for Kubernetes manifests/yamls""" + + class Config: + use_enum_values = True + + namespace: str = "mlem" + """Namespace to create kubernetes resources such as pods, service in""" + image_name: str = "ml" + """Name of the docker image to be deployed""" + image_uri: str = "ml:latest" + """URI of the docker image to be deployed""" + image_pull_policy: ImagePullPolicy = ImagePullPolicy.always + """Image pull policy for the docker image to be deployed""" + port: int = 8080 + """Port where the service should be available""" + service_type: ServiceType = NodePortService() + """Type of service by which endpoints of the model are exposed""" + + +class K8sYamlGenerator(K8sYamlBuildArgs, TemplateModel): + TEMPLATE_FILE: ClassVar = "resources.yaml.j2" + TEMPLATE_DIR: ClassVar = os.path.dirname(__file__) + + def prepare_dict(self): + logger.debug( + 'Generating Resource Yaml via templates from "%s"...', + self.templates_dir, + ) + + logger.debug('Docker image is based on "%s".', self.image_uri) + + k8s_yaml_args = self.dict() + k8s_yaml_args["service_type"] = self.service_type.get_string() + k8s_yaml_args.pop("templates_dir") + return k8s_yaml_args diff --git a/mlem/contrib/kubernetes/resources.yaml.j2 b/mlem/contrib/kubernetes/resources.yaml.j2 new file mode 100644 index 00000000..5cbe9b7b --- /dev/null +++ b/mlem/contrib/kubernetes/resources.yaml.j2 @@ -0,0 +1,47 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: {{ namespace }} + labels: + name: {{ namespace }} + +--- + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ image_name }} + namespace: {{ namespace }} +spec: + selector: + matchLabels: + app: {{ image_name }} + template: + metadata: + labels: + app: {{ image_name }} + spec: + containers: + - name: {{ image_name }} + image: {{ image_uri }} + imagePullPolicy: {{ image_pull_policy }} + ports: + - containerPort: {{ port }} + +--- + +apiVersion: v1 +kind: Service +metadata: + name: {{ image_name }} + namespace: {{ namespace }} + labels: + run: {{ image_name }} +spec: + ports: + - port: {{ port }} + protocol: TCP + targetPort: {{ port }} + selector: + app: {{ image_name }} + type: {{ service_type }} diff --git a/mlem/contrib/kubernetes/service.py b/mlem/contrib/kubernetes/service.py new file mode 100644 index 00000000..9f347abd --- /dev/null +++ b/mlem/contrib/kubernetes/service.py @@ -0,0 +1,115 @@ +from abc import abstractmethod +from typing import ClassVar, Optional, Tuple + +from kubernetes import client + +from mlem.core.base import MlemABC +from mlem.core.errors import EndpointNotFound, MlemError + + +def find_index(nodes_list, node_name): + for i, each_node in enumerate(nodes_list): + if each_node.metadata.name == node_name: + return i + return -1 + + +class ServiceType(MlemABC): + """Service Type for services inside a Kubernetes Cluster""" + + abs_name: ClassVar = "k8s_service_type" + + class Config: + type_root = True + + @abstractmethod + def get_string(self): + raise NotImplementedError + + @abstractmethod + def get_host_and_port( + self, service, namespace="mlem" # pylint: disable=unused-argument + ) -> Tuple[Optional[str], Optional[int]]: + """Returns host and port for the service in Kubernetes""" + raise NotImplementedError + + +class NodePortService(ServiceType): + """NodePort Service implementation for service inside a Kubernetes Cluster""" + + type: ClassVar = "nodeport" + + def get_string(self): + return "NodePort" + + def get_host_and_port(self, service, namespace="mlem"): + try: + port = service.items[0].spec.ports[0].node_port + except (IndexError, AttributeError) as e: + raise MlemError( + "Couldn't determine node port of the deployed service" + ) from e + try: + node_name = ( + client.CoreV1Api() + .list_namespaced_pod(namespace) + .items[0] + .spec.node_name + ) + except (IndexError, AttributeError) as e: + raise MlemError( + "Couldn't determine name of the node where the pod is deployed" + ) from e + node_list = client.CoreV1Api().list_node().items + node_index = find_index(node_list, node_name) + if node_index == -1: + raise MlemError( + f"Couldn't find the node where pods in namespace {namespace} exists" + ) + address_dict = node_list[node_index].status.addresses + for each_address in address_dict: + if each_address.type == "ExternalIP": + host = each_address.address + return host, port + raise EndpointNotFound( + f"Node {node_name} doesn't have an externally reachable IP address" + ) + + +class LoadBalancerService(ServiceType): + """LoadBalancer Service implementation for service inside a Kubernetes Cluster""" + + type: ClassVar = "loadbalancer" + + def get_string(self): + return "LoadBalancer" + + def get_host_and_port(self, service, namespace="mlem"): + try: + port = service.items[0].spec.ports[0].port + except (IndexError, AttributeError) as e: + raise MlemError( + "Couldn't determine port of the deployed service" + ) from e + try: + ingress = service.items[0].status.load_balancer.ingress[0] + host = ingress.hostname or ingress.ip + except (IndexError, AttributeError) as e: + raise MlemError( + "Couldn't determine IP address of the deployed service" + ) from e + return host, port + + +class ClusterIPService(ServiceType): + """ClusterIP Service implementation for service inside a Kubernetes Cluster""" + + type: ClassVar = "clusterip" + + def get_string(self): + return "ClusterIP" + + def get_host_and_port(self, service, namespace="mlem"): + raise MlemError( + "Cannot expose service of type ClusterIP outside the Kubernetes Cluster" + ) diff --git a/mlem/contrib/kubernetes/utils.py b/mlem/contrib/kubernetes/utils.py new file mode 100644 index 00000000..ae11fbe8 --- /dev/null +++ b/mlem/contrib/kubernetes/utils.py @@ -0,0 +1,80 @@ +import json +import os +import tempfile + +from kubernetes import client, utils, watch + +from .context import K8sYamlGenerator + + +def create_k8s_resources(generator: K8sYamlGenerator): + k8s_client = client.ApiClient() + with tempfile.TemporaryDirectory(prefix="mlem_k8s_yaml_build_") as tempdir: + filename = os.path.join(tempdir, "resource.yaml") + generator.write(filename) + try: + utils.create_from_yaml(k8s_client, filename, verbose=True) + except utils.FailToCreateError as e: + failures = e.api_exceptions + for each_failure in failures: + error_info = json.loads(each_failure.body) + if error_info["reason"] != "AlreadyExists": + raise e + if error_info["details"]["kind"] == "deployments": + existing_image_uri = ( + client.CoreV1Api() + .list_namespaced_pod(generator.namespace) + .items[0] + .spec.containers[0] + .image + ) + if existing_image_uri != generator.image_uri: + api_instance = client.AppsV1Api() + body = { + "spec": { + "template": { + "spec": { + "containers": [ + { + "name": generator.image_name, + "image": generator.image_uri, + } + ] + } + } + } + } + api_instance.patch_namespaced_deployment( + generator.image_name, + generator.namespace, + body, + pretty=True, + ) + + +def pod_is_running(namespace, timeout=60) -> bool: + w = watch.Watch() + for event in w.stream( + func=client.CoreV1Api().list_namespaced_pod, + namespace=namespace, + timeout_seconds=timeout, + ): + if event["object"].status.phase == "Running": + w.stop() + return True + return False + + +def namespace_deleted(namespace, timeout=60) -> bool: + w = watch.Watch() + for event in w.stream( + func=client.CoreV1Api().list_namespace, + timeout_seconds=timeout, + ): + if ( + namespace == event["object"].metadata.name + and event["type"] == "DELETED" + ): + w.stop() + return True + return False diff --git a/mlem/contrib/lightgbm.py b/mlem/contrib/lightgbm.py index a2bf6f51..1db29b7f 100644 --- a/mlem/contrib/lightgbm.py +++ b/mlem/contrib/lightgbm.py @@ -1,3 +1,9 @@ +"""LightGBM models support +Extension type: model + +ModelType and ModelIO implementations for `lightgbm.Booster` as well as +LightGBMDataType with Reader and Writer for `lightgbm.Dataset` +""" import os import posixpath import tempfile @@ -46,7 +52,9 @@ class LightGBMDataType( type: ClassVar[str] = "lightgbm" valid_types: ClassVar = (lgb.Dataset,) inner: DataType + """DataType of Inner""" labels: Optional[DataType] + """DataType of Labels""" def serialize(self, instance: Any) -> dict: self.check_type(instance, lgb.Dataset, SerializationError) @@ -103,6 +111,8 @@ def get_model(self, prefix: str = "") -> Type[BaseModel]: class LightGBMDataWriter(DataWriter): + """Wrapper writer for lightgbm.Dataset objects""" + type: ClassVar[str] = "lightgbm" def write( @@ -152,10 +162,14 @@ def write( class LightGBMDataReader(DataReader): + """Wrapper reader for lightgbm.Dataset objects""" + type: ClassVar[str] = "lightgbm" data_type: LightGBMDataType inner: DataReader + """DataReader of Inner""" labels: Optional[DataReader] + """DataReader of Labels""" def read(self, artifacts: Artifacts) -> DataType: if self.labels is not None: @@ -189,7 +203,8 @@ class LightGBMModelIO(ModelIO): """ type: ClassVar[str] = "lightgbm_io" - model_file_name = "model.lgb" + model_file_name: str = "model.lgb" + """Filename to use""" def dump(self, storage: Storage, path, model) -> Artifacts: with tempfile.TemporaryDirectory(prefix="mlem_lightgbm_dump") as f: @@ -222,6 +237,7 @@ class LightGBMModel(ModelType, ModelHook, IsInstanceHookMixin): type: ClassVar[str] = "lightgbm" valid_types: ClassVar = (lgb.Booster,) io: ModelIO = LightGBMModelIO() + """LightGBMModelIO""" @classmethod def process( diff --git a/mlem/contrib/numpy.py b/mlem/contrib/numpy.py index 6dbdd697..a3206008 100644 --- a/mlem/contrib/numpy.py +++ b/mlem/contrib/numpy.py @@ -1,3 +1,8 @@ +"""Numpy data types support +Extension type: data + +DataType, Reader and Writer implementations for `np.ndarray` and `np.number` primitives +""" from types import ModuleType from typing import Any, ClassVar, Iterator, List, Optional, Tuple, Type, Union @@ -40,19 +45,12 @@ def np_type_from_string(string_repr) -> np.dtype: class NumpyNumberType( LibRequirementsMixin, DataType, DataSerializer, DataHook ): - """ - :class:`.DataType` implementation for `numpy.number` objects which - converts them to built-in Python numbers and vice versa. - - :param dtype: `numpy.number` data type as string - """ + """numpy.number DataType""" libraries: ClassVar[List[ModuleType]] = [np] type: ClassVar[str] = "number" dtype: str - - # def get_spec(self) -> ArgList: - # return [Field(None, python_type_from_np_string_repr(self.dtype), False)] + """`numpy.number` type name as string""" def deserialize(self, obj: dict) -> Any: return self.actual_type(obj) # pylint: disable=not-callable @@ -83,19 +81,15 @@ def get_model(self, prefix: str = "") -> Type: class NumpyNdarrayType( LibRequirementsMixin, DataType, DataHook, DataSerializer ): - """ - :class:`.DataType` implementation for `np.ndarray` objects - which converts them to built-in Python lists and vice versa. - - :param shape: shape of `numpy.ndarray` objects in data - :param dtype: data type of `numpy.ndarray` objects in data - """ + """DataType implementation for `np.ndarray`""" type: ClassVar[str] = "ndarray" libraries: ClassVar[List[ModuleType]] = [np] shape: Optional[Tuple[Optional[int], ...]] + """Shape of `numpy.ndarray`""" dtype: str + """Data type of elements""" @staticmethod def _abstract_shape(shape): @@ -179,6 +173,8 @@ def get_writer(self, project: str = None, filename: str = None, **kwargs): class NumpyNumberWriter(DataWriter): + """Write np.number objects""" + type: ClassVar[str] = "numpy_number" def write( @@ -190,8 +186,11 @@ def write( class NumpyNumberReader(DataReader): + """Read np.number objects""" + type: ClassVar[str] = "numpy_number" data_type: NumpyNumberType + """Resulting data type""" def read(self, artifacts: Artifacts) -> DataType: if DataWriter.art_name not in artifacts: diff --git a/mlem/contrib/onnx.py b/mlem/contrib/onnx.py index df3a8526..27156070 100644 --- a/mlem/contrib/onnx.py +++ b/mlem/contrib/onnx.py @@ -1,3 +1,8 @@ +"""ONNX models support +Extension type: model + +ModelType and ModelIO implementations for `onnx.ModelProto` +""" from typing import Any, ClassVar, List, Optional, Union import numpy as np diff --git a/mlem/contrib/pandas.py b/mlem/contrib/pandas.py index 8dcbae87..85ce4481 100644 --- a/mlem/contrib/pandas.py +++ b/mlem/contrib/pandas.py @@ -1,3 +1,9 @@ +"""Pandas data types support +Extension type: data + +DataType, Reader and Writer implementations for `pd.DataFrame` and `pd.Series` +ImportHook implementation for files saved with pandas +""" import os.path import posixpath import re @@ -114,16 +120,15 @@ class Config: class _PandasDataType( LibRequirementsMixin, DataType, DataHook, DataSerializer, ABC ): - """Intermidiate class for pandas DataType implementations - - :param columns: list of column names (including index) - :param dtypes: list of string representations of pandas dtypes of columns - :param index_cols: list of column names that are used as index""" + """Intermidiate class for pandas DataType implementations""" libraries: ClassVar = [pd] columns: List[str] + """Column names""" dtypes: List[str] + """Column types""" index_cols: List[str] + """Column names that should be in index""" @classmethod def process(cls, obj: Any, **kwargs) -> "_PandasDataType": @@ -562,6 +567,7 @@ def get_pandas_batch_formats(batch_size: int): class _PandasIO(BaseModel): format: str + """name of pandas-supported format""" @validator("format") def is_valid_format( # pylint: disable=no-self-argument @@ -671,6 +677,8 @@ def write( class PandasImport(ExtImportHook, LoadAndAnalyzeImportHook): + """Import files as pd.DataFrame""" + EXTS: ClassVar = tuple(f".{k}" for k in PANDAS_FORMATS) type: ClassVar = "pandas" force_type: ClassVar = MlemData diff --git a/mlem/contrib/pip/__init__.py b/mlem/contrib/pip/__init__.py index e69de29b..b5d60c10 100644 --- a/mlem/contrib/pip/__init__.py +++ b/mlem/contrib/pip/__init__.py @@ -0,0 +1,6 @@ +"""Python Package builds support +Extension type: build + +Contains two Builder implementations: `pip` to create a directory with +Python Package from model and `whl` to create a wheel file with Python Package +""" diff --git a/mlem/contrib/pip/base.py b/mlem/contrib/pip/base.py index 7fcf2110..c75dd10f 100644 --- a/mlem/contrib/pip/base.py +++ b/mlem/contrib/pip/base.py @@ -4,7 +4,7 @@ import posixpath import subprocess import tempfile -from typing import ClassVar, Dict, List, Optional +from typing import Any, ClassVar, Dict, List, Optional from fsspec import AbstractFileSystem from fsspec.implementations.local import LocalFileSystem @@ -26,19 +26,27 @@ class SetupTemplate(TemplateModel): TEMPLATE_DIR: ClassVar = os.path.dirname(__file__) package_name: str + """Name of python package""" python_version: Optional[str] = None + """Required python version""" short_description: str = "" + """short_description""" url: str = "" + """url""" email: str = "" + """author's email""" author: str = "" + """author's name""" version: str = "0.0.0" - additional_setup_kwargs: Dict = {} + """package version""" + additional_setup_kwargs: Dict[str, Any] = {} + """additional parameters for setup()""" @validator("python_version") def validate_python_version( # pylint: disable=no-self-argument cls, value # noqa: B902 ): - return f"=={value}" if value[0] in "0123456789" else value + return f"=={value}" if value and value[0] in "0123456789" else value class SourceTemplate(TemplateModel): @@ -46,6 +54,7 @@ class SourceTemplate(TemplateModel): TEMPLATE_DIR: ClassVar = os.path.dirname(__file__) methods: List[str] + """list of methods""" class PipMixin(SetupTemplate): @@ -65,9 +74,7 @@ def make_distr(self, obj: MlemModel, root: str, fs: AbstractFileSystem): posixpath.join(path, "__init__.py"), fs ) with no_echo(): - obj.clone( - posixpath.join(path, "model"), fs, external=True, index=False - ) + obj.clone(posixpath.join(path, "model"), fs) with fs.open(posixpath.join(root, "requirements.txt"), "w") as f: f.write( "\n".join( @@ -86,8 +93,11 @@ def make_distr(self, obj: MlemModel, root: str, fs: AbstractFileSystem): class PipBuilder(MlemBuilder, PipMixin): + """Create a directory python package""" + type: ClassVar = "pip" target: str + """Path to save result""" def build(self, obj: MlemModel): fs, root = get_fs(self.target) @@ -95,15 +105,20 @@ def build(self, obj: MlemModel): class WhlBuilder(MlemBuilder, PipMixin): + """Create a wheel with python package""" + type: ClassVar = "whl" target: str + """Path to save result""" def build_whl(self, path, target, target_fs): target_fs.makedirs(target, exist_ok=True) logger.debug("Building whl from %s...", path) with tempfile.TemporaryDirectory() as whl_dir: subprocess.check_output( - f"pip wheel . --no-deps -w {whl_dir}", shell=True, cwd=path + f"pip wheel . --no-deps -w {whl_dir}", + shell=True, # nosec: B602 + cwd=path, ) whl_path = glob.glob(os.path.join(whl_dir, "*.whl"))[0] whl_name = os.path.basename(whl_path) diff --git a/mlem/contrib/rabbitmq.py b/mlem/contrib/rabbitmq.py index 14bea59f..2a90cda3 100644 --- a/mlem/contrib/rabbitmq.py +++ b/mlem/contrib/rabbitmq.py @@ -1,3 +1,8 @@ +"""RabbitMQ serving +Extension type: serving + +RabbitMQServer implementation +""" import json from time import time from typing import Callable, ClassVar, Optional @@ -24,9 +29,13 @@ class RabbitMQMixin(BaseModel): host: str + """Host of RMQ instance""" port: int + """Port of RMQ instance""" exchange: str = "" + """RMQ exchange to use""" queue_prefix: str = "" + """Queue prefix""" channel_cache: Optional[BlockingChannel] = None class Config: @@ -44,6 +53,8 @@ def channel(self): class RabbitMQServer(Server, RabbitMQMixin): + """RMQ server that consumes requests and produces model predictions from/to RMQ instance""" + type: ClassVar = "rmq" def _create_handler( @@ -96,8 +107,11 @@ def serve(self, interface: Interface): class RabbitMQClient(Client, RabbitMQMixin): + """Access models served with rmq server""" + type: ClassVar = "rmq" timeout: float = 0 + """Time to wait for response. 0 means indefinite""" def _interface_factory(self) -> InterfaceDescriptor: res, _, payload = self.channel.basic_get( diff --git a/mlem/contrib/requirements.py b/mlem/contrib/requirements.py new file mode 100644 index 00000000..89a37fa1 --- /dev/null +++ b/mlem/contrib/requirements.py @@ -0,0 +1,52 @@ +"""Requirements support +Extension type: build + +MlemBuilder implementation for `Requirements` which includes +installable, conda, unix, custom, file etc. based requirements. +""" +import logging +from typing import ClassVar, Optional + +from pydantic import validator + +from mlem.core.base import load_impl_ext +from mlem.core.objects import MlemBuilder, MlemModel +from mlem.core.requirements import Requirement +from mlem.ui import EMOJI_OK, EMOJI_PACK, echo +from mlem.utils.entrypoints import list_implementations + +REQUIREMENTS = "requirements.txt" + +logger = logging.getLogger(__name__) + + +class RequirementsBuilder(MlemBuilder): + """MlemBuilder implementation for building requirements""" + + type: ClassVar = "requirements" + + target: Optional[str] = None + """Target path for requirements""" + req_type: str = "installable" + """Type of requirements, example: unix""" + + @validator("req_type") + def get_req_type(cls, req_type): # pylint: disable=no-self-argument + if req_type not in list_implementations(Requirement): + raise ValueError( + f"req_type {req_type} is not valid. Allowed options are: {list_implementations(Requirement)}" + ) + return req_type + + def build(self, obj: MlemModel): + req_type_cls = load_impl_ext(Requirement.abs_name, self.req_type) + assert issubclass(req_type_cls, Requirement) + reqs = obj.requirements.of_type(req_type_cls) + if self.target is None: + reqs_representation = [r.get_repr() for r in reqs] + requirement_string = " ".join(reqs_representation) + print(requirement_string) + else: + echo(EMOJI_PACK + "Materializing requirements...") + req_type_cls.materialize(reqs, self.target) + echo(EMOJI_OK + f"Materialized to {self.target}!") diff --git a/mlem/contrib/sagemaker/__init__.py b/mlem/contrib/sagemaker/__init__.py new file mode 100644 index 00000000..cdd63793 --- /dev/null +++ b/mlem/contrib/sagemaker/__init__.py @@ -0,0 +1,5 @@ +"""Sagemaker Deployments support +Extension type: deployment + +Implements MlemEnv, MlemDeployment and DeployState to work with AWS SageMaker +""" diff --git a/mlem/contrib/sagemaker/build.py b/mlem/contrib/sagemaker/build.py new file mode 100644 index 00000000..468f46f9 --- /dev/null +++ b/mlem/contrib/sagemaker/build.py @@ -0,0 +1,136 @@ +import base64 +import os +from typing import ClassVar, Optional + +import boto3 +import sagemaker +from pydantic import BaseModel + +from ...core.objects import MlemModel +from ...ui import EMOJI_BUILD, EMOJI_KEY, echo, set_offset +from ..docker.base import DockerEnv, DockerImage, RemoteRegistry +from ..docker.helpers import build_model_image + +IMAGE_NAME = "mlem-sagemaker-runner" + + +class AWSVars(BaseModel): + """AWS Configuration""" + + profile: str + """AWS Profile""" + bucket: str + """S3 Bucket""" + region: str + """AWS Region""" + account: str + """AWS Account name""" + role_name: str + """AWS Role name""" + + @property + def role(self): + return f"arn:aws:iam::{self.account}:role/{self.role_name}" + + def get_sagemaker_session(self): + return sagemaker.Session( + self.get_session(), default_bucket=self.bucket + ) + + def get_session(self): + return boto3.Session( + profile_name=self.profile, region_name=self.region + ) + + +def ecr_repo_check(region, repository, session: boto3.Session): + client = session.client("ecr", region_name=region) + + repos = client.describe_repositories()["repositories"] + + if repository not in {r["repositoryName"] for r in repos}: + echo(EMOJI_BUILD + f"Creating ECR repository {repository}") + client.create_repository(repositoryName=repository) + + +class ECRegistry(RemoteRegistry): + """ECR registry""" + + class Config: + exclude = {"aws_vars"} + + type: ClassVar = "ecr" + account: str + """AWS Account""" + region: str + """AWS Region""" + + aws_vars: Optional[AWSVars] = None + """AWS Configuration cache""" + + def login(self, client): + auth_data = self.ecr_client.get_authorization_token() + token = auth_data["authorizationData"][0]["authorizationToken"] + user, token = base64.b64decode(token).decode("utf8").split(":") + self._login(self.get_host(), client, user, token) + echo( + EMOJI_KEY + + f"Logged in to remote registry at host {self.get_host()}" + ) + + def get_host(self) -> Optional[str]: + return f"{self.account}.dkr.ecr.{self.region}.amazonaws.com" + + def image_exists(self, client, image: DockerImage): + images = self.ecr_client.list_images(repositoryName=image.name)[ + "imageIds" + ] + return len(images) > 0 + + def delete_image(self, client, image: DockerImage, force=False, **kwargs): + return self.ecr_client.batch_delete_image( + repositoryName=image.name, + imageIds=[{"imageTag": image.tag}], + ) + + def with_aws_vars(self, aws_vars): + self.aws_vars = aws_vars + return self + + @property + def ecr_client(self): + return ( + self.aws_vars.get_session().client("ecr") + if self.aws_vars + else boto3.client("ecr", region_name=self.region) + ) + + +def build_sagemaker_docker( + meta: MlemModel, + method: str, + account: str, + region: str, + image_name: str, + repository: str, + aws_vars: AWSVars, +): + from .runtime import SageMakerServer # circular import + + docker_env = DockerEnv( + registry=ECRegistry(account=account, region=region).with_aws_vars( + aws_vars + ) + ) + ecr_repo_check(region, repository, aws_vars.get_session()) + echo(EMOJI_BUILD + "Creating docker image for sagemaker") + with set_offset(2): + return build_model_image( + meta, + name=repository, + tag=image_name, + server=SageMakerServer(method=method), + env=docker_env, + force_overwrite=True, + templates_dir=[os.path.dirname(__file__)], + ) diff --git a/mlem/contrib/sagemaker/config.py b/mlem/contrib/sagemaker/config.py new file mode 100644 index 00000000..cb5c9195 --- /dev/null +++ b/mlem/contrib/sagemaker/config.py @@ -0,0 +1,12 @@ +from typing import Optional + +from mlem.config import MlemConfigBase + + +class AWSConfig(MlemConfigBase): + ROLE: Optional[str] + PROFILE: Optional[str] + + class Config: + section = "aws" + env_prefix = "AWS_" diff --git a/tests/pack/__init__.py b/mlem/contrib/sagemaker/copy.j2 similarity index 100% rename from tests/pack/__init__.py rename to mlem/contrib/sagemaker/copy.j2 diff --git a/mlem/contrib/sagemaker/env_setup.py b/mlem/contrib/sagemaker/env_setup.py new file mode 100644 index 00000000..ab651f28 --- /dev/null +++ b/mlem/contrib/sagemaker/env_setup.py @@ -0,0 +1,96 @@ +import os +import shutil +import subprocess + +from mlem.ui import echo + +MLEM_TF = "mlem_sagemaker.tf" + + +def _tf_command(tf_dir, command, *flags, **args): + args = " ".join(f"-var='{k}={v}'" for k, v in args.items()) + return " ".join( + [ + "terraform", + f"-chdir={tf_dir}", + command, + *flags, + args, + ] + ) + + +def _tf_get_var(tf_dir, varname): + return ( + subprocess.check_output( + _tf_command(tf_dir, "output", varname), shell=True # nosec: B602 + ) + .decode("utf8") + .strip() + .strip('"') + ) + + +def sagemaker_terraform( + user_name: str = "mlem", + role_name: str = "mlem", + region_name: str = "us-east-1", + profile: str = "default", + plan: bool = False, + work_dir: str = ".", + export_secret: str = None, +): + if not os.path.exists(work_dir): + os.makedirs(work_dir, exist_ok=True) + + shutil.copy( + os.path.join(os.path.dirname(__file__), MLEM_TF), + os.path.join(work_dir, MLEM_TF), + ) + subprocess.check_output( + _tf_command(work_dir, "init"), + shell=True, # nosec: B602 + ) + + flags = ["-auto-approve"] if not plan else [] + + echo( + subprocess.check_output( + _tf_command( + work_dir, + "plan" if plan else "apply", + *flags, + role_name=role_name, + user_name=user_name, + region_name=region_name, + profile=profile, + ), + shell=True, # nosec: B602 + ) + ) + + if not plan and export_secret: + if os.path.exists(export_secret): + print( + f"Creds already present at {export_secret}, please backup and remove them" + ) + return + key_id = _tf_get_var(work_dir, "access_key_id") + access_secret = _tf_get_var(work_dir, "secret_access_key") + region = _tf_get_var(work_dir, "region_name") + profile = _tf_get_var(work_dir, "aws_user") + print(profile, region) + if export_secret.endswith(".csv"): + secrets = f"""User Name,Access key ID,Secret access key +{profile},{key_id},{access_secret}""" + print( + f"Import new profile:\naws configure import --csv file://{export_secret}\naws configure set region {region} --profile {profile}" + ) + else: + secrets = f"""export AWS_ACCESS_KEY_ID={key_id} +export AWS_SECRET_ACCESS_KEY={access_secret} +export AWS_REGION={region} +""" + print(f"Source envs:\nsource {export_secret}") + with open(export_secret, "w", encoding="utf8") as f: + f.write(secrets) diff --git a/mlem/contrib/sagemaker/meta.py b/mlem/contrib/sagemaker/meta.py new file mode 100644 index 00000000..437d5762 --- /dev/null +++ b/mlem/contrib/sagemaker/meta.py @@ -0,0 +1,401 @@ +import posixpath +from functools import wraps +from typing import Any, ClassVar, Optional, Tuple + +import sagemaker +from pydantic import validator +from sagemaker.deserializers import JSONDeserializer +from sagemaker.serializers import JSONSerializer +from typing_extensions import Protocol + +from mlem.contrib.docker.base import DockerDaemon, DockerImage +from mlem.contrib.sagemaker.build import ( + AWSVars, + ECRegistry, + build_sagemaker_docker, +) +from mlem.contrib.sagemaker.runtime import SagemakerClient +from mlem.contrib.sagemaker.utils import ( + MODEL_TAR_FILENAME, + _create_model_arch_and_upload_to_s3, + delete_model_file_from_s3, + generate_model_file_name, + init_aws_vars, +) +from mlem.core.errors import WrongMethodError +from mlem.core.model import Signature +from mlem.core.objects import ( + DeployState, + DeployStatus, + MlemDeployment, + MlemEnv, + MlemModel, +) +from mlem.ui import EMOJI_BUILD, EMOJI_UPLOAD, echo + +DEFAULT_ECR_REPOSITORY = "mlem" + + +ENDPOINT_STATUS_MAPPING = { + "Creating": DeployStatus.STARTING, + "Failed": DeployStatus.CRASHED, + "InService": DeployStatus.RUNNING, + "OutOfService": DeployStatus.STOPPED, + "Updating": DeployStatus.STARTING, + "SystemUpdating": DeployStatus.STARTING, + "RollingBack": DeployStatus.STARTING, + "Deleting": DeployStatus.STOPPED, +} + + +class SagemakerDeployState(DeployState): + """State of SageMaker deployment""" + + type: ClassVar = "sagemaker" + + image: Optional[DockerImage] = None + """Built image""" + image_tag: Optional[str] = None + """Built image tag""" + model_location: Optional[str] = None + """Location of uploaded model""" + endpoint_name: Optional[str] = None + """Name of SageMaker endpoint""" + endpoint_model_hash: Optional[str] = None + """Hash of deployed model""" + method_signature: Optional[Signature] = None + """Signature of deployed method""" + region: Optional[str] = None + """AWS Region""" + previous: Optional["SagemakerDeployState"] = None + """Previous state""" + + @property + def image_uri(self): + if self.image is None: + if self.image_tag is None: + raise ValueError( + "Cannot get image_uri: image not built or not specified prebuilt image uri" + ) + return self.image_tag + return self.image.uri + + def get_predictor(self, session: sagemaker.Session): + predictor = sagemaker.Predictor( + endpoint_name=self.endpoint_name, + sagemaker_session=session, + serializer=JSONSerializer(), + deserializer=JSONDeserializer(), + ) + return predictor + + +class SagemakerEnv(MlemEnv): + """SageMaker environment""" + + type: ClassVar = "sagemaker" + # deploy_type: ClassVar = SagemakerDeployment + + role: Optional[str] = None + """Default role""" + account: Optional[str] = None + """Default account""" + region: Optional[str] = None + """Default region""" + bucket: Optional[str] = None + """Default bucket""" + profile: Optional[str] = None + """Default profile""" + ecr_repository: Optional[str] = None + """Default ECR repository""" + + @property + def role_name(self): + return f"arn:aws:iam::{self.account}:role/{self.role}" + + def get_session(self, region: str = None) -> sagemaker.Session: + return self.get_session_and_aws_vars(region)[0] + + def get_session_and_aws_vars( + self, region: str = None + ) -> Tuple[sagemaker.Session, AWSVars]: + return init_aws_vars( + self.profile, + self.role, + self.bucket, + region or self.region, + self.account, + ) + + +class DeploymentStepMethod(Protocol): + def __call__(self, state: DeployState, *args, **kwargs) -> Any: + ... + + +def updates_state(f) -> DeploymentStepMethod: + @wraps(f) + def inner( + self: MlemDeployment, state: SagemakerDeployState, *args, **kwargs + ): + res = f(self, state, *args, **kwargs) + self.update_state(state) + return res + + return inner # type: ignore[return-value] + + +class SagemakerDeployment(MlemDeployment[SagemakerDeployState, SagemakerEnv]): + """SageMaker Deployment""" + + type: ClassVar = "sagemaker" + state_type: ClassVar = SagemakerDeployState + env_type: ClassVar = SagemakerEnv + method: str = "predict" + """Model method to be deployed""" + image_tag: Optional[str] = None + """Name of the docker image to use""" + use_prebuilt: bool = False + """Use pre-built docker image. If True, image_name should be set""" + model_arch_location: Optional[str] = None + """Path on s3 to store model archive (excluding bucket)""" + model_name: Optional[str] + """Name for SageMaker Model""" + endpoint_name: Optional[str] = None + """Name for SageMaker Endpoint""" + initial_instance_count: int = 1 + """Initial instance count for Endpoint""" + instance_type: str = "ml.t2.medium" + """Instance type for Endpoint""" + accelerator_type: Optional[str] = None + "The size of the Elastic Inference (EI) instance to use" + + @validator("use_prebuilt") + def ensure_image_name( # pylint: disable=no-self-argument + cls, value, values # noqa: B902 + ): + if value and "image_name" not in values: + raise ValueError( + "image_name should be set if use_prebuilt is true" + ) + return value + + def _get_client(self, state: "SagemakerDeployState"): + return SagemakerClient( + endpoint_name=state.endpoint_name, + aws_vars=self.get_env().get_session_and_aws_vars( + region=state.region + )[1], + signature=state.method_signature, + ) + + @updates_state + def _upload_model_file( + self, + state: SagemakerDeployState, + model: MlemModel, + aws_vars: AWSVars, + session: sagemaker.Session, + ): + assert state.previous is not None # TODO + echo( + EMOJI_UPLOAD + + f"Uploading model distribution to {aws_vars.bucket}..." + ) + if state.model_location is not None: + state.previous.model_location = state.model_location + state.model_location = _create_model_arch_and_upload_to_s3( + session, + model, + aws_vars.bucket, + self.model_arch_location + or generate_model_file_name(model.meta_hash()), + ) + state.update_model(model) + + @updates_state + def _update_model( + self, + state: SagemakerDeployState, + aws_vars: AWSVars, + session: sagemaker.Session, + ): + assert state.model_location is not None # TODO + sm_model = sagemaker.Model( + image_uri=state.image_uri, + model_data=posixpath.join( + state.model_location, MODEL_TAR_FILENAME + ), + name=self.model_name, + role=aws_vars.role, + sagemaker_session=session, + ) + sm_model.create( + instance_type=self.instance_type, + accelerator_type=self.accelerator_type, + ) + prev_endpoint_conf = session.sagemaker_client.describe_endpoint( + EndpointName=state.endpoint_name + )["EndpointConfigName"] + prev_model_name = session.sagemaker_client.describe_endpoint_config( + EndpointConfigName=prev_endpoint_conf + )["ProductionVariants"][0]["ModelName"] + + predictor = state.get_predictor(session) + predictor.update_endpoint( + model_name=sm_model.name, + initial_instance_count=self.initial_instance_count, + instance_type=self.instance_type, + accelerator_type=self.accelerator_type, + wait=True, + ) + session.sagemaker_client.delete_model(ModelName=prev_model_name) + prev = state.previous + if prev is not None: + if prev.image is not None: + self._delete_image(prev, aws_vars) + if prev.model_location is not None: + delete_model_file_from_s3(session, prev.model_location) + prev.model_location = None + session.sagemaker_client.delete_endpoint_config( + EndpointConfigName=prev_endpoint_conf + ) + state.endpoint_model_hash = state.model_hash + + @updates_state + def _build_image( + self, + state: SagemakerDeployState, + model: MlemModel, + aws_vars: AWSVars, + ecr_repository: str, + ): + assert state.previous is not None # TODO + try: + state.method_signature = model.model_type.methods[self.method] + except KeyError as e: + raise WrongMethodError( + f"Wrong method {self.method} for model {model.name}" + ) from e + image_tag = self.image_tag or model.meta_hash() + if state.image_tag is not None: + state.previous.image_tag = state.image_tag + state.previous.image = state.image + state.image = build_sagemaker_docker( + model, + self.method, + aws_vars.account, + aws_vars.region, + image_tag, + ecr_repository or DEFAULT_ECR_REPOSITORY, + aws_vars, + ) + state.image_tag = image_tag + + @updates_state + def _deploy_model( + self, + state: SagemakerDeployState, + aws_vars: AWSVars, + session: sagemaker.Session, + ): + assert state.model_location is not None # TODO + sm_model = sagemaker.Model( + image_uri=state.image_uri, + model_data=posixpath.join( + state.model_location, MODEL_TAR_FILENAME + ), + name=self.model_name, + role=aws_vars.role, + sagemaker_session=session, + ) + echo( + EMOJI_BUILD + + f"Starting up sagemaker {self.initial_instance_count} `{self.instance_type}` instance(s)..." + ) + sm_model.deploy( + initial_instance_count=self.initial_instance_count, + instance_type=self.instance_type, + accelerator_type=self.accelerator_type, + endpoint_name=self.endpoint_name, + wait=False, + ) + state.endpoint_name = sm_model.endpoint_name + state.endpoint_model_hash = state.model_hash + + def deploy(self, model: MlemModel): + with self.lock_state(): + state: SagemakerDeployState = self.get_state() + redeploy = self.model_changed(model) + state.previous = state.previous or SagemakerDeployState() + + session, aws_vars = self.get_env().get_session_and_aws_vars( + state.region + ) + if state.region is None: + state.region = aws_vars.region + self.update_state(state) + + if not self.use_prebuilt and (state.image_tag is None or redeploy): + self._build_image( + state, model, aws_vars, self.get_env().ecr_repository + ) + + if state.model_location is None or redeploy: + self._upload_model_file(state, model, aws_vars, session) + + if ( + state.endpoint_name is None + or redeploy + or state.endpoint_model_hash is not None + and state.endpoint_model_hash != state.model_hash + ): + if state.endpoint_name is None: + self._deploy_model(state, aws_vars, session) + else: + self._update_model(state, aws_vars, session) + + @updates_state + def _delete_image(self, state: SagemakerDeployState, aws_vars: AWSVars): + assert state.image is not None # TODO + with DockerDaemon(host="").client() as client: + if isinstance(state.image.registry, ECRegistry): + state.image.registry.with_aws_vars(aws_vars) + state.image.delete(client) + state.image = None + + def remove(self): + with self.lock_state(): + state: SagemakerDeployState = self.get_state() + session, aws_vars = self.get_env().get_session_and_aws_vars( + state.region + ) + if state.model_location is not None: + delete_model_file_from_s3(session, state.model_location) + if state.endpoint_name is not None: + + client = session.sagemaker_client + endpoint_conf = session.sagemaker_client.describe_endpoint( + EndpointName=state.endpoint_name + )["EndpointConfigName"] + + model_name = client.describe_endpoint_config( + EndpointConfigName=endpoint_conf + )["ProductionVariants"][0]["ModelName"] + client.delete_model(ModelName=model_name) + client.delete_endpoint(EndpointName=state.endpoint_name) + client.delete_endpoint_config(EndpointConfigName=endpoint_conf) + if state.image is not None: + self._delete_image(state, aws_vars) + self.purge_state() + + def get_status(self, raise_on_error=True) -> "DeployStatus": + with self.lock_state(): + state: SagemakerDeployState = self.get_state() + session = self.get_env().get_session(state.region) + + endpoint = session.sagemaker_client.describe_endpoint( + EndpointName=state.endpoint_name + ) + status = endpoint["EndpointStatus"] + return ENDPOINT_STATUS_MAPPING.get(status, DeployStatus.UNKNOWN) diff --git a/mlem/contrib/sagemaker/mlem_sagemaker.tf b/mlem/contrib/sagemaker/mlem_sagemaker.tf new file mode 100644 index 00000000..ffbb5a5d --- /dev/null +++ b/mlem/contrib/sagemaker/mlem_sagemaker.tf @@ -0,0 +1,82 @@ +variable "profile" { + description = "AWS Profile to use for API calls" + type = string + default = "default" +} + +variable "role_name" { + description = "AWS role name" + type = string + default = "mlem" +} + +variable "user_name" { + description = "AWS user name" + type = string + default = "mlem" +} + +variable "region_name" { + description = "AWS region name" + type = string + default = "us-east-1" +} + +provider "aws" { + region = var.region_name + profile = var.profile +} + +resource "aws_iam_user" "aws_user" { + name = var.user_name +} + +resource "aws_iam_access_key" "aws_user" { + user = aws_iam_user.aws_user.name +} + +resource "aws_iam_user_policy_attachment" "sagemaker_policy" { + user = aws_iam_user.aws_user.name + policy_arn = "arn:aws:iam::aws:policy/AmazonSageMakerFullAccess" +} + +resource "aws_iam_user_policy_attachment" "ecr_policy" { + user = aws_iam_user.aws_user.name + policy_arn = "arn:aws:iam::aws:policy/AmazonEC2ContainerRegistryFullAccess" +} + +resource "aws_iam_role" "aws_role" { + name = var.role_name + description = "MLEM SageMaker Role" + assume_role_policy = < /usr/local/bin/serve && chmod +x /usr/local/bin/serve +ENTRYPOINT ["bash", "-c"] diff --git a/mlem/contrib/sagemaker/runtime.py b/mlem/contrib/sagemaker/runtime.py new file mode 100644 index 00000000..c43baeeb --- /dev/null +++ b/mlem/contrib/sagemaker/runtime.py @@ -0,0 +1,103 @@ +import logging +from types import ModuleType +from typing import ClassVar, Dict, List + +import boto3 +import fastapi +import sagemaker +import uvicorn +from sagemaker.deserializers import JSONDeserializer +from sagemaker.serializers import JSONSerializer + +from mlem.config import MlemConfigBase, project_config +from mlem.contrib.fastapi import FastAPIServer +from mlem.contrib.sagemaker.build import AWSVars +from mlem.core.model import Signature +from mlem.runtime import Interface +from mlem.runtime.client import Client +from mlem.runtime.interface import InterfaceDescriptor + +logger = logging.getLogger(__name__) + + +class SageMakerServerConfig(MlemConfigBase): + HOST: str = "0.0.0.0" + PORT: int = 8080 + METHOD: str = "predict" + + class Config: + section = "sagemaker" + + +local_config = project_config("", section=SageMakerServerConfig) + + +def ping(): + return "OK" + + +class SageMakerServer(FastAPIServer): + """Server to use inside SageMaker containers""" + + type: ClassVar = "_sagemaker" + libraries: ClassVar[List[ModuleType]] = [ + uvicorn, + fastapi, + sagemaker, + boto3, + ] + method: str = local_config.METHOD + """Method to expose""" + port: int = local_config.PORT + """Port to use""" + host: str = local_config.HOST + """Host to use""" + + def app_init(self, interface: Interface): + app = super().app_init(interface) + + handler, response_model = self._create_handler( + "invocations", + interface.get_method_signature(self.method), + interface.get_method_executor(self.method), + ) + app.add_api_route( + "/invocations", + handler, + methods=["POST"], + response_model=response_model, + ) + app.add_api_route("/ping", ping, methods=["GET"]) + return app + + def get_env_vars(self) -> Dict[str, str]: + return {"SAGEMAKER_METHOD": self.method} + + +class SagemakerClient(Client): + """Client to make SageMaker requests""" + + type: ClassVar = "sagemaker" + + endpoint_name: str + """Name of SageMaker Endpoint""" + aws_vars: AWSVars + """AWS Configuration""" + signature: Signature + """Signature of deployed method""" + + def _interface_factory(self) -> InterfaceDescriptor: + return InterfaceDescriptor(methods={"predict": self.signature}) + + def get_predictor(self): + sess = self.aws_vars.get_sagemaker_session() + predictor = sagemaker.Predictor( + endpoint_name=self.endpoint_name, + sagemaker_session=sess, + serializer=JSONSerializer(), + deserializer=JSONDeserializer(), + ) + return predictor + + def _call_method(self, name, args): + return self.get_predictor().predict(args) diff --git a/mlem/contrib/sagemaker/utils.py b/mlem/contrib/sagemaker/utils.py new file mode 100644 index 00000000..f6664fd2 --- /dev/null +++ b/mlem/contrib/sagemaker/utils.py @@ -0,0 +1,79 @@ +import os +import posixpath +import tarfile +import tempfile + +import boto3 +import sagemaker + +from mlem.config import project_config +from mlem.contrib.sagemaker.build import AWSVars +from mlem.contrib.sagemaker.config import AWSConfig +from mlem.core.objects import MlemModel + +MODEL_TAR_FILENAME = "model.tar.gz" + + +def delete_model_file_from_s3(session: sagemaker.Session, model_path: str): + s3_client = session.boto_session.client("s3") + if model_path.startswith("s3://"): + model_path = model_path[len("s3://") :] + bucket, *paths = model_path.split("/") + model_path = posixpath.join(*paths, MODEL_TAR_FILENAME) + s3_client.delete_object(Bucket=bucket, Key=model_path) + + +def init_aws_vars( + profile=None, role=None, bucket=None, region=None, account=None +): + boto_session = boto3.Session(profile_name=profile, region_name=region) + sess = sagemaker.Session(boto_session, default_bucket=bucket) + + bucket = ( + bucket or sess.default_bucket() + ) # Replace with your own bucket name if needed + region = region or boto_session.region_name + config = project_config(project="", section=AWSConfig) + role = role or config.ROLE or sagemaker.get_execution_role(sess) + account = account or boto_session.client("sts").get_caller_identity().get( + "Account" + ) + return sess, AWSVars( + bucket=bucket, + region=region, + account=account, + role_name=role, + profile=profile or config.PROFILE, + ) + + +def _create_model_arch_and_upload_to_s3( + session: sagemaker.Session, + model: MlemModel, + bucket: str, + model_arch_location: str, +) -> str: + with tempfile.TemporaryDirectory() as dirname: + model.clone(os.path.join(dirname, "model", "model")) + arch_path = os.path.join(dirname, "arch", MODEL_TAR_FILENAME) + os.makedirs(os.path.dirname(arch_path)) + with tarfile.open(arch_path, "w:gz") as tar: + path = os.path.join(dirname, "model") + for file in os.listdir(path): + tar.add(os.path.join(path, file), arcname=file) + + model_location = session.upload_data( + os.path.dirname(arch_path), + bucket=bucket, + key_prefix=posixpath.join(model_arch_location, model.meta_hash()), + ) + + return model_location + + +def generate_image_name(deploy_id): + return f"mlem-sagemaker-image-{deploy_id}" + + +def generate_model_file_name(deploy_id): + return f"mlem-model-{deploy_id}" diff --git a/mlem/contrib/sklearn.py b/mlem/contrib/sklearn.py index e4f29c90..66eae66b 100644 --- a/mlem/contrib/sklearn.py +++ b/mlem/contrib/sklearn.py @@ -1,3 +1,8 @@ +"""Scikit-Learn models support +Extension type: model + +ModelType implementations for any sklearn-compatible classes as well as `Pipeline` +""" from typing import Any, ClassVar, List, Optional, Union import sklearn @@ -22,14 +27,14 @@ class SklearnModel(ModelType, ModelHook, IsInstanceHookMixin): - """ - :class:`mlem.core.model.ModelType implementation for `scikit-learn` models - """ + """ModelType implementation for `scikit-learn` models""" type: ClassVar[str] = "sklearn" - io: ModelIO = SimplePickleIO() valid_types: ClassVar = (RegressorMixin, ClassifierMixin) + io: ModelIO = SimplePickleIO() + """IO""" + @classmethod def process( cls, obj: Any, sample_data: Optional[Any] = None, **kwargs @@ -85,6 +90,8 @@ def get_requirements(self) -> Requirements: class SklearnPipelineType(SklearnModel): + """ModelType implementation for `scikit-learn` pipelines""" + valid_types: ClassVar = (Pipeline,) type: ClassVar = "sklearn_pipeline" diff --git a/mlem/contrib/tensorflow.py b/mlem/contrib/tensorflow.py index d8135040..813e1398 100644 --- a/mlem/contrib/tensorflow.py +++ b/mlem/contrib/tensorflow.py @@ -1,3 +1,9 @@ +"""Tensorflow models support +Extension type: model + +ModelType and ModelIO implementations for `tf.keras.Model` +DataType, Reader and Writer implementations for `tf.Tensor` +""" import posixpath import tempfile from typing import Any, ClassVar, Iterator, List, Optional, Tuple @@ -39,17 +45,15 @@ class TFTensorDataType( DataType, DataSerializer, DataHook, IsInstanceHookMixin ): """ - :class:`.DataType` implementation for `tensorflow.Tensor` objects - which converts them to built-in Python lists and vice versa. - - :param shape: shape of `tensorflow.Tensor` objects in data - :param dtype: data type of `tensorflow.Tensor` objects in data + DataType implementation for `tensorflow.Tensor` """ type: ClassVar[str] = "tf_tensor" valid_types: ClassVar = (tf.Tensor,) shape: Tuple[Optional[int], ...] + """Shape of `tensorflow.Tensor` objects in data""" dtype: str + """Data type of `tensorflow.Tensor` objects in data""" @property def tf_type(self): @@ -117,6 +121,8 @@ def process(cls, obj: tf.Tensor, **kwargs) -> DataType: class TFTensorWriter(DataWriter): + """Write tensorflow tensors to np format""" + type: ClassVar[str] = "tf_tensor" def write( @@ -128,6 +134,8 @@ def write( class TFTensorReader(DataReader): + """Read tensorflow tensors from np format""" + type: ClassVar[str] = "tf_tensor" def read(self, artifacts: Artifacts) -> DataType: @@ -157,11 +165,12 @@ def is_custom_net(model): class TFKerasModelIO(BufferModelIO): """ - :class:`.ModelIO` implementation for Tensorflow Keras models (:class:`tensorflow.keras.Model` objects) + IO for Tensorflow Keras models (:class:`tensorflow.keras.Model` objects) """ type: ClassVar[str] = "tf_keras" save_format: Optional[str] = None + """`tf` for custom net classes and `h5` otherwise""" def save_model(self, model: tf.keras.Model, path: str): if self.save_format is None: @@ -198,6 +207,7 @@ class TFKerasModel(ModelType, ModelHook, IsInstanceHookMixin): type: ClassVar[str] = "tf_keras" valid_types: ClassVar = (tf.keras.Model,) io: ModelIO = TFKerasModelIO() + """IO""" @classmethod def process( diff --git a/mlem/contrib/torch.py b/mlem/contrib/torch.py index 57c24993..897cbcb3 100644 --- a/mlem/contrib/torch.py +++ b/mlem/contrib/torch.py @@ -1,3 +1,10 @@ +"""Torch models support +Extension type: model + +ModelType and ModelIO implementations for `torch.nn.Module` +ImportHook for importing files saved with `torch.save` +DataType, Reader and Writer implementations for `torch.Tensor` +""" from typing import Any, ClassVar, Iterator, List, Optional, Tuple import cloudpickle @@ -31,18 +38,14 @@ def python_type_from_torch_string_repr(dtype: str): class TorchTensorDataType( DataType, DataSerializer, DataHook, IsInstanceHookMixin ): - """ - :class:`.DataType` implementation for `torch.Tensor` objects - which converts them to built-in Python lists and vice versa. - - :param shape: shape of `torch.Tensor` objects in data - :param dtype: data type of `torch.Tensor` objects in data - """ + """DataType implementation for `torch.Tensor`""" type: ClassVar[str] = "torch" valid_types: ClassVar = (torch.Tensor,) shape: Tuple[Optional[int], ...] + """Shape of `torch.Tensor` object""" dtype: str + """Type name of `torch.Tensor` elements""" def _check_shape(self, tensor, exc_type): if tuple(tensor.shape)[1:] != self.shape[1:]: @@ -103,6 +106,8 @@ def process(cls, obj: torch.Tensor, **kwargs) -> DataType: class TorchTensorWriter(DataWriter): + """Write torch tensors""" + type: ClassVar[str] = "torch" def write( @@ -114,6 +119,8 @@ def write( class TorchTensorReader(DataReader): + """Read torch tensors""" + type: ClassVar[str] = "torch" def read(self, artifacts: Artifacts) -> DataType: @@ -132,12 +139,11 @@ def read_batch( class TorchModelIO(ModelIO): - """ - :class:`.ModelIO` implementation for PyTorch models - """ + """IO for PyTorch models""" type: ClassVar[str] = "torch_io" is_jit: bool = False + """Is model jit compiled""" def dump(self, storage: Storage, path, model) -> Artifacts: self.is_jit = isinstance(model, torch.jit.ScriptModule) @@ -165,6 +171,7 @@ class TorchModel(ModelType, ModelHook, IsInstanceHookMixin): type: ClassVar[str] = "torch" valid_types: ClassVar = (torch.nn.Module,) io: ModelIO = TorchModelIO() + """TorchModelIO""" @classmethod def process( @@ -197,6 +204,8 @@ def get_requirements(self) -> Requirements: class TorchModelImport(LoadAndAnalyzeImportHook): + """Import torch models saved with `torch.save`""" + type: ClassVar = "torch" force_type: ClassVar = MlemModel diff --git a/mlem/contrib/venv.py b/mlem/contrib/venv.py new file mode 100644 index 00000000..05136d34 --- /dev/null +++ b/mlem/contrib/venv.py @@ -0,0 +1,203 @@ +"""Virtual Environments support +Extension type: build + +MlemBuilder implementations for `Environments` which includes +conda based and venv based virtual environments. +""" +import os +import platform +import subprocess +import sys +import venv +from abc import abstractmethod +from typing import ClassVar, List, Optional + +from mlem.core.errors import MlemError +from mlem.core.objects import MlemBuilder, MlemModel +from mlem.core.requirements import Requirement +from mlem.ui import EMOJI_OK, EMOJI_PACK, echo + + +def get_python_exe_in_virtual_env(env_dir: str, use_conda_env: bool = False): + if platform.system() == "Windows": + if not use_conda_env: + return os.path.join(env_dir, "Scripts", "python.exe") + return os.path.join(env_dir, "python.exe") + return os.path.join(env_dir, "bin", "python") + + +def run_in_subprocess(cmd: List[str], error_msg: str, check_output=False): + try: + if check_output: + return subprocess.check_output(cmd) + return subprocess.run(cmd, check=True) + except ( + FileNotFoundError, + subprocess.CalledProcessError, + subprocess.TimeoutExpired, + ) as e: + raise MlemError(f"{error_msg}\n{e}") from e + + +class CondaPackageRequirement(Requirement): + """Represents a conda package that needs to be installed""" + + type: ClassVar[str] = "conda" + package_name: str + """Denotes name of a package such as 'numpy'""" + spec: Optional[str] = None + """Denotes selectors for a package such as '>=1.8,<2'""" + channel_name: str = "conda-forge" + """Denotes channel from which a package is to be installed""" + + def get_repr(self): + """ + conda installable representation of this module + """ + if self.spec is not None: + return f"{self.channel_name}::{self.package_name}{self.spec}" + return f"{self.channel_name}::{self.package_name}" + + @classmethod + def materialize(cls, reqs, target: str): + raise NotImplementedError + + +class EnvBuilder(MlemBuilder): + type: ClassVar = "env" + + target: Optional[str] = "venv" + """Name of the virtual environment""" + + @abstractmethod + def create_virtual_env(self): + raise NotImplementedError + + @abstractmethod + def get_installed_packages(self, env_dir: str): + raise NotImplementedError + + +class VenvBuilder(EnvBuilder): + """MlemBuilder implementation for building virtual environments""" + + type: ClassVar = "venv" + + no_cache: bool = False + """Disable cache""" + current_env: bool = False + """Whether to install in the current virtual env, must be active""" + + def create_virtual_env(self): + env_dir = os.path.abspath(self.target) + venv.create(env_dir, with_pip=True) + + def get_installed_packages(self, env_dir): + env_exe = get_python_exe_in_virtual_env(env_dir) + return run_in_subprocess( + [env_exe, "-m", "pip", "freeze"], + error_msg="Error running pip", + check_output=True, + ) + + def build(self, obj: MlemModel): + if self.current_env: + if ( + os.getenv("VIRTUAL_ENV") is None + or sys.prefix == sys.base_prefix + ): + raise MlemError("No virtual environment detected.") + echo(EMOJI_PACK + f"Detected the virtual env {sys.prefix}") + env_dir = sys.prefix + else: + assert self.target is not None + echo(EMOJI_PACK + f"Creating virtual env {self.target}...") + self.create_virtual_env() + env_dir = os.path.abspath(self.target) + os.environ["VIRTUAL_ENV"] = env_dir + + env_exe = get_python_exe_in_virtual_env(env_dir) + echo(EMOJI_PACK + "Installing the required packages...") + # Based on recommendation given in https://pip.pypa.io/en/latest/user_guide/#using-pip-from-your-program + install_cmd = [env_exe, "-m", "pip", "install"] + if self.no_cache: + install_cmd.append("--no-cache-dir") + install_cmd.extend(obj.requirements.to_pip()) + run_in_subprocess(install_cmd, error_msg="Error running pip") + if platform.system() == "Windows": + activate_cmd = f"`{self.target}\\Scripts\\activate`" + else: + activate_cmd = f"`source {self.target}/bin/activate`" + echo( + EMOJI_OK + + f"virtual environment `{self.target}` is ready, activate with {activate_cmd}" + ) + return env_dir + + +class CondaBuilder(EnvBuilder): + """MlemBuilder implementation for building conda environments""" + + type: ClassVar = "conda" + + python_version: str = f"{sys.version_info.major}.{sys.version_info.minor}" + """The python version to use""" + current_env: Optional[bool] = False + """Whether to install in the current conda env""" + conda_reqs: List[CondaPackageRequirement] = [] + """List of conda package requirements""" + + def create_virtual_env(self): + env_dir = os.path.abspath(self.target) + create_cmd = ["--prefix", env_dir, f"python={self.python_version}"] + run_in_subprocess( + ["conda", "create", "-y", *create_cmd], + error_msg="Error running conda", + ) + + def get_installed_packages(self, env_dir): + return run_in_subprocess( + ["conda", "list", "--prefix", env_dir], + error_msg="Error running conda", + check_output=True, + ) + + def build(self, obj: MlemModel): + pip_based_packages = obj.requirements.to_pip() + conda_based_packages = [r.get_repr() for r in self.conda_reqs] + + if self.current_env: + conda_default_env = os.getenv("CONDA_DEFAULT_ENV", None) + if conda_default_env == "base" or conda_default_env is None: + raise MlemError("No conda environment detected.") + echo(EMOJI_PACK + f"Detected the conda env {sys.prefix}") + env_dir = sys.prefix + env_exe = sys.executable + else: + assert self.target is not None + self.create_virtual_env() + env_dir = os.path.abspath(self.target) + env_exe = get_python_exe_in_virtual_env( + env_dir, use_conda_env=True + ) + if conda_based_packages: + run_in_subprocess( + [ + "conda", + "install", + "--prefix", + env_dir, + "-y", + *conda_based_packages, + ], + error_msg="Error running conda", + ) + + # install pip packages in conda env + if pip_based_packages: + run_in_subprocess( + [env_exe, "-m", "pip", "install", *pip_based_packages], + error_msg="Error running pip", + ) + + return env_dir diff --git a/mlem/contrib/xgboost.py b/mlem/contrib/xgboost.py index c7db3fe8..571d9622 100644 --- a/mlem/contrib/xgboost.py +++ b/mlem/contrib/xgboost.py @@ -1,3 +1,9 @@ +"""XGBoost models support +Extension type: model + +ModelType and ModelIO implementations for `xgboost.Booster` as well as +DataType, Reader and Writer implementations for `xgboost.DMatrix` +""" import os import posixpath import tempfile @@ -41,19 +47,18 @@ class DMatrixDataType( IsInstanceHookMixin, ): """ - :class:`~.DataType` implementation for xgboost.DMatrix type - - :param is_from_list: whether DMatrix can be constructed from list - :param feature_type_names: string representation of feature types - :param feature_names: list of feature names + DataType implementation for xgboost.DMatrix type """ type: ClassVar[str] = "xgboost_dmatrix" valid_types: ClassVar = (xgboost.DMatrix,) is_from_list: bool + """Whether DMatrix can be constructed from list""" feature_type_names: Optional[List[str]] + """String representation of feature types""" feature_names: Optional[List[str]] = None + """List of feature names""" @property def feature_types(self): @@ -118,7 +123,8 @@ class XGBoostModelIO(ModelIO): """ type: ClassVar[str] = "xgboost_io" - model_file_name = "model.xgb" + model_file_name: str = "model.xgb" + """Filename to use""" def dump( self, storage: Storage, path, model: xgboost.Booster diff --git a/mlem/core/artifacts.py b/mlem/core/artifacts.py index 558da83a..7a6a811e 100644 --- a/mlem/core/artifacts.py +++ b/mlem/core/artifacts.py @@ -38,8 +38,11 @@ class Config: abs_name: ClassVar = "artifact" uri: str + """location""" size: int + """size in bytes""" hash: str + """md5 hash""" @overload def materialize( @@ -101,6 +104,7 @@ class FSSpecArtifact(Artifact): type: ClassVar = "fsspec" uri: str + """Path to file""" def _download(self, target_path: str) -> "LocalArtifact": fs, path = get_fs(self.uri) @@ -135,7 +139,9 @@ class PlaceholderArtifact(Artifact): """On dumping this artifact will be replaced with actual artifact that is relative to project root (if there is a project)""" + type: ClassVar = "_placeholder" location: Location + """Location of artifact""" def relative(self, fs: AbstractFileSystem, path: str) -> "Artifact": raise NotImplementedError @@ -201,7 +207,9 @@ class Config: fs: Optional[AbstractFileSystem] = None base_path: str = "" uri: str + """Path to storage dir""" storage_options: Optional[Dict[str, str]] = {} + """Additional options for FS""" def upload(self, local_path: str, target_path: str) -> FSSpecArtifact: fs = self.get_fs() @@ -315,7 +323,7 @@ def relative(self, fs: AbstractFileSystem, path: str) -> "FSSpecArtifact": def md5_fileobj(fobj): - hash_md5 = hashlib.md5() + hash_md5 = hashlib.md5() # nosec: B324 for chunk in iter(lambda: fobj.read(CHUNK_SIZE), b""): hash_md5.update(chunk) return hash_md5.hexdigest() diff --git a/mlem/core/base.py b/mlem/core/base.py index 760a0d25..b180f39f 100644 --- a/mlem/core/base.py +++ b/mlem/core/base.py @@ -1,6 +1,18 @@ import shlex +from collections import defaultdict from inspect import isabstract -from typing import Any, ClassVar, Dict, List, Optional, Type, TypeVar, overload +from typing import ( + Any, + ClassVar, + Dict, + List, + Optional, + Tuple, + Type, + TypeVar, + Union, + overload, +) from pydantic import BaseModel, parse_obj_as from typing_extensions import Literal @@ -42,6 +54,11 @@ def load_impl_ext( load_entrypoints, ) + if abs_name in MlemABC.abs_types: + abs_class = MlemABC.abs_types[abs_name] + if type_name in abs_class.__type_map__: + return abs_class.__type_map__[type_name] + if type_name is not None and "." in type_name: try: obj = import_string(type_name) @@ -130,30 +147,7 @@ def load_type(cls, type_name: str): raise UnknownImplementation(type_name, cls.abs_name) from e -def set_or_replace(obj: dict, key: str, value: Any, subkey: str = "type"): - if key in obj: - old_value = obj[key] - if ( - isinstance(old_value, str) - and isinstance(value, dict) - and subkey not in value - ): - value[subkey] = old_value - obj[key] = value - return - if isinstance(old_value, dict) and isinstance(value, str): - old_value[subkey] = value - return - obj[key] = value - - -def set_recursively(obj: dict, keys: List[str], value: Any): - if len(keys) == 1: - set_or_replace(obj, keys[0], value) - return - key, keys = keys[0], keys[1:] - set_or_replace(obj, key, {}) - set_recursively(obj[key], keys, value) +_not_set = object() def get_recursively(obj: dict, keys: List[str]): @@ -163,27 +157,30 @@ def get_recursively(obj: dict, keys: List[str]): return get_recursively(obj[key], keys) -def smart_split(string: str, char: str, maxsplit: int = None): +def smart_split(value: str, char: str, maxsplit: int = None): SPECIAL = "\0" if char != " ": - string = string.replace(" ", SPECIAL).replace(char, " ") + value = value.replace(" ", SPECIAL).replace(char, " ") res = [ s.replace(" ", char).replace(SPECIAL, " ") - for s in shlex.split(string, posix=True) + for s in shlex.split(value, posix=True) ] if maxsplit is None: return res return res[:maxsplit] + [char.join(res[maxsplit:])] +TMO = TypeVar("TMO", bound=MlemABC) + + def build_mlem_object( - model: Type[MlemABC], + model: Type[TMO], subtype: str, str_conf: List[str] = None, file_conf: List[str] = None, conf: Dict[str, Any] = None, **kwargs, -): +) -> TMO: not_links, links = parse_links(model, str_conf or []) if model.__is_root__: kwargs[model.__config__.type_field] = subtype @@ -227,36 +224,143 @@ def parse_links(model: Type["BaseModel"], str_conf: List[str]): return not_links, links +IntStr = Union[int, str] +Keys = Tuple[IntStr, ...] +KeyValue = Tuple[IntStr, Any] +Aggregates = Dict[Keys, List[KeyValue]] + + +class SmartSplitDict(dict): + def __init__(self, value=None, sep=".", type_field="type"): + self.type_field = type_field + self.sep = sep + super().__init__(value or ()) + + def update(self, __m: Dict[Any, Any], **kwargs) -> None: # type: ignore[override] + for k, v in __m.items(): + self[k] = v + for k, v in kwargs.items(): + self[k] = v + + def __setitem__(self, key, value): + if isinstance(key, str): + key = tuple(smart_split(key, self.sep)) + + for keys, val in self._disassemble(value, key): + super().__setitem__(keys, val) + + def _disassemble(self, value: Any, key_prefix): + if isinstance(value, list): + for i, v in enumerate(value): + yield from self._disassemble(v, key_prefix + (i,)) + return + if isinstance(value, dict): + for k, v in value.items(): + yield from self._disassemble(v, key_prefix + (k,)) + return + yield key_prefix, value + + def build(self) -> Dict[str, Any]: + prefix_values: Aggregates = self._aggregate_by_prefix() + while prefix_values: + if len(prefix_values) == 1 and () in prefix_values: + return self._merge_aggregates(prefix_values[()]) + max_len = max(len(k) for k in prefix_values) + to_aggregate: Dict[Keys, Any] = {} + postponed: Aggregates = defaultdict(list) + for prefix, values in prefix_values.items(): + if len(prefix) == max_len: + to_aggregate[prefix] = self._merge_aggregates(values) + continue + postponed[prefix] = values + aggregated: Aggregates = self._aggregate_by_prefix(to_aggregate) + for prefix in set(postponed).union(aggregated): + postponed[prefix].extend(aggregated.get(prefix, [])) + if postponed == prefix_values: + raise RuntimeError("infinite loop on smartdict builing") + prefix_values = postponed + # this can only be reached if loop was not entered + return {} + + def _merge_aggregates(self, values: List[KeyValue]) -> Any: + if all(isinstance(k, int) for k, _ in values): + return self._merge_as_list(values) + return self._merge_as_dict(values) + + def _merge_as_list(self, values: List[KeyValue]): + assert all(isinstance(k, int) for k, _ in values) + index_values = defaultdict(list) + for index, value in values: + index_values[index].append(value) + res = [_not_set] * (int(max(k for k, _ in values)) + 1) + for i, v in index_values.items(): + res[i] = self._merge_values(v) # type: ignore[index] + return res + + def _merge_as_dict(self, values: List[KeyValue]) -> Dict[Any, Any]: + key_values = defaultdict(list) + for key, value in values: + key_values[key].append(value) + return {k: self._merge_values(v) for k, v in key_values.items()} + + def _merge_values(self, values: List[Any]) -> Any: + if len(values) == 1: + return values[0] + merged = {} + for value in values: + if isinstance(value, dict): + merged.update(value) + elif isinstance(value, str): + merged[self.type_field] = value + else: + raise ValueError(f"Cannot merge {value.__class__} into dict") + return merged + + def _aggregate_by_prefix( + self, values: Dict[Keys, Any] = None + ) -> Aggregates: + values = values if values is not None else self + prefix_values: Aggregates = defaultdict(list) + + for keys, value in values.items(): + prefix, key = keys[:-1], keys[-1] + if isinstance(key, str) and key.isnumeric(): + key = int(key) + prefix_values[prefix].append((key, value)) + return prefix_values + + def parse_string_conf(conf: List[str]) -> Dict[str, Any]: - res: Dict[str, Any] = {} + res = SmartSplitDict() for c in conf: keys, value = smart_split(c, "=") - set_recursively(res, smart_split(keys, "."), value) - return res + res[keys] = value + return res.build() + + +TBM = TypeVar("TBM", bound=BaseModel) def build_model( - model: Type[BaseModel], + model: Type[TBM], str_conf: List[str] = None, file_conf: List[str] = None, conf: Dict[str, Any] = None, **kwargs, -): - model_dict: Dict[str, Any] = {} - kwargs.update(conf or {}) - model_dict.update() - for key, c in kwargs.items(): - set_recursively(model_dict, smart_split(key, "."), c) +) -> TBM: + model_dict = SmartSplitDict() + model_dict.update(kwargs) + model_dict.update(conf or {}) for file in file_conf or []: keys, path = smart_split(make_posix(file), "=") with open(path, "r", encoding="utf8") as f: value = safe_load(f) - set_recursively(model_dict, smart_split(keys, "."), value) + model_dict[keys] = value for c in str_conf or []: keys, value = smart_split(c, "=", 1) if value == "None": value = None - set_recursively(model_dict, smart_split(keys, "."), value) - return parse_obj_as(model, model_dict) + model_dict[keys] = value + return parse_obj_as(model, model_dict.build()) diff --git a/mlem/core/data_type.py b/mlem/core/data_type.py index a96efc09..0f8a82a2 100644 --- a/mlem/core/data_type.py +++ b/mlem/core/data_type.py @@ -133,6 +133,7 @@ class Config: type_root = True data_type: DataType + """Resulting data type""" abs_name: ClassVar[str] = "data_reader" @abstractmethod @@ -172,6 +173,7 @@ class PrimitiveType(DataType, DataHook, DataSerializer): type: ClassVar[str] = "primitive" ptype: str + """Name of builtin type""" @classmethod def is_object_valid(cls, obj: Any) -> bool: @@ -205,6 +207,8 @@ def get_model(self, prefix: str = "") -> Type[BaseModel]: class PrimitiveWriter(DataWriter): + """Writer for primitive types""" + type: ClassVar[str] = "primitive" def write( @@ -216,6 +220,8 @@ def write( class PrimitiveReader(DataReader): + """Reader for primitive types""" + type: ClassVar[str] = "primitive" data_type: PrimitiveType @@ -247,7 +253,9 @@ class ArrayType(DataType, DataSerializer): type: ClassVar[str] = "array" dtype: DataType + """DataType of elements""" size: Optional[int] + """Size of the list""" def get_requirements(self) -> Requirements: return self.dtype.get_requirements() @@ -272,6 +280,8 @@ def get_model(self, prefix: str = "") -> Type[BaseModel]: class ArrayWriter(DataWriter): + """Writer for lists with single element type""" + type: ClassVar[str] = "array" def write( @@ -298,9 +308,12 @@ def write( class ArrayReader(DataReader): + """Reader for lists with single element type""" + type: ClassVar[str] = "array" data_type: ArrayType readers: List[DataReader] + """Inner readers""" def read(self, artifacts: Artifacts) -> DataType: artifacts = flatdict.FlatterDict(artifacts, delimiter="/") @@ -321,9 +334,12 @@ class _TupleLikeType(DataType, DataSerializer): DataType for tuple-like collections """ - items: List[DataType] + type: ClassVar = "_tuple_like" actual_type: ClassVar[type] + items: List[DataType] + """DataTypes of elements""" + def deserialize(self, obj): _check_type_and_size( obj, self.actual_type, len(self.items), DeserializationError @@ -377,6 +393,8 @@ def _check_type_and_size(obj, dtype, size, exc_type): class _TupleLikeWriter(DataWriter): + """Writer for tuple-like data""" + type: ClassVar[str] = "tuple_like" def write( @@ -404,9 +422,12 @@ def write( class _TupleLikeReader(DataReader): + """Reader for tuple-like data""" + type: ClassVar[str] = "tuple_like" data_type: _TupleLikeType readers: List[DataReader] + """Inner readers""" def read(self, artifacts: Artifacts) -> DataType: artifacts = flatdict.FlatterDict(artifacts, delimiter="/") @@ -515,6 +536,7 @@ class DictType(DataType, DataSerializer): type: ClassVar[str] = "dict" item_types: Dict[Union[StrictStr, StrictInt], DataType] + """Mapping key -> nested data type""" @classmethod def process(cls, obj, **kwargs): @@ -570,6 +592,8 @@ def get_model(self, prefix="") -> Type[BaseModel]: class DictWriter(DataWriter): + """Writer for dicts""" + type: ClassVar[str] = "dict" def write( @@ -597,9 +621,12 @@ def write( class DictReader(DataReader): + """Reader for dicts""" + type: ClassVar[str] = "dict" data_type: DictType item_readers: Dict[Union[StrictStr, StrictInt], DataReader] + """Nested readers""" def read(self, artifacts: Artifacts) -> DataType: artifacts = flatdict.FlatterDict(artifacts, delimiter="/") @@ -623,7 +650,9 @@ class DynamicDictType(DataType, DataSerializer): type: ClassVar[str] = "d_dict" key_type: PrimitiveType + """DataType for key (primitive)""" value_type: DataType + """DataType for value""" @validator("key_type") def is_valid_key_type( # pylint: disable=no-self-argument @@ -720,6 +749,8 @@ def get_model(self, prefix="") -> Type[BaseModel]: class DynamicDictWriter(DataWriter): + """Write dicts without fixed set of keys""" + type: ClassVar[str] = "d_dict" def write( @@ -739,6 +770,8 @@ def write( class DynamicDictReader(DataReader): + """Read dicts without fixed set of keys""" + type: ClassVar[str] = "d_dict" data_type: DynamicDictType @@ -757,32 +790,3 @@ def read_batch( self, artifacts: Artifacts, batch_size: int ) -> Iterator[DataType]: raise NotImplementedError - - -# -# -# class BytesDataType(DataType): -# """ -# DataType for bytes objects -# """ -# type = 'bytes' -# real_type = None -# -# def __init__(self): -# pass -# -# def get_spec(self) -> ArgList: -# return [Field('file', bytes, False)] -# -# def deserialize(self, obj) -> object: -# return obj -# -# def serialize(self, instance: object) -> dict: -# return instance -# -# @property -# def requirements(self) -> Requirements: -# return Requirements() -# -# def get_writer(self): -# return PickleWriter() diff --git a/mlem/core/errors.py b/mlem/core/errors.py index 7b2a5aaf..95a03652 100644 --- a/mlem/core/errors.py +++ b/mlem/core/errors.py @@ -1,7 +1,7 @@ """Exceptions raised by the MLEM.""" from typing import List, Optional -from mlem.constants import MLEM_DIR +from mlem.constants import MLEM_CONFIG_FILE_NAME class MlemError(Exception): @@ -22,7 +22,7 @@ class SerializationError(MlemError): class MlemProjectNotFound(MlemError): - _message = "{MLEM_DIR} folder wasn't found when searching through the path. Search has started from here: path={path}, fs={fs}, rev={rev}" + _message = "{MLEM_CONFIG_FILE_NAME} folder wasn't found when searching through the path. Search has started from here: path={path}, fs={fs}, rev={rev}" def __init__(self, path, fs=None, rev=None) -> None: @@ -30,7 +30,10 @@ def __init__(self, path, fs=None, rev=None) -> None: self.fs = fs self.rev = rev self.message = self._message.format( - MLEM_DIR=MLEM_DIR, path=path, fs=fs, rev=rev + MLEM_CONFIG_FILE_NAME=MLEM_CONFIG_FILE_NAME, + path=path, + fs=fs, + rev=rev, ) super().__init__(self.message) @@ -39,6 +42,10 @@ class LocationNotFound(MlemError): """Thrown if MLEM could not resolve location""" +class EndpointNotFound(MlemError): + """Thrown if MLEM could not resolve endpoint""" + + class RevisionNotFound(LocationNotFound): _message = "Revision '{rev}' wasn't found in path={path}, fs={fs}" @@ -119,6 +126,21 @@ def __init__(self, meta, force_type): ) +class WrongMetaSubType(TypeError, MlemError): + def __init__(self, meta, force_type): + loc = f"from {meta.loc.uri} " if meta.is_saved else "" + super().__init__( + f"Wrong type of meta loaded, got {meta.object_type} {meta.type} {loc}instead of {force_type.object_type} {force_type.type}" + ) + + +class WrongABCType(TypeError, MlemError): + def __init__(self, instance, expected_abc_type): + super().__init__( + f"Wrong implementation type, got {instance.type} instead of {expected_abc_type.type}" + ) + + class DeploymentError(MlemError): """Thrown if something goes wrong during deployment process""" @@ -147,7 +169,7 @@ def __init__(self, section: str): super().__init__(f'Unknown config section "{section}"') -class ExtensionRequirementError(MlemError): +class ExtensionRequirementError(MlemError, ImportError): def __init__(self, ext: str, reqs: List[str], extra: Optional[str]): self.ext = ext self.reqs = reqs diff --git a/mlem/core/index.py b/mlem/core/index.py index 2fb41b8a..e69de29b 100644 --- a/mlem/core/index.py +++ b/mlem/core/index.py @@ -1,192 +0,0 @@ -import posixpath -from abc import abstractmethod -from collections import defaultdict -from typing import ClassVar, Dict, Iterable, List, Set, Type, Union - -from pydantic import ValidationError, parse_obj_as -from yaml import safe_dump, safe_load - -from mlem.constants import MLEM_DIR -from mlem.core.base import MlemABC -from mlem.core.errors import MlemProjectNotFound -from mlem.core.meta_io import MLEM_EXT, Location -from mlem.core.metadata import load_meta -from mlem.core.objects import MlemLink, MlemObject -from mlem.ui import no_echo - -TypeFilter = Union[Type[MlemObject], Iterable[Type[MlemObject]], None] - - -class Index(MlemABC): - """Base class for mlem object indexing logic""" - - class Config: - type_root = True - - abs_name: ClassVar = "index" - - @abstractmethod - def index(self, obj: MlemObject, location: Location): - raise NotImplementedError - - @abstractmethod - def list( - self, - location: Location, - type_filter: TypeFilter, - include_links: bool = True, - ) -> Dict[Type[MlemObject], List[MlemObject]]: - raise NotImplementedError - - @staticmethod - def parse_type_filter(type_filter: TypeFilter) -> Set[Type[MlemObject]]: - if type_filter is None: - type_filter = set(MlemObject.non_abstract_subtypes().values()) - if isinstance(type_filter, type) and issubclass( - type_filter, MlemObject - ): - type_filter = {type_filter} - tf = set(type_filter) - if not tf: - return set() - tf.add(MlemLink) - return tf - - -class LinkIndex(Index): - """Indexing base on contents of MLEM_DIR - either objects or links to them - should be there""" - - type: ClassVar = "link" - - def index(self, obj: MlemObject, location: Location): - if ( - location.path - == posixpath.join(MLEM_DIR, obj.object_type, obj.name) + MLEM_EXT - ): - return - with no_echo(): - obj.make_link( - obj.name, location.fs, project=location.project, external=False - ) - - def list( - self, - location: Location, - type_filter: TypeFilter, - include_links: bool = True, - ignore_errors: bool = False, - ) -> Dict[Type[MlemObject], List[MlemObject]]: - _type_filter = self.parse_type_filter(type_filter) - if len(_type_filter) == 0: - return {} - - res = defaultdict(list) - root_path = posixpath.join(location.project or "", MLEM_DIR) - files = location.fs.glob( - posixpath.join(root_path, f"**{MLEM_EXT}"), - ) - for cls in _type_filter: - type_path = posixpath.join(root_path, cls.object_type) - for file in files: - if not file.startswith(type_path): - continue - try: - with no_echo(): - meta = load_meta( - posixpath.relpath(file, location.project), - project=location.project, - rev=location.rev, - follow_links=False, - fs=location.fs, - load_value=False, - ) - obj_type = cls - if isinstance(meta, MlemLink): - link_name = posixpath.relpath(file, type_path)[ - : -len(MLEM_EXT) - ] - is_auto_link = meta.path == link_name + MLEM_EXT - - obj_type = MlemObject.__type_map__[meta.link_type] - if obj_type not in _type_filter: - continue - if is_auto_link: - with no_echo(): - meta = meta.load_link() - elif not include_links: - continue - res[obj_type].append(meta) - except ValidationError: - if not ignore_errors: - raise - return res - - -FileIndexSchema = Dict[str, List[str]] - - -class FileIndex(Index): - """Index as a single file""" - - type: ClassVar = "file" - filename = "index.yaml" - - def _read_index(self, location: Location): - if location.project is None: - raise MlemProjectNotFound(location.path, location.fs, location.rev) - path = posixpath.join(location.project, MLEM_DIR, self.filename) - if not location.fs.exists(path): - return {} - - with location.fs.open(path) as f: - return parse_obj_as(FileIndexSchema, safe_load(f)) - - def _write_index(self, location: Location, data: FileIndexSchema): - if location.project is None: - raise MlemProjectNotFound(location.path, location.fs, location.rev) - path = posixpath.join(location.project, MLEM_DIR, self.filename) - - with location.fs.open(path, "w") as f: - safe_dump(data, f) - - def index(self, obj: MlemObject, location: Location): - data = self._read_index(location) - type_data = data.get(obj.object_type, []) - if obj.name not in type_data: - type_data.append(obj.name) - data[obj.object_type] = type_data - self._write_index(location, data) - - def list( - self, - location: Location, - type_filter: TypeFilter, - include_links: bool = True, - ) -> Dict[Type[MlemObject], List[MlemObject]]: - _type_filter = self.parse_type_filter(type_filter) - if not _type_filter: - return {} - - data = self._read_index(location) - - res = defaultdict(list) - - with no_echo(): - for type_ in _type_filter: - if type_ is MlemLink and not include_links: - continue - - res[type_].extend( - [ - load_meta( - path, - location.project, - location.rev, - load_value=False, - fs=location.fs, - ) - for path in data.get(type_.object_type, []) - ] - ) - return res diff --git a/mlem/core/meta_io.py b/mlem/core/meta_io.py index 06da3905..80445a57 100644 --- a/mlem/core/meta_io.py +++ b/mlem/core/meta_io.py @@ -2,6 +2,7 @@ Utils functions that parse and process supplied URI, serialize/derialize MLEM objects """ import contextlib +import os import posixpath from abc import ABC, abstractmethod from inspect import isabstract @@ -19,7 +20,7 @@ MlemObjectNotFound, RevisionNotFound, ) -from mlem.utils.root import MLEM_DIR, find_project_root +from mlem.utils.root import find_project_root MLEM_EXT = ".mlem" @@ -43,8 +44,18 @@ def fullpath(self): def path_in_project(self): return posixpath.relpath(self.fullpath, self.project) + @property + def dirname(self): + return posixpath.dirname(self.fullpath) + + @property + def basename(self): + return posixpath.basename(self.path) + @contextlib.contextmanager - def open(self, mode="r", **kwargs): + def open(self, mode="r", make_dir: bool = False, **kwargs): + if make_dir: + self.fs.makedirs(posixpath.dirname(self.fullpath), exist_ok=True) with self.fs.open(self.fullpath, mode, **kwargs) as f: yield f @@ -57,12 +68,17 @@ def abs(cls, path: str, fs: AbstractFileSystem): def update_path(self, path): if not self.uri.endswith(self.path): raise ValueError("cannot automatically update uri") + if os.path.isabs(self.path) and not os.path.isabs(path): + path = posixpath.join(posixpath.dirname(self.path), path) self.uri = self.uri[: -len(self.path)] + path self.path = path def exists(self): return self.fs.exists(self.fullpath) + def delete(self): + self.fs.delete(self.fullpath) + def is_same_project(self, other: "Location"): return other.fs == self.fs and other.project == self.project @@ -75,6 +91,23 @@ def uri_repr(self): return posixpath.relpath(self.fullpath, "") return self.uri + @classmethod + def resolve( + cls, + path: str, + project: str = None, + rev: str = None, + fs: AbstractFileSystem = None, + find_project: bool = False, + ): + return UriResolver.resolve( + path=path, + project=project, + rev=rev, + fs=fs, + find_project=find_project, + ) + class UriResolver(MlemABC): """Base class for resolving location. Turns (path, project, rev, fs) tuple @@ -260,9 +293,7 @@ def get_fs( except FileNotFoundError as e: # TODO catch HTTPError for wrong orgrepo if options["sha"] is not None and not cls.check_rev(options): raise RevisionNotFound(options["sha"], uri) from e - raise LocationNotFound( - f"Could not resolve github location {uri}" - ) from e + raise LocationNotFound(f"Could not resolve location {uri}") from e return fs, path @classmethod @@ -299,6 +330,7 @@ def pre_process( class FSSpecResolver(UriResolver): """Resolve different fsspec URIs""" + type: ClassVar = "fsspec" low_priority: ClassVar = True @classmethod @@ -338,7 +370,7 @@ def get_uri( def get_fs(uri: str) -> Tuple[AbstractFileSystem, str]: - location = UriResolver.resolve(path=uri, project=None, rev=None, fs=None) + location = Location.resolve(path=uri, project=None, rev=None, fs=None) return location.fs, location.fullpath @@ -353,7 +385,7 @@ def get_path_by_fs_path(fs: AbstractFileSystem, path: str): def get_uri(fs: AbstractFileSystem, path: str, repr: bool = False): - loc = UriResolver.resolve(path, None, None, fs=fs) + loc = Location.resolve(path, None, None, fs=fs) if repr: return loc.uri_repr return loc.uri @@ -373,15 +405,10 @@ def get_meta_path(uri: str, fs: AbstractFileSystem) -> str: if uri.endswith(MLEM_EXT) and fs.isfile(uri): # .../. return uri - # if fs.isdir(uri) and fs.isfile(posixpath.join(uri, META_FILE_NAME)): - # # .../path and .../path/ exists - # return posixpath.join(uri, META_FILE_NAME) + if fs.isfile(uri + MLEM_EXT): # .../name without return uri + MLEM_EXT - if MLEM_DIR in uri and fs.isfile(uri): - # ...//.../file - return uri if fs.exists(uri): raise MlemObjectNotFound( f"{uri} is not a valid MLEM metafile or a folder with a MLEM model or data" diff --git a/mlem/core/metadata.py b/mlem/core/metadata.py index 62e0f7ca..07c35d48 100644 --- a/mlem/core/metadata.py +++ b/mlem/core/metadata.py @@ -16,7 +16,7 @@ MlemProjectNotFound, WrongMetaType, ) -from mlem.core.meta_io import Location, UriResolver, get_meta_path +from mlem.core.meta_io import Location, get_meta_path from mlem.core.objects import MlemData, MlemModel, MlemObject, find_object from mlem.utils.path import make_posix @@ -48,8 +48,6 @@ def save( project: Optional[str] = None, sample_data=None, fs: Optional[AbstractFileSystem] = None, - index: bool = None, - external: Optional[bool] = None, params: Dict[str, str] = None, ) -> MlemObject: """Saves given object to a given path @@ -63,8 +61,6 @@ def save( provide input data sample, so MLEM will include it's schema in the model's metadata fs: FileSystem for the `path` argument - index: Whether to add object to mlem project index - external: if obj is saved to project, whether to put it outside of .mlem dir params: arbitrary params for object Returns: @@ -76,7 +72,7 @@ def save( params=params, ) path = os.fspath(path) - meta.dump(path, fs=fs, project=project, index=index, external=external) + meta.dump(path, fs=fs, project=project) return meta @@ -90,10 +86,10 @@ def load( """Load python object saved by MLEM Args: - path (str): Path to the object. Could be local path or path inside a git repo. - project (Optional[str], optional): URL to project if object is located there. - rev (Optional[str], optional): revision, could be git commit SHA, branch name or tag. - follow_links (bool, optional): If object we read is a MLEM link, whether to load the + path: Path to the object. Could be local path or path inside a git repo. + project: URL to project if object is located there. + rev: revision, could be git commit SHA, branch name or tag. + follow_links: If object we read is a MLEM link, whether to load the actual object link points to. Defaults to True. Returns: @@ -156,19 +152,19 @@ def load_meta( """Load MlemObject Args: - path (str): Path to the object. Could be local path or path inside a git repo. - project (Optional[str], optional): URL to project if object is located there. - rev (Optional[str], optional): revision, could be git commit SHA, branch name or tag. - follow_links (bool, optional): If object we read is a MLEM link, whether to load the + path: Path to the object. Could be local path or path inside a git repo. + project: URL to project if object is located there. + rev: revision, could be git commit SHA, branch name or tag. + follow_links: If object we read is a MLEM link, whether to load the actual object link points to. Defaults to True. - load_value (bool, optional): Load actual python object incorporated in MlemObject. Defaults to False. + load_value: Load actual python object incorporated in MlemObject. Defaults to False. fs: filesystem to load from. If not provided, will be inferred from path force_type: type of meta to be loaded. Defaults to MlemObject (any mlem meta) Returns: MlemObject: Saved MlemObject """ path = os.fspath(path) - location = UriResolver.resolve( + location = Location.resolve( path=make_posix(path), project=make_posix(project), rev=rev, diff --git a/mlem/core/model.py b/mlem/core/model.py index 5952e690..c3ed0b2b 100644 --- a/mlem/core/model.py +++ b/mlem/core/model.py @@ -102,10 +102,15 @@ class Argument(BaseModel): """Function argument descriptor""" name: str + """argument name""" type_: DataType + """argument data type""" required: bool = True + """is required""" default: Any = None + """default value""" kw_only: bool = False + """is keyword only""" @classmethod def from_argspec( @@ -177,10 +182,15 @@ class Signature(BaseModel, WithRequirements): """Function signature descriptor""" name: str + """function name""" args: List[Argument] + """list of arguments""" returns: DataType + """returning data type""" varargs: Optional[str] = None + """name of var arg""" varkw: Optional[str] = None + """name of varkw arg""" @classmethod def from_method( @@ -230,9 +240,7 @@ def get_requirements(self): class ModelType(ABC, MlemABC, WithRequirements): - """ - Base class for model metadata. - """ + """Base class for model metadata.""" class Config: type_root = True @@ -243,7 +251,9 @@ class Config: model: Any = None io: ModelIO + """Model IO""" methods: Dict[str, Signature] + """Model method signatures""" def load(self, artifacts: Artifacts): self.model = self.io.load(artifacts) diff --git a/mlem/core/objects.py b/mlem/core/objects.py index 59020cc2..1915d40b 100644 --- a/mlem/core/objects.py +++ b/mlem/core/objects.py @@ -1,7 +1,9 @@ """ Base classes for meta objects in MLEM """ +import contextlib import hashlib +import itertools import os import posixpath import time @@ -9,8 +11,10 @@ from enum import Enum from functools import partial from typing import ( + TYPE_CHECKING, Any, ClassVar, + ContextManager, Dict, Generic, Iterable, @@ -23,13 +27,15 @@ overload, ) +import fsspec from fsspec import AbstractFileSystem from fsspec.implementations.local import LocalFileSystem from pydantic import ValidationError, parse_obj_as, validator -from typing_extensions import Literal +from typing_extensions import Literal, TypeAlias from yaml import safe_dump, safe_load from mlem.config import project_config +from mlem.constants import MLEM_STATE_DIR, MLEM_STATE_EXT from mlem.core.artifacts import ( Artifacts, FSSpecStorage, @@ -40,25 +46,31 @@ from mlem.core.data_type import DataReader, DataType from mlem.core.errors import ( DeploymentError, + MlemError, MlemObjectNotFound, MlemObjectNotSavedError, - MlemProjectNotFound, + WrongABCType, + WrongMetaSubType, WrongMetaType, ) -from mlem.core.meta_io import ( - MLEM_DIR, - MLEM_EXT, - Location, - UriResolver, - get_path_by_fs_path, -) +from mlem.core.meta_io import MLEM_EXT, Location, get_path_by_fs_path from mlem.core.model import ModelAnalyzer, ModelType from mlem.core.requirements import Requirements from mlem.polydantic.lazy import lazy_field from mlem.ui import EMOJI_LINK, EMOJI_LOAD, EMOJI_SAVE, echo, no_echo +from mlem.utils.fslock import FSLock from mlem.utils.path import make_posix from mlem.utils.root import find_project_root +if TYPE_CHECKING: + from pydantic.typing import ( + AbstractSetIntStr, + MappingIntStrAny, + TupleGenerator, + ) + + from mlem.runtime.client import Client + T = TypeVar("T", bound="MlemObject") @@ -77,7 +89,9 @@ class Config: __abstract__: ClassVar[bool] = True object_type: ClassVar[str] location: Optional[Location] = None + """MlemObject location [transient]""" params: Dict[str, str] = {} + """Arbitrary map of additional parameters""" @property def loc(self) -> Location: @@ -88,11 +102,7 @@ def loc(self) -> Location: @property def name(self): """Name of the object in the project""" - project_path = self.loc.path_in_project[: -len(MLEM_EXT)] - prefix = posixpath.join(MLEM_DIR, self.object_type) - if project_path.startswith(prefix): - project_path = project_path[len(prefix) + 1 :] - return project_path + return self.loc.path_in_project[: -len(MLEM_EXT)] @property def is_saved(self): @@ -119,14 +129,12 @@ def _get_location( path: str, project: Optional[str], fs: Optional[AbstractFileSystem], - external: bool, - ensure_mlem_root: bool, metafile_path: bool = True, ) -> Location: """Create location from arguments""" if metafile_path: path = cls.get_metafile_path(path) - loc = UriResolver.resolve( + loc = Location.resolve( path, project, rev=None, fs=fs, find_project=True ) if loc.project is not None: @@ -134,24 +142,6 @@ def _get_location( find_project_root( loc.project, loc.fs, raise_on_missing=True, recursive=False ) - if ensure_mlem_root and loc.project is None: - raise MlemProjectNotFound(loc.fullpath, loc.fs) - if ( - loc.project is None - or external - or loc.fullpath.startswith( - posixpath.join(loc.project, MLEM_DIR, cls.object_type) - ) - ): - # orphan or external or inside .mlem - return loc - - internal_path = posixpath.join( - MLEM_DIR, - cls.object_type, - loc.path_in_project, - ) - loc.update_path(internal_path) return loc @classmethod @@ -204,8 +194,6 @@ def dump( path: str, fs: Optional[AbstractFileSystem] = None, project: Optional[str] = None, - index: Optional[bool] = None, - external: Optional[bool] = None, ): """Dumps metafile and possible artifacts to path. @@ -213,23 +201,14 @@ def dump( path: name of the object. Relative to project, if it is provided. fs: filesystem to save to. if not provided, inferred from project and path project: path to mlem project - index: whether add to index if object is external. - If set to True, checks existanse of mlem project - defaults to True if mlem project exists and external is true - external: whether to save object inside mlem dir or not. - Defaults to false if project is provided - Forced to false if path points inside mlem dir """ - location, index = self._parse_dump_args( - path, project, fs, index, external - ) - self._write_meta(location, index) + location = self._parse_dump_args(path, project, fs) + self._write_meta(location) return self def _write_meta( self, location: Location, - index: bool, ): """Write metadata to path in fs and possibly create link in mlem dir""" echo(EMOJI_SAVE + f"Saving {self.object_type} to {location.uri_repr}") @@ -238,50 +217,27 @@ def _write_meta( ) with location.open("w") as f: safe_dump(self.dict(), f) - if index and location.project: - project_config(location.project, location.fs).index.index( - self, location - ) def _parse_dump_args( self, path: str, project: Optional[str], fs: Optional[AbstractFileSystem], - index: Optional[bool], - external: Optional[bool], - ) -> Tuple[Location, bool]: + ) -> Location: """Parse arguments for .dump and bind meta""" - if external is None: - external = project_config(project, fs=fs).EXTERNAL - # by default we index only external non-orphan objects - if index is None: - index = True - ensure_mlem_root = False - else: - # if index manually set to True, there should be mlem project - ensure_mlem_root = index location = self._get_location( make_posix(path), make_posix(project), fs, - external, - ensure_mlem_root, ) self.bind(location) - if location.project is not None: - # force external=False if fullpath inside MLEM_DIR - external = posixpath.join(MLEM_DIR, "") not in posixpath.dirname( - location.fullpath - ) - return location, index + return location def make_link( self, path: str = None, fs: Optional[AbstractFileSystem] = None, project: Optional[str] = None, - external: Optional[bool] = None, absolute: bool = False, ) -> "MlemLink": if self.location is None: @@ -295,11 +251,10 @@ def make_link( link_type=self.resolved_type, ) if path is not None: - ( - location, - _, - ) = link._parse_dump_args( # pylint: disable=protected-access - path, project, fs, False, external=external + location = ( + link._parse_dump_args( # pylint: disable=protected-access + path, project, fs + ) ) if ( not absolute @@ -309,9 +264,7 @@ def make_link( link.path = self.get_metafile_path(self.name) link.link_type = self.resolved_type link.project = None - link._write_meta( # pylint: disable=protected-access - location, True - ) + link._write_meta(location) # pylint: disable=protected-access return link def clone( @@ -319,8 +272,6 @@ def clone( path: str, fs: Optional[AbstractFileSystem] = None, project: Optional[str] = None, - index: Optional[bool] = None, - external: Optional[bool] = None, ): """ Clone existing object to `path`. @@ -331,7 +282,7 @@ def clone( raise MlemObjectNotSavedError("Cannot clone not saved object") new: "MlemObject" = self.deepcopy() new.dump( - path, fs, project, index, external + path, fs, project ) # only dump meta TODO: https://github.com/iterative/mlem/issues/37 return new @@ -348,22 +299,32 @@ def update(self): + f"Updating {self.object_type} at {self.location.uri_repr}" ) with no_echo(): - self._write_meta(self.location, False) + self._write_meta(self.location) def meta_hash(self): - return hashlib.md5(safe_dump(self.dict()).encode("utf8")).hexdigest() + return hashlib.md5( # nosec: B324 + safe_dump(self.dict()).encode("utf8") + ).hexdigest() + + +TL = TypeVar("TL", bound="MlemLink") class MlemLink(MlemObject): """Link is a special MlemObject that represents a MlemObject in a different location""" + object_type: ClassVar = "link" + __link_type_map__: ClassVar[Dict[str, Type["TypedLink"]]] = {} + path: str + """Path to object""" project: Optional[str] = None + """Project URI""" rev: Optional[str] = None + """Revision to use""" link_type: str - - object_type: ClassVar = "link" + """Type of underlying object""" @property def link_cls(self) -> Type[MlemObject]: @@ -406,7 +367,7 @@ def parse_link(self) -> Location: if self.project is None and self.rev is None: # is it possible to have rev without project? - location = UriResolver.resolve( + location = Location.resolve( path=self.path, project=None, rev=None, fs=None ) if ( @@ -424,7 +385,7 @@ def parse_link(self) -> Location: return find_meta_location(location) # link is absolute return find_meta_location( - UriResolver.resolve( + Location.resolve( path=self.path, project=self.project, rev=self.rev, fs=None ) ) @@ -442,13 +403,73 @@ def from_location( else link_type, ) + @classmethod + def typed_link( + cls: Type["MlemLink"], type_: Union[str, Type[MlemObject]] + ) -> Type["MlemLink"]: + type_name = type_ if isinstance(type_, str) else type_.object_type + + class TypedMlemLink(TypedLink): + object_type: ClassVar = f"link_{type_name}" + _link_type: ClassVar = type_name + link_type = type_name + + def _iter( + self, + to_dict: bool = False, + by_alias: bool = False, + include: Union["AbstractSetIntStr", "MappingIntStrAny"] = None, + exclude: Union["AbstractSetIntStr", "MappingIntStrAny"] = None, + exclude_unset: bool = False, + exclude_defaults: bool = False, + exclude_none: bool = False, + ) -> "TupleGenerator": + exclude = exclude or set() + if isinstance(exclude, set): + exclude.update(("type", "object_type", "link_type")) + elif isinstance(exclude, dict): + exclude.update( + {"type": True, "object_type": True, "link_type": True} + ) + return super()._iter( + to_dict, + by_alias, + include, + exclude, + exclude_unset, + exclude_defaults, + exclude_none, + ) + + TypedMlemLink.__doc__ = f"""Link to {type_name} MLEM object""" + return TypedMlemLink + + @property + def typed(self) -> "TypedLink": + type_ = MlemLink.__link_type_map__[self.link_type] + return type_(**self.dict()) + + +class TypedLink(MlemLink, ABC): + """Base class for specific type link classes""" + + __abstract__: ClassVar = True + object_type: ClassVar = "_typed_link" + _link_type: ClassVar + + def __init_subclass__(cls, **kwargs): + super().__init_subclass__(**kwargs) + MlemLink.__link_type_map__[cls._link_type] = cls + class _WithArtifacts(ABC, MlemObject): """Special subtype of MlemObject that can have files (artifacts) attached""" __abstract__: ClassVar[bool] = True artifacts: Optional[Artifacts] = None + """dict with artifacts""" requirements: Requirements = Requirements.new() + """list of requirements""" @classmethod def get_metafile_path(cls, fullpath: str): @@ -460,9 +481,6 @@ def get_metafile_path(cls, fullpath: str): @property def name(self): project_path = self.location.path_in_project - prefix = posixpath.join(MLEM_DIR, self.object_type) - if project_path.startswith(prefix): - project_path = project_path[len(prefix) + 1 :] if project_path.endswith(MLEM_EXT): project_path = project_path[: -len(MLEM_EXT)] return project_path @@ -486,12 +504,8 @@ def dump( path: str, fs: Optional[AbstractFileSystem] = None, project: Optional[str] = None, - index: Optional[bool] = None, - external: Optional[bool] = None, ): - location, index = self._parse_dump_args( - path, project, fs, index, external - ) + location = self._parse_dump_args(path, project, fs) try: if location.exists(): with no_echo(): @@ -502,35 +516,27 @@ def dump( except (MlemObjectNotFound, FileNotFoundError, ValidationError): pass self.artifacts = self.get_artifacts() - self._write_meta(location, index) + self._write_meta(location) return self @abstractmethod def write_value(self) -> Artifacts: raise NotImplementedError - # def ensure_saved(self): - # if self.fs is None: - # raise ValueError(f"Can't load {self}: it's not saved") - def clone( self, path: str, fs: Optional[AbstractFileSystem] = None, project: Optional[str] = None, - index: Optional[bool] = None, - external: Optional[bool] = None, ): if self.location is None: raise MlemObjectNotSavedError("Cannot clone not saved object") # clone is just dump with copying artifacts new: "_WithArtifacts" = self.deepcopy() new.artifacts = {} - ( - location, - index, - ) = new._parse_dump_args( # pylint: disable=protected-access - path, project, fs, index, external + + location = new._parse_dump_args( # pylint: disable=protected-access + path, project, fs ) for art_name, art in (self.artifacts or {}).items(): @@ -544,7 +550,7 @@ def clone( new.artifacts[art_name] = LocalArtifact( uri=posixpath.relpath(art_path, new.dirname), **download.info ) - new._write_meta(location, index) # pylint: disable=protected-access + new._write_meta(location) # pylint: disable=protected-access return new @property @@ -592,6 +598,7 @@ class MlemModel(_WithArtifacts): object_type: ClassVar = "model" model_type_cache: Any model_type: ModelType + """Framework-specific metadata""" model_type, model_type_raw, model_type_cache = lazy_field( ModelType, "model_type", "model_type_cache" ) @@ -644,8 +651,9 @@ class Config: exclude = {"data_type"} object_type: ClassVar = "data" - reader_cache: Optional[Dict] + reader_cache: Any reader: Optional[DataReader] + """How to read this data""" reader, reader_raw, reader_cache = lazy_field( DataReader, "reader", @@ -711,6 +719,7 @@ class Config: type_root = True type_field = "type" + type: ClassVar[str] object_type: ClassVar = "builder" abs_name: ClassVar[str] = "builder" @@ -726,12 +735,33 @@ class Config: type_root = True abs_name: ClassVar[str] = "deploy_state" + type: ClassVar[str] + allow_default: ClassVar[bool] = False model_hash: Optional[str] = None + """Hash of deployed model meta""" + model_link: Optional["ModelLink"] + """Link to deployed model""" + declaration: "MlemDeployment" + """Deployment declaration used""" - @abstractmethod - def get_client(self): - raise NotImplementedError + def update_model( + self, + model: MlemModel, + ): + self.model_hash = model.meta_hash() + if model.is_saved: + self.model_link = model.make_link().typed + else: + self.model_link = None + + @validator("declaration") + def validate_declaration( # pylint: disable=no-self-argument + cls, value: "MlemDeployment" + ): + copy = value.copy() + copy.env = value.get_env() + return copy DT = TypeVar("DT", bound="MlemDeployment") @@ -749,18 +779,6 @@ class Config: type: ClassVar = ... deploy_type: ClassVar[Type[DT]] - @abstractmethod - def deploy(self, meta: DT): - raise NotImplementedError - - @abstractmethod - def remove(self, meta: DT): - raise NotImplementedError - - @abstractmethod - def get_status(self, meta: DT, raise_on_error=True) -> "DeployStatus": - raise NotImplementedError - def check_type(self, deploy: "MlemDeployment"): if not isinstance(deploy, self.deploy_type): raise ValueError( @@ -779,7 +797,174 @@ class DeployStatus(str, Enum): RUNNING = "running" -class MlemDeployment(MlemObject): +ST = TypeVar("ST", bound=DeployState) + + +@contextlib.contextmanager +def _no_lock(): + yield + + +class StateManager(MlemABC): + abs_name: ClassVar = "state" + type: ClassVar[str] + + class Config: + type_root = True + default_type = "fsspec" + + @abstractmethod + def _get_state( + self, deployment: "MlemDeployment" + ) -> Optional[DeployState]: + pass + + def get_state( + self, deployment: "MlemDeployment", state_type: Type[ST] + ) -> Optional[ST]: + state = self._get_state(deployment) + if state is not None and not isinstance(state, state_type): + raise DeploymentError( + f"State for {deployment.name} is {state.type}, but should be {state_type.type}" + ) + return state + + @abstractmethod + def update_state(self, deployment: "MlemDeployment", state: DeployState): + raise NotImplementedError + + @abstractmethod + def purge_state(self, deployment: "MlemDeployment"): + raise NotImplementedError + + @abstractmethod + def lock_state(self, deployment: "MlemDeployment") -> ContextManager: + raise NotImplementedError + + +class LocalFileStateManager(StateManager): + """StateManager that stores state as yaml file locally""" + + type: ClassVar = "local" + + locking: bool = True + """Enable state locking""" + lock_timeout: float = 10 * 60 + """Lock timeout""" + + @staticmethod + def location(deployment: "MlemDeployment") -> Location: + loc = deployment.loc.copy() + loc.update_path(loc.path + MLEM_STATE_EXT) + return loc + + def _get_state( + self, deployment: "MlemDeployment" + ) -> Optional[DeployState]: + try: + with self.location(deployment).open("r") as f: + return parse_obj_as(DeployState, safe_load(f)) + except FileNotFoundError: + return None + + def update_state(self, deployment: "MlemDeployment", state: DeployState): + with self.location(deployment).open("w", make_dir=True) as f: + safe_dump(state.dict(), f) + + def purge_state(self, deployment: "MlemDeployment"): + loc = self.location(deployment) + if loc.exists(): + loc.delete() + + def lock_state(self, deployment: "MlemDeployment"): + if self.locking: + loc = self.location(deployment) + dirname, filename = posixpath.split(loc.fullpath) + return FSLock( + loc.fs, + dirname, + filename, + timeout=self.lock_timeout, + ) + return super().lock_state(deployment) + + +class FSSpecStateManager(StateManager): + """StateManager that stores state as yaml file in fsspec-supported filesystem""" + + type: ClassVar = "fsspec" + + class Config: + exclude = {"fs", "path"} + arbitrary_types_allowed = True + + uri: str + """URI of directory to store state files""" + storage_options: Dict = {} + """Additional options""" + locking: bool = True + """Enable state locking""" + lock_timeout: float = 10 * 60 + """Lock timeout""" + + fs: Optional[AbstractFileSystem] = None + """Filesystem cache""" + path: str = "" + """Path inside filesystem cache""" + + def get_fs(self) -> AbstractFileSystem: + if self.fs is None: + self.fs, _, (self.path,) = fsspec.get_fs_token_paths( + self.uri, storage_options=self.storage_options + ) + return self.fs + + def _get_path(self, deployment: "MlemDeployment"): + self.get_fs() + return posixpath.join(self.path, MLEM_STATE_DIR, deployment.name) + + def _get_state( + self, deployment: "MlemDeployment" + ) -> Optional[DeployState]: + try: + with self.get_fs().open(self._get_path(deployment)) as f: + return parse_obj_as(DeployState, safe_load(f)) + except FileNotFoundError: + return None + + def update_state(self, deployment: "MlemDeployment", state: DeployState): + path = self._get_path(deployment) + fs = self.get_fs() + fs.makedirs(posixpath.dirname(path), exist_ok=True) + with fs.open(path, "w") as f: + safe_dump(state.dict(), f) + + def purge_state(self, deployment: "MlemDeployment"): + path = self._get_path(deployment) + fs = self.get_fs() + if fs.exists(path): + fs.delete(path) + + def lock_state(self, deployment: "MlemDeployment"): + if self.locking: + fullpath = self._get_path(deployment) + dirname, filename = posixpath.split(fullpath) + return FSLock( + self.get_fs(), + dirname, + filename, + timeout=self.lock_timeout, + ) + return super().lock_state(deployment) + + +EnvLink: TypeAlias = MlemLink.typed_link(MlemEnv) +ModelLink: TypeAlias = MlemLink.typed_link(MlemModel) + +ET = TypeVar("ET", bound=MlemEnv) + + +class MlemDeployment(MlemObject, Generic[ST, ET]): """Base class for deployment metadata""" object_type: ClassVar = "deployment" @@ -787,40 +972,126 @@ class MlemDeployment(MlemObject): class Config: type_root = True type_field = "type" - exclude = {"model", "env"} + exclude = {"model_cache", "env_cache"} use_enum_values = True abs_name: ClassVar = "deployment" type: ClassVar[str] + state_type: ClassVar[Type[ST]] + env_type: ClassVar[Type[ET]] + + env: Union[str, MlemEnv, EnvLink, None] = None + """Enironment to use""" + env_cache: Optional[MlemEnv] = None + state_manager: Optional[StateManager] + """State manager used""" + + def __init_subclass__(cls): + if hasattr(cls, "env_type"): + cls.env_type.deploy_type = cls + super().__init_subclass__() + + @validator("state_manager", always=True) + def default_state_manager( # pylint: disable=no-self-argument + cls, value # noqa: B902 + ): + if value is None: + value = project_config("").state + return value - env_link: MlemLink - env: Optional[MlemEnv] - model_link: MlemLink - model: Optional[MlemModel] - state: Optional[DeployState] + @property + def _state_manager(self) -> StateManager: + if self.state_manager is None: + return LocalFileStateManager() + return self.state_manager - def get_env(self): - if self.env is None: - self.env = self.env_link.bind(self.loc).load_link( - force_type=MlemEnv - ) - return self.env + def get_state(self) -> ST: + return self._state_manager.get_state( + self, self.state_type + ) or self.state_type(declaration=self) - def get_model(self): - if self.model is None: - self.model = self.model_link.bind(self.loc).load_link( - force_type=MlemModel - ) - return self.model + def lock_state(self): + return self._state_manager.lock_state(self) + + def update_state(self, state: ST): + self._state_manager.update_state(self, state) + + def purge_state(self): + self._state_manager.purge_state(self) + + def get_client(self, state: DeployState = None) -> "Client": + if state is not None and not isinstance(state, self.state_type): + raise WrongABCType(state, self.state_type) + return self._get_client(state or self.get_state()) + + @abstractmethod + def _get_client(self, state: ST) -> "Client": + raise NotImplementedError + + @validator("env") + def validate_env(cls, value): # pylint: disable=no-self-argument + if isinstance(value, MlemLink): + if value.project is None: + return value.path + if not isinstance(value, EnvLink): + return EnvLink(**value.dict()) + if isinstance(value, str): + return make_posix(value) + return value + + def get_env(self) -> ET: + if self.env_cache is None: + if isinstance(self.env, str): + link = MlemLink( + path=self.env, + project=self.loc.project + if not os.path.isabs(self.env) + else None, + rev=self.loc.rev if not os.path.isabs(self.env) else None, + link_type=MlemEnv.object_type, + ) + self.env_cache = link.load_link(force_type=MlemEnv) + elif isinstance(self.env, MlemEnv): + self.env_cache = self.env + elif isinstance(self.env, MlemLink): + self.env_cache = self.env.load_link(force_type=MlemEnv) + elif self.env is None: + try: + self.env_cache = self.env_type() + except ValidationError as e: + raise MlemError( + f"{self.env_type} env does not have default value, please set `env` field" + ) from e + else: + raise ValueError( + "env should be one of [str, MlemLink, MlemEnv]" + ) + if not isinstance(self.env_cache, self.env_type): + raise WrongMetaSubType(self.env_cache, self.env_type) + return self.env_cache - def run(self): - return self.get_env().deploy(self) + @abstractmethod + def deploy(self, model: MlemModel): + raise NotImplementedError + @abstractmethod def remove(self): - self.get_env().remove(self) + raise NotImplementedError + + @abstractmethod + def get_status(self, raise_on_error=True) -> "DeployStatus": + raise NotImplementedError - def get_status(self, raise_on_error: bool = True) -> DeployStatus: - return self.get_env().get_status(self, raise_on_error=raise_on_error) + def check_unchanged(self): + declaration = self.get_state().declaration + copy = declaration.copy() + copy.env = None + self_copy = self.copy() + self_copy.env = None + if copy != self_copy or declaration.env != self.get_env(): + raise DeploymentError( + "Deployment parameters changed, this is not supported yet. Please re-create deployment with new parameters" + ) def wait_for_status( self, @@ -831,7 +1102,7 @@ def wait_for_status( DeployStatus, Iterable[DeployStatus] ] = None, raise_on_timeout: bool = True, - ): + ) -> object: if isinstance(status, DeployStatus): statuses = {status} else: @@ -843,7 +1114,12 @@ def wait_for_status( allowed = set(allowed_intermediate) current = DeployStatus.UNKNOWN - for _ in range(times): + iterator: Iterable + if times == 0: + iterator = itertools.count() + else: + iterator = range(times) + for _ in iterator: current = self.get_status(raise_on_error=False) if current in statuses: return True @@ -855,25 +1131,17 @@ def wait_for_status( return False time.sleep(timeout) if raise_on_timeout: + # TODO: count actual time passed raise DeploymentError( f"Deployment status is still {current} after {times * timeout} seconds" ) return False - def model_changed(self): - if self.state is None or self.state.model_hash is None: + def model_changed(self, model: MlemModel, state: Optional[ST] = None): + state = state or self.get_state() + if state.model_hash is None: return True - return self.get_model().meta_hash() != self.state.model_hash - - def update_model_hash(self, model: Optional[MlemModel] = None): - model = model or self.get_model() - if self.state is None: - return - self.state.model_hash = model.meta_hash() - - def replace_model(self, model: MlemModel): - self.model = model - self.model_link = self.model.make_link() + return model.meta_hash() != state.model_hash def find_object( @@ -890,8 +1158,6 @@ def find_object( tp, posixpath.join( project or "", - MLEM_DIR, - cls.object_type, cls.get_metafile_path(path), ), ) @@ -906,3 +1172,6 @@ def find_object( raise ValueError(f"Ambiguous object {path}: {source_paths}") type_, source_path = source_paths[0] return type_, source_path + + +DeployState.update_forward_refs() diff --git a/mlem/core/requirements.py b/mlem/core/requirements.py index 26e7d6ee..df802a36 100644 --- a/mlem/core/requirements.py +++ b/mlem/core/requirements.py @@ -2,6 +2,7 @@ Base classes to work with requirements which come with ML models and data """ import base64 +import collections import contextlib import glob import itertools @@ -54,25 +55,42 @@ class Config: abs_name: ClassVar[str] = "requirement" type: ClassVar = ... + @abstractmethod + def get_repr(self): + raise NotImplementedError + + @classmethod + @abstractmethod + def materialize(cls, reqs, target: str): + raise NotImplementedError + class PythonRequirement(Requirement, ABC): + type: ClassVar = "_python" module: str + """Python module name""" + + def get_repr(self): + raise NotImplementedError + + @classmethod + def materialize(cls, reqs, target: str): + raise NotImplementedError class InstallableRequirement(PythonRequirement): """ - This class represents pip-installable python library - - :param module: name of python module - :param version: version of python package - :param package_name: Optional. pip package name for this module, if it is different from module name + pip-installable python library """ type: ClassVar[str] = "installable" module: str + """Name of python module""" version: Optional[str] = None + """Version of python package""" package_name: Optional[str] = None + """Pip package name for this module, if it is different from module name""" @property def package(self): @@ -83,7 +101,7 @@ def package(self): self.module, self.module ) - def to_str(self): + def get_repr(self): """ pip installable representation of this module """ @@ -91,6 +109,13 @@ def to_str(self): return f"{self.package}=={self.version}" return self.package + @classmethod + def materialize(cls, reqs, target: str): + reqs = [r.get_repr() for r in reqs] + requirement_string = "\n".join(reqs) + with open(os.path.join(target), "w", encoding="utf8") as fp: + fp.write(requirement_string + "\n") + @classmethod def from_module( cls, mod: ModuleType, package_name: str = None @@ -135,17 +160,28 @@ def from_str(cls, name): class CustomRequirement(PythonRequirement): """ - This class represents local python code that you need as a requirement for your code - - :param name: filename of this code - :param source64zip: zipped and base64-encoded source - :param is_package: whether this code should be in %name%/__init__.py + local python code that you need as a requirement for your code """ type: ClassVar[str] = "custom" name: str + """Filename of this code""" source64zip: str + """Zipped and base64-encoded source""" is_package: bool + """Whether this code should be in %name%/__init__.py""" + + def get_repr(self): + raise NotImplementedError + + @classmethod + def materialize(cls, reqs, target: str): + for cr in reqs: + for part, src in cr.to_sources_dict().items(): + p = os.path.join(target, part) + os.makedirs(os.path.dirname(p), exist_ok=True) + with open(p, "wb") as f: + f.write(src) @staticmethod def from_module(mod: ModuleType) -> "CustomRequirement": @@ -264,11 +300,16 @@ def to_sources_dict(self) -> Dict[str, bytes]: class FileRequirement(CustomRequirement): - """Represents an additional file""" + """Additional file""" type: ClassVar[str] = "file" is_package: bool = False + """Ignored""" module: str = "" + """Ignored""" + + def get_repr(self): + raise NotImplementedError def to_sources_dict(self): """ @@ -287,10 +328,18 @@ def from_path(cls, path: str): class UnixPackageRequirement(Requirement): - """Represents a unix package that needs to be installed""" + """Unix package that needs to be installed""" type: ClassVar[str] = "unix" package_name: str + """Name of the package""" + + def get_repr(self): + return self.package_name + + @classmethod + def materialize(cls, reqs, target: str): + raise NotImplementedError T = TypeVar("T", bound=Requirement) @@ -299,11 +348,10 @@ class UnixPackageRequirement(Requirement): class Requirements(BaseModel): """ A collection of requirements - - :param requirements: list of :class:`Requirement` instances """ __root__: List[Requirement] = [] + """List of :class:`Requirement` instances""" @property def installable(self) -> List[InstallableRequirement]: @@ -396,11 +444,17 @@ def add(self, requirement: Requirement): if requirement not in self.__root__: self.__root__.append(requirement) + def to_unix(self) -> List[str]: + """ + :return: list of unix based packages + """ + return [r.get_repr() for r in self.of_type(UnixPackageRequirement)] + def to_pip(self) -> List[str]: """ :return: list of pip installable packages """ - return [r.to_str() for r in self.installable] + return [r.get_repr() for r in self.installable] def __add__(self, other: "AnyRequirements"): other = resolve_requirements(other) @@ -423,12 +477,7 @@ def new(cls, requirements: "AnyRequirements" = None): return resolve_requirements(requirements) def materialize_custom(self, path: str): - for cr in self.custom: - for part, src in cr.to_sources_dict().items(): - p = os.path.join(path, part) - os.makedirs(os.path.dirname(p), exist_ok=True) - with open(p, "wb") as f: - f.write(src) + CustomRequirement.materialize(self.custom, path) @contextlib.contextmanager def import_custom(self): @@ -490,7 +539,8 @@ def resolve_requirements(other: "AnyRequirements") -> Requirements: if isinstance(other[0], str): return Requirements( __root__=[ - InstallableRequirement.from_str(r) for r in set(other) + InstallableRequirement.from_str(r) + for r in collections.OrderedDict.fromkeys(other) ] ) @@ -522,7 +572,7 @@ def resolve_requirements(other: "AnyRequirements") -> Requirements: class WithRequirements: - """A mixing for objects that should provide their requirements""" + """A mixin for objects that should provide their requirements""" def get_requirements(self) -> Requirements: from mlem.utils.module import get_object_requirements diff --git a/mlem/ext.py b/mlem/ext.py index 1aecf256..4b6d8849 100644 --- a/mlem/ext.py +++ b/mlem/ext.py @@ -4,6 +4,7 @@ """ import importlib import logging +import re import sys from types import ModuleType from typing import Callable, Dict, List, Optional, Union @@ -108,6 +109,15 @@ class ExtensionLoader: Extension("mlem.contrib.github", [], True), Extension("mlem.contrib.gitlabfs", [], True), Extension("mlem.contrib.bitbucketfs", [], True), + Extension("mlem.contrib.sagemaker", ["sagemaker", "boto3"], False), + Extension("mlem.contrib.dvc", ["dvc"], False), + Extension( + "mlem.contrib.heroku", ["fastapi", "uvicorn", "docker"], False + ), + Extension("mlem.contrib.pip", [], False), + Extension("mlem.contrib.kubernetes", ["kubernetes", "docker"], False), + Extension("mlem.contrib.requirements", [], False), + Extension("mlem.contrib.venv", [], False), ) _loaded_extensions: Dict[Extension, ModuleType] = {} @@ -257,6 +267,19 @@ def load_extensions(*exts: str): ExtensionLoader.load(ext) +def get_ext_type(ext: Union[str, Extension]): + if isinstance(ext, Extension): + ext_module = ext.module + else: + ext_module = ext + + doc = import_module(ext_module).__doc__ or "" + search = re.search(r"Extension type: (\w*)", doc) + if search is None: + raise ValueError(f"{ext_module} extension doesnt define it's type") + return search.group(1) + + # Copyright 2019 Zyfra # Copyright 2021 Iterative # diff --git a/mlem/polydantic/core.py b/mlem/polydantic/core.py index 6f2b8910..c5b86ec9 100644 --- a/mlem/polydantic/core.py +++ b/mlem/polydantic/core.py @@ -71,6 +71,8 @@ def validate(cls, value): return super().validate(value) if isinstance(value, str): value = {cls.__config__.type_field: value} + if not isinstance(value, dict): + raise ValueError(f"{value} is neither dict nor {cls}") value = value.copy() type_name = value.pop( cls.__config__.type_field, cls.__config__.default_type @@ -108,15 +110,21 @@ def _iter( exclude_defaults=exclude_defaults, exclude_none=exclude_none, ) + exclude = exclude or set() if self.__is_root__: alias = self.__get_alias__(self.__config__.type_field) - if not exclude_defaults or alias != self.__config__.default_type: + if ( + not exclude_defaults or alias != self.__config__.default_type + ) and self.__config__.type_field not in exclude: yield self.__config__.type_field, alias for parent in self.__iter_parents__(include_top=False): alias = parent.__get_alias__() - if not exclude_defaults or alias != parent.__config__.default_type: - yield parent.__type_field__(), alias + parent_type_field = parent.__type_field__() + if ( + not exclude_defaults or alias != parent.__config__.default_type + ) and parent_type_field not in exclude: + yield parent_type_field, alias def __iter__(self): """Add alias field""" diff --git a/mlem/runtime/client.py b/mlem/runtime/client.py index 8332cd7b..5e0bc9f0 100644 --- a/mlem/runtime/client.py +++ b/mlem/runtime/client.py @@ -91,9 +91,13 @@ def __call__(self, *args, **kwargs): class HTTPClient(Client): + """Access models served with http-based servers""" + type: ClassVar[str] = "http" host: str = "0.0.0.0" + """Server host""" port: Optional[int] = 8080 + """Server port""" @property def base_url(self): diff --git a/mlem/runtime/interface.py b/mlem/runtime/interface.py index e526b517..6219428f 100644 --- a/mlem/runtime/interface.py +++ b/mlem/runtime/interface.py @@ -20,7 +20,9 @@ class ExecutionError(MlemError): class InterfaceDescriptor(BaseModel): version: str = mlem.version.__version__ + """mlem version""" methods: Dict[str, Signature] = {} + """interface methods""" class Interface(ABC, MlemABC): @@ -137,6 +139,7 @@ class SimpleInterface(Interface): type: ClassVar[str] = "simple" methods: InterfaceDescriptor = InterfaceDescriptor() + """Interface version and methods""" def __init__(self, **data: Any): methods = {} @@ -175,6 +178,7 @@ class Config: type: ClassVar[str] = "model" model_type: ModelType + """Model metadata""" def load(self, uri: str): meta = load_meta(uri) diff --git a/mlem/ui.py b/mlem/ui.py index 894df08d..d85c0376 100644 --- a/mlem/ui.py +++ b/mlem/ui.py @@ -107,3 +107,4 @@ def bold(text): EMOJI_BUILD = emoji("🛠") EMOJI_UPLOAD = emoji("🔼") EMOJI_STOP = emoji("🔻") +EMOJI_KEY = emoji("🗝") diff --git a/mlem/utils/entrypoints.py b/mlem/utils/entrypoints.py index 4809cc64..1ad1080e 100644 --- a/mlem/utils/entrypoints.py +++ b/mlem/utils/entrypoints.py @@ -52,33 +52,57 @@ def load_entrypoints(domain: str = MLEM_ENTRY_POINT) -> Dict[str, Entrypoint]: def list_implementations( base_class: Union[str, Type[MlemABC]], - meta_subtype: Type["MlemObject"] = None, + meta_subtype: Union[str, Type["MlemObject"]] = None, + include_hidden: bool = True, ) -> List[str]: + from mlem.core.objects import MlemObject + if isinstance(base_class, type) and issubclass(base_class, MlemABC): abs_name = base_class.abs_name - if base_class == "meta" and meta_subtype is not None: - base_class = meta_subtype.object_type + + if (base_class in ("meta", MlemObject)) and meta_subtype is not None: + if isinstance(meta_subtype, str): + base_class = meta_subtype + else: + base_class = meta_subtype.object_type abs_name = "meta" + resolved_base_class: Optional[Type[MlemABC]] = None if isinstance(base_class, str): abs_name = base_class try: - base_class = MlemABC.abs_types[abs_name] + resolved_base_class = MlemABC.abs_types[abs_name] except KeyError: - base_class = load_impl_ext(abs_name, None) + try: + resolved_base_class = load_impl_ext(abs_name, None) + except ValueError: + pass + else: + resolved_base_class = base_class eps = { e.name for e in load_entrypoints().values() if e.abs_name == abs_name and e.name is not None } - eps.update(base_class.non_abstract_subtypes()) - return list(eps) + if resolved_base_class is not None: + eps.update(resolved_base_class.non_abstract_subtypes()) + return sorted(e for e in eps if include_hidden or not e.startswith("_")) + + +def list_abstractions( + include_hidden: bool = True, +) -> List[str]: + eps = {e.abs_name for e in load_entrypoints().values()} + eps.update(MlemABC.abs_types) + return [e for e in eps if include_hidden or not e.startswith("_")] IT = TypeVar("IT") def find_implementations( - base: Type[IT], root_module_name: str = MLEM_ENTRY_POINT + base: Type[IT], + root_module_name: str = MLEM_ENTRY_POINT, + raise_on_error: bool = False, ) -> Dict[Type[IT], str]: """Generates dict with MLEM entrypoints which should appear in setup.py. Can be used by plugin developers to check if they populated all existing @@ -103,6 +127,8 @@ def find_implementations( print( f"Cannot import module {module_name}: {e.__class__} {e.args}" ) + if raise_on_error: + raise continue for obj in module.__dict__.values(): @@ -118,12 +144,16 @@ def find_implementations( return impls -def find_abc_implementations(root_module_name: str = MLEM_ENTRY_POINT): - impls = find_implementations(MlemABC, root_module_name) +def find_abc_implementations( + root_module_name: str = MLEM_ENTRY_POINT, raise_on_error: bool = False +): + impls = find_implementations( + MlemABC, root_module_name, raise_on_error=raise_on_error + ) return { MLEM_ENTRY_POINT: [ f"{obj.abs_name}.{obj.__get_alias__()} = {name}" - if not obj.__is_root__ + if not obj.__is_root__ or hasattr(obj, obj.__type_field__()) else f"{obj.abs_name} = {name}" for obj, name in impls.items() if hasattr(obj, "abs_name") diff --git a/mlem/utils/fslock.py b/mlem/utils/fslock.py new file mode 100644 index 00000000..396d3c8f --- /dev/null +++ b/mlem/utils/fslock.py @@ -0,0 +1,113 @@ +import posixpath +import random +import re +import time +from typing import List, Tuple + +from fsspec import AbstractFileSystem + +from mlem.utils.path import make_posix + +LOCK_EXT = "lock" + + +class LockTimeoutError(Exception): + pass + + +class FSLock: + def __init__( + self, + fs: AbstractFileSystem, + dirpath: str, + name: str, + timeout: float = None, + retry_timeout: float = 0.1, + *, + salt=None, + ): + self.fs = fs + self.dirpath = make_posix(str(dirpath)) + self.name = name + self.timeout = timeout + self.retry_timeout = retry_timeout + self._salt = salt + self._timestamp = None + + @property + def salt(self): + if self._salt is None: + self._salt = random.randint(10**3, 10**4) + return self._salt + + @property + def timestamp(self): + if self._timestamp is None: + self._timestamp = time.time_ns() + return self._timestamp + + @property + def lock_filename(self): + return f"{self.name}.{self.timestamp}.{self.salt}.{LOCK_EXT}" + + @property + def lock_path(self): + return posixpath.join(self.dirpath, self.lock_filename) + + def _list_locks(self) -> List[Tuple[int, int]]: + locks = [ + posixpath.basename(make_posix(f)) + for f in self.fs.listdir(self.dirpath, detail=False) + ] + locks = [ + f[len(self.name) :] + for f in locks + if f.startswith(self.name) and f.endswith(LOCK_EXT) + ] + pat = re.compile(rf"\.(\d+)\.(\d+)\.{LOCK_EXT}") + locks_re = [pat.match(lock) for lock in locks] + return [ + (int(m.group(1)), int(m.group(2))) + for m in locks_re + if m is not None + ] + + def _double_check(self): + locks = self._list_locks() + if not locks: + return False + minlock = min(locks) + c = minlock == (self._timestamp, self._salt) + return c + + def _write_lockfile(self): + self.fs.touch(self.lock_path) + + def _clear(self): + self._timestamp = None + self._salt = None + + def _delete_lockfile(self): + try: + self.fs.delete(self.lock_path) + except FileNotFoundError: + pass + + def __enter__(self): + start = time.time() + + self._write_lockfile() + time.sleep(self.retry_timeout) + + while not self._double_check(): + if self.timeout is not None and time.time() - start > self.timeout: + self._delete_lockfile() + self._clear() + raise LockTimeoutError( + f"Lock aquiring timeouted after {self.timeout}" + ) + time.sleep(self.retry_timeout) + + def __exit__(self, exc_type, exc_val, exc_tb): + self._delete_lockfile() + self._clear() diff --git a/mlem/utils/git.py b/mlem/utils/git.py new file mode 100644 index 00000000..d288a84b --- /dev/null +++ b/mlem/utils/git.py @@ -0,0 +1,5 @@ +import re + + +def is_long_sha(sha: str): + return re.match(r"^[a-f\d]{40}$", sha) diff --git a/mlem/utils/module.py b/mlem/utils/module.py index 142aa0e1..a0090bdd 100644 --- a/mlem/utils/module.py +++ b/mlem/utils/module.py @@ -307,7 +307,11 @@ def get_module_as_requirement( def get_local_module_reqs(mod) -> List[ModuleType]: """Parses module AST to find all import statements""" - tree = ast.parse(inspect.getsource(mod)) + try: + tree = ast.parse(inspect.getsource(mod)) + except OSError: + logger.debug("Failed to get source of %s", str(mod)) + return [] imports: List[Tuple[str, Optional[str]]] = [] for statement in tree.body: if isinstance(statement, ast.Import): diff --git a/mlem/utils/root.py b/mlem/utils/root.py index 3dc96e76..858dd5ef 100644 --- a/mlem/utils/root.py +++ b/mlem/utils/root.py @@ -6,7 +6,7 @@ from fsspec.implementations.local import LocalFileSystem from typing_extensions import Literal -from mlem.constants import MLEM_DIR +from mlem.constants import MLEM_CONFIG_FILE_NAME from mlem.core.errors import MlemProjectNotFound @@ -15,7 +15,7 @@ def mlem_project_exists( ): """Check is mlem project exists at path""" try: - exists = fs.exists(posixpath.join(path, MLEM_DIR)) + exists = fs.exists(posixpath.join(path, MLEM_CONFIG_FILE_NAME)) except ValueError: # some fsspec implementations throw ValueError because of # wrong bucket/container names containing "." diff --git a/mlem/utils/templates.py b/mlem/utils/templates.py index 2ffe4caa..db7b8dca 100644 --- a/mlem/utils/templates.py +++ b/mlem/utils/templates.py @@ -2,7 +2,12 @@ from fsspec import AbstractFileSystem from fsspec.implementations.local import LocalFileSystem -from jinja2 import Environment, FileSystemLoader, StrictUndefined +from jinja2 import ( + Environment, + FileSystemLoader, + StrictUndefined, + select_autoescape, +) from pydantic import BaseModel @@ -13,14 +18,16 @@ class TemplateModel(BaseModel): TEMPLATE_DIR: ClassVar[str] templates_dir: List[str] = [] + """list of directories to look for jinja templates""" def prepare_dict(self): return self.dict() def generate(self, **additional): j2 = Environment( - loader=FileSystemLoader([self.TEMPLATE_DIR] + self.templates_dir), + loader=FileSystemLoader(self.templates_dir + [self.TEMPLATE_DIR]), undefined=StrictUndefined, + autoescape=select_autoescape(), ) template = j2.get_template(self.TEMPLATE_FILE) args = self.prepare_dict() diff --git a/setup.cfg b/setup.cfg index 41416eef..de26369f 100644 --- a/setup.cfg +++ b/setup.cfg @@ -25,6 +25,8 @@ log_level = debug markers = long: Marks long-running tests docker: Marks tests that needs Docker + kubernetes: Marks tests that needs Kubernetes + conda: Marks tests that need conda testpaths = tests addopts = -rav --durations=0 --cov=mlem --cov-report=term-missing --cov-report=xml diff --git a/setup.py b/setup.py index 408b5504..1966e64d 100644 --- a/setup.py +++ b/setup.py @@ -68,7 +68,7 @@ "xgboost": ["xgboost"], "lightgbm": ["lightgbm"], "fastapi": ["uvicorn", "fastapi"], - # "sagemaker": ["boto3==1.19.12", "sagemaker"], + "sagemaker": ["boto3", "sagemaker"], "torch": ["torch"], "tensorflow": ["tensorflow"], "azure": ["adlfs>=2021.10.0", "azure-identity>=1.4.0", "knack"], @@ -82,6 +82,7 @@ "rmq": ["pika"], "docker": ["docker"], "heroku": ["docker", "fastapi", "uvicorn"], + "kubernetes": ["docker", "kubernetes"], "dvc": ["dvc~=2.0"], } @@ -153,7 +154,7 @@ "env.docker = mlem.contrib.docker.base:DockerEnv", "docker_registry.docker_io = mlem.contrib.docker.base:DockerIORegistry", "builder.docker = mlem.contrib.docker.base:DockerImageBuilder", - "docker_registry = mlem.contrib.docker.base:DockerRegistry", + "docker_registry.local = mlem.contrib.docker.base:DockerRegistry", "docker_registry.remote = mlem.contrib.docker.base:RemoteRegistry", "artifact.dvc = mlem.contrib.dvc:DVCArtifact", "storage.dvc = mlem.contrib.dvc:DVCStorage", @@ -164,7 +165,13 @@ "deployment.heroku = mlem.contrib.heroku.meta:HerokuDeployment", "env.heroku = mlem.contrib.heroku.meta:HerokuEnv", "deploy_state.heroku = mlem.contrib.heroku.meta:HerokuState", - "server.heroku = mlem.contrib.heroku.server:HerokuServer", + "server._heroku = mlem.contrib.heroku.server:HerokuServer", + "deployment.kubernetes = mlem.contrib.kubernetes.base:K8sDeployment", + "deploy_state.kubernetes = mlem.contrib.kubernetes.base:K8sDeploymentState", + "env.kubernetes = mlem.contrib.kubernetes.base:K8sEnv", + "k8s_service_type.clusterip = mlem.contrib.kubernetes.service:ClusterIPService", + "k8s_service_type.loadbalancer = mlem.contrib.kubernetes.service:LoadBalancerService", + "k8s_service_type.nodeport = mlem.contrib.kubernetes.service:NodePortService", "data_reader.lightgbm = mlem.contrib.lightgbm:LightGBMDataReader", "data_type.lightgbm = mlem.contrib.lightgbm:LightGBMDataType", "data_writer.lightgbm = mlem.contrib.lightgbm:LightGBMDataWriter", @@ -180,7 +187,6 @@ "model_type.onnx = mlem.contrib.onnx:ONNXModel", "data_type.dataframe = mlem.contrib.pandas:DataFrameType", "import.pandas = mlem.contrib.pandas:PandasImport", - "import.torch = mlem.contrib.torch:TorchModelImport", "data_reader.pandas = mlem.contrib.pandas:PandasReader", "data_reader.pandas_series = mlem.contrib.pandas:PandasSeriesReader", "data_writer.pandas_series = mlem.contrib.pandas:PandasSeriesWriter", @@ -188,8 +194,18 @@ "data_type.series = mlem.contrib.pandas:SeriesType", "builder.pip = mlem.contrib.pip.base:PipBuilder", "builder.whl = mlem.contrib.pip.base:WhlBuilder", + "builder.requirements = mlem.contrib.requirements:RequirementsBuilder", + "builder.venv = mlem.contrib.venv:VenvBuilder", + "builder.conda = mlem.contrib.venv:CondaBuilder", + "requirement.conda = mlem.contrib.venv:CondaPackageRequirement", "client.rmq = mlem.contrib.rabbitmq:RabbitMQClient", "server.rmq = mlem.contrib.rabbitmq:RabbitMQServer", + "docker_registry.ecr = mlem.contrib.sagemaker.build:ECRegistry", + "deploy_state.sagemaker = mlem.contrib.sagemaker.meta:SagemakerDeployState", + "deployment.sagemaker = mlem.contrib.sagemaker.meta:SagemakerDeployment", + "env.sagemaker = mlem.contrib.sagemaker.meta:SagemakerEnv", + "server._sagemaker = mlem.contrib.sagemaker.runtime:SageMakerServer", + "client.sagemaker = mlem.contrib.sagemaker.runtime:SagemakerClient", "model_type.sklearn = mlem.contrib.sklearn:SklearnModel", "model_type.sklearn_pipeline = mlem.contrib.sklearn:SklearnPipelineType", "model_type.tf_keras = mlem.contrib.tensorflow:TFKerasModel", @@ -199,6 +215,7 @@ "data_writer.tf_tensor = mlem.contrib.tensorflow:TFTensorWriter", "model_type.torch = mlem.contrib.torch:TorchModel", "model_io.torch_io = mlem.contrib.torch:TorchModelIO", + "import.torch = mlem.contrib.torch:TorchModelImport", "data_type.torch = mlem.contrib.torch:TorchTensorDataType", "data_reader.torch = mlem.contrib.torch:TorchTensorReader", "data_writer.torch = mlem.contrib.torch:TorchTensorWriter", @@ -212,6 +229,8 @@ "docker = mlem.contrib.docker.context:DockerConfig", "heroku = mlem.contrib.heroku.config:HerokuConfig", "pandas = mlem.contrib.pandas:PandasConfig", + "aws = mlem.contrib.sagemaker.config:AWSConfig", + "sagemaker = mlem.contrib.sagemaker.runtime:SageMakerServerConfig", ], }, zip_safe=False, diff --git a/tests/api/test_commands.py b/tests/api/test_commands.py index 3f089916..ef2aa03e 100644 --- a/tests/api/test_commands.py +++ b/tests/api/test_commands.py @@ -8,19 +8,15 @@ from pytest_lazyfixture import lazy_fixture from mlem.api import apply, apply_remote, link, load_meta -from mlem.api.commands import build, import_object, init, ls -from mlem.config import CONFIG_FILE_NAME -from mlem.constants import PREDICT_METHOD_NAME -from mlem.contrib.heroku.meta import HerokuEnv +from mlem.api.commands import build, import_object, init +from mlem.constants import MLEM_CONFIG_FILE_NAME, PREDICT_METHOD_NAME from mlem.core.artifacts import LocalArtifact -from mlem.core.errors import MlemProjectNotFound -from mlem.core.meta_io import MLEM_DIR, MLEM_EXT +from mlem.core.meta_io import MLEM_EXT from mlem.core.metadata import load from mlem.core.model import ModelIO -from mlem.core.objects import MlemData, MlemEnv, MlemLink, MlemModel +from mlem.core.objects import MlemLink, MlemModel from mlem.runtime.client import HTTPClient -from mlem.utils.path import make_posix -from tests.conftest import MLEM_TEST_REPO, long, need_test_repo_auth +from tests.conftest import MLEM_TEST_REPO, long IMPORT_MODEL_FILENAME = "mymodel" @@ -62,7 +58,7 @@ def test_apply_remote(mlem_client, train): def test_link_as_separate_file(model_path_mlem_project): model_path, mlem_project = model_path_mlem_project link_path = os.path.join(mlem_project, "latest.mlem") - link(model_path, target=link_path, external=True) + link(model_path, target=link_path) assert os.path.exists(link_path) link_object = load_meta(link_path, follow_links=False) assert isinstance(link_object, MlemLink) @@ -77,12 +73,9 @@ def test_link_in_mlem_dir(model_path_mlem_project): model_path, target=link_name, target_project=mlem_project, - external=False, ) assert isinstance(link_obj, MlemLink) - link_dumped_to = os.path.join( - mlem_project, MLEM_DIR, "link", link_name + MLEM_EXT - ) + link_dumped_to = os.path.join(mlem_project, link_name + MLEM_EXT) assert os.path.exists(link_dumped_to) loaded_link_object = load_meta(link_dumped_to, follow_links=False) assert isinstance(loaded_link_object, MlemLink) @@ -116,77 +109,16 @@ def test_link_from_remote_to_local(current_test_branch, mlem_project): assert isinstance(model, MlemModel) -def test_ls_local(filled_mlem_project): - objects = ls(filled_mlem_project) - assert len(objects) == 1 - assert MlemModel in objects - models = objects[MlemModel] - assert len(models) == 2 - model, lnk = models - if isinstance(model, MlemLink): - model, lnk = lnk, model - - assert isinstance(model, MlemModel) - assert isinstance(lnk, MlemLink) - assert ( - posixpath.join(make_posix(filled_mlem_project), lnk.path) - == model.loc.fullpath - ) - - -def test_ls_no_project(tmpdir): - with pytest.raises(MlemProjectNotFound): - ls(str(tmpdir)) - - -@long -@need_test_repo_auth -def test_ls_remote(current_test_branch): - objects = ls( - os.path.join(MLEM_TEST_REPO, f"tree/{current_test_branch}/simple") - ) - assert len(objects) == 2 - assert MlemModel in objects - models = objects[MlemModel] - assert len(models) == 2 - model, lnk = models - if isinstance(model, MlemLink): - model, lnk = lnk, model - - assert isinstance(model, MlemModel) - assert isinstance(lnk, MlemLink) - - assert MlemData in objects - assert len(objects[MlemData]) == 4 - - -@long -def test_ls_remote_s3(s3_tmp_path): - path = s3_tmp_path("ls_remote_s3") - init(path) - meta = HerokuEnv() - meta.dump(posixpath.join(path, "env")) - meta.dump(posixpath.join(path, "subdir", "env")) - meta.dump(posixpath.join(path, "subdir", "subsubdir", "env")) - objects = ls(path) - assert MlemEnv in objects - envs = objects[MlemEnv] - assert len(envs) == 3 - assert all(o == meta for o in envs) - - def test_init(tmpdir): init(str(tmpdir)) - assert os.path.isdir(tmpdir / MLEM_DIR) - assert os.path.isfile(tmpdir / MLEM_DIR / CONFIG_FILE_NAME) + assert os.path.isfile(tmpdir / MLEM_CONFIG_FILE_NAME) @long def test_init_remote(s3_tmp_path, s3_storage_fs): path = s3_tmp_path("init") init(path) - assert s3_storage_fs.isdir(f"{path}/{MLEM_DIR}") - assert s3_storage_fs.isfile(f"{path}/{MLEM_DIR}/{CONFIG_FILE_NAME}") + assert s3_storage_fs.isfile(f"{path}/{MLEM_CONFIG_FILE_NAME}") def _check_meta(meta, out_path, fs=None): @@ -262,9 +194,7 @@ def test_import_model_pickle__no_copy_in_mlem_project( write_model_pickle(path) out_path = os.path.join(mlem_project, "mlem_model") - meta = import_object( - path, target=out_path, type_=type_, copy_data=False, external=True - ) + meta = import_object(path, target=out_path, type_=type_, copy_data=False) _check_meta(meta, out_path) _check_load_artifact(meta, out_path, False, train, filename) @@ -297,7 +227,7 @@ def test_import_model_pickle_remote_in_project( write_model_pickle(path, s3_storage_fs) out_path = posixpath.join(project_path, "mlem_model") meta = import_object( - path, target=out_path, copy_data=False, type_="pickle", external=True + path, target=out_path, copy_data=False, type_="pickle" ) _check_meta(meta, out_path, s3_storage_fs) _check_load_artifact(meta, out_path, False, train) diff --git a/tests/cli/conftest.py b/tests/cli/conftest.py index 73c2e0b9..a161069e 100644 --- a/tests/cli/conftest.py +++ b/tests/cli/conftest.py @@ -2,17 +2,35 @@ from click.testing import Result from typer.testing import CliRunner +from mlem import LOCAL_CONFIG from mlem.cli import app +app.pretty_exceptions_short = False + class Runner: def __init__(self): self._runner = CliRunner(mix_stderr=False) - def invoke(self, *args, **kwargs) -> Result: - return self._runner.invoke(app, *args, **kwargs) + def invoke(self, *args, raise_on_error: bool = False, **kwargs) -> Result: + result = self._runner.invoke(app, *args, **kwargs) + if raise_on_error and result.exit_code != 0: + if result.exit_code == 1: + raise result.exception + raise RuntimeError(result.stderr) + return result @pytest.fixture def runner() -> Runner: return Runner() + + +@pytest.fixture +def no_debug(): + tmp = LOCAL_CONFIG.DEBUG + try: + LOCAL_CONFIG.DEBUG = False + yield + finally: + LOCAL_CONFIG.DEBUG = tmp diff --git a/tests/cli/test_apply.py b/tests/cli/test_apply.py index df93a58b..7212cce9 100644 --- a/tests/cli/test_apply.py +++ b/tests/cli/test_apply.py @@ -10,10 +10,11 @@ from mlem.api import load, save from mlem.core.data_type import ArrayType -from mlem.core.errors import MlemProjectNotFound +from mlem.core.errors import UnsupportedDataBatchLoading from mlem.core.metadata import load_meta from mlem.core.objects import MlemData from mlem.runtime.client import HTTPClient +from tests.cli.conftest import Runner from tests.conftest import MLEM_TEST_REPO, long, need_test_repo_auth @@ -35,7 +36,6 @@ def test_apply(runner, model_path, data_path): "predict", "-o", path, - "--no-index", ], ) assert result.exit_code == 0, ( @@ -60,14 +60,14 @@ def model_train_batch(): def model_path_batch(model_train_batch, tmp_path_factory): path = os.path.join(tmp_path_factory.getbasetemp(), "saved-model") model, train = model_train_batch - save(model, path, sample_data=train, index=False) + save(model, path, sample_data=train) yield path @pytest.fixture def data_path_batch(model_train_batch, tmpdir_factory): temp_dir = str(tmpdir_factory.mktemp("saved-data") / "data") - save(model_train_batch[1], temp_dir, index=False) + save(model_train_batch[1], temp_dir) yield temp_dir @@ -84,7 +84,6 @@ def test_apply_batch(runner, model_path_batch, data_path_batch): "predict", "-o", path, - "--no-index", "-b", "5", ], @@ -118,7 +117,6 @@ def test_apply_with_import(runner, model_meta_saved_single, tmp_path_factory): "predict", "-o", path, - "--no-index", "--import", "--it", "pandas[csv]", @@ -134,44 +132,39 @@ def test_apply_with_import(runner, model_meta_saved_single, tmp_path_factory): def test_apply_batch_with_import( - runner, model_meta_saved_single, tmp_path_factory + runner: Runner, model_meta_saved_single, tmp_path_factory ): data_path = os.path.join(tmp_path_factory.getbasetemp(), "import_data") load_iris(return_X_y=True, as_frame=True)[0].to_csv(data_path, index=False) with tempfile.TemporaryDirectory() as dir: path = posixpath.join(dir, "data") - result = runner.invoke( - [ - "apply", - model_meta_saved_single.loc.uri, - data_path, - "-m", - "predict", - "-o", - path, - "--no-index", - "--import", - "--it", - "pandas[csv]", - "-b", - "2", - ], - ) - assert result.exit_code == 1, ( - result.stdout, - result.stderr, - result.exception, - ) - assert ( - "Batch data loading is currently not supported for loading data on-the-fly" - in result.stderr - ) + with pytest.raises( + UnsupportedDataBatchLoading, + match="Batch data loading is currently not supported for loading data on-the-fly", + ): + runner.invoke( + [ + "apply", + model_meta_saved_single.loc.uri, + data_path, + "-m", + "predict", + "-o", + path, + "--import", + "--it", + "pandas[csv]", + "-b", + "2", + ], + raise_on_error=True, + ) def test_apply_no_output(runner, model_path, data_path): result = runner.invoke( - ["apply", model_path, data_path, "-m", "predict", "--no-index"], + ["apply", model_path, data_path, "-m", "predict"], ) assert result.exit_code == 0, ( result.stdout, @@ -181,29 +174,6 @@ def test_apply_no_output(runner, model_path, data_path): assert len(result.stdout) > 0 -def test_apply_fails_without_mlem_dir(runner, model_path, data_path): - with tempfile.TemporaryDirectory() as dir: - result = runner.invoke( - [ - "--tb", - "apply", - model_path, - data_path, - "-m", - "predict", - "-o", - dir, - "--index", - ], - ) - assert result.exit_code == 1, ( - result.stdout, - result.stderr, - result.exception, - ) - assert isinstance(result.exception, MlemProjectNotFound) - - @long @need_test_repo_auth def test_apply_from_remote(runner, current_test_branch, s3_tmp_path): @@ -227,7 +197,6 @@ def test_apply_from_remote(runner, current_test_branch, s3_tmp_path): current_test_branch, "-o", out, - "--no-index", ], ) assert result.exit_code == 0, ( @@ -246,14 +215,16 @@ def test_apply_remote(mlem_client, runner, data_path): [ "apply-remote", "http", + "-d", data_path, - "-c", - "host=''", - "-c", - "port=None", + "--host", + "", + "--port", + "None", "-o", path, ], + raise_on_error=True, ) assert result.exit_code == 0, ( result.stdout, diff --git a/tests/cli/test_build.py b/tests/cli/test_build.py index 2308ebbe..5e088992 100644 --- a/tests/cli/test_build.py +++ b/tests/cli/test_build.py @@ -1,24 +1,40 @@ +import json import os.path from typing import ClassVar +from pydantic import parse_obj_as +from yaml import safe_dump + +from mlem.cli.build import create_build_command +from mlem.contrib.fastapi import FastAPIServer from mlem.core.objects import MlemBuilder, MlemModel +from mlem.runtime.server import Server from mlem.utils.path import make_posix from tests.cli.conftest import Runner class BuilderMock(MlemBuilder): + """mock""" + type: ClassVar = "mock" target: str + """target""" + server: Server + """server""" def build(self, obj: MlemModel): with open(self.target, "w", encoding="utf8") as f: - f.write(obj.loc.path) + f.write(obj.loc.path + "\n") + json.dump(self.server.dict(), f) + + +create_build_command(BuilderMock.type) def test_build(runner: Runner, model_meta_saved_single, tmp_path): path = os.path.join(tmp_path, "packed") result = runner.invoke( - f"build {make_posix(model_meta_saved_single.loc.uri)} -c target={make_posix(path)} mock" + f"build mock -m {make_posix(model_meta_saved_single.loc.uri)} --target {make_posix(path)} --server fastapi --server.port 1000" ) assert result.exit_code == 0, ( @@ -28,4 +44,59 @@ def test_build(runner: Runner, model_meta_saved_single, tmp_path): ) with open(path, encoding="utf8") as f: - assert f.read().strip() == model_meta_saved_single.loc.path + lines = f.read().splitlines() + assert len(lines) == 2 + path, serv = lines + assert path == model_meta_saved_single.loc.path + assert parse_obj_as(Server, json.loads(serv)) == FastAPIServer( + port=1000 + ) + + +def test_build_with_file_conf( + runner: Runner, model_meta_saved_single, tmp_path +): + path = os.path.join(tmp_path, "packed") + server_path = os.path.join(tmp_path, "server.yaml") + with open(server_path, "w", encoding="utf8") as f: + safe_dump(FastAPIServer(port=9999).dict(), f) + + result = runner.invoke( + f"build mock -m {make_posix(model_meta_saved_single.loc.uri)} --target {make_posix(path)} --file_conf server={make_posix(server_path)}" + ) + + assert result.exit_code == 0, (result.exception, result.output) + + with open(path, encoding="utf8") as f: + lines = f.read().splitlines() + assert len(lines) == 2 + path, serv = lines + assert path == model_meta_saved_single.loc.path + assert parse_obj_as(Server, json.loads(serv)) == FastAPIServer( + port=9999 + ) + + +def test_build_with_load(runner: Runner, model_meta_saved_single, tmp_path): + path = os.path.join(tmp_path, "packed") + load_path = os.path.join(tmp_path, "builder.yaml") + builder = BuilderMock( + server=FastAPIServer(port=9999), target=make_posix(path) + ) + with open(load_path, "w", encoding="utf8") as f: + safe_dump(builder.dict(), f) + + result = runner.invoke( + f"build -m {make_posix(model_meta_saved_single.loc.uri)} --load {make_posix(load_path)}" + ) + + assert result.exit_code == 0, (result.exception, result.output) + + with open(path, encoding="utf8") as f: + lines = f.read().splitlines() + assert len(lines) == 2 + path, serv = lines + assert path == model_meta_saved_single.loc.path + assert parse_obj_as(Server, json.loads(serv)) == FastAPIServer( + port=9999 + ) diff --git a/tests/cli/test_clone.py b/tests/cli/test_clone.py index 161997f8..8cb20183 100644 --- a/tests/cli/test_clone.py +++ b/tests/cli/test_clone.py @@ -8,7 +8,7 @@ def test_model_cloning(runner: Runner, model_path): with tempfile.TemporaryDirectory() as path: path = posixpath.join(path, "cloned") - result = runner.invoke(["clone", model_path, path, "--no-index"]) + result = runner.invoke(["clone", model_path, path]) assert result.exit_code == 0, ( result.stdout, result.stderr, diff --git a/tests/cli/test_declare.py b/tests/cli/test_declare.py index 6686bf94..26fcfb77 100644 --- a/tests/cli/test_declare.py +++ b/tests/cli/test_declare.py @@ -1,14 +1,504 @@ +from functools import lru_cache +from typing import Any, Dict, List, Optional + +import pytest +from pydantic import BaseModel + +from mlem.cli.declare import create_declare_mlem_object_subcommand, declare +from mlem.contrib.docker import DockerDirBuilder +from mlem.contrib.docker.context import DockerBuildArgs +from mlem.contrib.fastapi import FastAPIServer from mlem.contrib.heroku.meta import HerokuEnv +from mlem.contrib.pip.base import PipBuilder +from mlem.core.base import build_mlem_object from mlem.core.metadata import load_meta +from mlem.core.objects import EnvLink, MlemBuilder, MlemModel +from mlem.runtime.server import Server from mlem.utils.path import make_posix from tests.cli.conftest import Runner +from tests.cli.test_deployment import MlemDeploymentMock, MlemEnvMock + +builder_typer = [ + g.typer_instance + for g in declare.registered_groups + if g.typer_instance.info.name == "builder" +][0] +builder_typer.pretty_exceptions_short = False + +all_test_params = [] + + +class SimpleValue(BaseModel): + value: str + + +class ComplexValue(BaseModel): + field: str + field_list: List[str] = [] + field_dict: Dict[str, str] = {} + + +class ListValue(BaseModel): + f: List[str] = [] + + +class _MockBuilder(MlemBuilder): + """mock""" + + def build(self, obj: MlemModel): + pass + + def __init_subclass__(cls): + cls.__doc__ = "mock" + super().__init_subclass__() def test_declare(runner: Runner, tmp_path): result = runner.invoke( - f"declare env heroku {make_posix(str(tmp_path))} -c api_key=aaa" + f"declare env heroku {make_posix(str(tmp_path))} --api_key aaa" ) assert result.exit_code == 0, result.exception env = load_meta(str(tmp_path)) assert isinstance(env, HerokuEnv) assert env.api_key == "aaa" + + +@pytest.mark.parametrize( + "args, res", + [ + ("", []), + ( + "--args.templates_dir.0 kek --args.templates_dir.1 kek2", + ["kek", "kek2"], + ), + ], +) +def test_declare_list(runner: Runner, tmp_path, args, res): + result = runner.invoke( + f"declare builder docker_dir {make_posix(str(tmp_path))} --server fastapi --target lol " + + args, + raise_on_error=True, + ) + assert result.exit_code == 0, (result.exception, result.output) + builder = load_meta(str(tmp_path)) + assert isinstance(builder, DockerDirBuilder) + assert isinstance(builder.server, FastAPIServer) + assert builder.target == "lol" + assert isinstance(builder.args, DockerBuildArgs) + assert builder.args.templates_dir == res + + +@pytest.mark.parametrize( + "args, res", + [ + ("", {}), + ( + "--additional_setup_kwargs.key value --additional_setup_kwargs.key2 value2", + {"key": "value", "key2": "value2"}, + ), + ], +) +def test_declare_dict(runner: Runner, tmp_path, args, res): + result = runner.invoke( + f"declare builder pip {make_posix(str(tmp_path))} --package_name lol --target lol " + + args + ) + assert result.exit_code == 0, (result.exception, result.output) + builder = load_meta(str(tmp_path)) + assert isinstance(builder, PipBuilder) + assert builder.package_name == "lol" + assert builder.target == "lol" + assert builder.additional_setup_kwargs == res + + +class MockListComplexValue(_MockBuilder): + """mock""" + + field: List[ComplexValue] = [] + + +all_test_params.append( + pytest.param( + MockListComplexValue(), "", id=f"{MockListComplexValue.type}_empty" + ) +) +all_test_params.append( + pytest.param( + MockListComplexValue( + field=[ + ComplexValue( + field="a", + field_list=["a", "a"], + field_dict={"a": "a", "b": "b"}, + ), + ComplexValue( + field="a", + field_list=["a", "a"], + field_dict={"a": "a", "b": "b"}, + ), + ] + ), + "--field.0.field a --field.0.field_list.0 a --field.0.field_list.1 a --field.0.field_dict.a a --field.0.field_dict.b b " + "--field.1.field a --field.1.field_list.0 a --field.1.field_list.1 a --field.1.field_dict.a a --field.1.field_dict.b b", + id=f"{MockListComplexValue.type}_full", + ) +) + + +class MockListListValue(_MockBuilder): + """mock""" + + f: List[ListValue] = [] + + +all_test_params.append( + pytest.param(MockListListValue(), "", id="list_list_value_empty") +) +all_test_params.append( + pytest.param( + MockListListValue( + f=[ListValue(f=["a", "b"]), ListValue(f=["a", "b"])] + ), + "--f.0.f.0 a --f.0.f.1 b --f.1.f.0 a --f.1.f.1 b", + id="list_list_value_full", + ) +) + + +class MockModelListBuilder(_MockBuilder): + """mock""" + + field: List[SimpleValue] = [] + + +all_test_params.append( + pytest.param(MockModelListBuilder(), "", id="model_list_empty") +) +all_test_params.append( + pytest.param( + MockModelListBuilder( + field=[SimpleValue(value="kek"), SimpleValue(value="kek2")] + ), + "--field.0.value kek --field.1.value kek2", + id="model_list_full", + ) +) + + +class MockModelDictBuilder(_MockBuilder): + """mock""" + + field: Dict[str, SimpleValue] = {} + + +all_test_params.append( + pytest.param(MockModelDictBuilder(), "", id="model_dict_empty") +) +all_test_params.append( + pytest.param( + MockModelDictBuilder( + field={ + "k1": SimpleValue(value="kek"), + "k2": SimpleValue(value="kek2"), + } + ), + "--field.k1.value kek --field.k2.value kek2", + id="model_dict_empty", + ) +) + + +class MockFlatList(_MockBuilder): + """mock""" + + f: List[List[str]] = [] + + +all_test_params.append( + pytest.param(MockFlatList(f=[]), "", id="flat_list_empty") +) +all_test_params.append( + pytest.param( + MockFlatList(f=[["a", "a"], ["a", "a"]]), + "--f.0.0 a --f.0.1 a --f.1.0 a --f.1.1 a", + id="flat_list_full", + ) +) + + +class MockFlatListDict(_MockBuilder): + """mock""" + + f: List[Dict[str, str]] = [] + + +all_test_params.append( + pytest.param(MockFlatListDict(), "", id="flat_list_dict_empty") +) +all_test_params.append( + pytest.param( + MockFlatListDict(f=[{"k1": "a"}, {"k2": "b"}]), + "--f.0.k1 a --f.1.k2 b", + id="flat_list_dict_full", + ) +) + + +class MockFlatDictList(_MockBuilder): + """mock""" + + f: Dict[str, List[str]] = {} + + +all_test_params.append( + pytest.param(MockFlatDictList(), "", id="flat_dict_list_empty") +) +all_test_params.append( + pytest.param( + MockFlatDictList(f={"k1": ["a"], "k2": ["b"]}), + "--f.k1.0 a --f.k2.0 b", + id="flat_dict_list_full", + ) +) + + +class MockFlatDict(_MockBuilder): + """mock""" + + f: Dict[str, Dict[str, str]] = {} + + +all_test_params.append(pytest.param(MockFlatDict(), "", id="flat_dict_empty")) +all_test_params.append( + pytest.param( + MockFlatDict(f={"k1": {"k1": "a"}, "k2": {"k2": "b"}}), + "--f.k1.k1 a --f.k2.k2 b", + id="flat_dict_full", + ) +) + + +class MaskedField(_MockBuilder): + """mock""" + + field: ListValue + project: str + + +all_test_params.append( + pytest.param( + MaskedField(project="a", field=ListValue(f=["a"])), + "--.project a --field.f.0 a", + id="masked", + ) +) + + +class BooleanField(_MockBuilder): + field: bool + + +all_test_params.extend( + ( + pytest.param( + BooleanField(field=True), + "--field 1", + id="bool_true_1", + ), + pytest.param( + BooleanField(field=False), + "--field 0", + id="bool_false_0", + ), + pytest.param( + BooleanField(field=True), + "--field True", + id="bool_true", + ), + pytest.param( + BooleanField(field=False), + "--field False", + id="bool_false", + ), + ) +) + + +class AllowNoneField(_MockBuilder): + field: Optional[int] = 0 + + +all_test_params.extend( + ( + pytest.param( + AllowNoneField(field=10), "--field 10", id="allow_none_value" + ), + pytest.param( + AllowNoneField(field=None), "--field None", id="allow_none_none" + ), + pytest.param(AllowNoneField(), "", id="allow_none_default"), + ) +) + + +@lru_cache() +def _declare_builder_command(type_: str): + create_declare_mlem_object_subcommand( + builder_typer, + type_, + MlemBuilder.object_type, + MlemBuilder, + ) + + +@pytest.mark.parametrize("expected, args", all_test_params) +def test_declare_models( + runner: Runner, tmp_path, args: str, expected: MlemBuilder +): + _declare_builder_command(expected.__get_alias__()) + result = runner.invoke( + f"declare builder {expected.__get_alias__()} {make_posix(str(tmp_path))} " + + args, + raise_on_error=True, + ) + assert result.exit_code == 0, (result.exception, result.output) + builder = load_meta(str(tmp_path)) + assert isinstance(builder, type(expected)) + assert builder == expected + + +class RootValue(BaseModel): + __root__: List[str] = [] + + +class MockComplexBuilder(_MockBuilder): + """mock""" + + string: str + str_list: List[str] = [] + str_dict: Dict[str, str] = {} + str_list_dict: List[Dict[str, str]] = [] + str_dict_list: Dict[str, List[str]] = {} + value: ComplexValue + + value_list: List[ComplexValue] = [] + value_dict: Dict[str, ComplexValue] = {} + root_value: RootValue + root_list: List[RootValue] = [] + root_dict: Dict[str, RootValue] = {} + server: Server + server_list: List[Server] = [] + server_dict: Dict[str, Server] = {} + + +create_declare_mlem_object_subcommand( + builder_typer, + MockComplexBuilder.type, + MlemBuilder.object_type, + MlemBuilder, +) + + +def test_declare_all_together(runner: Runner, tmp_path): + args = [ + "string", + "str_list.0", + "str_list.1", + "str_dict.k1", + "str_dict.k2", + "str_list_dict.0.k1", + "str_list_dict.0.k2", + "str_list_dict.1.k1", + "str_list_dict.1.k2", + "str_dict_list.k1.0", + "str_dict_list.k1.1", + "str_dict_list.k2.0", + "str_dict_list.k2.1", + "value.field", + "value.field_list.0", + "value.field_list.1", + "value.field_dict.k1", + "value.field_dict.k2", + "value_list.0.field", + "value_list.0.field_list.0", + "value_list.0.field_list.1", + "value_list.0.field_dict.k1", + "value_list.0.field_dict.k2", + "value_list.1.field", + "value_list.1.field_list.0", + "value_list.1.field_list.1", + "value_list.1.field_dict.k1", + "value_list.1.field_dict.k2", + "value_dict.k1.field", + "value_dict.k1.field_list.0", + "value_dict.k1.field_list.1", + "value_dict.k1.field_dict.k1", + "value_dict.k1.field_dict.k2", + "value_dict.k2.field", + "value_dict.k2.field_list.0", + "value_dict.k2.field_list.1", + "value_dict.k2.field_dict.k1", + "value_dict.k2.field_dict.k2", + "root_value.0", + "root_value.1", + "root_list.0.0", + "root_list.0.1", + "root_list.1.0", + "root_list.1.1", + "root_dict.k1.0", + "root_dict.k1.1", + "root_dict.k2.0", + "root_dict.k2.1", + ] + server_args: Dict[str, Any] = { + "server": "fastapi", + "server.port": 0, + "server_list.0": "fastapi", + "server_list.0.port": 0, + "server_list.1": "fastapi", + "server_list.1.port": 0, + "server_dict.k1": "fastapi", + "server_dict.k1.port": 0, + "server_dict.k2": "fastapi", + "server_dict.k2.port": 0, + } + args_str = " ".join(f"--{k} lol" for k in args) + args_str += " " + " ".join(f"--{k} {v}" for k, v in server_args.items()) + result = runner.invoke( + f"declare builder {MockComplexBuilder.type} {make_posix(str(tmp_path))} {args_str}", + raise_on_error=True, + ) + assert result.exit_code == 0, (result.exception, result.output) + builder = load_meta(str(tmp_path)) + assert isinstance(builder, MockComplexBuilder) + assert builder == build_mlem_object( + MlemBuilder, + MockComplexBuilder.type, + str_conf=[f"{k}=lol" for k in args], + conf=server_args, + ) + + +@pytest.mark.parametrize( + "args,env_value", + [ + ("", None), + ("--env path", "path"), + ( + "--env.path path --env.project project", + EnvLink(path="path", project="project"), + ), + ("--env.env_param val", MlemEnvMock(env_param="val")), + ], +) +def test_declare_deployment_env( + runner: Runner, tmp_path, args: str, env_value +): + path = make_posix(str(tmp_path)) + runner.invoke( + f"declare deployment {MlemDeploymentMock.type} {path} " + args, + raise_on_error=True, + ) + meta = load_meta(path, force_type=MlemDeploymentMock) + + assert meta.env == env_value diff --git a/tests/cli/test_deployment.py b/tests/cli/test_deployment.py index 384d7349..1c697b42 100644 --- a/tests/cli/test_deployment.py +++ b/tests/cli/test_deployment.py @@ -1,10 +1,15 @@ import os -from typing import ClassVar +from typing import Any, ClassVar, Optional, Type import pytest from numpy import ndarray +from yaml import safe_load from mlem.api import load +from mlem.cli.declare import create_declare_mlem_object_subcommand, declare +from mlem.cli.deployment import create_deploy_run_command +from mlem.contrib.heroku.meta import HerokuEnv +from mlem.core.errors import DeploymentError, WrongMetaSubType from mlem.core.meta_io import MLEM_EXT from mlem.core.metadata import load_meta from mlem.core.objects import ( @@ -13,50 +18,70 @@ MlemDeployment, MlemEnv, MlemLink, + MlemModel, + MlemObject, ) from mlem.runtime.client import Client, HTTPClient +from mlem.utils.path import make_posix from tests.cli.conftest import Runner -@pytest.fixture -def mock_deploy_get_client(mocker, request_get_mock, request_post_mock): - return mocker.patch( - "tests.cli.test_deployment.DeployStateMock.get_client", - return_value=HTTPClient(host="", port=None), - ) - - class DeployStateMock(DeployState): - def get_client(self) -> Client: - pass + """mock""" - -class MlemDeploymentMock(MlemDeployment): class Config: use_enum_values = True - type: ClassVar = "mock" + allow_default: ClassVar = True + + deployment: Optional[MlemDeployment] = None + env: Optional[MlemEnv] = None status: DeployStatus = DeployStatus.NOT_DEPLOYED - param: str = "" - state: DeployState = DeployStateMock() class MlemEnvMock(MlemEnv): + """mock""" + type: ClassVar = "mock" - deploy_type: ClassVar = MlemDeploymentMock - def deploy(self, meta: MlemDeploymentMock): - meta.status = DeployStatus.RUNNING - meta.update() + env_param: Optional[str] = None + + +class MlemDeploymentMock(MlemDeployment[DeployStateMock, MlemEnvMock]): + """mock""" + + type: ClassVar = "mock" + state_type: ClassVar = DeployStateMock + env_type: ClassVar = MlemEnvMock + + """status""" + param: str = "" + """param""" - def remove(self, meta: MlemDeploymentMock): - meta.status = DeployStatus.STOPPED - meta.update() + def _get_client(self, state) -> Client: + return HTTPClient(host="", port=None) - def get_status( - self, meta: MlemDeploymentMock, raise_on_error=True - ) -> "DeployStatus": - return meta.status + def deploy(self, model: MlemModel): + with self.lock_state(): + state = self.get_state() + state.status = DeployStatus.RUNNING + state.deployment = self + state.env = self.get_env() + state.update_model(model) + self.update_state(state) + + def remove(self): + with self.lock_state(): + state = self.get_state() + state.status = DeployStatus.STOPPED + state.deployment = None + state.env = None + state.model_hash = None + self.update_state(state) + + def get_status(self, raise_on_error=True) -> "DeployStatus": + with self.lock_state(): + return self.get_state().status @pytest.fixture @@ -67,22 +92,105 @@ def mock_env_path(tmp_path_factory): @pytest.fixture() -def mock_deploy_path(tmp_path, mock_env_path, model_meta_saved_single): +def mock_deploy_path(tmp_path, mock_env_path): path = os.path.join(tmp_path, "deployname") MlemDeploymentMock( param="bbb", - model_link=model_meta_saved_single.make_link(), - env_link=MlemLink(path=mock_env_path, link_type="env"), + env=mock_env_path, ).dump(path) return path +def _check_deployment_meta( + deployment: MlemDeployment, + mlem_project: Optional[str], + env_path: str, + path: str = "deployment", + env: Any = None, +): + deployment.dump(path, project=mlem_project) + + with deployment.loc.open("r") as f: + data = safe_load(f) + assert data == { + "object_type": "deployment", + "type": "mock", + "env": env or make_posix(env_path), + } + + deployment2 = load_meta( + path, project=mlem_project, force_type=MlemDeployment + ) + assert deployment2 == deployment + assert deployment2.get_env() == load_meta(env_path) + + +def test_deploy_meta_str_env(mlem_project, mock_env_path): + deployment = MlemDeploymentMock(env=mock_env_path) + _check_deployment_meta(deployment, mlem_project, mock_env_path) + + +def test_deploy_meta_link_env(mlem_project, mock_env_path): + deployment = MlemDeploymentMock( + env=MlemLink(path=mock_env_path, link_type="env"), + ) + _check_deployment_meta(deployment, mlem_project, mock_env_path) + + +def test_deploy_meta_link_env_project(mlem_project, mock_env_path): + load_meta(mock_env_path).clone("project_env", project=mlem_project) + + deployment = MlemDeploymentMock( + env=MlemLink( + path="project_env", project=mlem_project, link_type="env" + ), + ) + _check_deployment_meta( + deployment, + mlem_project, + mock_env_path, + env={ + "path": "project_env", + "project": make_posix(mlem_project), + }, + ) + + +def test_deploy_meta_link_env_no_project(tmpdir, mock_env_path): + + deployment = MlemDeploymentMock( + env=MlemLink(path=mock_env_path, link_type="env"), + ) + deployment_path = os.path.join(tmpdir, "deployment") + + _check_deployment_meta( + deployment, None, mock_env_path, path=deployment_path + ) + + +def test_read_relative_model_from_remote_deploy_meta(): + """TODO + path = "s3://..." + model.dump(path / "model"); + deployment = MlemDeploymentMock( + model=model, + env=MlemLink( + path=mock_env_path, link_type="env" + ), + ) + deployment.dump(path / deployment) + + deployment2 = load_meta(...) + deployment2.get_model() + """ + + def test_deploy_create_new( runner: Runner, model_meta_saved_single, mock_env_path, tmp_path ): path = os.path.join(tmp_path, "deployname") result = runner.invoke( - f"deploy run {path} -m {model_meta_saved_single.loc.uri} -t {mock_env_path} -c param=aaa".split() + f"deploy run {MlemDeploymentMock.type} {path} -m {model_meta_saved_single.loc.uri} --env {mock_env_path} --param aaa".split() ) assert result.exit_code == 0, ( result.stdout, @@ -93,11 +201,16 @@ def test_deploy_create_new( meta = load_meta(path) assert isinstance(meta, MlemDeploymentMock) assert meta.param == "aaa" - assert meta.status == DeployStatus.RUNNING + assert meta.get_status() == DeployStatus.RUNNING -def test_deploy_create_existing(runner: Runner, mock_deploy_path): - result = runner.invoke(f"deploy run {mock_deploy_path}".split()) +def test_deploy_create_existing( + runner: Runner, mock_deploy_path, model_meta_saved_single +): + result = runner.invoke( + f"deploy run --load {mock_deploy_path} -m {model_meta_saved_single.loc.fullpath}".split(), + raise_on_error=True, + ) assert result.exit_code == 0, ( result.stdout, result.stderr, @@ -106,7 +219,7 @@ def test_deploy_create_existing(runner: Runner, mock_deploy_path): meta = load_meta(mock_deploy_path) assert isinstance(meta, MlemDeploymentMock) assert meta.param == "bbb" - assert meta.status == DeployStatus.RUNNING + assert meta.get_status() == DeployStatus.RUNNING def test_deploy_status(runner: Runner, mock_deploy_path): @@ -128,19 +241,21 @@ def test_deploy_remove(runner: Runner, mock_deploy_path): ) meta = load_meta(mock_deploy_path) assert isinstance(meta, MlemDeploymentMock) - assert meta.status == DeployStatus.STOPPED + assert meta.get_status() == DeployStatus.STOPPED def test_deploy_apply( runner: Runner, mock_deploy_path, data_path, - mock_deploy_get_client, tmp_path, + request_get_mock, + request_post_mock, ): path = os.path.join(tmp_path, "output") result = runner.invoke( - f"deploy apply {mock_deploy_path} {data_path} -o {path}".split() + f"deploy apply {mock_deploy_path} {data_path} -o {path}".split(), + raise_on_error=True, ) assert result.exit_code == 0, ( result.stdout, @@ -149,6 +264,274 @@ def test_deploy_apply( ) meta = load_meta(mock_deploy_path) assert isinstance(meta, MlemDeploymentMock) - assert meta.status == DeployStatus.NOT_DEPLOYED + assert meta.get_status() == DeployStatus.NOT_DEPLOYED predictions = load(path) assert isinstance(predictions, ndarray) + + +def add_mock_declare(type_: Type[MlemObject]): + + typer = [ + g.typer_instance + for g in declare.registered_groups + if g.typer_instance.info.name == type_.object_type + ][0] + + create_declare_mlem_object_subcommand( + typer, + type_.__get_alias__(), + type_.object_type, + type_, + ) + + +add_mock_declare(MlemDeploymentMock) +add_mock_declare(MlemEnvMock) + +create_deploy_run_command(MlemDeploymentMock.type) + + +def _deploy_and_check( + runner: Runner, + deploy_path: str, + model_single_path: str, + load_deploy=True, + add_args="", + env_param_value: Optional[str] = "env_val", +): + + if load_deploy: + status_res = runner.invoke( + f"deploy status {deploy_path}", raise_on_error=True + ) + assert status_res.exit_code == 0, ( + status_res.output, + status_res.exception, + status_res.stderr, + ) + assert status_res.output.strip() == DeployStatus.NOT_DEPLOYED.value + + deploy_res = runner.invoke( + f"deploy run --load {deploy_path} --model {model_single_path}", + raise_on_error=True, + ) + else: + deploy_res = runner.invoke( + f"deploy run {MlemDeploymentMock.type} {deploy_path} --model {model_single_path} --param val {add_args}", + raise_on_error=True, + ) + + assert deploy_res.exit_code == 0, ( + deploy_res.output, + deploy_res.exception, + deploy_res.stderr, + ) + + status_res = runner.invoke( + f"deploy status {deploy_path}", raise_on_error=True + ) + assert status_res.exit_code == 0, ( + status_res.output, + status_res.exception, + status_res.stderr, + ) + assert status_res.output.strip() == DeployStatus.RUNNING.value + + deploy_meta = load_meta(deploy_path, force_type=MlemDeploymentMock) + state = deploy_meta.get_state() + assert isinstance(state.deployment, MlemDeploymentMock) + assert state.deployment.param == "val" + assert isinstance(state.env, MlemEnvMock) + assert state.env.env_param == env_param_value + + remove_res = runner.invoke( + f"deploy remove {deploy_path}", raise_on_error=True + ) + assert remove_res.exit_code == 0, ( + remove_res.output, + remove_res.exception, + remove_res.stderr, + ) + + status_res = runner.invoke( + f"deploy status {deploy_path}", raise_on_error=True + ) + assert status_res.exit_code == 0, ( + status_res.output, + status_res.exception, + status_res.stderr, + ) + assert status_res.output.strip() == DeployStatus.STOPPED.value + + +def test_all_declared(runner: Runner, tmp_path, model_single_path): + """ + mlem declare env heroku --api_key lol prod.mlem + mlem declare deployment heroku --env prod.mlem --app_name myapp service.mlem + # error on depl/env type mismatch TODO + mlem deployment run --load service.mlem --model mdoel + """ + env_path = make_posix(str(tmp_path / "env")) + runner.invoke( + f"declare env {MlemEnvMock.type} --env_param env_val {env_path}", + raise_on_error=True, + ) + deploy_path = make_posix(str(tmp_path / "deploy")) + runner.invoke( + f"declare deployment {MlemDeploymentMock.type} --param val --env {env_path} {deploy_path}", + raise_on_error=True, + ) + + _deploy_and_check(runner, deploy_path, model_single_path) + + +def test_declare_type_mismatch(runner: Runner, tmp_path, model_single_path): + """ + mlem declare env heroku --api_key lol prod.mlem + mlem declare deployment sagemaker --env prod.mlem --app_name myapp service.mlem + # error on depl/env type mismatch TODO + mlem deployment run --load service.mlem --model mdoel + """ + env_path = make_posix(str(tmp_path / "env")) + runner.invoke( + f"declare env {HerokuEnv.type} {env_path}", raise_on_error=True + ) + deploy_path = make_posix(str(tmp_path / "deploy")) + runner.invoke( + f"declare deployment {MlemDeploymentMock.type} --param a --env {env_path} {deploy_path}", + raise_on_error=True, + ) + + with pytest.raises(WrongMetaSubType): + runner.invoke( + f"deploy run --load {deploy_path} --model {model_single_path}", + raise_on_error=True, + ) + + +def test_deploy_declared(runner: Runner, tmp_path, model_single_path): + """ + mlem declare deployment heroku --env.api_key prod.mlem --app_name myapp service.mlem + mlem deployment run --load service.mlem --model mdoel + """ + deploy_path = make_posix(str(tmp_path / "deploy")) + declare_res = runner.invoke( + f"declare deployment {MlemDeploymentMock.type} {deploy_path} --param val --env.env_param env_val ", + raise_on_error=True, + ) + assert declare_res.exit_code == 0, ( + declare_res.output, + declare_res.exception, + declare_res.stderr, + ) + + _deploy_and_check(runner, deploy_path, model_single_path) + + +def test_env_declared(runner: Runner, tmp_path, model_single_path): + """ + mlem declare env heroku --api_key lol prod.mlem + mlem deployment run heroku service.mlem --model model --app_name myapp --env prod.mlem + # error on type mismatch + """ + env_path = make_posix(str(tmp_path / "env")) + declare_res = runner.invoke( + f"declare env {MlemEnvMock.type} --env_param env_val {env_path}", + raise_on_error=True, + ) + assert declare_res.exit_code == 0, ( + declare_res.output, + declare_res.exception, + declare_res.stderr, + ) + deploy_path = make_posix(str(tmp_path / "deploy")) + _deploy_and_check( + runner, + deploy_path, + model_single_path, + load_deploy=False, + add_args=f"--env {env_path}", + ) + + +def test_none_declared(runner: Runner, tmp_path, model_single_path): + """ + mlem deployment run heroku service.mlem --model model --app_name myapp --env.api_key lol + # error on args mismatch + """ + deploy_path = make_posix(str(tmp_path / "deploy")) + _deploy_and_check( + runner, + deploy_path, + model_single_path, + load_deploy=False, + add_args="--env.env_param env_val", + ) + + +def test_no_env_params(runner: Runner, tmp_path, model_single_path): + deploy_path = make_posix(str(tmp_path / "deploy")) + _deploy_and_check( + runner, + deploy_path, + model_single_path, + load_deploy=False, + env_param_value=None, + ) + + +def test_redeploy_changed(runner: Runner, tmp_path, model_single_path): + env_path = make_posix(str(tmp_path / "env")) + runner.invoke( + f"declare env {MlemEnvMock.type} --env_param env_val {env_path}", + raise_on_error=True, + ) + deploy_path = make_posix(str(tmp_path / "deploy")) + runner.invoke( + f"declare deployment {MlemDeploymentMock.type} --param val --env {env_path} {deploy_path}", + raise_on_error=True, + ) + + runner.invoke( + f"deploy run --load {deploy_path} --model {model_single_path}", + raise_on_error=True, + ) + + runner.invoke( + f"declare deployment {MlemDeploymentMock.type} --param val1 --env {env_path} {deploy_path}", + raise_on_error=True, + ) + with pytest.raises(DeploymentError): + runner.invoke( + f"deploy run --load {deploy_path} --model {model_single_path}", + raise_on_error=True, + ) + + +def test_redeploy_env_changed(runner: Runner, tmp_path, model_single_path): + env_path = make_posix(str(tmp_path / "env")) + runner.invoke( + f"declare env {MlemEnvMock.type} --env_param env_val {env_path}", + raise_on_error=True, + ) + deploy_path = make_posix(str(tmp_path / "deploy")) + runner.invoke( + f"declare deployment {MlemDeploymentMock.type} --param val --env {env_path} {deploy_path}", + raise_on_error=True, + ) + + runner.invoke( + f"deploy run --load {deploy_path} --model {model_single_path}", + raise_on_error=True, + ) + + runner.invoke( + f"declare env {MlemEnvMock.type} --env_param env_val1 {env_path}", + raise_on_error=True, + ) + + with pytest.raises(DeploymentError): + runner.invoke( + f"deploy run --load {deploy_path} --model {model_single_path}", + raise_on_error=True, + ) diff --git a/tests/cli/test_info.py b/tests/cli/test_info.py index c2a3c415..a74e9f76 100644 --- a/tests/cli/test_info.py +++ b/tests/cli/test_info.py @@ -1,80 +1,12 @@ import json import os -import pytest from pydantic import parse_obj_as from mlem.core.meta_io import MLEM_EXT -from mlem.core.objects import MlemLink, MlemModel, MlemObject +from mlem.core.objects import MlemModel, MlemObject from tests.conftest import MLEM_TEST_REPO, long -LOCAL_LS_EXPECTED_RESULT = """Models: - - latest -> model1 - - model1 -""" - - -@pytest.mark.parametrize("obj_type", [None, "all", "model"]) -def test_ls(runner, filled_mlem_project, obj_type): - os.chdir(filled_mlem_project) - result = runner.invoke( - ["list", "-t", obj_type] if obj_type else ["list"], - ) - assert result.exit_code == 0, ( - result.stdout, - result.stderr, - result.exception, - ) - assert len(result.stdout) > 0, "Output is empty, but should not be" - assert result.stdout == LOCAL_LS_EXPECTED_RESULT - - result = runner.invoke( - (["list", "-t", obj_type] if obj_type else ["list"]) + ["--json"], - ) - assert result.exit_code == 0, ( - result.stdout, - result.stderr, - result.exception, - ) - assert len(result.stdout) > 0, "Output is empty, but should not be" - data = json.loads(result.stdout) - assert "model" in data - models = data["model"] - assert len(models) == 2 - model, link = [parse_obj_as(MlemObject, m) for m in models] - if isinstance(model, MlemLink): - model, link = link, model - assert isinstance(model, MlemModel) - assert isinstance(link, MlemLink) - - -REMOTE_LS_EXPECTED_RESULT = """Models: - - data/model - - latest -> data/model -Data: - - data/pred - - data/test_x - - data/test_y - - data/train -""" - - -@pytest.mark.long -def test_ls_remote(runner, current_test_branch): - result = runner.invoke( - [ - "list", - f"{MLEM_TEST_REPO}/tree/{current_test_branch}/simple", - ], - ) - assert result.exit_code == 0, ( - result.stdout, - result.stderr, - result.exception, - ) - assert len(result.stdout) > 0, "Output is empty, but should not be" - assert result.stdout == REMOTE_LS_EXPECTED_RESULT - def test_pretty_print(runner, model_path_mlem_project): model_path, _ = model_path_mlem_project diff --git a/tests/cli/test_init.py b/tests/cli/test_init.py index 65d08efb..1cd77053 100644 --- a/tests/cli/test_init.py +++ b/tests/cli/test_init.py @@ -1,7 +1,6 @@ import os -from mlem.config import CONFIG_FILE_NAME -from mlem.constants import MLEM_DIR +from mlem.constants import MLEM_CONFIG_FILE_NAME from mlem.utils.path import make_posix from tests.cli.conftest import Runner @@ -9,5 +8,4 @@ def test_init(runner: Runner, tmpdir): result = runner.invoke(f"init {make_posix(str(tmpdir))}") assert result.exit_code == 0, result.exception - assert os.path.isdir(tmpdir / MLEM_DIR) - assert os.path.isfile(tmpdir / MLEM_DIR / CONFIG_FILE_NAME) + assert os.path.isfile(tmpdir / MLEM_CONFIG_FILE_NAME) diff --git a/tests/cli/test_link.py b/tests/cli/test_link.py index 831f3979..4549d964 100644 --- a/tests/cli/test_link.py +++ b/tests/cli/test_link.py @@ -2,7 +2,7 @@ import tempfile from mlem.api import load_meta -from mlem.core.meta_io import MLEM_DIR, MLEM_EXT +from mlem.core.meta_io import MLEM_EXT from mlem.core.objects import MlemLink, MlemModel @@ -10,7 +10,7 @@ def test_link(runner, model_path): with tempfile.TemporaryDirectory() as dir: link_path = os.path.join(dir, "latest.mlem") result = runner.invoke( - ["link", model_path, link_path, "-e", "--abs"], + ["link", model_path, link_path, "--abs"], ) assert result.exit_code == 0, ( result.stdout, @@ -33,9 +33,7 @@ def test_link_mlem_project(runner, model_path_mlem_project): result.stderr, result.exception, ) - link_path = os.path.join( - project, MLEM_DIR, MlemLink.object_type, link_name - ) + link_path = os.path.join(project, link_name) assert os.path.exists(link_path) link_object = load_meta(link_path, follow_links=False) assert isinstance(link_object, MlemLink) diff --git a/tests/cli/test_main.py b/tests/cli/test_main.py index e4e13c61..90010244 100644 --- a/tests/cli/test_main.py +++ b/tests/cli/test_main.py @@ -1,9 +1,11 @@ import pytest -from click import Group -from typer.main import get_command_from_info, get_group_from_info +import requests +from click import Context, Group +from typer.main import get_command_from_info, get_group, get_group_from_info from mlem.cli import app from tests.cli.conftest import Runner +from tests.conftest import long def iter_group(group: Group, prefix=()): @@ -11,7 +13,7 @@ def iter_group(group: Group, prefix=()): yield prefix, group for name, c in group.commands.items(): if isinstance(c, Group): - yield from iter_group(c, prefix + (name,)) + yield from iter_group(c, prefix) else: yield prefix + (name,), c @@ -37,29 +39,48 @@ def app_cli_cmd(): def test_commands_help(app_cli_cmd): no_help = [] - for name, cli_cmd in app_cli_cmd: - if cli_cmd.help is None: - no_help.append(name) - assert len(no_help) == 0, f"{no_help} cli commnads do not have help!" + group = get_group(app) + ctx = Context(group, info_name="mlem", help_option_names=["-h", "--help"]) + + with ctx: + for name, cli_cmd in app_cli_cmd: + if cli_cmd.help is None: + no_help.append(name) + assert len(no_help) == 0, f"{no_help} cli commands do not have help!" def test_commands_args_help(app_cli_cmd): no_help = [] for name, cmd in app_cli_cmd: + dynamic_metavar = getattr(cmd, "dynamic_metavar", None) for arg in cmd.params: + if arg.name == dynamic_metavar: + continue if arg.help is None: no_help.append(f"{name}:{arg.name}") - assert len(no_help) == 0, f"{no_help} cli commnad args do not have help!" - - -def test_commands_examples(app_cli_cmd): - no_examples = [] - for name, cmd in app_cli_cmd: - if cmd.examples is None and not isinstance(cmd, Group): - no_examples.append(name) + assert len(no_help) == 0, f"{no_help} cli commands args do not have help!" + + +@pytest.mark.xfail +@long +def test_commands_docs_links(app_cli_cmd): + no_link = [] + link_broken = [] + for name, _cmd in app_cli_cmd: + result = Runner().invoke(name.split() + ["--help"]) + if result.output is None or "Documentation: <" not in result.output: + no_link.append(name) + else: + link = result.output.split("Documentation: <")[1].split(">")[0] + response = requests.get(link, timeout=5) + if response.status_code != 200: + link_broken.append(name) + assert ( + len(no_link) == 0 + ), f"{no_link} cli commands do not have documentation link!" assert ( - len(no_examples) == 0 - ), f"{no_examples} cli commnads do not have examples!" + len(link_broken) == 0 + ), f"{link_broken} cli commands have broken documentation links!" @pytest.mark.parametrize("cmd", ["--help", "-h"]) @@ -74,12 +95,7 @@ def test_help(runner: Runner, cmd): def test_cli_commands_help(runner: Runner, app_cli_cmd): for name, _ in app_cli_cmd: - result = runner.invoke(name + " --help") - assert result.exit_code == 0, ( - result.stdout, - result.stderr, - result.exception, - ) + runner.invoke(name + " --help", raise_on_error=True) def test_version(runner: Runner): diff --git a/tests/cli/test_serve.py b/tests/cli/test_serve.py index 5ae4758a..7010e524 100644 --- a/tests/cli/test_serve.py +++ b/tests/cli/test_serve.py @@ -1,5 +1,6 @@ from typing import ClassVar +from mlem.cli.serve import create_serve_command from mlem.runtime import Interface from mlem.runtime.server import Server from mlem.ui import echo @@ -7,15 +8,21 @@ class MockServer(Server): + """mock""" + type: ClassVar = "mock" param: str = "wrong" + """param""" def serve(self, interface: Interface): echo(self.param) +create_serve_command(MockServer.type) + + def test_serve(runner: Runner, model_single_path): - result = runner.invoke(f"serve {model_single_path} mock -c param=aaa") + result = runner.invoke(f"serve mock -m {model_single_path} --param aaa") assert result.exit_code == 0, ( result.stdout, result.stderr, diff --git a/tests/cli/test_stderr.py b/tests/cli/test_stderr.py index 87ae56ae..6caf80dc 100644 --- a/tests/cli/test_stderr.py +++ b/tests/cli/test_stderr.py @@ -1,19 +1,22 @@ from io import StringIO from unittest import mock +import pytest + from mlem.core.errors import MlemError from mlem.ui import echo, stderr_echo EXCEPTION_MESSAGE = "Test Exception Message" +@pytest.mark.usefixtures("no_debug") def test_stderr_exception(runner): # patch the ls command and ensure it throws an expection. with mock.patch( - "mlem.api.commands.ls", side_effect=Exception(EXCEPTION_MESSAGE) + "mlem.api.commands.init", side_effect=Exception(EXCEPTION_MESSAGE) ): result = runner.invoke( - ["list"], + ["init"], ) assert result.exit_code == 1, ( result.stdout, @@ -27,13 +30,14 @@ def test_stderr_exception(runner): MLEM_ERROR_MESSAGE = "Test Mlem Error Message" +@pytest.mark.usefixtures("no_debug") def test_stderr_mlem_error(runner): # patch the ls command and ensure it throws a mlem error. with mock.patch( - "mlem.api.commands.ls", side_effect=MlemError(MLEM_ERROR_MESSAGE) + "mlem.api.commands.init", side_effect=MlemError(MLEM_ERROR_MESSAGE) ): result = runner.invoke( - ["list"], + ["init"], ) assert result.exit_code == 1, ( result.stdout, diff --git a/tests/cli/test_types.py b/tests/cli/test_types.py new file mode 100644 index 00000000..397285f6 --- /dev/null +++ b/tests/cli/test_types.py @@ -0,0 +1,108 @@ +from typing import Optional + +import pytest +from pydantic import BaseModel + +from mlem.cli.types import iterate_type_fields +from mlem.cli.utils import get_attribute_docstrings, get_field_help +from mlem.core.base import MlemABC, load_impl_ext +from mlem.utils.entrypoints import list_implementations +from tests.cli.conftest import Runner + + +def test_types(runner: Runner): + result = runner.invoke("types") + assert result.exit_code == 0, (result.exception, result.output) + assert all(typename in result.output for typename in MlemABC.abs_types) + + +@pytest.mark.parametrize("abs_name", MlemABC.abs_types.keys()) +def test_types_abs_name(runner: Runner, abs_name): + result = runner.invoke(f"types {abs_name}") + assert result.exit_code == 0, result.exception + assert set(result.output.splitlines()) == set( + list_implementations(abs_name, include_hidden=False) + ) + + +@pytest.mark.parametrize( + "abs_name,subtype", + [ + (abs_name, subtype) + for abs_name, root_type in MlemABC.abs_types.items() + for subtype in list_implementations(root_type, include_hidden=False) + if not subtype.startswith("tests.") and "mock" not in subtype + ], +) +def test_types_abs_name_subtype(runner: Runner, abs_name, subtype): + result = runner.invoke(f"types {abs_name} {subtype}") + assert result.exit_code == 0, result.exception + if not subtype.startswith("tests."): + assert "docstring missing" not in result.output + + +@pytest.mark.parametrize( + "abs_name,subtype", + [ + (abs_name, subtype) + for abs_name, root_type in MlemABC.abs_types.items() + for subtype in list_implementations(root_type, include_hidden=False) + if not subtype.startswith("tests.") and "mock" not in subtype + ], +) +def test_fields_capitalized(abs_name, subtype): + impl = load_impl_ext(abs_name, subtype) + ad = get_attribute_docstrings(impl) + allowed_lowercase = ["md5"] + capitalized = { + k: v[0] == v[0].capitalize() + if all(not v.startswith(prefix) for prefix in allowed_lowercase) + else True + for k, v in ad.items() + } + assert capitalized == {k: True for k in ad} + + +def test_iter_type_fields_subclass(): + class Parent(BaseModel): + parent: str + """parent""" + + class Child(Parent): + child: str + """child""" + excluded: Optional[str] = None + + class Config: + fields = {"excluded": {"exclude": True}} + + fields = list(iterate_type_fields(Child)) + + assert len(fields) == 2 + assert {get_field_help(Child, f.path) for f in fields} == { + "parent", + "child", + } + + +def test_iter_type_fields_subclass_multiinheritance(): + class Parent(BaseModel): + parent: str + """parent""" + + class Parent2(BaseModel): + parent2 = "" + """parent2""" + + class Child(Parent, Parent2): + child: str + """child""" + + fields = list(iterate_type_fields(Child)) + + assert len(fields) == 3 + assert {get_field_help(Child, f.path) for f in fields} == { + "parent", + "child", + "parent2", + } diff --git a/tests/conftest.py b/tests/conftest.py index 1aa44793..0e42fda6 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -2,7 +2,7 @@ import posixpath import tempfile from pathlib import Path -from typing import Any, Callable, Type +from typing import Any, Callable, Set, Type import git import numpy as np @@ -43,6 +43,10 @@ MLEM_S3_TEST_BUCKET = "mlem-tests" +def _cut_empty_lines(string): + return "\n".join(line for line in string.splitlines() if line) + + def _check_github_test_repo_ssh_auth(): try: git.cmd.Git().ls_remote(MLEM_TEST_REPO) @@ -72,8 +76,7 @@ def _check_github_test_repo_auth(): ) -@pytest.fixture() -def current_test_branch(): +def get_current_test_branch(branch_list: Set[str]): try: branch = Repo(str(Path(__file__).parent.parent)).active_branch.name except TypeError: @@ -82,20 +85,30 @@ def current_test_branch(): branch = os.environ.get("GITHUB_HEAD_REF", os.environ["GITHUB_REF"]) if branch.startswith("refs/heads/"): branch = branch[len("refs/heads/") :] - remote_refs = set( - ls_github_branches(MLEM_TEST_REPO_ORG, MLEM_TEST_REPO_NAME).keys() - ) - if branch in remote_refs: + if branch in branch_list: return branch return "main" +@pytest.fixture() +def current_test_branch(): + return get_current_test_branch( + set(ls_github_branches(MLEM_TEST_REPO_ORG, MLEM_TEST_REPO_NAME).keys()) + ) + + @pytest.fixture(scope="session", autouse=True) def add_test_env(): os.environ["MLEM_TESTS"] = "true" LOCAL_CONFIG.TESTS = True +@pytest.fixture(scope="session", autouse=True) +def add_debug_env(): + os.environ["MLEM_DEBUG"] = "true" + LOCAL_CONFIG.DEBUG = True + + def resource_path(test_file, *paths): resources_dir = os.path.join(os.path.dirname(test_file), RESOURCES) return os.path.join(resources_dir, *paths) @@ -201,14 +214,14 @@ def model_path(model_train_target, tmp_path_factory): model, train, _ = model_train_target # because of index=False we test reading by path here # reading by link name is not tested - save(model, path, sample_data=train, index=False) + save(model, path, sample_data=train) yield path @pytest.fixture def data_path(train, tmpdir_factory): temp_dir = str(tmpdir_factory.mktemp("saved-data") / "data") - save(train, temp_dir, index=False) + save(train, temp_dir) yield temp_dir @@ -285,7 +298,7 @@ def filled_mlem_project(mlem_project): requirements=Requirements.new("sklearn"), model_type=SklearnModel(methods={}, model=""), ) - model.dump("model1", project=mlem_project, external=True) + model.dump("model1", project=mlem_project) model.make_link("latest", project=mlem_project) yield mlem_project @@ -297,7 +310,7 @@ def model_path_mlem_project(model_train_target, tmpdir_factory): dir = str(tmpdir_factory.mktemp("mlem-root-with-model")) init(dir) model_dir = os.path.join(dir, "generated-model") - save(model, model_dir, sample_data=train, index=True, external=True) + save(model, model_dir, sample_data=train) yield model_dir, dir diff --git a/tests/contrib/conftest.py b/tests/contrib/conftest.py index 4932aab0..2bb07b15 100644 --- a/tests/contrib/conftest.py +++ b/tests/contrib/conftest.py @@ -1,9 +1,26 @@ +import subprocess + import pytest from mlem.contrib.docker.context import use_mlem_source +from tests.conftest import long @pytest.fixture() def uses_docker_build(): with use_mlem_source("whl"): yield + + +def has_conda(): + try: + ret = subprocess.run(["conda"], check=True) + return ret.returncode == 0 + except FileNotFoundError: + return False + + +def conda_test(f): + mark = pytest.mark.conda + skip = pytest.mark.skipif(not has_conda(), reason="conda is unavailable") + return long(mark(skip(f))) diff --git a/tests/contrib/resources/pandas/.mlem/config.yaml b/tests/contrib/resources/pandas/.mlem.yaml similarity index 100% rename from tests/contrib/resources/pandas/.mlem/config.yaml rename to tests/contrib/resources/pandas/.mlem.yaml diff --git a/tests/contrib/test_bitbucket.py b/tests/contrib/test_bitbucket.py index 1f9bd916..8c5f522c 100644 --- a/tests/contrib/test_bitbucket.py +++ b/tests/contrib/test_bitbucket.py @@ -3,12 +3,12 @@ import pytest from pytest_lazyfixture import lazy_fixture -from mlem.contrib.bitbucketfs import BitBucketFileSystem +from mlem.contrib.bitbucketfs import BitBucketFileSystem, ls_bb_refs from mlem.core.errors import RevisionNotFound -from mlem.core.meta_io import UriResolver, get_fs +from mlem.core.meta_io import Location, get_fs from mlem.core.metadata import load_meta from mlem.core.objects import MlemModel -from tests.conftest import long +from tests.conftest import get_current_test_branch, long MLEM_TEST_REPO_PROJECT = "iterative-ai/mlem-test" @@ -31,6 +31,11 @@ def fs_auth(): return BitBucketFileSystem(MLEM_TEST_REPO_PROJECT) +@pytest.fixture() +def current_test_branch_bb(): + return get_current_test_branch(set(ls_bb_refs(MLEM_TEST_REPO_PROJECT))) + + @long @pytest.mark.parametrize( "fs", @@ -71,7 +76,7 @@ def test_uri_resolver(uri): ["main", "branch", "tag", "3897d2ab"], ) def test_uri_resolver_rev(rev): - location = UriResolver.resolve(MLEM_TEST_REPO_URI, None, rev=rev, fs=None) + location = Location.resolve(MLEM_TEST_REPO_URI, None, rev=rev, fs=None) assert isinstance(location.fs, BitBucketFileSystem) assert location.fs.root == rev assert "README.md" in location.fs.ls("") @@ -80,12 +85,16 @@ def test_uri_resolver_rev(rev): @long def test_uri_resolver_wrong_rev(): with pytest.raises(RevisionNotFound): - UriResolver.resolve( + Location.resolve( MLEM_TEST_REPO_URI, None, rev="__not_exists__", fs=None ) @long -def test_loading_object(): - meta = load_meta("latest", project=MLEM_TEST_REPO_URI + "/src/main/simple") +def test_loading_object(current_test_branch_bb): + meta = load_meta( + "latest", + project=MLEM_TEST_REPO_URI + "/src/main/simple", + rev=current_test_branch_bb, + ) assert isinstance(meta, MlemModel) diff --git a/tests/contrib/test_docker/resources/dockerfile.j2 b/tests/contrib/test_docker/resources/dockerfile.j2 new file mode 100644 index 00000000..e6a7521f --- /dev/null +++ b/tests/contrib/test_docker/resources/dockerfile.j2 @@ -0,0 +1,3 @@ +FROM alpine + +CMD sleep infinity diff --git a/tests/contrib/test_docker/test_context.py b/tests/contrib/test_docker/test_context.py index 217cf049..d8e378f8 100644 --- a/tests/contrib/test_docker/test_context.py +++ b/tests/contrib/test_docker/test_context.py @@ -11,6 +11,7 @@ use_mlem_source, ) from mlem.core.requirements import UnixPackageRequirement +from tests.conftest import _cut_empty_lines from tests.contrib.test_docker.conftest import docker_test REGISTRY_PORT = 5000 @@ -97,6 +98,12 @@ def test_dockerfile_generator_super_custom(): assert _generate_dockerfile(**kwargs) == dockerfile +def test_dockerfile_generator_no_cmd(): + kwargs = {"run_cmd": None} + with use_mlem_source("pip"): + assert "CMD" not in _generate_dockerfile(**kwargs) + + def test_use_wheel_installation(tmpdir): distr = tmpdir.mkdir("distr").join("somewhatwheel.txt") distr.write("wheel goes brrr") @@ -106,10 +113,6 @@ def test_use_wheel_installation(tmpdir): assert f"RUN pip install {MLEM_LOCAL_WHL}" in dockerfile -def _cut_empty_lines(string): - return "\n".join(line for line in string.splitlines() if line) - - def _generate_dockerfile(unix_packages=None, **kwargs): return _cut_empty_lines( DockerfileGenerator(**kwargs).generate( diff --git a/tests/contrib/test_docker/test_deploy.py b/tests/contrib/test_docker/test_deploy.py index 1ebc37fb..2ae58ac7 100644 --- a/tests/contrib/test_docker/test_deploy.py +++ b/tests/contrib/test_docker/test_deploy.py @@ -6,15 +6,18 @@ import pytest from requests.exceptions import HTTPError +from mlem.api import deploy from mlem.contrib.docker.base import ( DockerContainer, DockerContainerState, DockerEnv, DockerImage, ) +from mlem.contrib.docker.context import DockerBuildArgs from mlem.contrib.fastapi import FastAPIServer from mlem.core.errors import DeploymentError from mlem.core.objects import DeployStatus +from tests.conftest import resource_path from tests.contrib.test_docker.conftest import docker_test IMAGE_NAME = "mike0sv/ebaklya" @@ -26,7 +29,15 @@ @pytest.fixture(scope="session") -def _test_images(tmpdir_factory, dockerenv_local, dockerenv_remote): +def _test_images(dockerenv_local): + with dockerenv_local.daemon.client() as client: + client.images.pull(IMAGE_NAME, "latest") + + +@pytest.fixture(scope="session") +def _test_images_remote( + tmpdir_factory, dockerenv_local, dockerenv_remote, _test_images +): with dockerenv_local.daemon.client() as client: tag_name = f"{dockerenv_remote.registry.get_host()}/{REPOSITORY_NAME}/{IMAGE_NAME}" client.images.pull(IMAGE_NAME, "latest").tag(tag_name) @@ -57,7 +68,7 @@ def test_run_default_registry( @docker_test def test_run_remote_registry( - dockerenv_remote, _test_images, model_meta_saved_single + dockerenv_remote, _test_images_remote, model_meta_saved_single ): _check_runner(IMAGE_NAME, dockerenv_remote, model_meta_saved_single) @@ -76,7 +87,7 @@ def test_run_local_image_name_that_will_never_exist( @docker_test def test_run_local_fail_inside_container( - dockerenv_remote, _test_images, model_meta_saved_single + dockerenv_remote, _test_images_remote, model_meta_saved_single ): with pytest.raises(DeploymentError): _check_runner( @@ -86,31 +97,60 @@ def test_run_local_fail_inside_container( ) +@docker_test +def test_deploy_full( + tmp_path_factory, dockerenv_local, model_meta_saved_single +): + meta_path = tmp_path_factory.mktemp("deploy-meta") + meta = deploy( + str(meta_path), + model_meta_saved_single, + dockerenv_local, + args=DockerBuildArgs(templates_dir=[resource_path(__file__)]), + server="fastapi", + container_name="test_full_deploy", + ) + + meta.wait_for_status( + DeployStatus.RUNNING, + allowed_intermediate=[ + DeployStatus.NOT_DEPLOYED, + DeployStatus.STARTING, + ], + times=50, + ) + assert meta.get_status() == DeployStatus.RUNNING + + def _check_runner(img, env: DockerEnv, model): with tempfile.TemporaryDirectory() as tmpdir: instance = DockerContainer( container_name=CONTAINER_NAME, port_mapping={80: 8008}, - state=DockerContainerState(image=DockerImage(name=img)), server=FastAPIServer(), - model_link=model.make_link(), - env_link=env.make_link(), + env=env, rm=False, ) - instance.update_model_hash(model) instance.dump(os.path.join(tmpdir, "deploy")) - assert env.get_status(instance) == DeployStatus.NOT_DEPLOYED + instance.update_state( + DockerContainerState( + image=DockerImage(name=img), + model_hash=model.meta_hash(), + declaration=instance, + ) + ) + assert instance.get_status() == DeployStatus.NOT_DEPLOYED - env.deploy(instance) + instance.deploy(model) instance.wait_for_status( DeployStatus.RUNNING, allowed_intermediate=[DeployStatus.STARTING] ) time.sleep(0.1) - assert env.get_status(instance) == DeployStatus.RUNNING + assert instance.get_status() == DeployStatus.RUNNING - env.remove(instance) + instance.remove() time.sleep(0.1) - assert env.get_status(instance) == DeployStatus.NOT_DEPLOYED + assert instance.get_status() == DeployStatus.NOT_DEPLOYED diff --git a/tests/contrib/test_gitlab.py b/tests/contrib/test_gitlab.py index a889d59a..5ee4822b 100644 --- a/tests/contrib/test_gitlab.py +++ b/tests/contrib/test_gitlab.py @@ -1,17 +1,22 @@ import pytest -from mlem.contrib.gitlabfs import GitlabFileSystem +from mlem.contrib.gitlabfs import GitlabFileSystem, ls_gitlab_refs from mlem.core.errors import RevisionNotFound -from mlem.core.meta_io import UriResolver, get_fs +from mlem.core.meta_io import Location, get_fs from mlem.core.metadata import load_meta from mlem.core.objects import MlemModel -from tests.conftest import long +from tests.conftest import get_current_test_branch, long MLEM_TEST_REPO_PROJECT = "iterative.ai/mlem-test" MLEM_TEST_REPO_URI = f"https://gitlab.com/{MLEM_TEST_REPO_PROJECT}" +@pytest.fixture() +def current_test_branch_gl(): + return get_current_test_branch(set(ls_gitlab_refs(MLEM_TEST_REPO_PROJECT))) + + @long def test_ls(): fs = GitlabFileSystem(MLEM_TEST_REPO_PROJECT) @@ -46,7 +51,7 @@ def test_uri_resolver(uri): ["main", "branch", "tag", "3897d2ab"], ) def test_uri_resolver_rev(rev): - location = UriResolver.resolve(MLEM_TEST_REPO_URI, None, rev=rev, fs=None) + location = Location.resolve(MLEM_TEST_REPO_URI, None, rev=rev, fs=None) assert isinstance(location.fs, GitlabFileSystem) assert location.fs.root == rev assert "README.md" in location.fs.ls("") @@ -55,14 +60,16 @@ def test_uri_resolver_rev(rev): @long def test_uri_resolver_wrong_rev(): with pytest.raises(RevisionNotFound): - UriResolver.resolve( + Location.resolve( MLEM_TEST_REPO_URI, None, rev="__not_exists__", fs=None ) @long -def test_loading_object(): +def test_loading_object(current_test_branch_gl): meta = load_meta( - "latest", project=MLEM_TEST_REPO_URI + "/-/blob/main/simple" + "latest", + project=MLEM_TEST_REPO_URI + "/-/blob/main/simple", + rev=current_test_branch_gl, ) assert isinstance(meta, MlemModel) diff --git a/tests/contrib/test_heroku.py b/tests/contrib/test_heroku.py index 8558b0fc..8cd80ba6 100644 --- a/tests/contrib/test_heroku.py +++ b/tests/contrib/test_heroku.py @@ -90,8 +90,8 @@ def test_create_app(heroku_app_name, heroku_env, model): name = heroku_app_name("create-app") heroku_deploy = HerokuDeployment( app_name=name, - env_link=heroku_env.make_link(), - model_link=model.make_link(), + env=heroku_env, + model=model.make_link(), team=HEROKU_TEAM, ) create_app(heroku_deploy) @@ -109,7 +109,7 @@ def test_build_heroku_docker(model: MlemModel, uses_docker_build): def test_state_ensured_app(): - state = HerokuState() + state = HerokuState(declaration=HerokuDeployment(app_name="")) with pytest.raises(ValueError): assert state.ensured_app is not None @@ -120,7 +120,8 @@ def test_state_ensured_app(): def _check_heroku_deployment(meta): assert isinstance(meta, HerokuDeployment) - assert heroku_api_request("GET", f"/apps/{meta.state.ensured_app.name}") + state = meta.get_state() + assert heroku_api_request("GET", f"/apps/{state.ensured_app.name}") meta.wait_for_status( DeployStatus.RUNNING, allowed_intermediate=[ @@ -132,7 +133,7 @@ def _check_heroku_deployment(meta): assert meta.get_status() == DeployStatus.RUNNING time.sleep(10) docs_page = requests.post( - meta.state.ensured_app.web_url + "predict", + state.ensured_app.web_url + "predict", json={ "data": { "values": [ @@ -159,7 +160,7 @@ def is_not_crash(err, *args): # pylint: disable=unused-argument return not needs_another_try -@flaky(rerun_filter=is_not_crash, max_runs=2) +@flaky(rerun_filter=is_not_crash, max_runs=1) @heroku @long @heroku_matrix @@ -186,7 +187,7 @@ def test_env_deploy_full( if CLEAR_APPS: meta.remove() - assert meta.state is None + assert meta.get_state() == HerokuState(declaration=meta) meta.wait_for_status( DeployStatus.NOT_DEPLOYED, allowed_intermediate=DeployStatus.RUNNING, diff --git a/tests/resources/empty/.mlem/config.yaml b/tests/contrib/test_kubernetes/__init__.py similarity index 100% rename from tests/resources/empty/.mlem/config.yaml rename to tests/contrib/test_kubernetes/__init__.py diff --git a/tests/contrib/test_kubernetes/conftest.py b/tests/contrib/test_kubernetes/conftest.py new file mode 100644 index 00000000..9df2824e --- /dev/null +++ b/tests/contrib/test_kubernetes/conftest.py @@ -0,0 +1,46 @@ +import os + +import pytest +from kubernetes import client, config + +from tests.conftest import long + +from .utils import Command + + +def is_minikube_running() -> bool: + try: + cmd = Command("minikube status") + returncode = cmd.run(timeout=3, shell=True) + if returncode == 0: + config.load_kube_config( + config_file=os.getenv("KUBECONFIG", default="~/.kube/config") + ) + client.CoreV1Api().list_namespaced_pod("default") + return True + return False + except (config.config_exception.ConfigException, ConnectionRefusedError): + return False + + +def has_k8s(): + if os.environ.get("SKIP_K8S_TESTS", None) == "true": + return False + current_os = os.environ.get("GITHUB_MATRIX_OS") + current_python = os.environ.get("GITHUB_MATRIX_PYTHON") + if ( + current_os is not None + and current_os != "ubuntu-latest" + or current_python is not None + and current_python != "3.9" + ): + return False + return is_minikube_running() + + +def k8s_test(f): + mark = pytest.mark.kubernetes + skip = pytest.mark.skipif( + not has_k8s(), reason="kubernetes is unavailable or skipped" + ) + return long(mark(skip(f))) diff --git a/tests/contrib/test_kubernetes/test_base.py b/tests/contrib/test_kubernetes/test_base.py new file mode 100644 index 00000000..df8c13ec --- /dev/null +++ b/tests/contrib/test_kubernetes/test_base.py @@ -0,0 +1,126 @@ +import os +import re +import subprocess +import tempfile + +import numpy as np +import pytest +from kubernetes import config +from sklearn.datasets import load_iris +from sklearn.tree import DecisionTreeClassifier + +from mlem.api import save +from mlem.config import project_config +from mlem.contrib.docker.base import DockerDaemon, DockerRegistry +from mlem.contrib.kubernetes.base import K8sDeployment, K8sDeploymentState +from mlem.contrib.kubernetes.build import build_k8s_docker +from mlem.contrib.kubernetes.context import ImagePullPolicy +from mlem.contrib.kubernetes.service import LoadBalancerService +from mlem.core.objects import DeployStatus, MlemModel +from tests.contrib.test_kubernetes.conftest import k8s_test +from tests.contrib.test_kubernetes.utils import Command + + +@pytest.fixture(scope="session") +def minikube_env_variables(): + old_environ = dict(os.environ) + output = subprocess.check_output( + ["minikube", "-p", "minikube", "docker-env"] + ) + export_re = re.compile('export ([A-Z_]+)="(.*)"\\n') + export_pairs = export_re.findall(output.decode("UTF-8")) + for k, v in export_pairs: + os.environ[k] = v + + yield + + os.environ.clear() + os.environ.update(old_environ) + + +@pytest.fixture +def load_kube_config(): + config.load_kube_config(os.getenv("KUBECONFIG", default="~/.kube/config")) + + +@pytest.fixture(scope="session") +def model_meta(tmp_path_factory): + path = os.path.join(tmp_path_factory.getbasetemp(), "saved-model-single") + train, target = load_iris(return_X_y=True) + model = DecisionTreeClassifier().fit(train, target) + return save(model, path, sample_data=train) + + +@pytest.fixture(scope="session") +def k8s_deployment(minikube_env_variables): + return K8sDeployment( + namespace="ml", + image_pull_policy=ImagePullPolicy.never, + service_type=LoadBalancerService(), + daemon=DockerDaemon(host=os.getenv("DOCKER_HOST", default="")), + ) + + +@pytest.fixture(scope="session") +def docker_image(k8s_deployment, model_meta): + tmpdir = tempfile.mkdtemp() + k8s_deployment.dump(os.path.join(tmpdir, "deploy")) + return build_k8s_docker( + model_meta, + k8s_deployment.image_name, + DockerRegistry(), + DockerDaemon(host=os.getenv("DOCKER_HOST", default="")), + k8s_deployment.server or project_config(None).server, + platform=None, + ) + + +@pytest.fixture +def k8s_deployment_state(docker_image, model_meta, k8s_deployment): + return K8sDeploymentState( + image=docker_image, + model_hash=model_meta.meta_hash(), + declaration=k8s_deployment, + ) + + +@k8s_test +@pytest.mark.usefixtures("load_kube_config") +def test_deploy( + k8s_deployment: K8sDeployment, + k8s_deployment_state: K8sDeploymentState, + model_meta: MlemModel, +): + k8s_deployment.update_state(k8s_deployment_state) + assert ( + k8s_deployment.get_status(k8s_deployment) == DeployStatus.NOT_DEPLOYED + ) + k8s_deployment.deploy(model_meta) + k8s_deployment.wait_for_status( + DeployStatus.RUNNING, + allowed_intermediate=[DeployStatus.STARTING], + timeout=10, + times=5, + ) + assert k8s_deployment.get_status(k8s_deployment) == DeployStatus.RUNNING + k8s_deployment.remove() + assert ( + k8s_deployment.get_status(k8s_deployment) == DeployStatus.NOT_DEPLOYED + ) + + +@k8s_test +@pytest.mark.usefixtures("load_kube_config") +def test_deployed_service( + k8s_deployment: K8sDeployment, + k8s_deployment_state: K8sDeploymentState, + model_meta: MlemModel, +): + k8s_deployment.update_state(k8s_deployment_state) + k8s_deployment.deploy(model_meta) + cmd = Command("minikube tunnel") + cmd.run(timeout=20, shell=True) + client = k8s_deployment.get_client() + train, _ = load_iris(return_X_y=True) + response = client.predict(data=train) + assert np.array_equal(response, np.array([0] * 50 + [1] * 50 + [2] * 50)) diff --git a/tests/contrib/test_kubernetes/test_context.py b/tests/contrib/test_kubernetes/test_context.py new file mode 100644 index 00000000..076a3323 --- /dev/null +++ b/tests/contrib/test_kubernetes/test_context.py @@ -0,0 +1,150 @@ +import pytest + +from mlem.contrib.kubernetes.context import ( + ImagePullPolicy, + K8sYamlBuildArgs, + K8sYamlGenerator, +) +from mlem.contrib.kubernetes.service import LoadBalancerService +from tests.conftest import _cut_empty_lines + + +@pytest.fixture +def k8s_default_manifest(): + return _cut_empty_lines( + """apiVersion: v1 +kind: Namespace +metadata: + name: mlem + labels: + name: mlem + +--- + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: ml + namespace: mlem +spec: + selector: + matchLabels: + app: ml + template: + metadata: + labels: + app: ml + spec: + containers: + - name: ml + image: ml:latest + imagePullPolicy: Always + ports: + - containerPort: 8080 + +--- + +apiVersion: v1 +kind: Service +metadata: + name: ml + namespace: mlem + labels: + run: ml +spec: + ports: + - port: 8080 + protocol: TCP + targetPort: 8080 + selector: + app: ml + type: NodePort +""" + ) + + +@pytest.fixture +def k8s_manifest(): + return _cut_empty_lines( + """apiVersion: v1 +kind: Namespace +metadata: + name: hello + labels: + name: hello + +--- + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: test + namespace: hello +spec: + selector: + matchLabels: + app: test + template: + metadata: + labels: + app: test + spec: + containers: + - name: test + image: test:latest + imagePullPolicy: Never + ports: + - containerPort: 8080 + +--- + +apiVersion: v1 +kind: Service +metadata: + name: test + namespace: hello + labels: + run: test +spec: + ports: + - port: 8080 + protocol: TCP + targetPort: 8080 + selector: + app: test + type: LoadBalancer +""" + ) + + +def test_k8s_yaml_build_args_default(k8s_default_manifest): + build_args = K8sYamlBuildArgs() + assert _generate_k8s_manifest(**build_args.dict()) == k8s_default_manifest + + +def test_k8s_yaml_build_args(k8s_manifest): + build_args = K8sYamlBuildArgs( + namespace="hello", + image_name="test", + image_uri="test:latest", + image_pull_policy=ImagePullPolicy.never, + port=8080, + service_type=LoadBalancerService(), + ) + assert _generate_k8s_manifest(**build_args.dict()) == k8s_manifest + + +def test_k8s_yaml_generator(k8s_manifest): + kwargs = { + "namespace": "hello", + "image_name": "test", + "image_uri": "test:latest", + "image_pull_policy": "Never", + "port": 8080, + "service_type": LoadBalancerService(), + } + assert _generate_k8s_manifest(**kwargs) == k8s_manifest + + +def _generate_k8s_manifest(**kwargs): + return _cut_empty_lines(K8sYamlGenerator(**kwargs).generate()) diff --git a/tests/contrib/test_kubernetes/utils.py b/tests/contrib/test_kubernetes/utils.py new file mode 100644 index 00000000..5568e038 --- /dev/null +++ b/tests/contrib/test_kubernetes/utils.py @@ -0,0 +1,34 @@ +import subprocess +import threading + + +class Command: + """ + Enables to run subprocess commands in a different thread + with TIMEOUT option! + Based on jcollado's solution: + http://stackoverflow.com/questions/1191374/subprocess-with-timeout/4825933#4825933 + """ + + def __init__(self, cmd): + self.cmd = cmd + self.process = None + + def run(self, timeout=0, **kwargs): + def target(**kwargs): + self.process = ( + subprocess.Popen( # pylint: disable=consider-using-with + self.cmd, **kwargs + ) + ) + self.process.communicate() + + thread = threading.Thread(target=target, kwargs=kwargs) + thread.start() + + thread.join(timeout) + if thread.is_alive(): + self.process.terminate() + thread.join() + + return self.process.returncode diff --git a/tests/contrib/test_pandas.py b/tests/contrib/test_pandas.py index d8c258cd..d7832008 100644 --- a/tests/contrib/test_pandas.py +++ b/tests/contrib/test_pandas.py @@ -14,8 +14,7 @@ from sklearn.model_selection import train_test_split from mlem.api.commands import import_object -from mlem.config import CONFIG_FILE_NAME -from mlem.constants import MLEM_DIR +from mlem.constants import MLEM_CONFIG_FILE_NAME from mlem.contrib.pandas import ( PANDAS_FORMATS, PANDAS_SERIES_FORMATS, @@ -466,7 +465,7 @@ def iris_data(): def test_save_load(iris_data, tmpdir): tmpdir = str(tmpdir / "data") - save(iris_data, tmpdir, index=False) + save(iris_data, tmpdir) data2 = load(tmpdir) pandas_assert(data2, iris_data) @@ -596,7 +595,7 @@ def test_series(series_data2: pd.Series, series_df_type2, df_type2): def test_change_format(mlem_project, data): with open( - os.path.join(mlem_project, MLEM_DIR, CONFIG_FILE_NAME), + os.path.join(mlem_project, MLEM_CONFIG_FILE_NAME), "w", encoding="utf8", ) as f: diff --git a/tests/contrib/test_requirements.py b/tests/contrib/test_requirements.py new file mode 100644 index 00000000..ab571bce --- /dev/null +++ b/tests/contrib/test_requirements.py @@ -0,0 +1,46 @@ +import lightgbm as lgb +import numpy as np +import pytest +from pydantic.error_wrappers import ValidationError + +from mlem.contrib.requirements import RequirementsBuilder +from mlem.core.objects import MlemModel + + +def test_build_reqs(tmp_path, model_meta): + path = str(tmp_path / "reqs.txt") + builder = RequirementsBuilder(target=path) + builder.build(model_meta) + with open(path, "r", encoding="utf-8") as f: + assert model_meta.requirements.to_pip() == f.read().splitlines() + + +def test_build_reqs_with_invalid_req_type(): + with pytest.raises( + ValidationError, match="req_type invalid is not valid." + ): + RequirementsBuilder(req_type="invalid") + + +def test_build_requirements_should_print_with_no_path(capsys, model_meta): + builder = RequirementsBuilder() + builder.build(model_meta) + captured = capsys.readouterr() + assert captured.out == " ".join(model_meta.requirements.to_pip()) + "\n" + + +def test_unix_requirement(capsys): + np_payload = np.linspace(0, 2, 5).reshape((-1, 1)) + data_np = lgb.Dataset( + np_payload, + label=np_payload.reshape((-1,)).tolist(), + free_raw_data=False, + ) + booster = lgb.train({}, data_np, 1) + model = MlemModel.from_obj(booster, sample_data=data_np) + builder = RequirementsBuilder(req_type="unix") + builder.build(model) + captured = capsys.readouterr() + assert str(captured.out).endswith( + "\n".join(model.requirements.to_unix()) + "\n" + ) diff --git a/tests/contrib/test_tensorflow.py b/tests/contrib/test_tensorflow.py index 22d811b2..d8596ade 100644 --- a/tests/contrib/test_tensorflow.py +++ b/tests/contrib/test_tensorflow.py @@ -194,7 +194,7 @@ def test_model_wrapper(net, input_data, tmpdir, request): prediction = tmw.call_method("predict", input_data) - np.testing.assert_array_equal(orig_pred, prediction) + np.testing.assert_allclose(orig_pred, prediction) model_name = str(tmpdir / "tensorflow-model") artifacts = tmw.dump(LOCAL_STORAGE, model_name) @@ -214,7 +214,7 @@ def test_model_wrapper(net, input_data, tmpdir, request): prediction2 = tmw.call_method("predict", input_data) - np.testing.assert_array_equal(prediction, prediction2) + np.testing.assert_allclose(prediction, prediction2) assert set(tmw.get_requirements().modules) == expected_requirements diff --git a/tests/contrib/test_venv.py b/tests/contrib/test_venv.py new file mode 100644 index 00000000..80a0c3d3 --- /dev/null +++ b/tests/contrib/test_venv.py @@ -0,0 +1,90 @@ +import os +import re +import sys + +import pytest + +from mlem.contrib.venv import ( + CondaBuilder, + CondaPackageRequirement, + VenvBuilder, +) +from mlem.core.errors import MlemError +from mlem.core.requirements import InstallableRequirement +from tests.contrib.conftest import conda_test + + +@pytest.fixture +def sys_prefix_path(tmp_path): + old_sys_prefix = sys.prefix + path = str(tmp_path / "venv-act") + sys.prefix = os.path.abspath(path) + + yield path + + sys.prefix = old_sys_prefix + + +def process_conda_list_output(installed_pkgs): + def get_words(line): + return re.findall(r"[^\s]+", line) + + words = [get_words(x) for x in installed_pkgs.splitlines()[3:]] + keys = [] + vals = [] + for w in words: + if len(w) >= 4: + keys.append(w[0]) + vals.append(w[3]) + result = dict(zip(keys, vals)) + return result + + +@conda_test +def test_build_conda(tmp_path, model_meta): + path = str(tmp_path / "conda-env") + builder = CondaBuilder( + target=path, + conda_reqs=[CondaPackageRequirement(package_name="xtensor")], + ) + env_dir = builder.build(model_meta) + installed_pkgs = builder.get_installed_packages(env_dir).decode() + pkgs_info = process_conda_list_output(installed_pkgs) + for each_req in model_meta.requirements: + if isinstance(each_req, InstallableRequirement): + assert pkgs_info[each_req.package] == "pypi" + elif isinstance(each_req, CondaPackageRequirement): + assert pkgs_info[each_req.package_name] == each_req.channel_name + + +def test_build_venv(tmp_path, model_meta): + path = str(tmp_path / "venv") + builder = VenvBuilder(target=path) + env_dir = builder.build(model_meta) + installed_pkgs = set( + builder.get_installed_packages(env_dir).decode().splitlines() + ) + required_pkgs = set(model_meta.requirements.to_pip()) + assert required_pkgs.issubset(installed_pkgs) + + +def test_install_in_current_venv_not_active(tmp_path, model_meta): + path = str(tmp_path / "venv") + builder = VenvBuilder(target=path, current_env=True) + with pytest.raises(MlemError, match="No virtual environment detected"): + builder.build(model_meta) + + +def test_install_in_current_active_venv(sys_prefix_path, model_meta): + builder = VenvBuilder(target=sys_prefix_path) + env_dir = os.path.abspath(sys_prefix_path) + builder.create_virtual_env() + assert builder.get_installed_packages(env_dir).decode() == "" + os.environ["VIRTUAL_ENV"] = env_dir + builder.current_env = True + builder.build(model_meta) + installed_pkgs = ( + builder.get_installed_packages(env_dir).decode().splitlines() + ) + for each_req in model_meta.requirements.to_pip(): + assert each_req in installed_pkgs diff --git a/tests/core/test_base.py b/tests/core/test_base.py index d5624825..dccc75da 100644 --- a/tests/core/test_base.py +++ b/tests/core/test_base.py @@ -1,8 +1,16 @@ -from typing import ClassVar, Optional +from typing import ClassVar, List, Optional + +from pydantic import BaseModel from mlem.contrib.docker import DockerImageBuilder from mlem.contrib.fastapi import FastAPIServer -from mlem.core.base import MlemABC, build_mlem_object, parse_links, smart_split +from mlem.core.base import ( + MlemABC, + SmartSplitDict, + build_mlem_object, + parse_links, + smart_split, +) from mlem.core.objects import MlemBuilder, MlemLink, MlemModel, MlemObject from mlem.runtime.server import Server from tests.conftest import resource_path @@ -51,11 +59,12 @@ def test_build_with_replace(): res = build_mlem_object( MockMlemABC, "mock", - ["server=fastapi", "server.port=8081"], + ["server=fastapi", "server.port=8081", "server.host=localhost"], ) assert isinstance(res, MockMlemABC) assert isinstance(res.server, FastAPIServer) assert res.server.port == 8081 + assert res.server.host == "localhost" res = build_mlem_object( MockMlemABC, @@ -64,3 +73,157 @@ def test_build_with_replace(): ) assert isinstance(res, MockMlemABC) assert isinstance(res.server, FastAPIServer) + + res = build_mlem_object( + MockMlemABC, + "mock", + conf={ + "server": "fastapi", + "server.port": 8081, + "server.host": "localhost", + }, + ) + assert isinstance(res, MockMlemABC) + assert isinstance(res.server, FastAPIServer) + assert res.server.port == 8081 + assert res.server.host == "localhost" + + +def test_build_with_list(): + class MockMlemABCList(MlemABC): + abs_name: ClassVar = "mock_list" + values: List[str] + + res = build_mlem_object( + MockMlemABCList, + "mock_list", + ["values.0=a", "values.1=b"], + ) + assert isinstance(res, MockMlemABCList) + assert isinstance(res.values, list) + assert res.values == ["a", "b"] + + +def test_build_with_list_complex(): + class Value(BaseModel): + field: str + + class MockMlemABCListComplex(MlemABC): + abs_name: ClassVar = "mock_list_complex" + values: List[Value] + + res = build_mlem_object( + MockMlemABCListComplex, + "mock_list_complex", + ["values.0.field=a", "values.1.field=b"], + ) + assert isinstance(res, MockMlemABCListComplex) + assert isinstance(res.values, list) + assert res.values == [Value(field="a"), Value(field="b")] + + +def test_build_with_list_nested(): + class MockMlemABCListNested(MlemABC): + abs_name: ClassVar = "mock_list_complex" + values: List[List[str]] + + res = build_mlem_object( + MockMlemABCListNested, + MockMlemABCListNested.abs_name, + ["values.0.0=a", "values.0.1=b"], + ) + assert isinstance(res, MockMlemABCListNested) + assert isinstance(res.values, list) + assert res.values == [["a", "b"]] + + +def test_smart_split_dict(): + d = SmartSplitDict(sep=".") + d["a.b.c"] = 1 + d["a.b.d"] = 2 + d["a.e"] = 3 + d["a.f"] = 4 + d["g"] = 5 + + assert d.build() == {"g": 5, "a": {"f": 4, "e": 3, "b": {"d": 2, "c": 1}}} + + +def test_smart_split_dict_with_list(): + d = SmartSplitDict(sep=".") + d["a.0"] = 1 + d["a.1"] = 2 + d["b"] = 3 + + assert d.build() == {"a": [1, 2], "b": 3} + + +def test_smart_split_dict_with_nested(): + d = SmartSplitDict(sep=".") + d["ll.0.0"] = 1 + d["ll.0.1"] = 2 + d["ll.1.0"] = 3 + d["ll.1.1"] = 4 + d["ld.0.a"] = 5 + d["ld.0.b"] = 6 + d["ld.1.a"] = 7 + d["ld.1.b"] = 8 + d["dl.a.0"] = 9 + d["dl.a.1"] = 10 + d["dl.b.0"] = 11 + d["dl.b.1"] = 12 + d["dd.a.a"] = 13 + d["dd.a.b"] = 14 + d["dd.b.a"] = 15 + d["dd.b.b"] = 16 + + assert d.build() == { + "ll": [[1, 2], [3, 4]], + "ld": [{"a": 5, "b": 6}, {"a": 7, "b": 8}], + "dl": {"a": [9, 10], "b": [11, 12]}, + "dd": {"a": {"a": 13, "b": 14}, "b": {"a": 15, "b": 16}}, + } + + +def test_smart_split_dict_nested_list(): + d = SmartSplitDict() + d["r.k1.0"] = "lol" + d["r.k1.1"] = "lol" + d["r.k2.0"] = "lol" + d["r.k2.1"] = "lol" + + assert d.build() == {"r": {"k1": ["lol", "lol"], "k2": ["lol", "lol"]}} + + +def test_smart_split_dict_with_type(): + d = SmartSplitDict(sep=".") + d["server"] = "fastapi" + d["server.port"] = 8080 + assert d.build() == {"server": {"type": "fastapi", "port": 8080}} + + +def test_smart_split_dict_prebuilt(): + d = SmartSplitDict(sep=".") + d["a.b.c"] = 1 + d["a"] = {"b": {"d": 2}} + assert d.build() == {"a": {"b": {"c": 1, "d": 2}}} + + +def test_smart_split_dict_list_with_type(): + d = SmartSplitDict(sep=".") + d["server.0"] = "fastapi" + d["server.0.port"] = 8080 + assert d.build() == {"server": [{"type": "fastapi", "port": 8080}]} + + +def test_smart_split_dict_dict_with_type(): + d = SmartSplitDict(sep=".") + d["server.a"] = "fastapi" + d["server.a.port"] = 8080 + d["server.b"] = "fastapi" + d["server.b.port"] = 8080 + assert d.build() == { + "server": { + "a": {"type": "fastapi", "port": 8080}, + "b": {"type": "fastapi", "port": 8080}, + } + } diff --git a/tests/core/test_meta_io.py b/tests/core/test_meta_io.py index 0990f238..37fddf71 100644 --- a/tests/core/test_meta_io.py +++ b/tests/core/test_meta_io.py @@ -11,7 +11,7 @@ from mlem import LOCAL_CONFIG from mlem.core.errors import RevisionNotFound -from mlem.core.meta_io import UriResolver, get_fs, get_path_by_fs_path, read +from mlem.core.meta_io import Location, get_fs, get_path_by_fs_path, read from tests.conftest import ( MLEM_TEST_REPO, MLEM_TEST_REPO_NAME, @@ -84,7 +84,7 @@ def test_get_fs_github(uri, rev): @long def test_github_wrong_rev(): with pytest.raises(RevisionNotFound): - UriResolver.resolve( + Location.resolve( MLEM_TEST_REPO, project=None, rev="__not_exists__kek", fs=None ) diff --git a/tests/core/test_metadata.py b/tests/core/test_metadata.py index 426b3241..c8572220 100644 --- a/tests/core/test_metadata.py +++ b/tests/core/test_metadata.py @@ -12,10 +12,9 @@ from sklearn.tree import DecisionTreeClassifier from mlem.api import init -from mlem.constants import MLEM_DIR from mlem.core.meta_io import MLEM_EXT from mlem.core.metadata import load, load_meta, save -from mlem.core.objects import MlemLink, MlemModel +from mlem.core.objects import MlemModel from tests.conftest import ( MLEM_TEST_REPO, MLEM_TEST_REPO_NAME, @@ -59,7 +58,7 @@ def test_model_saving_without_sample_data(model, tmpdir_factory): tmpdir_factory.mktemp("saving-models-without-sample-data") / "model" ) # index=True would require having .mlem folder somewhere - save(model, path, index=False) + save(model, path) def test_model_saving_in_mlem_project_root(model_train_target, tmpdir_factory): @@ -67,7 +66,7 @@ def test_model_saving_in_mlem_project_root(model_train_target, tmpdir_factory): init(project) model_dir = os.path.join(project, "generated-model") model, train, _ = model_train_target - save(model, model_dir, sample_data=train, index=True) + save(model, model_dir, sample_data=train) def test_model_saving(model_path): @@ -108,8 +107,7 @@ def test_meta_loading(model_path): [ f"github://{MLEM_TEST_REPO_ORG}:{MLEM_TEST_REPO_NAME}@{{branch}}/simple/data/model", f"github://{MLEM_TEST_REPO_ORG}:{MLEM_TEST_REPO_NAME}@{{branch}}/simple/data/model.mlem", - f"github://{MLEM_TEST_REPO_ORG}:{MLEM_TEST_REPO_NAME}@{{branch}}/simple/.mlem/link/data/model.mlem", - f"github://{MLEM_TEST_REPO_ORG}:{MLEM_TEST_REPO_NAME}@{{branch}}/simple/.mlem/link/latest.mlem", + f"github://{MLEM_TEST_REPO_ORG}:{MLEM_TEST_REPO_NAME}@{{branch}}/simple/latest.mlem", f"{MLEM_TEST_REPO}tree/{{branch}}/simple/data/model/", ], ) @@ -127,8 +125,7 @@ def test_model_loading_from_github_with_fsspec(url, current_test_branch): [ "data/model", "data/model.mlem", - ".mlem/link/data/model.mlem", - ".mlem/link/latest.mlem", + "latest.mlem", ], ) def test_model_loading_from_github(path, current_test_branch): @@ -164,11 +161,9 @@ def test_saving_to_s3(model, s3_storage_fs, s3_tmp_path): path = s3_tmp_path("model_save") init(path) model_path = posixpath.join(path, "model") - save(model, model_path, fs=s3_storage_fs, external=True) + save(model, model_path, fs=s3_storage_fs) model_path = model_path[len("s3:/") :] - assert s3_storage_fs.isfile( - posixpath.join(path, MLEM_DIR, MlemLink.object_type, "model.mlem") - ) + assert s3_storage_fs.isfile(posixpath.join(path, "model.mlem")) assert s3_storage_fs.isfile(model_path + MLEM_EXT) assert s3_storage_fs.isfile(model_path) diff --git a/tests/core/test_objects.py b/tests/core/test_objects.py index c24b35cd..d6775aa3 100644 --- a/tests/core/test_objects.py +++ b/tests/core/test_objects.py @@ -6,20 +6,22 @@ import pytest from fsspec.implementations.local import LocalFileSystem -from pydantic import ValidationError, parse_obj_as +from pydantic import parse_obj_as from sklearn.datasets import load_iris from mlem.core.artifacts import Artifacts, LocalArtifact, Storage from mlem.core.errors import MlemProjectNotFound, WrongRequirementsError -from mlem.core.meta_io import MLEM_DIR, MLEM_EXT +from mlem.core.meta_io import MLEM_EXT from mlem.core.metadata import load, load_meta -from mlem.core.model import ModelIO +from mlem.core.model import ModelIO, ModelType from mlem.core.objects import ( DeployState, + DeployStatus, MlemDeployment, MlemLink, MlemModel, MlemObject, + ModelLink, ) from mlem.core.requirements import InstallableRequirement, Requirements from tests.conftest import ( @@ -43,17 +45,24 @@ def get_status(self): def destroy(self): pass - def get_client(self): + +class MyMlemDeployment(MlemDeployment): + def deploy(self, model: MlemModel): + pass + + def remove(self): + pass + + def get_status(self, raise_on_error=True) -> DeployStatus: + pass + + def _get_client(self, state): pass @pytest.fixture() def meta(): - return MlemDeployment( - env_link=MlemLink(path="", link_type="env"), - model_link=MlemLink(path="", link_type="model"), - state=MyDeployState(), - ) + return MyMlemDeployment(env="") @pytest.fixture(params=["fullpath", "with_root"]) @@ -66,12 +75,9 @@ def get(name): return get -@pytest.mark.parametrize("external", [True, False]) -def test_meta_dump_curdir(meta, mlem_curdir_project, external): - meta.dump(DEPLOY_NAME, external=external) +def test_meta_dump_curdir(meta, mlem_curdir_project): + meta.dump(DEPLOY_NAME) path = DEPLOY_NAME + MLEM_EXT - if not external: - path = os.path.join(MLEM_DIR, meta.object_type, path) assert os.path.isfile(path) assert isinstance(load(DEPLOY_NAME), MlemDeployment) @@ -81,92 +87,32 @@ def test_meta_dump__no_root(meta, tmpdir): meta.dump(DEPLOY_NAME, project=str(tmpdir)) -def test_meta_dump_fullpath_in_project_no_link(mlem_project, meta): - meta.dump( - os.path.join(mlem_project, MLEM_DIR, meta.object_type, DEPLOY_NAME), - index=True, - external=True, - ) - link_path = os.path.join( - mlem_project, MLEM_DIR, MlemLink.object_type, DEPLOY_NAME + MLEM_EXT - ) - assert not os.path.exists(link_path) - - -def test_meta_dump_internal(mlem_project, meta, path_and_root): - path, root = path_and_root(DEPLOY_NAME) - meta.dump(path, project=root, external=False) - assert meta.name == DEPLOY_NAME - meta_path = os.path.join( - mlem_project, - MLEM_DIR, - MlemDeployment.object_type, - DEPLOY_NAME + MLEM_EXT, - ) - assert os.path.isfile(meta_path) - load_path = load_meta(meta_path) - assert isinstance(load_path, MlemDeployment) - assert load_path.name == meta.name - load_root = load_meta(path, project=root) - assert isinstance(load_root, MlemDeployment) - assert load_root.name == meta.name - - def test_meta_dump_external(mlem_project, meta, path_and_root): path, root = path_and_root(DEPLOY_NAME) - meta.dump(path, project=root, external=True) + meta.dump(path, project=root) assert meta.name == DEPLOY_NAME meta_path = os.path.join(mlem_project, DEPLOY_NAME + MLEM_EXT) assert os.path.isfile(meta_path) loaded = load_meta(meta_path) assert isinstance(loaded, MlemDeployment) assert loaded.name == meta.name - link_path = os.path.join( - mlem_project, MLEM_DIR, MlemLink.object_type, DEPLOY_NAME + MLEM_EXT - ) - assert os.path.isfile(link_path) - assert isinstance(load_meta(link_path, follow_links=False), MlemLink) -@pytest.mark.parametrize("external", [False, True]) -def test_model_dump_curdir(model_meta, mlem_curdir_project, external): - model_meta.dump(MODEL_NAME, external=external) +def test_model_dump_curdir(model_meta, mlem_curdir_project): + model_meta.dump(MODEL_NAME) assert model_meta.name == MODEL_NAME - if not external: - prefix = Path(os.path.join(MLEM_DIR, model_meta.object_type)) - else: - prefix = Path("") - assert os.path.isfile(prefix / MODEL_NAME) - assert os.path.isfile(prefix / (MODEL_NAME + MLEM_EXT)) + assert os.path.isfile(MODEL_NAME) + assert os.path.isfile(MODEL_NAME + MLEM_EXT) assert isinstance(load_meta(MODEL_NAME), MlemModel) -def test_model_dump_internal(mlem_project, model_meta, path_and_root): - path, root = path_and_root(MODEL_NAME) - model_meta.dump(path, project=root, external=False) - assert model_meta.name == MODEL_NAME - model_path = os.path.join( - mlem_project, MLEM_DIR, MlemModel.object_type, MODEL_NAME - ) - assert os.path.isfile(model_path + MLEM_EXT) - assert os.path.isfile(model_path) - - def test_model_dump_external(mlem_project, model_meta, path_and_root): path, root = path_and_root(MODEL_NAME) - model_meta.dump(path, project=root, external=True) + model_meta.dump(path, project=root) assert model_meta.name == MODEL_NAME model_path = os.path.join(mlem_project, MODEL_NAME) assert os.path.isfile(model_path + MLEM_EXT) assert os.path.isfile(model_path) - link_path = os.path.join( - mlem_project, MLEM_DIR, MlemLink.object_type, MODEL_NAME + MLEM_EXT - ) - assert os.path.isfile(link_path) - link = load_meta(link_path, follow_links=False) - assert isinstance(link, MlemLink) - model = link.load_link() - assert model.dict() == model_meta.dict() def _check_cloned_model(cloned_model_meta: MlemObject, path, fs=None): @@ -215,7 +161,7 @@ def test_model_cloning(model_single_path): model = load_meta(model_single_path) with tempfile.TemporaryDirectory() as path: path = posixpath.join(path, "cloned") - model.clone(path, index=False) + model.clone(path) cloned_model_meta = load_meta(path, load_value=False) _check_cloned_model(cloned_model_meta, path) @@ -225,24 +171,18 @@ def test_complex_model_cloning(complex_model_single_path): model = load_meta(complex_model_single_path) with tempfile.TemporaryDirectory() as path: path = posixpath.join(path, "cloned") - model.clone(path, index=False) + model.clone(path) cloned_model_meta = load_meta(path, load_value=False) _check_complex_cloned_model(cloned_model_meta, path) -@pytest.mark.parametrize("external", [True, False]) -def test_model_cloning_to_project(model_single_path, mlem_project, external): +def test_model_cloning_to_project(model_single_path, mlem_project): model = load_meta(model_single_path) - model.clone("model", project=mlem_project, index=False, external=external) + model.clone("model", project=mlem_project) cloned_model_meta = load_meta( "model", project=mlem_project, load_value=False ) - if external: - path = os.path.join(mlem_project, "model") - else: - path = os.path.join( - mlem_project, MLEM_DIR, MlemModel.object_type, "model" - ) + path = os.path.join(mlem_project, "model") _check_cloned_model(cloned_model_meta, path) @@ -250,7 +190,7 @@ def test_model_cloning_to_project(model_single_path, mlem_project, external): def test_model_cloning_to_remote(model_path, s3_tmp_path, s3_storage_fs): model = load_meta(model_path) path = s3_tmp_path("model_cloning_to_remote") - model.clone(path, index=False) + model.clone(path) s3path = path[len("s3:/") :] assert s3_storage_fs.isfile(s3path + MLEM_EXT) assert s3_storage_fs.isfile(s3path) @@ -278,7 +218,7 @@ def get(project="simple"): def test_remote_model_cloning(remote_model_meta, project): with tempfile.TemporaryDirectory() as path: path = os.path.join(path, "model") - remote_model_meta(project).clone(path, index=False) + remote_model_meta(project).clone(path) cloned_model_meta = load_meta(path, load_value=False) _check_cloned_model(cloned_model_meta, path) @@ -293,7 +233,7 @@ def test_remote_model_cloning_to_remote( remote_model_meta, project, s3_tmp_path, s3_storage_fs ): path = s3_tmp_path("remote_model_cloning_to_remote") - remote_model_meta(project).clone(path, index=False) + remote_model_meta(project).clone(path) s3path = path[len("s3:/") :] assert s3_storage_fs.isfile(s3path + MLEM_EXT) assert s3_storage_fs.isfile(s3path) @@ -327,9 +267,7 @@ def test_double_link_load(filled_mlem_project): latest = load_meta( "latest", project=filled_mlem_project, follow_links=False ) - link = latest.make_link( - "external", project=filled_mlem_project, external=True - ) + link = latest.make_link("external", project=filled_mlem_project) assert link.link_type == "model" model = load_meta( "external", project=filled_mlem_project, follow_links=True @@ -337,6 +275,13 @@ def test_double_link_load(filled_mlem_project): assert isinstance(model, MlemModel) +def test_typed_link(): + link = ModelLink(path="aaa") + assert link.dict() == {"path": "aaa"} + + assert parse_obj_as(ModelLink, {"path": "aaa"}) == link + + @long @need_test_repo_auth def test_load_link_from_rev(): @@ -363,34 +308,31 @@ def test_link_dump_in_mlem(model_path_mlem_project): link_type="model", ) link_name = "latest" - link.dump(link_name, project=mlem_project, external=True, index=False) + link.dump(link_name, project=mlem_project) model = load_meta(os.path.join(mlem_project, link_name), follow_links=True) assert isinstance(model, MlemModel) def test_model_model_type_laziness(): payload = { - "model_type": {"type": "doesnotexist"}, + "model_type": {"type": "sklearn", "methods": {}}, "object_type": "model", "requirements": [], } model = parse_obj_as(MlemModel, payload) - assert model.model_type_raw == {"type": "doesnotexist"} - with pytest.raises(ValidationError): - print(model.model_type) + assert model.model_type_cache == {"type": "sklearn", "methods": {}} + assert isinstance(model.model_type_cache, dict) + assert isinstance(model.model_type, ModelType) + assert isinstance(model.model_type_cache, ModelType) def test_mlem_project_root(filled_mlem_project): path = Path(filled_mlem_project) assert os.path.exists(path) assert os.path.isdir(path) - mlem_dir = path / MLEM_DIR - assert os.path.isdir(mlem_dir) - assert os.path.isfile(mlem_dir / "link" / ("model1" + MLEM_EXT)) - assert os.path.isfile(mlem_dir / "link" / ("latest" + MLEM_EXT)) - model_dir = path / "model1" - assert os.path.isfile(str(model_dir) + MLEM_EXT) - assert os.path.isfile(str(model_dir)) + assert os.path.isfile(path / ("model1" + MLEM_EXT)) + assert os.path.isfile(path / ("latest" + MLEM_EXT)) + assert os.path.isfile(path / "model1") class MockModelIO(ModelIO): @@ -426,11 +368,16 @@ def test_remove_old_artifacts(model, tmpdir, train): load(path).predict(train) +class MockModelType(ModelType): + io: ModelIO = MockModelIO(filename="") + + def test_checkenv(): model = MlemModel( requirements=Requirements.new( InstallableRequirement(module="pytest", version=pytest.__version__) - ) + ), + model_type=MockModelType(methods={}), ) model.checkenv() diff --git a/tests/core/test_requirements.py b/tests/core/test_requirements.py index 15c5a094..06e060b1 100644 --- a/tests/core/test_requirements.py +++ b/tests/core/test_requirements.py @@ -46,39 +46,41 @@ def test_resolve_requirement_list_arg(): def test_resolve_str_arg(): req = "dumb==0.4.1" actual_reqs = resolve_requirements(req) - assert actual_reqs.installable[0].to_str() == req + assert actual_reqs.installable[0].get_repr() == req def test_resolve_str_list_arg(): req = ["dumb==0.4.1", "art==4.0"] actual_reqs = resolve_requirements(req) assert len(actual_reqs.installable) == 2 - assert sorted(req) == sorted([r.to_str() for r in actual_reqs.installable]) + assert sorted(req) == sorted( + [r.get_repr() for r in actual_reqs.installable] + ) def test_installable_requirement__from_module(): import pandas as pd assert ( - InstallableRequirement.from_module(pd).to_str() + InstallableRequirement.from_module(pd).get_repr() == f"pandas=={pd.__version__}" ) import numpy as np assert ( - InstallableRequirement.from_module(np).to_str() + InstallableRequirement.from_module(np).get_repr() == f"numpy=={np.__version__}" ) import sklearn as sk assert ( - InstallableRequirement.from_module(sk).to_str() + InstallableRequirement.from_module(sk).get_repr() == f"scikit-learn=={sk.__version__}" ) assert ( - InstallableRequirement.from_module(sk, "xyz").to_str() + InstallableRequirement.from_module(sk, "xyz").get_repr() == f"xyz=={sk.__version__}" ) @@ -138,11 +140,17 @@ def test_req_collection_main(tmpdir, postfix): assert res == 0 meta = load_meta(model_path, force_type=MlemModel) assert set(meta.requirements.to_pip()) == { - InstallableRequirement.from_module(emoji).to_str(), - InstallableRequirement.from_module(numpy).to_str(), + InstallableRequirement.from_module(emoji).get_repr(), + InstallableRequirement.from_module(numpy).get_repr(), } +def test_consistent_resolve_order(): + reqs = ["a", "b", "c"] + for _ in range(10): + assert resolve_requirements(reqs).modules == reqs + + # Copyright 2019 Zyfra # Copyright 2021 Iterative # diff --git a/tests/resources/empty/.mlem.yaml b/tests/resources/empty/.mlem.yaml new file mode 100644 index 00000000..e69de29b diff --git a/tests/resources/storage/.mlem/config.yaml b/tests/resources/storage/.mlem.yaml similarity index 100% rename from tests/resources/storage/.mlem/config.yaml rename to tests/resources/storage/.mlem.yaml diff --git a/tests/test_config.py b/tests/test_config.py index 12d386b6..1b105f99 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -1,7 +1,8 @@ import posixpath -from mlem.config import CONFIG_FILE_NAME, MlemConfig, project_config -from mlem.constants import MLEM_DIR +from mlem.config import MlemConfig, project_config +from mlem.constants import MLEM_CONFIG_FILE_NAME +from mlem.contrib.fastapi import FastAPIServer from mlem.core.artifacts import FSSpecStorage, LocalStorage from mlem.core.meta_io import get_fs from tests.conftest import long @@ -24,7 +25,11 @@ def test_loading_empty(set_mlem_project_root): def test_loading_remote(s3_tmp_path, s3_storage_fs): project = s3_tmp_path("remote_conf") fs, path = get_fs(project) - path = posixpath.join(path, MLEM_DIR, CONFIG_FILE_NAME) + path = posixpath.join(path, MLEM_CONFIG_FILE_NAME) with fs.open(path, "w") as f: f.write("core:\n ADDITIONAL_EXTENSIONS: ext1\n") assert project_config(path, fs=fs).additional_extensions == ["ext1"] + + +def test_default_server(): + assert project_config("").server == FastAPIServer() diff --git a/tests/test_ext.py b/tests/test_ext.py index 690f68ef..7dddff31 100644 --- a/tests/test_ext.py +++ b/tests/test_ext.py @@ -1,5 +1,12 @@ -from mlem import ExtensionLoader +import os +import re +from importlib import import_module +from pathlib import Path + +import pytest + from mlem.config import MlemConfig, MlemConfigBase +from mlem.ext import ExtensionLoader, get_ext_type from mlem.utils.entrypoints import ( MLEM_CONFIG_ENTRY_POINT, MLEM_ENTRY_POINT, @@ -24,24 +31,49 @@ def test_find_implementations(): assert not i.startswith("None") +def _write_entrypoints(impls_sorted, section: str): + setup_path = Path(__file__).parent.parent / "setup.py" + with open(setup_path, encoding="utf8") as f: + setup_py = f.read() + impls_string = ",\n".join(f' "{i}"' for i in impls_sorted) + new_entrypoints = f'"{section}": [\n{impls_string},\n ]' + setup_py = re.subn(rf'"{section}": \[\n[^]]*]', new_entrypoints, setup_py)[ + 0 + ] + with open(setup_path, "w", encoding="utf8") as f: + f.write(setup_py) + + def test_all_impls_in_entrypoints(): # if this test fails, add new entrypoints (take the result of find_implementations()) to setup.py and # reinstall your dev copy of mlem to re-populate them exts = load_entrypoints() exts = {e.entry for e in exts.values()} - impls = find_abc_implementations()[MLEM_ENTRY_POINT] + impls = find_abc_implementations(raise_on_error=True)[MLEM_ENTRY_POINT] impls_sorted = sorted( impls, key=lambda x: tuple(x.split(" = ")[1].split(":")) ) - assert exts == set(impls), str(impls_sorted) + impls_set = set(impls) + if exts != impls_set: + _write_entrypoints(impls_sorted, "mlem.contrib") + assert ( + exts == impls_set + ), "New enrtypoints written to setup.py, please reinstall" def test_all_configs_in_entrypoints(): - impls = find_implementations(MlemConfigBase) + impls = find_implementations(MlemConfigBase, raise_on_error=True) impls[MlemConfig] = f"{MlemConfig.__module__}:{MlemConfig.__name__}" - assert { + impls_sorted = sorted( + {f"{i.__config__.section} = {k}" for i, k in impls.items()}, + key=lambda x: tuple(x.split(" = ")[1].split(":")), + ) + exts = { e.entry for e in load_entrypoints(MLEM_CONFIG_ENTRY_POINT).values() - } == {f"{i.__config__.section} = {k}" for i, k in impls.items()} + } + if exts != set(impls_sorted): + _write_entrypoints(impls_sorted, "mlem.config") + assert exts == impls_sorted def test_all_ext_has_pip_extra(): @@ -55,4 +87,34 @@ def test_all_ext_has_pip_extra(): for name, reqs in exts_reqs.items(): assert name in extras - assert set(reqs) == set(extras[name]) + ext_extras = extras[name] + assert set(reqs) == {re.split("[~=]", r)[0] for r in ext_extras} + + +def test_all_ext_registered(): + from mlem import contrib + + files = os.listdir(os.path.dirname(contrib.__file__)) + ext_sources = { + name[: -len(".py")] if name.endswith(".py") else name + for name in files + if not name.startswith("__") + } + assert set(ExtensionLoader.builtin_extensions) == { + f"mlem.contrib.{name}" for name in ext_sources + } + + +@pytest.mark.parametrize("mod", ExtensionLoader.builtin_extensions.keys()) +def test_all_ext_docstring(mod): + module = import_module(mod) + assert module.__doc__ is not None + assert get_ext_type(mod) in { + "model", + "deployment", + "data", + "serving", + "build", + "uri", + "storage", + } diff --git a/tests/test_setup.py b/tests/test_setup.py index 20b2f989..fb2787d3 100644 --- a/tests/test_setup.py +++ b/tests/test_setup.py @@ -15,7 +15,7 @@ def test_dvc_extras(): for e in importlib_metadata.metadata("dvc").get_all( "Provides-Extra" ) - if e not in {"all", "dev", "terraform", "testing", "tests"} + if e not in {"all", "dev", "terraform", "tests", "testing"} } specified_extras = { e: l for e, l in extras.items() if e[: len("dvc-")] == "dvc-" diff --git a/tests/utils/test_entrypoints.py b/tests/utils/test_entrypoints.py new file mode 100644 index 00000000..231dd72a --- /dev/null +++ b/tests/utils/test_entrypoints.py @@ -0,0 +1,39 @@ +from abc import abstractmethod + +from mlem.core.base import MlemABC +from mlem.core.objects import MlemEnv, MlemObject +from mlem.utils.entrypoints import list_implementations + + +class MockABC(MlemABC): + abs_name = "mock" + + class Config: + type_root = True + + @abstractmethod + def something(self): + pass + + +class MockImpl(MockABC): + type = "impl" + + def something(self): + pass + + +def test_list_implementations(): + assert list_implementations(MockABC) == ["impl"] + assert list_implementations("mock") == ["impl"] + + +def test_list_implementations_meta(): + assert "model" in list_implementations("meta") + assert "model" in list_implementations(MlemObject) + + assert "docker" in list_implementations("meta", MlemEnv) + assert "docker" in list_implementations(MlemObject, MlemEnv) + + assert "docker" in list_implementations("meta", "env") + assert "docker" in list_implementations(MlemObject, "env") diff --git a/tests/utils/test_fslock.py b/tests/utils/test_fslock.py new file mode 100644 index 00000000..3f93bec9 --- /dev/null +++ b/tests/utils/test_fslock.py @@ -0,0 +1,62 @@ +import os +import time +from threading import Thread + +from fsspec.implementations.local import LocalFileSystem + +from mlem.utils.fslock import LOCK_EXT, FSLock +from mlem.utils.path import make_posix + +NAME = "testlock" + + +# pylint: disable=protected-access +def test_fslock(tmpdir): + fs = LocalFileSystem() + lock = FSLock(fs, tmpdir, NAME) + + with lock: + assert lock._timestamp is not None + assert lock._salt is not None + lock_path = make_posix( + os.path.join( + tmpdir, f"{NAME}.{lock._timestamp}.{lock._salt}.{LOCK_EXT}" + ) + ) + assert lock.lock_path == lock_path + assert fs.exists(lock_path) + + assert lock._timestamp is None + assert lock._salt is None + assert not fs.exists(lock_path) + + +def _work(dirname, num): + time.sleep(0.3 + num / 5) + with FSLock(LocalFileSystem(), dirname, NAME, salt=num): + path = os.path.join(dirname, NAME) + if os.path.exists(path): + with open(path, "r+", encoding="utf8") as f: + data = f.read() + else: + data = "" + time.sleep(0.05) + with open(path, "w", encoding="utf8") as f: + f.write(data + f"{num}\n") + + +def test_fslock_concurrent(tmpdir): + start = 0 + end = 10 + threads = [ + Thread(target=_work, args=(tmpdir, n)) for n in range(start, end) + ] + for t in threads: + t.start() + for t in threads: + t.join() + with open(os.path.join(tmpdir, NAME), encoding="utf8") as f: + data = f.read() + + assert data.splitlines() == [str(i) for i in range(start, end)] + assert os.listdir(tmpdir) == [NAME]