From b1e8ca33df49e1a9cd8ca2340f9872f034b7a803 Mon Sep 17 00:00:00 2001 From: mike0sv Date: Tue, 20 Sep 2022 01:13:11 +0300 Subject: [PATCH 01/26] refactoring of deployments and env - move logic to deployments, remove model field --- mlem/api/commands.py | 57 ++- mlem/cli/deployment.py | 2 +- mlem/contrib/docker/base.py | 201 +++++----- mlem/contrib/heroku/meta.py | 67 ++-- mlem/contrib/kubernetes/base.py | 133 +++---- mlem/contrib/sagemaker/config.py | 12 + mlem/contrib/sagemaker/meta.py | 477 ++++++++++------------- mlem/contrib/sagemaker/runtime.py | 35 ++ mlem/contrib/sagemaker/utils.py | 79 ++++ mlem/core/objects.py | 119 ++---- setup.py | 10 +- tests/cli/conftest.py | 2 +- tests/cli/test_deployment.py | 138 +++---- tests/contrib/test_docker/test_deploy.py | 11 +- tests/core/test_objects.py | 15 +- 15 files changed, 637 insertions(+), 721 deletions(-) create mode 100644 mlem/contrib/sagemaker/config.py create mode 100644 mlem/contrib/sagemaker/utils.py diff --git a/mlem/api/commands.py b/mlem/api/commands.py index 9d230365..197fd1e3 100644 --- a/mlem/api/commands.py +++ b/mlem/api/commands.py @@ -414,7 +414,7 @@ def import_object( def deploy( deploy_meta_or_path: Union[MlemDeployment, str], - model: Union[MlemModel, str] = None, + model: Union[MlemModel, str], env: Union[MlemEnv, str] = None, project: Optional[str] = None, fs: Optional[AbstractFileSystem] = None, @@ -423,54 +423,45 @@ def deploy( env_kwargs: Dict[str, Any] = None, **deploy_kwargs, ) -> MlemDeployment: - deploy_path = None + deploy_meta: MlemDeployment update = False if isinstance(deploy_meta_or_path, str): - deploy_path = deploy_meta_or_path try: deploy_meta = load_meta( - path=deploy_path, + path=deploy_meta_or_path, project=project, fs=fs, force_type=MlemDeployment, ) update = True - except MlemObjectNotFound: - deploy_meta = None - + except MlemObjectNotFound as e: + if env is None: + raise MlemError( + "Please provide model and env args for new deployment" + ) from e + if not deploy_meta_or_path: + raise MlemError("deploy_path cannot be empty") from e + + env_meta = ensure_meta(MlemEnv, env, allow_typename=True) + if isinstance(env_meta, type): + env = None + if env_kwargs: + env = env_meta(**env_kwargs) + deploy_type = env_meta.deploy_type + deploy_meta = deploy_type( + env=env, + **deploy_kwargs, + ) + deploy_meta.dump(deploy_meta_or_path, fs, project, index, external) else: deploy_meta = deploy_meta_or_path update = True - if deploy_meta is None: - if model is None or env is None: - raise MlemError( - "Please provide model and env args for new deployment" - ) - if not deploy_path: - raise MlemError("deploy_path cannot be empty") - model_meta = get_model_meta(model) - env_meta = ensure_meta(MlemEnv, env, allow_typename=True) - if isinstance(env_meta, type): - env = None - if env_kwargs: - env = env_meta(**env_kwargs) - deploy_type = env_meta.deploy_type - deploy_meta = deploy_type( - model_cache=model_meta, - model=model_meta.make_link(), - env=env, - **deploy_kwargs, - ) - deploy_meta.dump(deploy_path, fs, project, index, external) - else: - if model is not None: - deploy_meta.replace_model(get_model_meta(model, load_value=False)) if update: pass # todo update from deploy_args and env_args # ensuring links are working deploy_meta.get_env() - deploy_meta.get_model() + model_meta = get_model_meta(model) - deploy_meta.run() + deploy_meta.deploy(model_meta) return deploy_meta diff --git a/mlem/cli/deployment.py b/mlem/cli/deployment.py index 1dc901a5..06308142 100644 --- a/mlem/cli/deployment.py +++ b/mlem/cli/deployment.py @@ -40,7 +40,7 @@ def deploy_run( ..., help="Path to deployment meta (will be created if it does not exist)", ), - model: Optional[str] = Option(None, "-m", "--model", help="Path to model"), + model: str = Option(..., "-m", "--model", help="Path to model"), env: Optional[str] = Option( None, "-t", "--env", help="Path to target environment" ), diff --git a/mlem/contrib/docker/base.py b/mlem/contrib/docker/base.py index 0067b687..78c74173 100644 --- a/mlem/contrib/docker/base.py +++ b/mlem/contrib/docker/base.py @@ -42,8 +42,12 @@ CONTAINER_STATUS_MAPPING = { + "created": DeployStatus.NOT_DEPLOYED, "running": DeployStatus.RUNNING, + "restarting": DeployStatus.STARTING, + "paused": DeployStatus.STOPPED, "exited": DeployStatus.CRASHED, + "dead": DeployStatus.CRASHED, } @@ -275,6 +279,24 @@ def delete(self, client: docker.DockerClient, force=False, **kwargs): self.registry.delete_image(client, self, force, **kwargs) +class DockerEnv(MlemEnv): + """MlemEnv implementation for docker environment""" + + type: ClassVar = "docker" + registry: DockerRegistry = DockerRegistry() + """Default registry to push images to""" + daemon: DockerDaemon = DockerDaemon(host="") + """Docker daemon parameters""" + + def delete_image(self, image: DockerImage, force: bool = False, **kwargs): + with self.daemon.client() as client: + return image.delete(client, force=force, **kwargs) + + def image_exists(self, image: DockerImage): + with self.daemon.client() as client: + return image.exists(client) + + class DockerContainerState(DeployState): """State of docker container deployment""" @@ -299,11 +321,14 @@ def generate_docker_container_name(): return f"mlem-deploy-{int(time.time())}" -class DockerContainer(MlemDeployment, _DockerBuildMixin): +class DockerContainer( + MlemDeployment[DockerContainerState, DockerEnv], _DockerBuildMixin +): """MlemDeployment implementation for docker containers""" type: ClassVar = "docker_container" state_type: ClassVar = DockerContainerState + env_type: ClassVar = DockerEnv container_name: Optional[str] = None """Name to use for container""" @@ -323,57 +348,103 @@ def ensure_image_name(self): def _get_client(self, state: DockerContainerState): raise NotImplementedError + def deploy(self, model: MlemModel): + # self.check_type(meta) + redeploy = False + with self.lock_state(): + state = self.get_state() + if state.image is None or self.model_changed(model): + from .helpers import build_model_image -class DockerEnv(MlemEnv[DockerContainer]): - """MlemEnv implementation for docker environment""" + image_name = ( + self.image_name + or self.container_name + or generate_docker_container_name() + ) + echo(EMOJI_BUILD + f"Creating docker image {image_name}") + with set_offset(2): + state.image = build_model_image( + model, + image_name, + self.server + or project_config( + self.loc.project if self.is_saved else None + ).server, + self.get_env(), + force_overwrite=True, + **self.args.dict(), + ) + state.update_model_hash(model) + self.update_state(state) + redeploy = True + if state.container_id is None or redeploy: + self.run_container(state) - type: ClassVar = "docker" - deploy_type: ClassVar = DockerContainer - registry: DockerRegistry = DockerRegistry() - """Default registry to push images to""" - daemon: DockerDaemon = DockerDaemon(host="") - """Docker daemon parameters""" + echo(EMOJI_OK + f"Container {state.container_name} is up") - def delete_image(self, image: DockerImage, force: bool = False, **kwargs): - with self.daemon.client() as client: - return image.delete(client, force=force, **kwargs) + def remove(self): + # self.check_type(meta) + with self.lock_state(): + state = self.get_state() + if state.container_id is None: + raise DeploymentError( + f"Container {self.container_name} is not deployed" + ) - def image_exists(self, image: DockerImage): - with self.daemon.client() as client: - return image.exists(client) + with self.get_env().daemon.client() as client: + try: + container = client.containers.get(state.container_id) + container.stop() + container.remove() + except docker.errors.NotFound: + pass + state.container_id = None + self.update_state(state) + + def get_status(self, raise_on_error=True) -> DeployStatus: + # self.check_type(meta) + state = self.get_state() + if state.container_id is None: + return DeployStatus.NOT_DEPLOYED + + with self.get_env().daemon.client() as client: + try: + status = container_status(client, state.container_id) + return CONTAINER_STATUS_MAPPING[status] + except NotFound: + return DeployStatus.UNKNOWN def run_container( self, - meta: DockerContainer, state: Optional[DockerContainerState] = None, ): - state = state or meta.get_state() + state = state or self.get_state() if state.image is None: raise DeploymentError( - f"Image {meta.ensure_image_name} is not built" + f"Image {self.ensure_image_name} is not built" ) - with self.daemon.client() as client: + with self.get_env().daemon.client() as client: state.image.registry.login(client) try: # always detach from container and just stream logs if detach=False - name = meta.container_name or generate_docker_container_name() + name = self.container_name or generate_docker_container_name() container = client.containers.run( state.image.uri, name=name, - auto_remove=meta.rm, - ports=meta.port_mapping, + auto_remove=self.rm, + ports=self.port_mapping, detach=True, - **meta.params, + **self.params, ) state.container_id = container.id state.container_name = name - meta.update_state(state) + self.update_state(state) sleep(0.5) if not container_is_running(client, name): - if not meta.rm: - for log in self.logs(meta, stdout=False, stderr=True): + if not self.rm: + for log in self.logs(stdout=False, stderr=True): raise DeploymentError( "The container died unexpectedly.", log ) @@ -388,86 +459,16 @@ def run_container( "Docker container raised an error: " + e.stderr.decode() ) from e - def logs( - self, meta: DockerContainer, **kwargs - ) -> Generator[str, None, None]: - state = meta.get_state() + def logs(self, **kwargs) -> Generator[str, None, None]: + state = self.get_state() if state.container_id is None: raise DeploymentError( - f"Container {meta.container_name} is not deployed" + f"Container {self.container_name} is not deployed" ) - with self.daemon.client() as client: + with self.get_env().daemon.client() as client: container = client.containers.get(state.container_id) yield from container_logs(container, **kwargs) - def deploy(self, meta: DockerContainer): - self.check_type(meta) - redeploy = False - with meta.lock_state(): - state = meta.get_state() - if state.image is None or meta.model_changed(): - from .helpers import build_model_image - - image_name = ( - meta.image_name - or meta.container_name - or generate_docker_container_name() - ) - echo(EMOJI_BUILD + f"Creating docker image {image_name}") - with set_offset(2): - state.image = build_model_image( - meta.get_model(), - image_name, - meta.server - or project_config( - meta.loc.project if meta.is_saved else None - ).server, - self, - force_overwrite=True, - **meta.args.dict(), - ) - meta.update_model_hash(state=state) - meta.update_state(state) - redeploy = True - if state.container_id is None or redeploy: - self.run_container(meta, state) - - echo(EMOJI_OK + f"Container {state.container_name} is up") - - def remove(self, meta: DockerContainer): - self.check_type(meta) - with meta.lock_state(): - state = meta.get_state() - if state.container_id is None: - raise DeploymentError( - f"Container {meta.container_name} is not deployed" - ) - - with self.daemon.client() as client: - try: - container = client.containers.get(state.container_id) - container.stop() - container.remove() - except docker.errors.NotFound: - pass - state.container_id = None - meta.update_state(state) - - def get_status( - self, meta: DockerContainer, raise_on_error=True - ) -> DeployStatus: - self.check_type(meta) - state = meta.get_state() - if state.container_id is None: - return DeployStatus.NOT_DEPLOYED - - with self.daemon.client() as client: - try: - status = container_status(client, state.container_id) - return CONTAINER_STATUS_MAPPING[status] - except NotFound: - return DeployStatus.UNKNOWN - class DockerDirBuilder(MlemBuilder, _DockerBuildMixin): """Create a directory with docker context to build docker image""" diff --git a/mlem/contrib/heroku/meta.py b/mlem/contrib/heroku/meta.py index 72f36282..cac69c8b 100644 --- a/mlem/contrib/heroku/meta.py +++ b/mlem/contrib/heroku/meta.py @@ -8,6 +8,7 @@ DeployStatus, MlemDeployment, MlemEnv, + MlemModel, ) from mlem.runtime.client import Client, HTTPClient @@ -35,6 +36,14 @@ class HerokuAppMeta(BaseModel): """Additional metadata""" +class HerokuEnv(MlemEnv): + """Heroku Account""" + + type: ClassVar = "heroku" + api_key: Optional[str] = None + """HEROKU_API_KEY - advised to set via env variable or `heroku login`""" + + class HerokuState(DeployState): """State of heroku deployment""" @@ -53,11 +62,12 @@ def ensured_app(self) -> HerokuAppMeta: return self.app -class HerokuDeployment(MlemDeployment): +class HerokuDeployment(MlemDeployment[HerokuState, HerokuEnv]): """Heroku App""" type: ClassVar = "heroku" state_type: ClassVar = HerokuState + env_type: ClassVar = HerokuEnv app_name: str """Heroku application name""" @@ -73,66 +83,55 @@ def _get_client(self, state: HerokuState) -> Client: host=urlparse(state.ensured_app.web_url).netloc, port=80 ) - -class HerokuEnv(MlemEnv[HerokuDeployment]): - """Heroku Account""" - - type: ClassVar = "heroku" - deploy_type: ClassVar = HerokuDeployment - api_key: Optional[str] = None - """HEROKU_API_KEY - advised to set via env variable or `heroku login`""" - - def deploy(self, meta: HerokuDeployment): + def deploy(self, model: MlemModel): from .utils import create_app, release_docker_app - self.check_type(meta) - with meta.lock_state(): - state: HerokuState = meta.get_state() + with self.lock_state(): + state: HerokuState = self.get_state() if state.app is None: - state.app = create_app(meta, api_key=self.api_key) - meta.update_state(state) + state.app = create_app(self, api_key=self.get_env().api_key) + self.update_state(state) redeploy = False - if state.image is None or meta.model_changed(): + if state.image is None or self.model_changed(model): state.image = build_heroku_docker( - meta.get_model(), state.app.name, api_key=self.api_key + model, state.app.name, api_key=self.get_env().api_key ) - meta.update_model_hash(state=state) + state.update_model_hash(model) + self.update_state(state) redeploy = True if state.release_state is None or redeploy: state.release_state = release_docker_app( state.app.name, state.image.image_id, - api_key=self.api_key, + api_key=self.get_env().api_key, ) - meta.update_state(state) + self.update_state(state) echo( EMOJI_OK - + f"Service {meta.app_name} is up. You can check it out at {state.app.web_url}" + + f"Service {self.app_name} is up. You can check it out at {state.app.web_url}" ) - def remove(self, meta: HerokuDeployment): + def remove(self): from .utils import delete_app - self.check_type(meta) - with meta.lock_state(): - state: HerokuState = meta.get_state() + with self.lock_state(): + state: HerokuState = self.get_state() if state.app is not None: - delete_app(state.ensured_app.name, self.api_key) - meta.purge_state() + delete_app(state.ensured_app.name, self.get_env().api_key) + self.purge_state() - def get_status( - self, meta: "HerokuDeployment", raise_on_error=True - ) -> DeployStatus: + def get_status(self, raise_on_error=True) -> DeployStatus: from .utils import list_dynos - self.check_type(meta) - state: HerokuState = meta.get_state() + state: HerokuState = self.get_state() if state.app is None: return DeployStatus.NOT_DEPLOYED - dynos = list_dynos(state.ensured_app.name, "web", self.api_key) + dynos = list_dynos( + state.ensured_app.name, "web", self.get_env().api_key + ) if not dynos: if raise_on_error: raise DeploymentError( diff --git a/mlem/contrib/kubernetes/base.py b/mlem/contrib/kubernetes/base.py index af5c3279..3b7da29d 100644 --- a/mlem/contrib/kubernetes/base.py +++ b/mlem/contrib/kubernetes/base.py @@ -8,7 +8,6 @@ from mlem.core.objects import ( DeployState, DeployStatus, - MlemBuilder, MlemDeployment, MlemEnv, MlemModel, @@ -36,6 +35,18 @@ } +class K8sEnv(MlemEnv): + """MlemEnv implementation for Kubernetes Environments""" + + type: ClassVar = "kubernetes" + """Type of deployment being used for the Kubernetes environment""" + + registry: Optional[DockerRegistry] = None + """Docker registry""" + templates_dir: List[str] = [] + """List of dirs where templates reside""" + + class K8sDeploymentState(DeployState): """DeployState implementation for Kubernetes deployments""" @@ -47,12 +58,15 @@ class K8sDeploymentState(DeployState): """Name of Deployment""" -class K8sDeployment(MlemDeployment, K8sYamlBuildArgs): +class K8sDeployment( + MlemDeployment[K8sDeploymentState, K8sEnv], K8sYamlBuildArgs +): """MlemDeployment implementation for Kubernetes deployments""" type: ClassVar = "kubernetes" state_type: ClassVar = K8sDeploymentState """Type of state for Kubernetes deployments""" + env_type: ClassVar = K8sEnv server: Optional[Server] = None """Type of Server to use, with options such as FastAPI, RabbitMQ etc.""" @@ -72,7 +86,6 @@ def load_kube_config(self): ) def _get_client(self, state: K8sDeploymentState) -> Client: - host, port = None, None self.load_kube_config() service = client.CoreV1Api().list_namespaced_service(self.namespace) try: @@ -89,131 +102,101 @@ def _get_client(self, state: K8sDeploymentState) -> Client: f"host and port determined are not valid, received host as {host} and port as {port}" ) - -class K8sEnv(MlemEnv[K8sDeployment]): - """MlemEnv implementation for Kubernetes Environments""" - - type: ClassVar = "kubernetes" - deploy_type: ClassVar = K8sDeployment - """Type of deployment being used for the Kubernetes environment""" - - registry: Optional[DockerRegistry] = None - """Docker registry""" - templates_dir: List[str] = [] - """List of dirs where templates reside""" - - def get_registry(self, meta: K8sDeployment): - registry = meta.registry or self.registry + def get_registry(self): + registry = self.registry or self.get_env().registry if not registry: raise MlemError( "registry to be used by Docker is not set or supplied" ) return registry - def get_image_name(self, meta: K8sDeployment): - return meta.image_name or generate_docker_container_name() + def get_image_name(self): + return self.image_name or generate_docker_container_name() - def get_server(self, meta: K8sDeployment): + def get_server(self): return ( - meta.server + self.server or project_config( - meta.loc.project if meta.is_saved else None + self.loc.project if self.is_saved else None ).server ) - def deploy(self, meta: K8sDeployment): - self.check_type(meta) + def deploy(self, model: MlemModel): redeploy = False - with meta.lock_state(): - meta.load_kube_config() - state: K8sDeploymentState = meta.get_state() - if state.image is None or meta.model_changed(): - image_name = self.get_image_name(meta) + with self.lock_state(): + self.load_kube_config() + state: K8sDeploymentState = self.get_state() + if state.image is None or self.model_changed(model): + image_name = self.get_image_name() state.image = build_k8s_docker( - meta=meta.get_model(), + meta=model, image_name=image_name, - registry=self.get_registry(meta), - daemon=meta.daemon, - server=self.get_server(meta), + registry=self.get_registry(), + daemon=self.daemon, + server=self.get_server(), ) - meta.update_model_hash(state=state) + state.update_model_hash(model) redeploy = True if ( state.deployment_name is None or redeploy ) and state.image is not None: generator = K8sYamlGenerator( - namespace=meta.namespace, + namespace=self.namespace, image_name=state.image.name, image_uri=state.image.uri, - image_pull_policy=meta.image_pull_policy, - port=meta.port, - service_type=meta.service_type, - templates_dir=meta.templates_dir or self.templates_dir, + image_pull_policy=self.image_pull_policy, + port=self.port, + service_type=self.service_type, + templates_dir=self.templates_dir + or self.get_env().templates_dir, ) create_k8s_resources(generator) - if pod_is_running(namespace=meta.namespace): + if pod_is_running(namespace=self.namespace): deployments_list = ( client.AppsV1Api().list_namespaced_deployment( - namespace=meta.namespace + namespace=self.namespace ) ) if len(deployments_list.items) == 0: raise DeploymentError( - f"Deployment {image_name} couldn't be found in {meta.namespace} namespace" + f"Deployment {image_name} couldn't be found in {self.namespace} namespace" ) dpl_name = deployments_list.items[0].metadata.name state.deployment_name = dpl_name - meta.update_state(state) + self.update_state(state) echo( EMOJI_OK - + f"Deployment {state.deployment_name} is up in {meta.namespace} namespace" + + f"Deployment {state.deployment_name} is up in {self.namespace} namespace" ) else: raise DeploymentError( f"Deployment {image_name} couldn't be set-up on the Kubernetes cluster" ) - def remove(self, meta: K8sDeployment): - self.check_type(meta) - with meta.lock_state(): - meta.load_kube_config() - state: K8sDeploymentState = meta.get_state() + def remove(self): + with self.lock_state(): + self.load_kube_config() + state: K8sDeploymentState = self.get_state() if state.deployment_name is not None: - client.CoreV1Api().delete_namespace(name=meta.namespace) - if namespace_deleted(meta.namespace): + client.CoreV1Api().delete_namespace(name=self.namespace) + if namespace_deleted(self.namespace): echo( EMOJI_OK - + f"Deployment {state.deployment_name} and the corresponding service are removed from {meta.namespace} namespace" + + f"Deployment {state.deployment_name} and the corresponding service are removed from {self.namespace} namespace" ) state.deployment_name = None - meta.update_state(state) - - def get_status( - self, meta: K8sDeployment, raise_on_error=True - ) -> DeployStatus: - self.check_type(meta) - meta.load_kube_config() - state: K8sDeploymentState = meta.get_state() + self.update_state(state) + + def get_status(self, raise_on_error=True) -> DeployStatus: + self.load_kube_config() + state: K8sDeploymentState = self.get_state() if state.deployment_name is None: return DeployStatus.NOT_DEPLOYED - pods_list = client.CoreV1Api().list_namespaced_pod(meta.namespace) + pods_list = client.CoreV1Api().list_namespaced_pod(self.namespace) return POD_STATE_MAPPING[pods_list.items[0].status.phase] - - -class K8sYamlBuilder(MlemBuilder, K8sYamlGenerator): - """MlemBuilder implementation for building Kubernetes manifests/yamls""" - - type: ClassVar = "kubernetes" - - target: str - """Target path for the manifest/yaml""" - - def build(self, obj: MlemModel): - self.write(self.target) - echo(EMOJI_OK + f"{self.target} generated for {obj.basename}") diff --git a/mlem/contrib/sagemaker/config.py b/mlem/contrib/sagemaker/config.py new file mode 100644 index 00000000..cb5c9195 --- /dev/null +++ b/mlem/contrib/sagemaker/config.py @@ -0,0 +1,12 @@ +from typing import Optional + +from mlem.config import MlemConfigBase + + +class AWSConfig(MlemConfigBase): + ROLE: Optional[str] + PROFILE: Optional[str] + + class Config: + section = "aws" + env_prefix = "AWS_" diff --git a/mlem/contrib/sagemaker/meta.py b/mlem/contrib/sagemaker/meta.py index 385cb1bd..89012c59 100644 --- a/mlem/contrib/sagemaker/meta.py +++ b/mlem/contrib/sagemaker/meta.py @@ -1,22 +1,27 @@ -import os import posixpath -import tarfile -import tempfile -from typing import ClassVar, Optional, Tuple +from functools import wraps +from typing import Any, ClassVar, Optional, Tuple -import boto3 import sagemaker from pydantic import validator from sagemaker.deserializers import JSONDeserializer from sagemaker.serializers import JSONSerializer +from typing_extensions import Protocol -from mlem.config import MlemConfigBase, project_config from mlem.contrib.docker.base import DockerDaemon, DockerImage from mlem.contrib.sagemaker.build import ( AWSVars, ECRegistry, build_sagemaker_docker, ) +from mlem.contrib.sagemaker.runtime import SagemakerClient +from mlem.contrib.sagemaker.utils import ( + MODEL_TAR_FILENAME, + _create_model_arch_and_upload_to_s3, + delete_model_file_from_s3, + generate_model_file_name, + init_aws_vars, +) from mlem.core.errors import WrongMethodError from mlem.core.model import Signature from mlem.core.objects import ( @@ -26,58 +31,21 @@ MlemEnv, MlemModel, ) -from mlem.runtime.client import Client -from mlem.runtime.interface import InterfaceDescriptor from mlem.ui import EMOJI_BUILD, EMOJI_UPLOAD, echo -MODEL_TAR_FILENAME = "model.tar.gz" DEFAULT_ECR_REPOSITORY = "mlem" -class AWSConfig(MlemConfigBase): - ROLE: Optional[str] - PROFILE: Optional[str] - - class Config: - section = "aws" - env_prefix = "AWS_" - - -def generate_model_file_name(deploy_id): - return f"mlem-model-{deploy_id}" - - -def generate_image_name(deploy_id): - return f"mlem-sagemaker-image-{deploy_id}" - - -class SagemakerClient(Client): - """Client to make SageMaker requests""" - - type: ClassVar = "sagemaker" - - endpoint_name: str - """Name of SageMaker Endpoint""" - aws_vars: AWSVars - """AWS Configuration""" - signature: Signature - """Signature of deployed method""" - - def _interface_factory(self) -> InterfaceDescriptor: - return InterfaceDescriptor(methods={"predict": self.signature}) - - def get_predictor(self): - sess = self.aws_vars.get_sagemaker_session() - predictor = sagemaker.Predictor( - endpoint_name=self.endpoint_name, - sagemaker_session=sess, - serializer=JSONSerializer(), - deserializer=JSONDeserializer(), - ) - return predictor - - def _call_method(self, name, args): - return self.get_predictor().predict(args) +ENDPOINT_STATUS_MAPPING = { + "Creating": DeployStatus.STARTING, + "Failed": DeployStatus.CRASHED, + "InService": DeployStatus.RUNNING, + "OutOfService": DeployStatus.STOPPED, + "Updating": DeployStatus.STARTING, + "SystemUpdating": DeployStatus.STARTING, + "RollingBack": DeployStatus.STARTING, + "Deleting": DeployStatus.STOPPED, +} class SagemakerDeployState(DeployState): @@ -122,12 +90,67 @@ def get_predictor(self, session: sagemaker.Session): return predictor -class SagemakerDeployment(MlemDeployment): +class SagemakerEnv(MlemEnv): + """SageMaker environment""" + + type: ClassVar = "sagemaker" + # deploy_type: ClassVar = SagemakerDeployment + + role: Optional[str] = None + """Default role""" + account: Optional[str] = None + """Default account""" + region: Optional[str] = None + """Default region""" + bucket: Optional[str] = None + """Default bucket""" + profile: Optional[str] = None + """Default profile""" + ecr_repository: Optional[str] = None + """Default ECR repository""" + + @property + def role_name(self): + return f"arn:aws:iam::{self.account}:role/{self.role}" + + def get_session(self, region: str = None) -> sagemaker.Session: + return self.get_session_and_aws_vars(region)[0] + + def get_session_and_aws_vars( + self, region: str = None + ) -> Tuple[sagemaker.Session, AWSVars]: + return init_aws_vars( + self.profile, + self.role, + self.bucket, + region or self.region, + self.account, + ) + + +class DeploymentStepMethod(Protocol): + def __call__(self, state: DeployState, *args, **kwargs) -> Any: + ... + + +def updates_state(f) -> DeploymentStepMethod: + @wraps(f) + def inner( + self: MlemDeployment, state: SagemakerDeployState, *args, **kwargs + ): + res = f(self, state, *args, **kwargs) + self.update_state(state) + return res + + return inner # type: ignore[return-value] + + +class SagemakerDeployment(MlemDeployment[SagemakerDeployState, SagemakerEnv]): """SageMaker Deployment""" type: ClassVar = "sagemaker" state_type: ClassVar = SagemakerDeployState - + env_type: ClassVar = SagemakerEnv method: str = "predict" """Model method to be deployed""" image_tag: Optional[str] = None @@ -166,108 +189,33 @@ def _get_client(self, state: "SagemakerDeployState"): signature=state.method_signature, ) - -ENDPOINT_STATUS_MAPPING = { - "Creating": DeployStatus.STARTING, - "Failed": DeployStatus.CRASHED, - "InService": DeployStatus.RUNNING, - "OutOfService": DeployStatus.STOPPED, - "Updating": DeployStatus.STARTING, - "SystemUpdating": DeployStatus.STARTING, - "RollingBack": DeployStatus.STARTING, - "Deleting": DeployStatus.STOPPED, -} - - -class SagemakerEnv(MlemEnv): - """SageMaker environment""" - - type: ClassVar = "sagemaker" - deploy_type: ClassVar = SagemakerDeployment - - role: Optional[str] = None - """Default role""" - account: Optional[str] = None - """Default account""" - region: Optional[str] = None - """Default region""" - bucket: Optional[str] = None - """Default bucket""" - profile: Optional[str] = None - """Default profile""" - ecr_repository: Optional[str] = None - """Default ECR repository""" - - @property - def role_name(self): - return f"arn:aws:iam::{self.account}:role/{self.role}" - - @staticmethod - def _create_and_upload_model_arch( - session: sagemaker.Session, + @updates_state + def _upload_model_file( + self, + state: SagemakerDeployState, model: MlemModel, - bucket: str, - model_arch_location: str, - ) -> str: - with tempfile.TemporaryDirectory() as dirname: - model.clone(os.path.join(dirname, "model", "model")) - arch_path = os.path.join(dirname, "arch", MODEL_TAR_FILENAME) - os.makedirs(os.path.dirname(arch_path)) - with tarfile.open(arch_path, "w:gz") as tar: - path = os.path.join(dirname, "model") - for file in os.listdir(path): - tar.add(os.path.join(path, file), arcname=file) - - model_location = session.upload_data( - os.path.dirname(arch_path), - bucket=bucket, - key_prefix=posixpath.join( - model_arch_location, model.meta_hash() - ), - ) - - return model_location - - @staticmethod - def _delete_model_file(session: sagemaker.Session, model_path: str): - s3_client = session.boto_session.client("s3") - if model_path.startswith("s3://"): - model_path = model_path[len("s3://") :] - bucket, *paths = model_path.split("/") - model_path = posixpath.join(*paths, MODEL_TAR_FILENAME) - s3_client.delete_object(Bucket=bucket, Key=model_path) - - def deploy(self, meta: SagemakerDeployment): - with meta.lock_state(): - state: SagemakerDeployState = meta.get_state() - redeploy = meta.model_changed() - state.previous = state.previous or SagemakerDeployState() - - session, aws_vars = self.get_session_and_aws_vars(state.region) - if state.region is None: - state.region = aws_vars.region - meta.update_state(state) - - if not meta.use_prebuilt and (state.image_tag is None or redeploy): - self._build_image(meta, state, aws_vars) - - if state.model_location is None or redeploy: - self._upload_model(meta, state, aws_vars, session) - - if ( - state.endpoint_name is None - or redeploy - or state.endpoint_model_hash is not None - and state.endpoint_model_hash != state.model_hash - ): - if state.endpoint_name is None: - self._deploy_model(meta, state, aws_vars, session) - else: - self._update_model(meta, state, aws_vars, session) + aws_vars: AWSVars, + session: sagemaker.Session, + ): + assert state.previous is not None # TODO + echo( + EMOJI_UPLOAD + + f"Uploading model distribution to {aws_vars.bucket}..." + ) + if state.model_location is not None: + state.previous.model_location = state.model_location + state.model_location = _create_model_arch_and_upload_to_s3( + session, + model, + aws_vars.bucket, + self.model_arch_location + or generate_model_file_name(model.meta_hash()), + ) + state.update_model_hash(model) + @updates_state def _update_model( self, - meta: SagemakerDeployment, state: SagemakerDeployState, aws_vars: AWSVars, session: sagemaker.Session, @@ -278,13 +226,13 @@ def _update_model( model_data=posixpath.join( state.model_location, MODEL_TAR_FILENAME ), - name=meta.model_name, + name=self.model_name, role=aws_vars.role, sagemaker_session=session, ) sm_model.create( - instance_type=meta.instance_type, - accelerator_type=meta.accelerator_type, + instance_type=self.instance_type, + accelerator_type=self.accelerator_type, ) prev_endpoint_conf = session.sagemaker_client.describe_endpoint( EndpointName=state.endpoint_name @@ -296,36 +244,57 @@ def _update_model( predictor = state.get_predictor(session) predictor.update_endpoint( model_name=sm_model.name, - initial_instance_count=meta.initial_instance_count, - instance_type=meta.instance_type, - accelerator_type=meta.accelerator_type, + initial_instance_count=self.initial_instance_count, + instance_type=self.instance_type, + accelerator_type=self.accelerator_type, wait=True, ) session.sagemaker_client.delete_model(ModelName=prev_model_name) prev = state.previous if prev is not None: if prev.image is not None: - self._delete_image(meta, prev, aws_vars) + self._delete_image(prev, aws_vars) if prev.model_location is not None: - self._delete_model_file(session, prev.model_location) + delete_model_file_from_s3(session, prev.model_location) prev.model_location = None session.sagemaker_client.delete_endpoint_config( EndpointConfigName=prev_endpoint_conf ) state.endpoint_model_hash = state.model_hash - meta.update_state(state) - def _delete_image(self, meta, state, aws_vars): - with DockerDaemon(host="").client() as client: - if isinstance(state.image.registry, ECRegistry): - state.image.registry.with_aws_vars(aws_vars) - state.image.delete(client) - state.image = None - meta.update_state(state) + @updates_state + def _build_image( + self, + state: SagemakerDeployState, + model: MlemModel, + aws_vars: AWSVars, + ecr_repository: str, + ): + assert state.previous is not None # TODO + try: + state.method_signature = model.model_type.methods[self.method] + except KeyError as e: + raise WrongMethodError( + f"Wrong method {self.method} for model {model.name}" + ) from e + image_tag = self.image_tag or model.meta_hash() + if state.image_tag is not None: + state.previous.image_tag = state.image_tag + state.previous.image = state.image + state.image = build_sagemaker_docker( + model, + self.method, + aws_vars.account, + aws_vars.region, + image_tag, + ecr_repository or DEFAULT_ECR_REPOSITORY, + aws_vars, + ) + state.image_tag = image_tag + @updates_state def _deploy_model( self, - meta: SagemakerDeployment, state: SagemakerDeployState, aws_vars: AWSVars, session: sagemaker.Session, @@ -336,85 +305,73 @@ def _deploy_model( model_data=posixpath.join( state.model_location, MODEL_TAR_FILENAME ), - name=meta.model_name, + name=self.model_name, role=aws_vars.role, sagemaker_session=session, ) echo( EMOJI_BUILD - + f"Starting up sagemaker {meta.initial_instance_count} `{meta.instance_type}` instance(s)..." + + f"Starting up sagemaker {self.initial_instance_count} `{self.instance_type}` instance(s)..." ) sm_model.deploy( - initial_instance_count=meta.initial_instance_count, - instance_type=meta.instance_type, - accelerator_type=meta.accelerator_type, - endpoint_name=meta.endpoint_name, + initial_instance_count=self.initial_instance_count, + instance_type=self.instance_type, + accelerator_type=self.accelerator_type, + endpoint_name=self.endpoint_name, wait=False, ) state.endpoint_name = sm_model.endpoint_name state.endpoint_model_hash = state.model_hash - meta.update_state(state) - def _upload_model( - self, - meta: SagemakerDeployment, - state: SagemakerDeployState, - aws_vars: AWSVars, - session: sagemaker.Session, - ): - assert state.previous is not None # TODO - echo( - EMOJI_UPLOAD - + f"Uploading model distribution to {aws_vars.bucket}..." - ) - if state.model_location is not None: - state.previous.model_location = state.model_location - state.model_location = self._create_and_upload_model_arch( - session, - meta.get_model(), - aws_vars.bucket, - meta.model_arch_location - or generate_model_file_name(meta.get_model().meta_hash()), - ) - meta.update_model_hash(state=state) - meta.update_state(state) + def deploy(self, model: MlemModel): + with self.lock_state(): + state: SagemakerDeployState = self.get_state() + redeploy = self.model_changed(model) + state.previous = state.previous or SagemakerDeployState() - def _build_image( - self, - meta: SagemakerDeployment, - state: SagemakerDeployState, - aws_vars: AWSVars, - ): - assert state.previous is not None # TODO - model = meta.get_model() - try: - state.method_signature = model.model_type.methods[meta.method] - except KeyError as e: - raise WrongMethodError( - f"Wrong method {meta.method} for model {model.name}" - ) from e - image_tag = meta.image_tag or model.meta_hash() - if state.image_tag is not None: - state.previous.image_tag = state.image_tag - state.previous.image = state.image - state.image = build_sagemaker_docker( - model, - meta.method, - aws_vars.account, - aws_vars.region, - image_tag, - self.ecr_repository or DEFAULT_ECR_REPOSITORY, - aws_vars, - ) - state.image_tag = image_tag - meta.update_state(state) + session, aws_vars = self.get_env().get_session_and_aws_vars( + state.region + ) + if state.region is None: + state.region = aws_vars.region + self.update_state(state) + + if not self.use_prebuilt and (state.image_tag is None or redeploy): + self._build_image( + state, model, aws_vars, self.get_env().ecr_repository + ) + + if state.model_location is None or redeploy: + self._upload_model_file(state, model, aws_vars, session) + + if ( + state.endpoint_name is None + or redeploy + or state.endpoint_model_hash is not None + and state.endpoint_model_hash != state.model_hash + ): + if state.endpoint_name is None: + self._deploy_model(state, aws_vars, session) + else: + self._update_model(state, aws_vars, session) - def remove(self, meta: SagemakerDeployment): - with meta.lock_state(): - state: SagemakerDeployState = meta.get_state() - session, aws_vars = self.get_session_and_aws_vars(state.region) + @updates_state + def _delete_image(self, state: SagemakerDeployState, aws_vars: AWSVars): + assert state.image is not None # TODO + with DockerDaemon(host="").client() as client: + if isinstance(state.image.registry, ECRegistry): + state.image.registry.with_aws_vars(aws_vars) + state.image.delete(client) + state.image = None + + def remove(self): + with self.lock_state(): + state: SagemakerDeployState = self.get_state() + session, aws_vars = self.get_env().get_session_and_aws_vars( + state.region + ) if state.model_location is not None: - self._delete_model_file(session, state.model_location) + delete_model_file_from_s3(session, state.model_location) if state.endpoint_name is not None: client = session.sagemaker_client @@ -429,56 +386,16 @@ def remove(self, meta: SagemakerDeployment): client.delete_endpoint(EndpointName=state.endpoint_name) client.delete_endpoint_config(EndpointConfigName=endpoint_conf) if state.image is not None: - self._delete_image(meta, state, aws_vars) - meta.purge_state() + self._delete_image(state, aws_vars) + self.purge_state() - def get_status( - self, meta: SagemakerDeployment, raise_on_error=True - ) -> "DeployStatus": - with meta.lock_state(): - state: SagemakerDeployState = meta.get_state() - session = self.get_session(state.region) + def get_status(self, raise_on_error=True) -> "DeployStatus": + with self.lock_state(): + state: SagemakerDeployState = self.get_state() + session = self.get_env().get_session(state.region) endpoint = session.sagemaker_client.describe_endpoint( EndpointName=state.endpoint_name ) status = endpoint["EndpointStatus"] return ENDPOINT_STATUS_MAPPING.get(status, DeployStatus.UNKNOWN) - - def get_session(self, region: str = None) -> sagemaker.Session: - return self.get_session_and_aws_vars(region)[0] - - def get_session_and_aws_vars( - self, region: str = None - ) -> Tuple[sagemaker.Session, AWSVars]: - return init_aws_vars( - self.profile, - self.role, - self.bucket, - region or self.region, - self.account, - ) - - -def init_aws_vars( - profile=None, role=None, bucket=None, region=None, account=None -): - boto_session = boto3.Session(profile_name=profile, region_name=region) - sess = sagemaker.Session(boto_session, default_bucket=bucket) - - bucket = ( - bucket or sess.default_bucket() - ) # Replace with your own bucket name if needed - region = region or boto_session.region_name - config = project_config(project="", section=AWSConfig) - role = role or config.ROLE or sagemaker.get_execution_role(sess) - account = account or boto_session.client("sts").get_caller_identity().get( - "Account" - ) - return sess, AWSVars( - bucket=bucket, - region=region, - account=account, - role_name=role, - profile=profile or config.PROFILE, - ) diff --git a/mlem/contrib/sagemaker/runtime.py b/mlem/contrib/sagemaker/runtime.py index a7c67171..c43baeeb 100644 --- a/mlem/contrib/sagemaker/runtime.py +++ b/mlem/contrib/sagemaker/runtime.py @@ -6,10 +6,16 @@ import fastapi import sagemaker import uvicorn +from sagemaker.deserializers import JSONDeserializer +from sagemaker.serializers import JSONSerializer from mlem.config import MlemConfigBase, project_config from mlem.contrib.fastapi import FastAPIServer +from mlem.contrib.sagemaker.build import AWSVars +from mlem.core.model import Signature from mlem.runtime import Interface +from mlem.runtime.client import Client +from mlem.runtime.interface import InterfaceDescriptor logger = logging.getLogger(__name__) @@ -66,3 +72,32 @@ def app_init(self, interface: Interface): def get_env_vars(self) -> Dict[str, str]: return {"SAGEMAKER_METHOD": self.method} + + +class SagemakerClient(Client): + """Client to make SageMaker requests""" + + type: ClassVar = "sagemaker" + + endpoint_name: str + """Name of SageMaker Endpoint""" + aws_vars: AWSVars + """AWS Configuration""" + signature: Signature + """Signature of deployed method""" + + def _interface_factory(self) -> InterfaceDescriptor: + return InterfaceDescriptor(methods={"predict": self.signature}) + + def get_predictor(self): + sess = self.aws_vars.get_sagemaker_session() + predictor = sagemaker.Predictor( + endpoint_name=self.endpoint_name, + sagemaker_session=sess, + serializer=JSONSerializer(), + deserializer=JSONDeserializer(), + ) + return predictor + + def _call_method(self, name, args): + return self.get_predictor().predict(args) diff --git a/mlem/contrib/sagemaker/utils.py b/mlem/contrib/sagemaker/utils.py new file mode 100644 index 00000000..f6664fd2 --- /dev/null +++ b/mlem/contrib/sagemaker/utils.py @@ -0,0 +1,79 @@ +import os +import posixpath +import tarfile +import tempfile + +import boto3 +import sagemaker + +from mlem.config import project_config +from mlem.contrib.sagemaker.build import AWSVars +from mlem.contrib.sagemaker.config import AWSConfig +from mlem.core.objects import MlemModel + +MODEL_TAR_FILENAME = "model.tar.gz" + + +def delete_model_file_from_s3(session: sagemaker.Session, model_path: str): + s3_client = session.boto_session.client("s3") + if model_path.startswith("s3://"): + model_path = model_path[len("s3://") :] + bucket, *paths = model_path.split("/") + model_path = posixpath.join(*paths, MODEL_TAR_FILENAME) + s3_client.delete_object(Bucket=bucket, Key=model_path) + + +def init_aws_vars( + profile=None, role=None, bucket=None, region=None, account=None +): + boto_session = boto3.Session(profile_name=profile, region_name=region) + sess = sagemaker.Session(boto_session, default_bucket=bucket) + + bucket = ( + bucket or sess.default_bucket() + ) # Replace with your own bucket name if needed + region = region or boto_session.region_name + config = project_config(project="", section=AWSConfig) + role = role or config.ROLE or sagemaker.get_execution_role(sess) + account = account or boto_session.client("sts").get_caller_identity().get( + "Account" + ) + return sess, AWSVars( + bucket=bucket, + region=region, + account=account, + role_name=role, + profile=profile or config.PROFILE, + ) + + +def _create_model_arch_and_upload_to_s3( + session: sagemaker.Session, + model: MlemModel, + bucket: str, + model_arch_location: str, +) -> str: + with tempfile.TemporaryDirectory() as dirname: + model.clone(os.path.join(dirname, "model", "model")) + arch_path = os.path.join(dirname, "arch", MODEL_TAR_FILENAME) + os.makedirs(os.path.dirname(arch_path)) + with tarfile.open(arch_path, "w:gz") as tar: + path = os.path.join(dirname, "model") + for file in os.listdir(path): + tar.add(os.path.join(path, file), arcname=file) + + model_location = session.upload_data( + os.path.dirname(arch_path), + bucket=bucket, + key_prefix=posixpath.join(model_arch_location, model.meta_hash()), + ) + + return model_location + + +def generate_image_name(deploy_id): + return f"mlem-sagemaker-image-{deploy_id}" + + +def generate_model_file_name(deploy_id): + return f"mlem-model-{deploy_id}" diff --git a/mlem/core/objects.py b/mlem/core/objects.py index 4b4221b0..fc4c571e 100644 --- a/mlem/core/objects.py +++ b/mlem/core/objects.py @@ -818,6 +818,12 @@ class Config: model_hash: Optional[str] = None """Hash of deployed model meta""" + def update_model_hash( + self, + model: MlemModel, + ): + self.model_hash = model.meta_hash() + DT = TypeVar("DT", bound="MlemDeployment") @@ -834,29 +840,12 @@ class Config: type: ClassVar = ... deploy_type: ClassVar[Type[DT]] - @abstractmethod - def deploy(self, meta: DT): - raise NotImplementedError - - @abstractmethod - def remove(self, meta: DT): - raise NotImplementedError - - @abstractmethod - def get_status(self, meta: DT, raise_on_error=True) -> "DeployStatus": - raise NotImplementedError - def check_type(self, deploy: "MlemDeployment"): if not isinstance(deploy, self.deploy_type): raise ValueError( f"Meta of the {self.type} deployment should be {self.deploy_type}, not {deploy.__class__}" ) - def __init_subclass__(cls): - if hasattr(cls, "deploy_type"): - cls.deploy_type.env_type = cls - super().__init_subclass__() - class DeployStatus(str, Enum): """Enum with deployment statuses""" @@ -903,15 +892,15 @@ def get_state( @abstractmethod def update_state(self, deployment: "MlemDeployment", state: DeployState): - pass + raise NotImplementedError @abstractmethod def purge_state(self, deployment: "MlemDeployment"): - pass + raise NotImplementedError @abstractmethod - def lock(self, deployment: "MlemDeployment") -> ContextManager: - return _no_lock() + def lock_state(self, deployment: "MlemDeployment") -> ContextManager: + raise NotImplementedError class LocalFileStateManager(StateManager): @@ -948,7 +937,7 @@ def purge_state(self, deployment: "MlemDeployment"): if loc.exists(): loc.delete() - def lock(self, deployment: "MlemDeployment"): + def lock_state(self, deployment: "MlemDeployment"): if self.locking: loc = self.location(deployment) dirname, filename = posixpath.split(loc.fullpath) @@ -958,7 +947,7 @@ def lock(self, deployment: "MlemDeployment"): filename, timeout=self.lock_timeout, ) - return super().lock(deployment) + return super().lock_state(deployment) class FSSpecStateManager(StateManager): @@ -1017,7 +1006,7 @@ def purge_state(self, deployment: "MlemDeployment"): if fs.exists(path): fs.delete(path) - def lock(self, deployment: "MlemDeployment"): + def lock_state(self, deployment: "MlemDeployment"): if self.locking: fullpath = self._get_path(deployment) dirname, filename = posixpath.split(fullpath) @@ -1027,7 +1016,7 @@ def lock(self, deployment: "MlemDeployment"): filename, timeout=self.lock_timeout, ) - return super().lock(deployment) + return super().lock_state(deployment) EnvLink: TypeAlias = MlemLink.typed_link(MlemEnv) @@ -1055,12 +1044,14 @@ class Config: env: Union[str, MlemEnv, EnvLink, None] = None """Enironment to use""" env_cache: Optional[MlemEnv] = None - model: Union[ModelLink, str] - """Model to use""" - model_cache: Optional[MlemModel] = None state_manager: Optional[StateManager] """State manager used""" + def __init_subclass__(cls): + if hasattr(cls, "env_type"): + cls.env_type.deploy_type = cls + super().__init_subclass__() + @validator("state_manager", always=True) def default_state_manager( # pylint: disable=no-self-argument cls, value # noqa: B902 @@ -1082,7 +1073,7 @@ def get_state(self) -> ST: ) def lock_state(self): - return self._state_manager.lock(self) + return self._state_manager.lock_state(self) def update_state(self, state: ST): self._state_manager.update_state(self, state) @@ -1141,51 +1132,17 @@ def get_env(self) -> ET: raise WrongMetaSubType(self.env_cache, self.env_type) return self.env_cache - @validator("model") - def validate_model(cls, value): # pylint: disable=no-self-argument - if isinstance(value, MlemLink): - if value.project is None: - return value.path - if not isinstance(value, ModelLink): - return ModelLink(**value.dict()) - if isinstance(value, str): - return make_posix(value) - return value - - def get_model(self) -> MlemModel: - if self.model_cache is None: - if isinstance(self.model, str): - link = MlemLink( - path=self.model, - project=self.loc.project - if not os.path.isabs(self.model) - else None, - rev=self.loc.rev - if not os.path.isabs(self.model) - else None, - link_type=MlemModel.object_type, - ) - if self.is_saved: - link.bind(self.loc) - self.model_cache = link.load_link(force_type=MlemModel) - elif isinstance(self.model, MlemLink): - if self.is_saved: - self.model.bind(self.loc) - self.model_cache = self.model.load_link(force_type=MlemModel) - else: - raise ValueError( - f"model field should be either str or MlemLink instance, got {self.model.__class__}" - ) - return self.model_cache - - def run(self): - return self.get_env().deploy(self) + @abstractmethod + def deploy(self, model: MlemModel): + raise NotImplementedError + @abstractmethod def remove(self): - self.get_env().remove(self) + raise NotImplementedError - def get_status(self, raise_on_error: bool = True) -> DeployStatus: - return self.get_env().get_status(self, raise_on_error=raise_on_error) + @abstractmethod + def get_status(self, raise_on_error=True) -> "DeployStatus": + raise NotImplementedError def wait_for_status( self, @@ -1231,27 +1188,11 @@ def wait_for_status( ) return False - def model_changed(self, state: Optional[ST] = None): + def model_changed(self, model: MlemModel, state: Optional[ST] = None): state = state or self.get_state() if state.model_hash is None: return True - return self.get_model().meta_hash() != state.model_hash - - def update_model_hash( - self, - model: Optional[MlemModel] = None, - state: Optional[ST] = None, - update_state: bool = True, - ): - model = model or self.get_model() - state = state or self.get_state() - state.model_hash = model.meta_hash() - if update_state: - self.update_state(state) - - def replace_model(self, model: MlemModel): - self.model = model.make_link().typed - self.model_cache = model + return model.meta_hash() != state.model_hash def find_object( diff --git a/setup.py b/setup.py index e9a98a47..e949b929 100644 --- a/setup.py +++ b/setup.py @@ -168,7 +168,6 @@ "deployment.kubernetes = mlem.contrib.kubernetes.base:K8sDeployment", "deploy_state.kubernetes = mlem.contrib.kubernetes.base:K8sDeploymentState", "env.kubernetes = mlem.contrib.kubernetes.base:K8sEnv", - "builder.kubernetes = mlem.contrib.kubernetes.base:K8sYamlBuilder", "k8s_service_type.clusterip = mlem.contrib.kubernetes.service:ClusterIPService", "k8s_service_type.loadbalancer = mlem.contrib.kubernetes.service:LoadBalancerService", "k8s_service_type.nodeport = mlem.contrib.kubernetes.service:NodePortService", @@ -196,12 +195,6 @@ "builder.whl = mlem.contrib.pip.base:WhlBuilder", "client.rmq = mlem.contrib.rabbitmq:RabbitMQClient", "server.rmq = mlem.contrib.rabbitmq:RabbitMQServer", - "docker_registry.ecr = mlem.contrib.sagemaker.build:ECRegistry", - "client.sagemaker = mlem.contrib.sagemaker.meta:SagemakerClient", - "deploy_state.sagemaker = mlem.contrib.sagemaker.meta:SagemakerDeployState", - "deployment.sagemaker = mlem.contrib.sagemaker.meta:SagemakerDeployment", - "env.sagemaker = mlem.contrib.sagemaker.meta:SagemakerEnv", - "server._sagemaker = mlem.contrib.sagemaker.runtime:SageMakerServer", "model_type.sklearn = mlem.contrib.sklearn:SklearnModel", "model_type.sklearn_pipeline = mlem.contrib.sklearn:SklearnPipelineType", "model_type.tf_keras = mlem.contrib.tensorflow:TFKerasModel", @@ -225,8 +218,7 @@ "docker = mlem.contrib.docker.context:DockerConfig", "heroku = mlem.contrib.heroku.config:HerokuConfig", "pandas = mlem.contrib.pandas:PandasConfig", - "aws = mlem.contrib.sagemaker.meta:AWSConfig", - "sagemaker = mlem.contrib.sagemaker.runtime:SageMakerServerConfig", + "aws = mlem.contrib.sagemaker.config:AWSConfig", ], }, zip_safe=False, diff --git a/tests/cli/conftest.py b/tests/cli/conftest.py index ace15c38..e39bb17a 100644 --- a/tests/cli/conftest.py +++ b/tests/cli/conftest.py @@ -16,7 +16,7 @@ def invoke(self, *args, raise_on_error: bool = False, **kwargs) -> Result: if raise_on_error and result.exit_code != 0: if result.exit_code == 1: raise result.exception - raise RuntimeError(result.output) + raise RuntimeError(result.stderr) return result diff --git a/tests/cli/test_deployment.py b/tests/cli/test_deployment.py index afb49e00..96cde172 100644 --- a/tests/cli/test_deployment.py +++ b/tests/cli/test_deployment.py @@ -1,5 +1,5 @@ import os -from typing import ClassVar +from typing import Any, ClassVar, Optional import pytest from numpy import ndarray @@ -14,6 +14,7 @@ MlemDeployment, MlemEnv, MlemLink, + MlemModel, ) from mlem.runtime.client import Client, HTTPClient from mlem.utils.path import make_posix @@ -43,6 +44,17 @@ class Config: def _get_client(self, state) -> Client: return HTTPClient(host="", port=None) + def deploy(self, model: MlemModel): + self.status = DeployStatus.RUNNING + self.update() + + def remove(self): + self.status = DeployStatus.STOPPED + self.update() + + def get_status(self, raise_on_error=True) -> "DeployStatus": + return self.status + class MlemEnvMock(MlemEnv): """mock""" @@ -50,19 +62,6 @@ class MlemEnvMock(MlemEnv): type: ClassVar = "mock" deploy_type: ClassVar = MlemDeploymentMock - def deploy(self, meta: MlemDeploymentMock): - meta.status = DeployStatus.RUNNING - meta.update() - - def remove(self, meta: MlemDeploymentMock): - meta.status = DeployStatus.STOPPED - meta.update() - - def get_status( - self, meta: MlemDeploymentMock, raise_on_error=True - ) -> "DeployStatus": - return meta.status - @pytest.fixture def mock_env_path(tmp_path_factory): @@ -72,122 +71,80 @@ def mock_env_path(tmp_path_factory): @pytest.fixture() -def mock_deploy_path(tmp_path, mock_env_path, model_meta_saved_single): +def mock_deploy_path(tmp_path, mock_env_path): path = os.path.join(tmp_path, "deployname") MlemDeploymentMock( param="bbb", - model=model_meta_saved_single.make_link(), - model_cache=model_meta_saved_single, env=mock_env_path, ).dump(path) return path -def test_deploy_meta_str_model(mlem_project, model_meta, mock_env_path): - model_meta.dump("model", project=mlem_project) - - deployment = MlemDeploymentMock(model="model", env=mock_env_path) - deployment.dump("deployment", project=mlem_project) +def _check_deployment_meta( + deployment: MlemDeployment, + mlem_project: Optional[str], + env_path: str, + path: str = "deployment", + env: Any = None, +): + deployment.dump(path, project=mlem_project) with deployment.loc.open("r") as f: data = safe_load(f) assert data == { - "model": "model", "object_type": "deployment", "type": "mock", - "env": make_posix(mock_env_path), + "env": env or make_posix(env_path), } deployment2 = load_meta( - "deployment", project=mlem_project, force_type=MlemDeployment + path, project=mlem_project, force_type=MlemDeployment ) assert deployment2 == deployment - assert deployment2.get_model() == model_meta - assert deployment2.get_env() == load_meta(mock_env_path) + assert deployment2.get_env() == load_meta(env_path) -def test_deploy_meta_link_str_model(mlem_project, model_meta, mock_env_path): - model_meta.dump("model", project=mlem_project) +def test_deploy_meta_str_env(mlem_project, mock_env_path): + deployment = MlemDeploymentMock(env=mock_env_path) + _check_deployment_meta(deployment, mlem_project, mock_env_path) + +def test_deploy_meta_link_env(mlem_project, mock_env_path): deployment = MlemDeploymentMock( - model=MlemLink(path="model", link_type="model"), env=MlemLink(path=mock_env_path, link_type="env"), ) - deployment.dump("deployment", project=mlem_project) - - with deployment.loc.open("r") as f: - data = safe_load(f) - assert data == { - "model": "model", - "object_type": "deployment", - "type": "mock", - "env": make_posix(mock_env_path), - } - - deployment2 = load_meta( - "deployment", project=mlem_project, force_type=MlemDeployment - ) - assert deployment2 == deployment - assert deployment2.get_model() == model_meta - assert deployment2.get_env() == load_meta(mock_env_path) + _check_deployment_meta(deployment, mlem_project, mock_env_path) -def test_deploy_meta_link_model(mlem_project, model_meta, mock_env_path): - model_meta.dump("model", project=mlem_project) +def test_deploy_meta_link_env_project(mlem_project, mock_env_path): load_meta(mock_env_path).clone("project_env", project=mlem_project) deployment = MlemDeploymentMock( - model=MlemLink(path="model", project=mlem_project, link_type="model"), env=MlemLink( path="project_env", project=mlem_project, link_type="env" ), ) - deployment.dump("deployment", project=mlem_project) - - with deployment.loc.open("r") as f: - data = safe_load(f) - assert data == { - "model": {"path": "model", "project": make_posix(mlem_project)}, - "object_type": "deployment", - "type": "mock", - "env": { - "path": "project_env", - "project": make_posix(mlem_project), - }, - } - - deployment2 = load_meta( - "deployment", project=mlem_project, force_type=MlemDeployment + _check_deployment_meta( + deployment, + mlem_project, + mock_env_path, + env={ + "path": "project_env", + "project": make_posix(mlem_project), + }, ) - assert deployment2 == deployment - assert deployment2.get_model() == model_meta - assert deployment2.get_env() == load_meta(mock_env_path) -def test_deploy_meta_link_model_no_project(tmpdir, model_meta, mock_env_path): - model_path = os.path.join(tmpdir, "model") - model_meta.dump(model_path) +def test_deploy_meta_link_env_no_project(tmpdir, mock_env_path): deployment = MlemDeploymentMock( - model=MlemLink(path="model", link_type="model"), env=MlemLink(path=mock_env_path, link_type="env"), ) deployment_path = os.path.join(tmpdir, "deployment") - deployment.dump(deployment_path) - with deployment.loc.open("r") as f: - data = safe_load(f) - assert data == { - "model": "model", - "object_type": "deployment", - "type": "mock", - "env": make_posix(mock_env_path), - } - - deployment2 = load_meta(deployment_path, force_type=MlemDeployment) - assert deployment2 == deployment - assert deployment2.get_model() == model_meta - assert deployment2.get_env() == load_meta(mock_env_path) + _check_deployment_meta( + deployment, None, mock_env_path, path=deployment_path + ) def test_read_relative_model_from_remote_deploy_meta(): @@ -226,9 +183,12 @@ def test_deploy_create_new( assert meta.status == DeployStatus.RUNNING -def test_deploy_create_existing(runner: Runner, mock_deploy_path): +def test_deploy_create_existing( + runner: Runner, mock_deploy_path, model_meta_saved_single +): result = runner.invoke( - f"deploy run {mock_deploy_path}".split(), raise_on_error=True + f"deploy run {mock_deploy_path} -m {model_meta_saved_single.loc.fullpath}".split(), + raise_on_error=True, ) assert result.exit_code == 0, ( result.stdout, diff --git a/tests/contrib/test_docker/test_deploy.py b/tests/contrib/test_docker/test_deploy.py index d36a3bf1..9035f49f 100644 --- a/tests/contrib/test_docker/test_deploy.py +++ b/tests/contrib/test_docker/test_deploy.py @@ -128,7 +128,6 @@ def _check_runner(img, env: DockerEnv, model): container_name=CONTAINER_NAME, port_mapping={80: 8008}, server=FastAPIServer(), - model=model.make_link(), env=env, rm=False, ) @@ -138,18 +137,18 @@ def _check_runner(img, env: DockerEnv, model): image=DockerImage(name=img), model_hash=model.meta_hash() ) ) - assert env.get_status(instance) == DeployStatus.NOT_DEPLOYED + assert instance.get_status() == DeployStatus.NOT_DEPLOYED - env.deploy(instance) + instance.deploy(model) instance.wait_for_status( DeployStatus.RUNNING, allowed_intermediate=[DeployStatus.STARTING] ) time.sleep(0.1) - assert env.get_status(instance) == DeployStatus.RUNNING + assert instance.get_status() == DeployStatus.RUNNING - env.remove(instance) + instance.remove() time.sleep(0.1) - assert env.get_status(instance) == DeployStatus.NOT_DEPLOYED + assert instance.get_status() == DeployStatus.NOT_DEPLOYED diff --git a/tests/core/test_objects.py b/tests/core/test_objects.py index e96f6176..290b293d 100644 --- a/tests/core/test_objects.py +++ b/tests/core/test_objects.py @@ -16,6 +16,7 @@ from mlem.core.model import ModelIO, ModelType from mlem.core.objects import ( DeployState, + DeployStatus, MlemDeployment, MlemLink, MlemModel, @@ -46,16 +47,22 @@ def destroy(self): class MyMlemDeployment(MlemDeployment): + def deploy(self, model: MlemModel): + pass + + def remove(self): + pass + + def get_status(self, raise_on_error=True) -> DeployStatus: + pass + def _get_client(self, state): pass @pytest.fixture() def meta(): - return MyMlemDeployment( - env="", - model=MlemLink(path="", link_type="model"), - ) + return MyMlemDeployment(env="") @pytest.fixture(params=["fullpath", "with_root"]) From eac7ceaaa5a9a85b51e3e8a614e1ad3c4784fbcf Mon Sep 17 00:00:00 2001 From: mike0sv Date: Fri, 23 Sep 2022 17:54:02 +0300 Subject: [PATCH 02/26] adjust deployments cli --- mlem/cli/declare.py | 38 ++++- mlem/cli/deployment.py | 125 +++++++++++++---- mlem/cli/main.py | 32 +++-- mlem/cli/utils.py | 6 +- mlem/core/base.py | 14 +- tests/cli/test_deployment.py | 265 ++++++++++++++++++++++++++++++++--- tests/cli/test_types.py | 1 + 7 files changed, 415 insertions(+), 66 deletions(-) diff --git a/mlem/cli/declare.py b/mlem/cli/declare.py index 87c82d16..d59b3714 100644 --- a/mlem/cli/declare.py +++ b/mlem/cli/declare.py @@ -1,3 +1,4 @@ +import itertools from typing import Type from typer import Argument, Typer @@ -5,7 +6,7 @@ from ..core.base import MlemABC, build_mlem_object, load_impl_ext from ..core.meta_io import Location -from ..core.objects import MlemObject +from ..core.objects import MlemDeployment, MlemObject from ..utils.entrypoints import list_abstractions, list_implementations from .main import ( app, @@ -16,6 +17,9 @@ option_project, ) from .utils import ( + CliTypeField, + _option_from_field, + _options_from_model, abc_fields_parameters, lazy_class_docstring, wrap_build_error, @@ -48,6 +52,34 @@ def create_declare_mlem_object(type_name, cls: Type[MlemObject]): ) +def add_env_params_deployment(subtype, parent_cls: Type[MlemDeployment]): + impl = load_impl_ext(parent_cls.object_type, subtype) + assert issubclass(impl, MlemDeployment) # just to help mypy + env_impl = impl.env_type + return lambda ctx: itertools.chain( + abc_fields_parameters(subtype, parent_cls)(ctx), + _options_from_model(env_impl, ctx, path="env", force_not_set=True), + ( + _option_from_field( + CliTypeField( + path="env", + required=False, + allow_none=False, + type_=str, + help="", + default=env_impl.type, + is_list=False, + is_mapping=False, + ), + "env", + ), + ), + ) + + +_add_fields = {"deployment": add_env_params_deployment} + + def create_declare_mlem_object_subcommand( parent: Typer, subtype: str, type_name: str, parent_cls ): @@ -56,7 +88,9 @@ def create_declare_mlem_object_subcommand( section="MLEM Objects", parent=parent, dynamic_metavar="__kwargs__", - dynamic_options_generator=abc_fields_parameters(subtype, parent_cls), + dynamic_options_generator=_add_fields.get( + parent_cls.object_type, abc_fields_parameters + )(subtype, parent_cls), hidden=subtype.startswith("_"), lazy_help=lazy_class_docstring(type_name, subtype), ) diff --git a/mlem/cli/deployment.py b/mlem/cli/deployment.py index 06308142..56eb1744 100644 --- a/mlem/cli/deployment.py +++ b/mlem/cli/deployment.py @@ -4,26 +4,43 @@ from typer import Argument, Option, Typer from mlem.cli.apply import run_apply_remote +from mlem.cli.declare import add_env_params_deployment from mlem.cli.main import ( app, mlem_command, mlem_group, - option_conf, + mlem_group_callback, option_data_project, option_data_rev, option_external, + option_file_conf, option_index, option_json, + option_load, option_method, + option_model, + option_model_project, + option_model_rev, option_project, option_rev, option_target_project, ) -from mlem.core.base import parse_string_conf +from mlem.cli.utils import ( + for_each_impl, + lazy_class_docstring, + make_not_required, + wrap_build_error, +) +from mlem.core.base import build_mlem_object from mlem.core.data_type import DataAnalyzer -from mlem.core.errors import DeploymentError +from mlem.core.errors import DeploymentError, MlemObjectNotFound from mlem.core.metadata import load_meta -from mlem.core.objects import DeployState, DeployStatus, MlemDeployment +from mlem.core.objects import ( + DeployState, + DeployStatus, + MlemDeployment, + MlemModel, +) from mlem.ui import echo, no_echo, set_echo deployment = Typer( @@ -33,21 +50,27 @@ ) app.add_typer(deployment) +deploy_run = Typer( + name="run", + help="""Deploy a model to a target environment. Can use an existing deployment + declaration or create a new one on-the-fly. + """, + cls=mlem_group("other"), + subcommand_metavar="deployment", +) +deployment.add_typer(deploy_run) + -@mlem_command("run", parent=deployment) -def deploy_run( - path: str = Argument( - ..., - help="Path to deployment meta (will be created if it does not exist)", - ), - model: str = Option(..., "-m", "--model", help="Path to model"), - env: Optional[str] = Option( - None, "-t", "--env", help="Path to target environment" - ), +@mlem_group_callback(deploy_run, required=["model", "load"]) +def deploy_run_callback( + load: str = option_load("deployment"), + model: str = make_not_required(option_model), + model_project: Optional[str] = option_model_project, + model_rev: Optional[str] = option_model_rev, project: Optional[str] = option_project, + rev: Optional[str] = option_rev, external: bool = option_external, index: bool = option_index, - conf: Optional[List[str]] = option_conf(), ): """Deploy a model to a target environment. Can use an existing deployment declaration or create a new one on-the-fly. @@ -64,21 +87,75 @@ def deploy_run( """ from mlem.api.commands import deploy - conf = conf or [] - env_conf = [c[len("env.") :] for c in conf if c.startswith("env.")] - conf = [c for c in conf if not c.startswith("env.")] deploy( - path, - model, - env, - project, + load, + load_meta( + model, project=model_project, rev=model_rev, force_type=MlemModel + ), + project=project, + rev=rev, external=external, index=index, - env_kwargs=parse_string_conf(env_conf), - **parse_string_conf(conf or []), ) +@for_each_impl(MlemDeployment) +def create_deploy_run_command(type_name): + @mlem_command( + type_name, + section="deployments", + parent=deploy_run, + dynamic_metavar="__kwargs__", + dynamic_options_generator=add_env_params_deployment( + type_name, MlemDeployment + ), + hidden=type_name.startswith("_"), + lazy_help=lazy_class_docstring(MlemDeployment.object_type, type_name), + no_pass_from_parent=["file_conf"], + ) + def deploy_run_command( + path: str = Argument( + ..., help="Where to save the object (.mlem file)" + ), + model: str = make_not_required(option_model), + model_project: Optional[str] = option_model_project, + model_rev: Optional[str] = option_model_rev, + project: Optional[str] = option_project, + external: bool = option_external, + index: bool = option_index, + file_conf: List[str] = option_file_conf("deployment"), + **__kwargs__, + ): + from mlem.api.commands import deploy + + try: + meta = load_meta(path, project=project, force_type=MlemDeployment) + raise DeploymentError( + f"Deployment meta already exists at {meta.loc}. Please use mlem deployment run --load ..." + ) + except MlemObjectNotFound: + with wrap_build_error(type_name, MlemDeployment): + meta = build_mlem_object( + MlemDeployment, + type_name, + str_conf=None, + file_conf=file_conf, + **__kwargs__, + ).dump(path, project=project) + deploy( + meta, + load_meta( + model, + project=model_project, + rev=model_rev, + force_type=MlemModel, + ), + project=project, + external=external, + index=index, + ) + + @mlem_command("remove", parent=deployment) def deploy_remove( path: str = Argument(..., help="Path to deployment meta"), diff --git a/mlem/cli/main.py b/mlem/cli/main.py index d69cb200..01e40de8 100644 --- a/mlem/cli/main.py +++ b/mlem/cli/main.py @@ -504,12 +504,30 @@ def inner(*iargs, **ikwargs): option_data_project = Option( None, "--data-project", - "--dr", + "--dp", metavar=PATH_METAVAR, help="Project with data", ) option_data_rev = Option( - None, "--data-rev", help="Revision of data", metavar=COMMITISH_METAVAR + None, + "--data-rev", + "--dr", + help="Revision of data", + metavar=COMMITISH_METAVAR, +) +option_model_project = Option( + None, + "--model-project", + "--mp", + metavar=PATH_METAVAR, + help="Project with model", +) +option_model_rev = Option( + None, + "--model-rev", + "--mr", + help="Revision of model", + metavar=COMMITISH_METAVAR, ) option_model = Option( ..., @@ -534,16 +552,6 @@ def option_load(type_: str = None): ) -def option_conf(type_: str = None): - type_ = f"for {type_} " if type_ is not None else "" - return Option( - None, - "-c", - "--conf", - help=f"Options {type_}in format `field.name=value`", - ) - - def option_file_conf(type_: str = None): type_ = f"for {type_} " if type_ is not None else "" return Option( diff --git a/mlem/cli/utils.py b/mlem/cli/utils.py index 6701c945..646a0413 100644 --- a/mlem/cli/utils.py +++ b/mlem/cli/utils.py @@ -375,7 +375,11 @@ def _options_from_mlem_abc( """Generate str option for mlem abc type. If param is already set, also generate respective implementation fields""" assert issubclass(field.type_, MlemABC) and field.type_.__is_root__ - if path in ctx.params and ctx.params[path] != NOT_SET: + if ( + path in ctx.params + and ctx.params[path] != NOT_SET + and ctx.params[path] is not None + ): yield from _options_from_model( load_impl_ext(field.type_.abs_name, ctx.params[path]), ctx, diff --git a/mlem/core/base.py b/mlem/core/base.py index d052c4c2..b180f39f 100644 --- a/mlem/core/base.py +++ b/mlem/core/base.py @@ -170,14 +170,17 @@ def smart_split(value: str, char: str, maxsplit: int = None): return res[:maxsplit] + [char.join(res[maxsplit:])] +TMO = TypeVar("TMO", bound=MlemABC) + + def build_mlem_object( - model: Type[MlemABC], + model: Type[TMO], subtype: str, str_conf: List[str] = None, file_conf: List[str] = None, conf: Dict[str, Any] = None, **kwargs, -): +) -> TMO: not_links, links = parse_links(model, str_conf or []) if model.__is_root__: kwargs[model.__config__.type_field] = subtype @@ -335,13 +338,16 @@ def parse_string_conf(conf: List[str]) -> Dict[str, Any]: return res.build() +TBM = TypeVar("TBM", bound=BaseModel) + + def build_model( - model: Type[BaseModel], + model: Type[TBM], str_conf: List[str] = None, file_conf: List[str] = None, conf: Dict[str, Any] = None, **kwargs, -): +) -> TBM: model_dict = SmartSplitDict() model_dict.update(kwargs) model_dict.update(conf or {}) diff --git a/tests/cli/test_deployment.py b/tests/cli/test_deployment.py index 96cde172..e85053dc 100644 --- a/tests/cli/test_deployment.py +++ b/tests/cli/test_deployment.py @@ -1,11 +1,15 @@ import os -from typing import Any, ClassVar, Optional +from typing import Any, ClassVar, Optional, Type import pytest from numpy import ndarray from yaml import safe_load from mlem.api import load +from mlem.cli.declare import create_declare_mlem_object_subcommand, declare +from mlem.cli.deployment import create_deploy_run_command +from mlem.contrib.heroku.meta import HerokuEnv +from mlem.core.errors import WrongMetaSubType from mlem.core.meta_io import MLEM_EXT from mlem.core.metadata import load_meta from mlem.core.objects import ( @@ -15,6 +19,7 @@ MlemEnv, MlemLink, MlemModel, + MlemObject, ) from mlem.runtime.client import Client, HTTPClient from mlem.utils.path import make_posix @@ -24,19 +29,31 @@ class DeployStateMock(DeployState): """mock""" + class Config: + use_enum_values = True + allow_default: ClassVar = True + deployment: Optional[MlemDeployment] = None + env: Optional[MlemEnv] = None + status: DeployStatus = DeployStatus.NOT_DEPLOYED + -class MlemDeploymentMock(MlemDeployment): +class MlemEnvMock(MlemEnv): """mock""" - class Config: - use_enum_values = True + type: ClassVar = "mock" + + env_param: Optional[str] = None + + +class MlemDeploymentMock(MlemDeployment[DeployStateMock, MlemEnvMock]): + """mock""" type: ClassVar = "mock" state_type: ClassVar = DeployStateMock + env_type: ClassVar = MlemEnvMock - status: DeployStatus = DeployStatus.NOT_DEPLOYED """status""" param: str = "" """param""" @@ -45,22 +62,26 @@ def _get_client(self, state) -> Client: return HTTPClient(host="", port=None) def deploy(self, model: MlemModel): - self.status = DeployStatus.RUNNING - self.update() + with self.lock_state(): + state = self.get_state() + state.status = DeployStatus.RUNNING + state.deployment = self + state.env = self.get_env() + state.update_model_hash(model) + self.update_state(state) def remove(self): - self.status = DeployStatus.STOPPED - self.update() + with self.lock_state(): + state = self.get_state() + state.status = DeployStatus.STOPPED + state.deployment = None + state.env = None + state.model_hash = None + self.update_state(state) def get_status(self, raise_on_error=True) -> "DeployStatus": - return self.status - - -class MlemEnvMock(MlemEnv): - """mock""" - - type: ClassVar = "mock" - deploy_type: ClassVar = MlemDeploymentMock + with self.lock_state(): + return self.get_state().status @pytest.fixture @@ -169,7 +190,7 @@ def test_deploy_create_new( ): path = os.path.join(tmp_path, "deployname") result = runner.invoke( - f"deploy run {path} -m {model_meta_saved_single.loc.uri} -t {mock_env_path} -c param=aaa".split() + f"deploy run {MlemDeploymentMock.type} {path} -m {model_meta_saved_single.loc.uri} --env {mock_env_path} --param aaa".split() ) assert result.exit_code == 0, ( result.stdout, @@ -180,14 +201,14 @@ def test_deploy_create_new( meta = load_meta(path) assert isinstance(meta, MlemDeploymentMock) assert meta.param == "aaa" - assert meta.status == DeployStatus.RUNNING + assert meta.get_status() == DeployStatus.RUNNING def test_deploy_create_existing( runner: Runner, mock_deploy_path, model_meta_saved_single ): result = runner.invoke( - f"deploy run {mock_deploy_path} -m {model_meta_saved_single.loc.fullpath}".split(), + f"deploy run --load {mock_deploy_path} -m {model_meta_saved_single.loc.fullpath}".split(), raise_on_error=True, ) assert result.exit_code == 0, ( @@ -198,7 +219,7 @@ def test_deploy_create_existing( meta = load_meta(mock_deploy_path) assert isinstance(meta, MlemDeploymentMock) assert meta.param == "bbb" - assert meta.status == DeployStatus.RUNNING + assert meta.get_status() == DeployStatus.RUNNING def test_deploy_status(runner: Runner, mock_deploy_path): @@ -220,7 +241,7 @@ def test_deploy_remove(runner: Runner, mock_deploy_path): ) meta = load_meta(mock_deploy_path) assert isinstance(meta, MlemDeploymentMock) - assert meta.status == DeployStatus.STOPPED + assert meta.get_status() == DeployStatus.STOPPED def test_deploy_apply( @@ -242,6 +263,204 @@ def test_deploy_apply( ) meta = load_meta(mock_deploy_path) assert isinstance(meta, MlemDeploymentMock) - assert meta.status == DeployStatus.NOT_DEPLOYED + assert meta.get_status() == DeployStatus.NOT_DEPLOYED predictions = load(path) assert isinstance(predictions, ndarray) + + +def add_mock_declare(type_: Type[MlemObject]): + + typer = [ + g.typer_instance + for g in declare.registered_groups + if g.typer_instance.info.name == type_.object_type + ][0] + + create_declare_mlem_object_subcommand( + typer, + type_.__get_alias__(), + type_.object_type, + type_, + ) + + +add_mock_declare(MlemDeploymentMock) +add_mock_declare(MlemEnvMock) + +create_deploy_run_command(MlemDeploymentMock.type) + + +def _deploy_and_check( + runner: Runner, + deploy_path: str, + model_single_path: str, + load_deploy=True, + add_args="", +): + + if load_deploy: + status_res = runner.invoke( + f"deploy status {deploy_path}", raise_on_error=True + ) + assert status_res.exit_code == 0, ( + status_res.output, + status_res.exception, + status_res.stderr, + ) + assert status_res.output.strip() == DeployStatus.NOT_DEPLOYED.value + + deploy_res = runner.invoke( + f"deploy run --load {deploy_path} --model {model_single_path}", + raise_on_error=True, + ) + else: + deploy_res = runner.invoke( + f"deploy run {MlemDeploymentMock.type} {deploy_path} --model {model_single_path} --param val {add_args}", + raise_on_error=True, + ) + + assert deploy_res.exit_code == 0, ( + deploy_res.output, + deploy_res.exception, + deploy_res.stderr, + ) + + status_res = runner.invoke( + f"deploy status {deploy_path}", raise_on_error=True + ) + assert status_res.exit_code == 0, ( + status_res.output, + status_res.exception, + status_res.stderr, + ) + assert status_res.output.strip() == DeployStatus.RUNNING.value + + deploy_meta = load_meta(deploy_path, force_type=MlemDeploymentMock) + state = deploy_meta.get_state() + assert isinstance(state.deployment, MlemDeploymentMock) + assert state.deployment.param == "val" + assert isinstance(state.env, MlemEnvMock) + assert state.env.env_param == "env_val" + + remove_res = runner.invoke( + f"deploy remove {deploy_path}", raise_on_error=True + ) + assert remove_res.exit_code == 0, ( + remove_res.output, + remove_res.exception, + remove_res.stderr, + ) + + status_res = runner.invoke( + f"deploy status {deploy_path}", raise_on_error=True + ) + assert status_res.exit_code == 0, ( + status_res.output, + status_res.exception, + status_res.stderr, + ) + assert status_res.output.strip() == DeployStatus.STOPPED.value + + +def test_all_declared(runner: Runner, tmp_path, model_single_path): + """ + mlem declare env heroku --api_key lol prod.mlem + mlem declare deployment heroku --env prod.mlem --app_name myapp service.mlem + # error on depl/env type mismatch TODO + mlem deployment run --load service.mlem --model mdoel + """ + env_path = str(tmp_path / "env") + runner.invoke( + f"declare env {MlemEnvMock.type} --env_param env_val {env_path}", + raise_on_error=True, + ) + deploy_path = str(tmp_path / "deploy") + runner.invoke( + f"declare deployment {MlemDeploymentMock.type} --param val --env {env_path} {deploy_path}", + raise_on_error=True, + ) + + _deploy_and_check(runner, deploy_path, model_single_path) + + +def test_declare_type_mismatch(runner: Runner, tmp_path, model_single_path): + """ + mlem declare env heroku --api_key lol prod.mlem + mlem declare deployment sagemaker --env prod.mlem --app_name myapp service.mlem + # error on depl/env type mismatch TODO + mlem deployment run --load service.mlem --model mdoel + """ + env_path = str(tmp_path / "env") + runner.invoke( + f"declare env {HerokuEnv.type} {env_path}", raise_on_error=True + ) + deploy_path = str(tmp_path / "deploy") + runner.invoke( + f"declare deployment {MlemDeploymentMock.type} --param a --env {env_path} {deploy_path}", + raise_on_error=True, + ) + with pytest.raises(WrongMetaSubType): + runner.invoke( + f"deploy run --load {deploy_path} --model {model_single_path}", + raise_on_error=True, + ) + + +def test_deploy_declared(runner: Runner, tmp_path, model_single_path): + """ + mlem declare deployment heroku --env.api_key prod.mlem --app_name myapp service.mlem + mlem deployment run --load service.mlem --model mdoel + """ + deploy_path = str(tmp_path / "deploy") + declare_res = runner.invoke( + f"declare deployment {MlemDeploymentMock.type} {deploy_path} --param val --env.env_param env_val ", + raise_on_error=True, + ) + assert declare_res.exit_code == 0, ( + declare_res.output, + declare_res.exception, + declare_res.stderr, + ) + + _deploy_and_check(runner, deploy_path, model_single_path) + + +def test_env_declared(runner: Runner, tmp_path, model_single_path): + """ + mlem declare env heroku --api_key lol prod.mlem + mlem deployment run heroku service.mlem --model model --app_name myapp --env prod.mlem + # error on type mismatch + """ + env_path = str(tmp_path / "env") + declare_res = runner.invoke( + f"declare env {MlemEnvMock.type} --env_param env_val {env_path}", + raise_on_error=True, + ) + assert declare_res.exit_code == 0, ( + declare_res.output, + declare_res.exception, + declare_res.stderr, + ) + deploy_path = str(tmp_path / "deploy") + _deploy_and_check( + runner, + deploy_path, + model_single_path, + load_deploy=False, + add_args=f"--env {env_path}", + ) + + +def test_none_declared(runner: Runner, tmp_path, model_single_path): + """ + mlem deployment run heroku service.mlem --model model --app_name myapp --env.api_key lol + # error on args mismatch + """ + deploy_path = str(tmp_path / "deploy") + _deploy_and_check( + runner, + deploy_path, + model_single_path, + load_deploy=False, + add_args="--env.env_param env_val", + ) diff --git a/tests/cli/test_types.py b/tests/cli/test_types.py index 181d4008..397285f6 100644 --- a/tests/cli/test_types.py +++ b/tests/cli/test_types.py @@ -31,6 +31,7 @@ def test_types_abs_name(runner: Runner, abs_name): (abs_name, subtype) for abs_name, root_type in MlemABC.abs_types.items() for subtype in list_implementations(root_type, include_hidden=False) + if not subtype.startswith("tests.") and "mock" not in subtype ], ) def test_types_abs_name_subtype(runner: Runner, abs_name, subtype): From 5bcc8598343e18128448709ede4f64829410db81 Mon Sep 17 00:00:00 2001 From: mike0sv Date: Fri, 23 Sep 2022 18:11:26 +0300 Subject: [PATCH 03/26] add rev to deploy API --- mlem/api/commands.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mlem/api/commands.py b/mlem/api/commands.py index 197fd1e3..306d6bef 100644 --- a/mlem/api/commands.py +++ b/mlem/api/commands.py @@ -417,6 +417,7 @@ def deploy( model: Union[MlemModel, str], env: Union[MlemEnv, str] = None, project: Optional[str] = None, + rev: Optional[str] = None, fs: Optional[AbstractFileSystem] = None, external: bool = None, index: bool = None, @@ -430,6 +431,7 @@ def deploy( deploy_meta = load_meta( path=deploy_meta_or_path, project=project, + rev=rev, fs=fs, force_type=MlemDeployment, ) From 15ca6d4567da87ae73fbacc0ab16ac4ccf062121 Mon Sep 17 00:00:00 2001 From: mike0sv Date: Thu, 29 Sep 2022 16:37:43 +0300 Subject: [PATCH 04/26] fix tests and pylint --- .pylintrc | 3 ++- tests/cli/test_deployment.py | 11 ++++++----- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/.pylintrc b/.pylintrc index af266a7f..9beb1074 100644 --- a/.pylintrc +++ b/.pylintrc @@ -170,7 +170,8 @@ disable=print-statement, redefined-builtin, # TODO: https://github.com/iterative/mlem/issues/60 no-self-use, # TODO: https://github.com/iterative/mlem/issues/60 maybe leave it import-outside-toplevel, - wrong-import-order # handeled by isort + wrong-import-order, # handeled by isort + cannot-enumerate-pytest-fixtures # TODO: https://github.com/iterative/mlem/issues/60 # Enable the message, report, category or checker with the given id(s). You can # either give multiple identifier separated by comma (,) or put this option diff --git a/tests/cli/test_deployment.py b/tests/cli/test_deployment.py index e85053dc..d3a8be1e 100644 --- a/tests/cli/test_deployment.py +++ b/tests/cli/test_deployment.py @@ -399,11 +399,12 @@ def test_declare_type_mismatch(runner: Runner, tmp_path, model_single_path): f"declare deployment {MlemDeploymentMock.type} --param a --env {env_path} {deploy_path}", raise_on_error=True, ) - with pytest.raises(WrongMetaSubType): - runner.invoke( - f"deploy run --load {deploy_path} --model {model_single_path}", - raise_on_error=True, - ) + + res = runner.invoke( + f"deploy run --load {deploy_path} --model {model_single_path}", + ) + assert res.exit_code != 0 + assert isinstance(res.exception, WrongMetaSubType) def test_deploy_declared(runner: Runner, tmp_path, model_single_path): From c5429fb0e513bf2f4c3c7dbee166f91ea0cc700e Mon Sep 17 00:00:00 2001 From: mike0sv Date: Thu, 29 Sep 2022 17:30:26 +0300 Subject: [PATCH 05/26] add deployment declaration and model link to state --- mlem/api/commands.py | 1 + mlem/cli/deployment.py | 2 +- mlem/contrib/docker/base.py | 2 +- mlem/contrib/heroku/meta.py | 2 +- mlem/contrib/kubernetes/base.py | 2 +- mlem/contrib/sagemaker/meta.py | 2 +- mlem/core/objects.py | 39 ++++++++++++++++--- tests/cli/test_deployment.py | 67 +++++++++++++++++++++++++++++++-- tests/contrib/test_heroku.py | 2 +- 9 files changed, 105 insertions(+), 14 deletions(-) diff --git a/mlem/api/commands.py b/mlem/api/commands.py index 306d6bef..cbda2586 100644 --- a/mlem/api/commands.py +++ b/mlem/api/commands.py @@ -465,5 +465,6 @@ def deploy( deploy_meta.get_env() model_meta = get_model_meta(model) + deploy_meta.check_unchanged() deploy_meta.deploy(model_meta) return deploy_meta diff --git a/mlem/cli/deployment.py b/mlem/cli/deployment.py index 56eb1744..38b27555 100644 --- a/mlem/cli/deployment.py +++ b/mlem/cli/deployment.py @@ -250,7 +250,7 @@ def deploy_apply( ) state: DeployState = deploy_meta.get_state() if ( - state == deploy_meta.state_type() + state == deploy_meta.state_type(declaration=deploy_meta) and not deploy_meta.state_type.allow_default ): raise DeploymentError( diff --git a/mlem/contrib/docker/base.py b/mlem/contrib/docker/base.py index 78c74173..68e3ee4c 100644 --- a/mlem/contrib/docker/base.py +++ b/mlem/contrib/docker/base.py @@ -374,7 +374,7 @@ def deploy(self, model: MlemModel): force_overwrite=True, **self.args.dict(), ) - state.update_model_hash(model) + state.update_model(model) self.update_state(state) redeploy = True if state.container_id is None or redeploy: diff --git a/mlem/contrib/heroku/meta.py b/mlem/contrib/heroku/meta.py index cac69c8b..3b2052d2 100644 --- a/mlem/contrib/heroku/meta.py +++ b/mlem/contrib/heroku/meta.py @@ -97,7 +97,7 @@ def deploy(self, model: MlemModel): state.image = build_heroku_docker( model, state.app.name, api_key=self.get_env().api_key ) - state.update_model_hash(model) + state.update_model(model) self.update_state(state) redeploy = True if state.release_state is None or redeploy: diff --git a/mlem/contrib/kubernetes/base.py b/mlem/contrib/kubernetes/base.py index 3b7da29d..88be95cf 100644 --- a/mlem/contrib/kubernetes/base.py +++ b/mlem/contrib/kubernetes/base.py @@ -135,7 +135,7 @@ def deploy(self, model: MlemModel): daemon=self.daemon, server=self.get_server(), ) - state.update_model_hash(model) + state.update_model(model) redeploy = True if ( diff --git a/mlem/contrib/sagemaker/meta.py b/mlem/contrib/sagemaker/meta.py index 89012c59..437d5762 100644 --- a/mlem/contrib/sagemaker/meta.py +++ b/mlem/contrib/sagemaker/meta.py @@ -211,7 +211,7 @@ def _upload_model_file( self.model_arch_location or generate_model_file_name(model.meta_hash()), ) - state.update_model_hash(model) + state.update_model(model) @updates_state def _update_model( diff --git a/mlem/core/objects.py b/mlem/core/objects.py index fc4c571e..24216af8 100644 --- a/mlem/core/objects.py +++ b/mlem/core/objects.py @@ -817,12 +817,28 @@ class Config: model_hash: Optional[str] = None """Hash of deployed model meta""" + model_link: Optional["ModelLink"] + """Link to deployed model""" + declaration: "MlemDeployment" + """Deployment declaration used""" - def update_model_hash( + def update_model( self, model: MlemModel, ): self.model_hash = model.meta_hash() + if model.is_saved: + self.model_link = model.make_link().typed + else: + self.model_link = None + + @validator("declaration") + def validate_declaration( # pylint: disable=no-self-argument + cls, value: "MlemDeployment" + ): + copy = value.copy() + copy.env = value.get_env() + return copy DT = TypeVar("DT", bound="MlemDeployment") @@ -1067,10 +1083,9 @@ def _state_manager(self) -> StateManager: return self.state_manager def get_state(self) -> ST: - return ( - self._state_manager.get_state(self, self.state_type) - or self.state_type() - ) + return self._state_manager.get_state( + self, self.state_type + ) or self.state_type(declaration=self) def lock_state(self): return self._state_manager.lock_state(self) @@ -1144,6 +1159,17 @@ def remove(self): def get_status(self, raise_on_error=True) -> "DeployStatus": raise NotImplementedError + def check_unchanged(self): + declaration = self.get_state().declaration + copy = declaration.copy() + copy.env = None + self_copy = self.copy() + self_copy.env = None + if copy != self_copy or declaration.env != self.get_env(): + raise DeploymentError( + "Deployment parameters changed, this is not supported yet. Please re-create deployment with new parameters" + ) + def wait_for_status( self, status: Union[DeployStatus, Iterable[DeployStatus]], @@ -1225,3 +1251,6 @@ def find_object( raise ValueError(f"Ambiguous object {path}: {source_paths}") type_, source_path = source_paths[0] return type_, source_path + + +DeployState.update_forward_refs() diff --git a/tests/cli/test_deployment.py b/tests/cli/test_deployment.py index d3a8be1e..aee7c8fc 100644 --- a/tests/cli/test_deployment.py +++ b/tests/cli/test_deployment.py @@ -9,7 +9,7 @@ from mlem.cli.declare import create_declare_mlem_object_subcommand, declare from mlem.cli.deployment import create_deploy_run_command from mlem.contrib.heroku.meta import HerokuEnv -from mlem.core.errors import WrongMetaSubType +from mlem.core.errors import DeploymentError, WrongMetaSubType from mlem.core.meta_io import MLEM_EXT from mlem.core.metadata import load_meta from mlem.core.objects import ( @@ -67,7 +67,7 @@ def deploy(self, model: MlemModel): state.status = DeployStatus.RUNNING state.deployment = self state.env = self.get_env() - state.update_model_hash(model) + state.update_model(model) self.update_state(state) def remove(self): @@ -254,7 +254,8 @@ def test_deploy_apply( ): path = os.path.join(tmp_path, "output") result = runner.invoke( - f"deploy apply {mock_deploy_path} {data_path} -o {path}".split() + f"deploy apply {mock_deploy_path} {data_path} -o {path}".split(), + raise_on_error=True, ) assert result.exit_code == 0, ( result.stdout, @@ -465,3 +466,63 @@ def test_none_declared(runner: Runner, tmp_path, model_single_path): load_deploy=False, add_args="--env.env_param env_val", ) + + +def test_redeploy_changed(runner: Runner, tmp_path, model_single_path): + env_path = str(tmp_path / "env") + runner.invoke( + f"declare env {MlemEnvMock.type} --env_param env_val {env_path}", + raise_on_error=True, + ) + deploy_path = str(tmp_path / "deploy") + runner.invoke( + f"declare deployment {MlemDeploymentMock.type} --param val --env {env_path} {deploy_path}", + raise_on_error=True, + ) + + runner.invoke( + f"deploy run --load {deploy_path} --model {model_single_path}", + raise_on_error=True, + ) + + runner.invoke( + f"declare deployment {MlemDeploymentMock.type} --param val1 --env {env_path} {deploy_path}", + raise_on_error=True, + ) + + res = runner.invoke( + f"deploy run --load {deploy_path} --model {model_single_path}" + ) + + assert res.exit_code != 0 + assert isinstance(res.exception, DeploymentError) + + +def test_redeploy_env_changed(runner: Runner, tmp_path, model_single_path): + env_path = str(tmp_path / "env") + runner.invoke( + f"declare env {MlemEnvMock.type} --env_param env_val {env_path}", + raise_on_error=True, + ) + deploy_path = str(tmp_path / "deploy") + runner.invoke( + f"declare deployment {MlemDeploymentMock.type} --param val --env {env_path} {deploy_path}", + raise_on_error=True, + ) + + runner.invoke( + f"deploy run --load {deploy_path} --model {model_single_path}", + raise_on_error=True, + ) + + runner.invoke( + f"declare env {MlemEnvMock.type} --env_param env_val1 {env_path}", + raise_on_error=True, + ) + + res = runner.invoke( + f"deploy run --load {deploy_path} --model {model_single_path}" + ) + + assert res.exit_code != 0 + assert isinstance(res.exception, DeploymentError) diff --git a/tests/contrib/test_heroku.py b/tests/contrib/test_heroku.py index aff9ad57..93f47046 100644 --- a/tests/contrib/test_heroku.py +++ b/tests/contrib/test_heroku.py @@ -109,7 +109,7 @@ def test_build_heroku_docker(model: MlemModel, uses_docker_build): def test_state_ensured_app(): - state = HerokuState() + state = HerokuState(declaration=HerokuDeployment(app_name="")) with pytest.raises(ValueError): assert state.ensured_app is not None From d40c005a2118a9998b9de09f3dd04b3920396b5f Mon Sep 17 00:00:00 2001 From: mike0sv Date: Thu, 29 Sep 2022 17:55:46 +0300 Subject: [PATCH 06/26] fix lint --- setup.cfg | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.cfg b/setup.cfg index 0d10ac3b..a64c0d56 100644 --- a/setup.cfg +++ b/setup.cfg @@ -7,6 +7,7 @@ ignore = B008, # Do not perform function calls in argument defaults: conflicts with typer P1, # unindexed parameters in the str.format, see: B902, # Invalid first argument 'cls' used for instance method. + B024, # ABCs without methods # https://pypi.org/project/flake8-string-format/ max_line_length = 79 max-complexity = 15 From a2d1d4e7ecaa6039d938352ec68cc1ecb4ce16f2 Mon Sep 17 00:00:00 2001 From: mike0sv Date: Thu, 29 Sep 2022 21:54:38 +0300 Subject: [PATCH 07/26] fix tests --- tests/cli/test_deployment.py | 33 +++++++++++++++------------------ 1 file changed, 15 insertions(+), 18 deletions(-) diff --git a/tests/cli/test_deployment.py b/tests/cli/test_deployment.py index aee7c8fc..8f15920f 100644 --- a/tests/cli/test_deployment.py +++ b/tests/cli/test_deployment.py @@ -401,11 +401,11 @@ def test_declare_type_mismatch(runner: Runner, tmp_path, model_single_path): raise_on_error=True, ) - res = runner.invoke( - f"deploy run --load {deploy_path} --model {model_single_path}", - ) - assert res.exit_code != 0 - assert isinstance(res.exception, WrongMetaSubType) + with pytest.raises(WrongMetaSubType): + runner.invoke( + f"deploy run --load {deploy_path} --model {model_single_path}", + raise_on_error=True, + ) def test_deploy_declared(runner: Runner, tmp_path, model_single_path): @@ -489,13 +489,11 @@ def test_redeploy_changed(runner: Runner, tmp_path, model_single_path): f"declare deployment {MlemDeploymentMock.type} --param val1 --env {env_path} {deploy_path}", raise_on_error=True, ) - - res = runner.invoke( - f"deploy run --load {deploy_path} --model {model_single_path}" - ) - - assert res.exit_code != 0 - assert isinstance(res.exception, DeploymentError) + with pytest.raises(DeploymentError): + runner.invoke( + f"deploy run --load {deploy_path} --model {model_single_path}", + raise_on_error=True, + ) def test_redeploy_env_changed(runner: Runner, tmp_path, model_single_path): @@ -520,9 +518,8 @@ def test_redeploy_env_changed(runner: Runner, tmp_path, model_single_path): raise_on_error=True, ) - res = runner.invoke( - f"deploy run --load {deploy_path} --model {model_single_path}" - ) - - assert res.exit_code != 0 - assert isinstance(res.exception, DeploymentError) + with pytest.raises(DeploymentError): + runner.invoke( + f"deploy run --load {deploy_path} --model {model_single_path}", + raise_on_error=True, + ) From eec64f848b2209eb6675cac5abdb21cbaddaf9da Mon Sep 17 00:00:00 2001 From: mike0sv Date: Thu, 29 Sep 2022 23:08:42 +0300 Subject: [PATCH 08/26] fix tests --- .github/workflows/check-test-release.yml | 1 + tests/cli/conftest.py | 11 +++++ tests/cli/test_apply.py | 53 +++++++++++------------- tests/cli/test_stderr.py | 4 ++ 4 files changed, 41 insertions(+), 28 deletions(-) diff --git a/.github/workflows/check-test-release.yml b/.github/workflows/check-test-release.yml index ff8bb36e..2d84714b 100644 --- a/.github/workflows/check-test-release.yml +++ b/.github/workflows/check-test-release.yml @@ -7,6 +7,7 @@ on: env: MLEM_TESTS: "true" + MLEM_DEBUG: "true" jobs: authorize: diff --git a/tests/cli/conftest.py b/tests/cli/conftest.py index e39bb17a..a161069e 100644 --- a/tests/cli/conftest.py +++ b/tests/cli/conftest.py @@ -2,6 +2,7 @@ from click.testing import Result from typer.testing import CliRunner +from mlem import LOCAL_CONFIG from mlem.cli import app app.pretty_exceptions_short = False @@ -23,3 +24,13 @@ def invoke(self, *args, raise_on_error: bool = False, **kwargs) -> Result: @pytest.fixture def runner() -> Runner: return Runner() + + +@pytest.fixture +def no_debug(): + tmp = LOCAL_CONFIG.DEBUG + try: + LOCAL_CONFIG.DEBUG = False + yield + finally: + LOCAL_CONFIG.DEBUG = tmp diff --git a/tests/cli/test_apply.py b/tests/cli/test_apply.py index bd37eddb..e7c0f0f4 100644 --- a/tests/cli/test_apply.py +++ b/tests/cli/test_apply.py @@ -10,10 +10,11 @@ from mlem.api import load, save from mlem.core.data_type import ArrayType -from mlem.core.errors import MlemProjectNotFound +from mlem.core.errors import MlemProjectNotFound, UnsupportedDataBatchLoading from mlem.core.metadata import load_meta from mlem.core.objects import MlemData from mlem.runtime.client import HTTPClient +from tests.cli.conftest import Runner from tests.conftest import MLEM_TEST_REPO, long, need_test_repo_auth @@ -134,39 +135,35 @@ def test_apply_with_import(runner, model_meta_saved_single, tmp_path_factory): def test_apply_batch_with_import( - runner, model_meta_saved_single, tmp_path_factory + runner: Runner, model_meta_saved_single, tmp_path_factory ): data_path = os.path.join(tmp_path_factory.getbasetemp(), "import_data") load_iris(return_X_y=True, as_frame=True)[0].to_csv(data_path, index=False) with tempfile.TemporaryDirectory() as dir: path = posixpath.join(dir, "data") - result = runner.invoke( - [ - "apply", - model_meta_saved_single.loc.uri, - data_path, - "-m", - "predict", - "-o", - path, - "--no-index", - "--import", - "--it", - "pandas[csv]", - "-b", - "2", - ], - ) - assert result.exit_code == 1, ( - result.stdout, - result.stderr, - result.exception, - ) - assert ( - "Batch data loading is currently not supported for loading data on-the-fly" - in result.stderr - ) + with pytest.raises( + UnsupportedDataBatchLoading, + match="Batch data loading is currently not supported for loading data on-the-fly", + ): + runner.invoke( + [ + "apply", + model_meta_saved_single.loc.uri, + data_path, + "-m", + "predict", + "-o", + path, + "--no-index", + "--import", + "--it", + "pandas[csv]", + "-b", + "2", + ], + raise_on_error=True, + ) def test_apply_no_output(runner, model_path, data_path): diff --git a/tests/cli/test_stderr.py b/tests/cli/test_stderr.py index 87ae56ae..db8a65e2 100644 --- a/tests/cli/test_stderr.py +++ b/tests/cli/test_stderr.py @@ -1,12 +1,15 @@ from io import StringIO from unittest import mock +import pytest + from mlem.core.errors import MlemError from mlem.ui import echo, stderr_echo EXCEPTION_MESSAGE = "Test Exception Message" +@pytest.mark.usefixtures("no_debug") def test_stderr_exception(runner): # patch the ls command and ensure it throws an expection. with mock.patch( @@ -27,6 +30,7 @@ def test_stderr_exception(runner): MLEM_ERROR_MESSAGE = "Test Mlem Error Message" +@pytest.mark.usefixtures("no_debug") def test_stderr_mlem_error(runner): # patch the ls command and ensure it throws a mlem error. with mock.patch( From 00daf07b5ee5cee6a4d8c07a4e8bdfe3403703f3 Mon Sep 17 00:00:00 2001 From: mike0sv Date: Thu, 29 Sep 2022 23:11:10 +0300 Subject: [PATCH 09/26] fix tests --- tests/contrib/test_heroku.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/contrib/test_heroku.py b/tests/contrib/test_heroku.py index 93f47046..8cd80ba6 100644 --- a/tests/contrib/test_heroku.py +++ b/tests/contrib/test_heroku.py @@ -187,7 +187,7 @@ def test_env_deploy_full( if CLEAR_APPS: meta.remove() - assert meta.get_state() == HerokuState() + assert meta.get_state() == HerokuState(declaration=meta) meta.wait_for_status( DeployStatus.NOT_DEPLOYED, allowed_intermediate=DeployStatus.RUNNING, From 6f5e7132d38ec2f4679a746226c588be609be9f2 Mon Sep 17 00:00:00 2001 From: mike0sv Date: Fri, 30 Sep 2022 17:42:44 +0300 Subject: [PATCH 10/26] add debug env --- tests/conftest.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/conftest.py b/tests/conftest.py index d18bb539..fb848149 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -100,6 +100,12 @@ def add_test_env(): LOCAL_CONFIG.TESTS = True +@pytest.fixture(scope="session", autouse=True) +def add_debug_env(): + os.environ["MLEM_DEBUG"] = "true" + LOCAL_CONFIG.DEBUG = True + + def resource_path(test_file, *paths): resources_dir = os.path.join(os.path.dirname(test_file), RESOURCES) return os.path.join(resources_dir, *paths) From f150f571dc5f3b7dbde5a3f7a2fcb60a1d584c65 Mon Sep 17 00:00:00 2001 From: mike0sv Date: Sat, 1 Oct 2022 14:50:13 +0300 Subject: [PATCH 11/26] fix crash on missing deps --- mlem/cli/declare.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/mlem/cli/declare.py b/mlem/cli/declare.py index d59b3714..65ef730d 100644 --- a/mlem/cli/declare.py +++ b/mlem/cli/declare.py @@ -53,7 +53,11 @@ def create_declare_mlem_object(type_name, cls: Type[MlemObject]): def add_env_params_deployment(subtype, parent_cls: Type[MlemDeployment]): - impl = load_impl_ext(parent_cls.object_type, subtype) + try: + impl = load_impl_ext(parent_cls.object_type, subtype) + except ImportError: + return lambda ctx: [] + assert issubclass(impl, MlemDeployment) # just to help mypy env_impl = impl.env_type return lambda ctx: itertools.chain( From 79bfb43cd71d5d662ce3ed6966a44b7291202daa Mon Sep 17 00:00:00 2001 From: mike0sv Date: Sun, 2 Oct 2022 12:43:44 +0300 Subject: [PATCH 12/26] linting --- mlem/contrib/pandas.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/mlem/contrib/pandas.py b/mlem/contrib/pandas.py index 80918822..9bd4c164 100644 --- a/mlem/contrib/pandas.py +++ b/mlem/contrib/pandas.py @@ -458,7 +458,9 @@ def read_pickle_with_unnamed(*args, **kwargs): def read_json_reset_index(*args, **kwargs): - return pd.read_json(*args, **kwargs).reset_index(drop=True) + return pd.read_json( # pylint: disable=no-member + *args, **kwargs + ).reset_index(drop=True) def read_html(*args, **kwargs): From de6e381f02dc4d1eb3bd6a20a31a7a1c8e66cae2 Mon Sep 17 00:00:00 2001 From: Madhur Tandon Date: Mon, 3 Oct 2022 18:14:19 +0530 Subject: [PATCH 13/26] fix issues with pandas pylint and flake8 --- mlem/contrib/pandas.py | 4 +++- setup.cfg | 3 ++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/mlem/contrib/pandas.py b/mlem/contrib/pandas.py index 601c9c32..8dcbae87 100644 --- a/mlem/contrib/pandas.py +++ b/mlem/contrib/pandas.py @@ -459,7 +459,9 @@ def read_pickle_with_unnamed(*args, **kwargs): def read_json_reset_index(*args, **kwargs): - return pd.read_json(*args, **kwargs).reset_index(drop=True) + return pd.read_json( # pylint: disable=no-member + *args, **kwargs + ).reset_index(drop=True) def read_html(*args, **kwargs): diff --git a/setup.cfg b/setup.cfg index 2ed8d795..41416eef 100644 --- a/setup.cfg +++ b/setup.cfg @@ -5,8 +5,9 @@ ignore = E266, # Too many leading '#' for block comment W503, # Line break occurred before a binary operator B008, # Do not perform function calls in argument defaults: conflicts with typer - P1, # unindexed parameters in the str.format, see: + P1, # unindexed parameters in the str.format, see: B902, # Invalid first argument 'cls' used for instance method. + B024, # ABCs without methods # https://pypi.org/project/flake8-string-format/ max_line_length = 79 max-complexity = 15 From 0c25d2d400ffc58a33afaddf9d33cc5e3593c9ee Mon Sep 17 00:00:00 2001 From: mike0sv Date: Mon, 3 Oct 2022 17:18:21 +0300 Subject: [PATCH 14/26] fix requirements --- mlem/utils/module.py | 13 ++++++++++++- tests/contrib/test_pandas.py | 2 +- tests/contrib/test_sklearn.py | 9 +++++++-- 3 files changed, 20 insertions(+), 4 deletions(-) diff --git a/mlem/utils/module.py b/mlem/utils/module.py index b03b62af..142aa0e1 100644 --- a/mlem/utils/module.py +++ b/mlem/utils/module.py @@ -411,7 +411,10 @@ def wrapper(pickler: "RequirementAnalyzer", obj): else: pickler.save(o) - if is_from_installable_module(obj): + if ( + is_from_installable_module(obj) + or get_object_base_module(obj) is mlem + ): return f(pickler, obj) # to add from local imports inside user (non PIP package) code @@ -514,6 +517,7 @@ def _should_ignore(self, mod: ModuleType): or is_private_module(mod) or is_pseudo_module(mod) or is_builtin_module(mod) + or mod in self._modules ) def add_requirement(self, obj_or_module): @@ -533,6 +537,11 @@ def add_requirement(self, obj_or_module): module = obj_or_module if module is not None and not self._should_ignore(module): + base_module = get_base_module(module) + if is_installable_module(base_module): + if base_module in self._modules: + return + module = base_module self._modules.add(module) if is_local_module(module): # add imports of this module @@ -553,6 +562,8 @@ def save(self, obj, save_persistent_id=True): if id(obj) in self.seen or isinstance(obj, IGNORE_TYPES_REQ): return None self.seen.add(id(obj)) + if get_object_base_module(obj) in self._modules: + return None self.add_requirement(obj) try: return super().save(obj, save_persistent_id) diff --git a/tests/contrib/test_pandas.py b/tests/contrib/test_pandas.py index 3d1c770e..d8c258cd 100644 --- a/tests/contrib/test_pandas.py +++ b/tests/contrib/test_pandas.py @@ -617,7 +617,7 @@ def f(x): sig = Signature.from_method(f, auto_infer=True, x=data) - assert set(get_object_requirements(sig).modules) == {"pandas", "numpy"} + assert set(get_object_requirements(sig).modules) == {"pandas"} # Copyright 2019 Zyfra diff --git a/tests/contrib/test_sklearn.py b/tests/contrib/test_sklearn.py index 9673e62f..72af8461 100644 --- a/tests/contrib/test_sklearn.py +++ b/tests/contrib/test_sklearn.py @@ -164,11 +164,16 @@ def test_model_type_lgb__dump_load(tmpdir, lgbm_model, inp_data): ] -def test_pipeline_requirements(lgbm_model): +def test_pipeline_requirements(lgbm_model, inp_data): model = Pipeline(steps=[("model", lgbm_model)]) meta = MlemModel.from_obj(model) - expected_requirements = {"sklearn", "lightgbm", "pandas", "numpy", "scipy"} + expected_requirements = {"sklearn", "lightgbm"} + assert set(meta.requirements.modules) == expected_requirements + + meta = MlemModel.from_obj(model, sample_data=np.array(inp_data)) + + expected_requirements = {"sklearn", "lightgbm", "numpy"} assert set(meta.requirements.modules) == expected_requirements From a84028208beb7c0f697a5a744eaa51d485508dfd Mon Sep 17 00:00:00 2001 From: mike0sv Date: Mon, 3 Oct 2022 17:22:13 +0300 Subject: [PATCH 15/26] fix req tests --- tests/contrib/test_lightgbm.py | 2 +- tests/contrib/test_sklearn.py | 2 +- tests/contrib/test_xgboost.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/contrib/test_lightgbm.py b/tests/contrib/test_lightgbm.py index 56ed8f7b..bd2e193a 100644 --- a/tests/contrib/test_lightgbm.py +++ b/tests/contrib/test_lightgbm.py @@ -195,7 +195,7 @@ def test_model__predict_not_dataset(model): @long def test_model__dump_load(tmpdir, model, data_np, local_fs): # pandas is not required, but if it is installed, it is imported by lightgbm - expected_requirements = {"lightgbm", "numpy", "scipy", "pandas"} + expected_requirements = {"lightgbm", "numpy"} assert set(model.get_requirements().modules) == expected_requirements artifacts = model.dump(LOCAL_STORAGE, tmpdir) diff --git a/tests/contrib/test_sklearn.py b/tests/contrib/test_sklearn.py index 72af8461..57dbde36 100644 --- a/tests/contrib/test_sklearn.py +++ b/tests/contrib/test_sklearn.py @@ -140,7 +140,7 @@ def test_model_type__dump_load(tmpdir, model, inp_data, request): def test_model_type_lgb__dump_load(tmpdir, lgbm_model, inp_data): model_type = ModelAnalyzer.analyze(lgbm_model, sample_data=inp_data) - expected_requirements = {"sklearn", "lightgbm", "pandas", "numpy", "scipy"} + expected_requirements = {"sklearn", "lightgbm", "numpy"} reqs = model_type.get_requirements().expanded assert set(reqs.modules) == expected_requirements assert reqs.of_type(UnixPackageRequirement) == [ diff --git a/tests/contrib/test_xgboost.py b/tests/contrib/test_xgboost.py index 4b385196..dfb431f1 100644 --- a/tests/contrib/test_xgboost.py +++ b/tests/contrib/test_xgboost.py @@ -133,7 +133,7 @@ def test_model__predict_not_dmatrix(model): @long def test_model__dump_load(tmpdir, model, dmatrix_np, local_fs): # pandas is not required, but it is conditionally imported by some Booster methods - expected_requirements = {"xgboost", "numpy", "scipy", "pandas"} + expected_requirements = {"xgboost", "numpy"} assert set(model.get_requirements().modules) == expected_requirements artifacts = model.dump(LOCAL_STORAGE, tmpdir) From bac1c3591a9d87532c161f920239f535be2c59ad Mon Sep 17 00:00:00 2001 From: mike0sv Date: Mon, 3 Oct 2022 17:22:37 +0300 Subject: [PATCH 16/26] remove comment --- tests/contrib/test_xgboost.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/contrib/test_xgboost.py b/tests/contrib/test_xgboost.py index dfb431f1..0a2feb1f 100644 --- a/tests/contrib/test_xgboost.py +++ b/tests/contrib/test_xgboost.py @@ -132,7 +132,6 @@ def test_model__predict_not_dmatrix(model): @long def test_model__dump_load(tmpdir, model, dmatrix_np, local_fs): - # pandas is not required, but it is conditionally imported by some Booster methods expected_requirements = {"xgboost", "numpy"} assert set(model.get_requirements().modules) == expected_requirements From 10a34fd075d31adadff6c0eb06c9b6517f6e57b1 Mon Sep 17 00:00:00 2001 From: mike0sv Date: Mon, 3 Oct 2022 17:28:46 +0300 Subject: [PATCH 17/26] fix docker tests --- tests/contrib/test_docker/test_deploy.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/contrib/test_docker/test_deploy.py b/tests/contrib/test_docker/test_deploy.py index 9035f49f..2ae58ac7 100644 --- a/tests/contrib/test_docker/test_deploy.py +++ b/tests/contrib/test_docker/test_deploy.py @@ -134,7 +134,9 @@ def _check_runner(img, env: DockerEnv, model): instance.dump(os.path.join(tmpdir, "deploy")) instance.update_state( DockerContainerState( - image=DockerImage(name=img), model_hash=model.meta_hash() + image=DockerImage(name=img), + model_hash=model.meta_hash(), + declaration=instance, ) ) assert instance.get_status() == DeployStatus.NOT_DEPLOYED From 7613c1015a9b777d83c6550d712d582055873778 Mon Sep 17 00:00:00 2001 From: mike0sv Date: Mon, 3 Oct 2022 17:35:58 +0300 Subject: [PATCH 18/26] fix catboost req tests --- tests/contrib/test_catboost.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/contrib/test_catboost.py b/tests/contrib/test_catboost.py index 51ad753a..ada91728 100644 --- a/tests/contrib/test_catboost.py +++ b/tests/contrib/test_catboost.py @@ -48,7 +48,7 @@ def test_catboost_model(catboost_model_fixture, pandas_data, tmpdir, request): ), ) - expected_requirements = {"catboost", "pandas", "numpy", "scipy"} + expected_requirements = {"catboost", "pandas"} reqs = set(cbmw.get_requirements().modules) assert all(r in reqs for r in expected_requirements) assert cbmw.model is catboost_model From 779b8b320d0a93a030d9fdb6d0a4cf24199e86f8 Mon Sep 17 00:00:00 2001 From: Madhur Tandon <20173739+madhur-tandon@users.noreply.github.com> Date: Mon, 3 Oct 2022 21:13:36 +0530 Subject: [PATCH 19/26] fix issues with pandas pylint and flake8 (#427) * fix issues with pandas pylint and flake8 * fix requirements * fix req tests * remove comment * fix catboost req tests Co-authored-by: mike0sv --- mlem/contrib/pandas.py | 4 +++- mlem/utils/module.py | 13 ++++++++++++- setup.cfg | 3 ++- tests/contrib/test_catboost.py | 2 +- tests/contrib/test_lightgbm.py | 2 +- tests/contrib/test_pandas.py | 2 +- tests/contrib/test_sklearn.py | 11 ++++++++--- tests/contrib/test_xgboost.py | 3 +-- 8 files changed, 29 insertions(+), 11 deletions(-) diff --git a/mlem/contrib/pandas.py b/mlem/contrib/pandas.py index 601c9c32..8dcbae87 100644 --- a/mlem/contrib/pandas.py +++ b/mlem/contrib/pandas.py @@ -459,7 +459,9 @@ def read_pickle_with_unnamed(*args, **kwargs): def read_json_reset_index(*args, **kwargs): - return pd.read_json(*args, **kwargs).reset_index(drop=True) + return pd.read_json( # pylint: disable=no-member + *args, **kwargs + ).reset_index(drop=True) def read_html(*args, **kwargs): diff --git a/mlem/utils/module.py b/mlem/utils/module.py index b03b62af..142aa0e1 100644 --- a/mlem/utils/module.py +++ b/mlem/utils/module.py @@ -411,7 +411,10 @@ def wrapper(pickler: "RequirementAnalyzer", obj): else: pickler.save(o) - if is_from_installable_module(obj): + if ( + is_from_installable_module(obj) + or get_object_base_module(obj) is mlem + ): return f(pickler, obj) # to add from local imports inside user (non PIP package) code @@ -514,6 +517,7 @@ def _should_ignore(self, mod: ModuleType): or is_private_module(mod) or is_pseudo_module(mod) or is_builtin_module(mod) + or mod in self._modules ) def add_requirement(self, obj_or_module): @@ -533,6 +537,11 @@ def add_requirement(self, obj_or_module): module = obj_or_module if module is not None and not self._should_ignore(module): + base_module = get_base_module(module) + if is_installable_module(base_module): + if base_module in self._modules: + return + module = base_module self._modules.add(module) if is_local_module(module): # add imports of this module @@ -553,6 +562,8 @@ def save(self, obj, save_persistent_id=True): if id(obj) in self.seen or isinstance(obj, IGNORE_TYPES_REQ): return None self.seen.add(id(obj)) + if get_object_base_module(obj) in self._modules: + return None self.add_requirement(obj) try: return super().save(obj, save_persistent_id) diff --git a/setup.cfg b/setup.cfg index 2ed8d795..41416eef 100644 --- a/setup.cfg +++ b/setup.cfg @@ -5,8 +5,9 @@ ignore = E266, # Too many leading '#' for block comment W503, # Line break occurred before a binary operator B008, # Do not perform function calls in argument defaults: conflicts with typer - P1, # unindexed parameters in the str.format, see: + P1, # unindexed parameters in the str.format, see: B902, # Invalid first argument 'cls' used for instance method. + B024, # ABCs without methods # https://pypi.org/project/flake8-string-format/ max_line_length = 79 max-complexity = 15 diff --git a/tests/contrib/test_catboost.py b/tests/contrib/test_catboost.py index 51ad753a..ada91728 100644 --- a/tests/contrib/test_catboost.py +++ b/tests/contrib/test_catboost.py @@ -48,7 +48,7 @@ def test_catboost_model(catboost_model_fixture, pandas_data, tmpdir, request): ), ) - expected_requirements = {"catboost", "pandas", "numpy", "scipy"} + expected_requirements = {"catboost", "pandas"} reqs = set(cbmw.get_requirements().modules) assert all(r in reqs for r in expected_requirements) assert cbmw.model is catboost_model diff --git a/tests/contrib/test_lightgbm.py b/tests/contrib/test_lightgbm.py index 56ed8f7b..bd2e193a 100644 --- a/tests/contrib/test_lightgbm.py +++ b/tests/contrib/test_lightgbm.py @@ -195,7 +195,7 @@ def test_model__predict_not_dataset(model): @long def test_model__dump_load(tmpdir, model, data_np, local_fs): # pandas is not required, but if it is installed, it is imported by lightgbm - expected_requirements = {"lightgbm", "numpy", "scipy", "pandas"} + expected_requirements = {"lightgbm", "numpy"} assert set(model.get_requirements().modules) == expected_requirements artifacts = model.dump(LOCAL_STORAGE, tmpdir) diff --git a/tests/contrib/test_pandas.py b/tests/contrib/test_pandas.py index 3d1c770e..d8c258cd 100644 --- a/tests/contrib/test_pandas.py +++ b/tests/contrib/test_pandas.py @@ -617,7 +617,7 @@ def f(x): sig = Signature.from_method(f, auto_infer=True, x=data) - assert set(get_object_requirements(sig).modules) == {"pandas", "numpy"} + assert set(get_object_requirements(sig).modules) == {"pandas"} # Copyright 2019 Zyfra diff --git a/tests/contrib/test_sklearn.py b/tests/contrib/test_sklearn.py index 9673e62f..57dbde36 100644 --- a/tests/contrib/test_sklearn.py +++ b/tests/contrib/test_sklearn.py @@ -140,7 +140,7 @@ def test_model_type__dump_load(tmpdir, model, inp_data, request): def test_model_type_lgb__dump_load(tmpdir, lgbm_model, inp_data): model_type = ModelAnalyzer.analyze(lgbm_model, sample_data=inp_data) - expected_requirements = {"sklearn", "lightgbm", "pandas", "numpy", "scipy"} + expected_requirements = {"sklearn", "lightgbm", "numpy"} reqs = model_type.get_requirements().expanded assert set(reqs.modules) == expected_requirements assert reqs.of_type(UnixPackageRequirement) == [ @@ -164,11 +164,16 @@ def test_model_type_lgb__dump_load(tmpdir, lgbm_model, inp_data): ] -def test_pipeline_requirements(lgbm_model): +def test_pipeline_requirements(lgbm_model, inp_data): model = Pipeline(steps=[("model", lgbm_model)]) meta = MlemModel.from_obj(model) - expected_requirements = {"sklearn", "lightgbm", "pandas", "numpy", "scipy"} + expected_requirements = {"sklearn", "lightgbm"} + assert set(meta.requirements.modules) == expected_requirements + + meta = MlemModel.from_obj(model, sample_data=np.array(inp_data)) + + expected_requirements = {"sklearn", "lightgbm", "numpy"} assert set(meta.requirements.modules) == expected_requirements diff --git a/tests/contrib/test_xgboost.py b/tests/contrib/test_xgboost.py index 4b385196..0a2feb1f 100644 --- a/tests/contrib/test_xgboost.py +++ b/tests/contrib/test_xgboost.py @@ -132,8 +132,7 @@ def test_model__predict_not_dmatrix(model): @long def test_model__dump_load(tmpdir, model, dmatrix_np, local_fs): - # pandas is not required, but it is conditionally imported by some Booster methods - expected_requirements = {"xgboost", "numpy", "scipy", "pandas"} + expected_requirements = {"xgboost", "numpy"} assert set(model.get_requirements().modules) == expected_requirements artifacts = model.dump(LOCAL_STORAGE, tmpdir) From 8a3868a28ea9df387d7257281783e68959ecc7ca Mon Sep 17 00:00:00 2001 From: mike0sv Date: Tue, 4 Oct 2022 00:14:27 +0300 Subject: [PATCH 20/26] fix k8s tests --- tests/contrib/test_kubernetes/test_base.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/contrib/test_kubernetes/test_base.py b/tests/contrib/test_kubernetes/test_base.py index 269cd0e4..ed215b27 100644 --- a/tests/contrib/test_kubernetes/test_base.py +++ b/tests/contrib/test_kubernetes/test_base.py @@ -56,10 +56,9 @@ def model_meta(tmp_path_factory): @pytest.fixture(scope="session") -def k8s_deployment(minikube_env_variables, model_meta): +def k8s_deployment(minikube_env_variables): return K8sDeployment( - name="ml", - model=model_meta.make_link(), + namespace="ml", image_pull_policy=ImagePullPolicy.never, service_type=LoadBalancerService(), daemon=DockerDaemon(host=os.getenv("DOCKER_HOST", default="")), @@ -67,11 +66,11 @@ def k8s_deployment(minikube_env_variables, model_meta): @pytest.fixture(scope="session") -def docker_image(k8s_deployment): +def docker_image(k8s_deployment, model_meta): tmpdir = tempfile.mkdtemp() k8s_deployment.dump(os.path.join(tmpdir, "deploy")) return build_k8s_docker( - k8s_deployment.get_model(), + model_meta, k8s_deployment.image_name, DockerRegistry(), DockerDaemon(host=os.getenv("DOCKER_HOST", default="")), @@ -81,10 +80,11 @@ def docker_image(k8s_deployment): @pytest.fixture -def k8s_deployment_state(docker_image, model_meta): +def k8s_deployment_state(docker_image, model_meta, k8s_deployment): return K8sDeploymentState( image=docker_image, model_hash=model_meta.meta_hash(), + declaration=k8s_deployment, ) From 5515c23e99be1a595095ba89a2f5900f8235f3ef Mon Sep 17 00:00:00 2001 From: mike0sv Date: Tue, 4 Oct 2022 02:55:05 +0300 Subject: [PATCH 21/26] fix k8s tests --- tests/contrib/test_kubernetes/test_base.py | 41 ++++++++++------------ 1 file changed, 18 insertions(+), 23 deletions(-) diff --git a/tests/contrib/test_kubernetes/test_base.py b/tests/contrib/test_kubernetes/test_base.py index ed215b27..df8c13ec 100644 --- a/tests/contrib/test_kubernetes/test_base.py +++ b/tests/contrib/test_kubernetes/test_base.py @@ -12,15 +12,11 @@ from mlem.api import save from mlem.config import project_config from mlem.contrib.docker.base import DockerDaemon, DockerRegistry -from mlem.contrib.kubernetes.base import ( - K8sDeployment, - K8sDeploymentState, - K8sEnv, -) +from mlem.contrib.kubernetes.base import K8sDeployment, K8sDeploymentState from mlem.contrib.kubernetes.build import build_k8s_docker from mlem.contrib.kubernetes.context import ImagePullPolicy from mlem.contrib.kubernetes.service import LoadBalancerService -from mlem.core.objects import DeployStatus +from mlem.core.objects import DeployStatus, MlemModel from tests.contrib.test_kubernetes.conftest import k8s_test from tests.contrib.test_kubernetes.utils import Command @@ -88,41 +84,40 @@ def k8s_deployment_state(docker_image, model_meta, k8s_deployment): ) -@pytest.fixture -def k8s_env(): - return K8sEnv() - - @k8s_test @pytest.mark.usefixtures("load_kube_config") def test_deploy( - k8s_deployment, - k8s_deployment_state, - k8s_env, + k8s_deployment: K8sDeployment, + k8s_deployment_state: K8sDeploymentState, + model_meta: MlemModel, ): k8s_deployment.update_state(k8s_deployment_state) - assert k8s_env.get_status(k8s_deployment) == DeployStatus.NOT_DEPLOYED - k8s_env.deploy(k8s_deployment) + assert ( + k8s_deployment.get_status(k8s_deployment) == DeployStatus.NOT_DEPLOYED + ) + k8s_deployment.deploy(model_meta) k8s_deployment.wait_for_status( DeployStatus.RUNNING, allowed_intermediate=[DeployStatus.STARTING], timeout=10, times=5, ) - assert k8s_env.get_status(k8s_deployment) == DeployStatus.RUNNING - k8s_env.remove(k8s_deployment) - assert k8s_env.get_status(k8s_deployment) == DeployStatus.NOT_DEPLOYED + assert k8s_deployment.get_status(k8s_deployment) == DeployStatus.RUNNING + k8s_deployment.remove() + assert ( + k8s_deployment.get_status(k8s_deployment) == DeployStatus.NOT_DEPLOYED + ) @k8s_test @pytest.mark.usefixtures("load_kube_config") def test_deployed_service( - k8s_deployment, - k8s_deployment_state, - k8s_env, + k8s_deployment: K8sDeployment, + k8s_deployment_state: K8sDeploymentState, + model_meta: MlemModel, ): k8s_deployment.update_state(k8s_deployment_state) - k8s_env.deploy(k8s_deployment) + k8s_deployment.deploy(model_meta) cmd = Command("minikube tunnel") cmd.run(timeout=20, shell=True) client = k8s_deployment.get_client() From 79088d7d0424880d69e4614eb15aa4f30a1b6e4f Mon Sep 17 00:00:00 2001 From: mike0sv Date: Tue, 4 Oct 2022 09:27:08 +0300 Subject: [PATCH 22/26] fix windows bugs --- tests/cli/test_deployment.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/tests/cli/test_deployment.py b/tests/cli/test_deployment.py index 8f15920f..f91470f4 100644 --- a/tests/cli/test_deployment.py +++ b/tests/cli/test_deployment.py @@ -370,12 +370,12 @@ def test_all_declared(runner: Runner, tmp_path, model_single_path): # error on depl/env type mismatch TODO mlem deployment run --load service.mlem --model mdoel """ - env_path = str(tmp_path / "env") + env_path = make_posix(str(tmp_path / "env")) runner.invoke( f"declare env {MlemEnvMock.type} --env_param env_val {env_path}", raise_on_error=True, ) - deploy_path = str(tmp_path / "deploy") + deploy_path = make_posix(str(tmp_path / "deploy")) runner.invoke( f"declare deployment {MlemDeploymentMock.type} --param val --env {env_path} {deploy_path}", raise_on_error=True, @@ -391,11 +391,11 @@ def test_declare_type_mismatch(runner: Runner, tmp_path, model_single_path): # error on depl/env type mismatch TODO mlem deployment run --load service.mlem --model mdoel """ - env_path = str(tmp_path / "env") + env_path = make_posix(str(tmp_path / "env")) runner.invoke( f"declare env {HerokuEnv.type} {env_path}", raise_on_error=True ) - deploy_path = str(tmp_path / "deploy") + deploy_path = make_posix(str(tmp_path / "deploy")) runner.invoke( f"declare deployment {MlemDeploymentMock.type} --param a --env {env_path} {deploy_path}", raise_on_error=True, @@ -413,7 +413,7 @@ def test_deploy_declared(runner: Runner, tmp_path, model_single_path): mlem declare deployment heroku --env.api_key prod.mlem --app_name myapp service.mlem mlem deployment run --load service.mlem --model mdoel """ - deploy_path = str(tmp_path / "deploy") + deploy_path = make_posix(str(tmp_path / "deploy")) declare_res = runner.invoke( f"declare deployment {MlemDeploymentMock.type} {deploy_path} --param val --env.env_param env_val ", raise_on_error=True, @@ -433,7 +433,7 @@ def test_env_declared(runner: Runner, tmp_path, model_single_path): mlem deployment run heroku service.mlem --model model --app_name myapp --env prod.mlem # error on type mismatch """ - env_path = str(tmp_path / "env") + env_path = make_posix(str(tmp_path / "env")) declare_res = runner.invoke( f"declare env {MlemEnvMock.type} --env_param env_val {env_path}", raise_on_error=True, @@ -443,7 +443,7 @@ def test_env_declared(runner: Runner, tmp_path, model_single_path): declare_res.exception, declare_res.stderr, ) - deploy_path = str(tmp_path / "deploy") + deploy_path = make_posix(str(tmp_path / "deploy")) _deploy_and_check( runner, deploy_path, @@ -458,7 +458,7 @@ def test_none_declared(runner: Runner, tmp_path, model_single_path): mlem deployment run heroku service.mlem --model model --app_name myapp --env.api_key lol # error on args mismatch """ - deploy_path = str(tmp_path / "deploy") + deploy_path = make_posix(str(tmp_path / "deploy")) _deploy_and_check( runner, deploy_path, @@ -469,12 +469,12 @@ def test_none_declared(runner: Runner, tmp_path, model_single_path): def test_redeploy_changed(runner: Runner, tmp_path, model_single_path): - env_path = str(tmp_path / "env") + env_path = make_posix(str(tmp_path / "env")) runner.invoke( f"declare env {MlemEnvMock.type} --env_param env_val {env_path}", raise_on_error=True, ) - deploy_path = str(tmp_path / "deploy") + deploy_path = make_posix(str(tmp_path / "deploy")) runner.invoke( f"declare deployment {MlemDeploymentMock.type} --param val --env {env_path} {deploy_path}", raise_on_error=True, @@ -497,12 +497,12 @@ def test_redeploy_changed(runner: Runner, tmp_path, model_single_path): def test_redeploy_env_changed(runner: Runner, tmp_path, model_single_path): - env_path = str(tmp_path / "env") + env_path = make_posix(str(tmp_path / "env")) runner.invoke( f"declare env {MlemEnvMock.type} --env_param env_val {env_path}", raise_on_error=True, ) - deploy_path = str(tmp_path / "deploy") + deploy_path = make_posix(str(tmp_path / "deploy")) runner.invoke( f"declare deployment {MlemDeploymentMock.type} --param val --env {env_path} {deploy_path}", raise_on_error=True, From 4a7ce2ddd37aa4dca0d61117ffe38f6e41013f66 Mon Sep 17 00:00:00 2001 From: mike0sv Date: Tue, 4 Oct 2022 16:31:58 +0300 Subject: [PATCH 23/26] fix sagemaker circular import --- mlem/contrib/sagemaker/build.py | 3 ++- mlem/utils/entrypoints.py | 14 +++++++++++--- setup.py | 6 ++++++ tests/test_ext.py | 2 +- 4 files changed, 20 insertions(+), 5 deletions(-) diff --git a/mlem/contrib/sagemaker/build.py b/mlem/contrib/sagemaker/build.py index 6fc8cb54..468f46f9 100644 --- a/mlem/contrib/sagemaker/build.py +++ b/mlem/contrib/sagemaker/build.py @@ -10,7 +10,6 @@ from ...ui import EMOJI_BUILD, EMOJI_KEY, echo, set_offset from ..docker.base import DockerEnv, DockerImage, RemoteRegistry from ..docker.helpers import build_model_image -from .runtime import SageMakerServer IMAGE_NAME = "mlem-sagemaker-runner" @@ -116,6 +115,8 @@ def build_sagemaker_docker( repository: str, aws_vars: AWSVars, ): + from .runtime import SageMakerServer # circular import + docker_env = DockerEnv( registry=ECRegistry(account=account, region=region).with_aws_vars( aws_vars diff --git a/mlem/utils/entrypoints.py b/mlem/utils/entrypoints.py index 1c5b2ca1..1ad1080e 100644 --- a/mlem/utils/entrypoints.py +++ b/mlem/utils/entrypoints.py @@ -100,7 +100,9 @@ def list_abstractions( def find_implementations( - base: Type[IT], root_module_name: str = MLEM_ENTRY_POINT + base: Type[IT], + root_module_name: str = MLEM_ENTRY_POINT, + raise_on_error: bool = False, ) -> Dict[Type[IT], str]: """Generates dict with MLEM entrypoints which should appear in setup.py. Can be used by plugin developers to check if they populated all existing @@ -125,6 +127,8 @@ def find_implementations( print( f"Cannot import module {module_name}: {e.__class__} {e.args}" ) + if raise_on_error: + raise continue for obj in module.__dict__.values(): @@ -140,8 +144,12 @@ def find_implementations( return impls -def find_abc_implementations(root_module_name: str = MLEM_ENTRY_POINT): - impls = find_implementations(MlemABC, root_module_name) +def find_abc_implementations( + root_module_name: str = MLEM_ENTRY_POINT, raise_on_error: bool = False +): + impls = find_implementations( + MlemABC, root_module_name, raise_on_error=raise_on_error + ) return { MLEM_ENTRY_POINT: [ f"{obj.abs_name}.{obj.__get_alias__()} = {name}" diff --git a/setup.py b/setup.py index e949b929..eff5d0e5 100644 --- a/setup.py +++ b/setup.py @@ -195,6 +195,12 @@ "builder.whl = mlem.contrib.pip.base:WhlBuilder", "client.rmq = mlem.contrib.rabbitmq:RabbitMQClient", "server.rmq = mlem.contrib.rabbitmq:RabbitMQServer", + "docker_registry.ecr = mlem.contrib.sagemaker.build:ECRegistry", + "deploy_state.sagemaker = mlem.contrib.sagemaker.meta:SagemakerDeployState", + "deployment.sagemaker = mlem.contrib.sagemaker.meta:SagemakerDeployment", + "env.sagemaker = mlem.contrib.sagemaker.meta:SagemakerEnv", + "server._sagemaker = mlem.contrib.sagemaker.runtime:SageMakerServer", + "client.sagemaker = mlem.contrib.sagemaker.runtime:SagemakerClient", "model_type.sklearn = mlem.contrib.sklearn:SklearnModel", "model_type.sklearn_pipeline = mlem.contrib.sklearn:SklearnPipelineType", "model_type.tf_keras = mlem.contrib.tensorflow:TFKerasModel", diff --git a/tests/test_ext.py b/tests/test_ext.py index 23665206..09156eed 100644 --- a/tests/test_ext.py +++ b/tests/test_ext.py @@ -45,7 +45,7 @@ def test_all_impls_in_entrypoints(): # reinstall your dev copy of mlem to re-populate them exts = load_entrypoints() exts = {e.entry for e in exts.values()} - impls = find_abc_implementations()[MLEM_ENTRY_POINT] + impls = find_abc_implementations(raise_on_error=True)[MLEM_ENTRY_POINT] impls_sorted = sorted( impls, key=lambda x: tuple(x.split(" = ")[1].split(":")) ) From 5a349d028046a06e75950160ddb5d40c41a1c457 Mon Sep 17 00:00:00 2001 From: mike0sv Date: Tue, 4 Oct 2022 17:04:01 +0300 Subject: [PATCH 24/26] fix configs --- setup.py | 1 + tests/test_ext.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index eff5d0e5..654956a9 100644 --- a/setup.py +++ b/setup.py @@ -225,6 +225,7 @@ "heroku = mlem.contrib.heroku.config:HerokuConfig", "pandas = mlem.contrib.pandas:PandasConfig", "aws = mlem.contrib.sagemaker.config:AWSConfig", + "sagemaker = mlem.contrib.sagemaker.runtime:SageMakerServerConfig", ], }, zip_safe=False, diff --git a/tests/test_ext.py b/tests/test_ext.py index 09156eed..3de02447 100644 --- a/tests/test_ext.py +++ b/tests/test_ext.py @@ -58,7 +58,7 @@ def test_all_impls_in_entrypoints(): def test_all_configs_in_entrypoints(): - impls = find_implementations(MlemConfigBase) + impls = find_implementations(MlemConfigBase, raise_on_error=True) impls[MlemConfig] = f"{MlemConfig.__module__}:{MlemConfig.__name__}" impls_sorted = sorted( {f"{i.__config__.section} = {k}" for i, k in impls.items()}, From 0bd86f9e200ba0e6ce1ecfeb70df43603251fc4e Mon Sep 17 00:00:00 2001 From: Mikhail Sveshnikov Date: Wed, 5 Oct 2022 13:29:54 +0300 Subject: [PATCH 25/26] Apply suggestions from code review Co-authored-by: Alexander Guschin <1aguschin@gmail.com> --- mlem/contrib/docker/base.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/mlem/contrib/docker/base.py b/mlem/contrib/docker/base.py index 68e3ee4c..43f27349 100644 --- a/mlem/contrib/docker/base.py +++ b/mlem/contrib/docker/base.py @@ -383,7 +383,6 @@ def deploy(self, model: MlemModel): echo(EMOJI_OK + f"Container {state.container_name} is up") def remove(self): - # self.check_type(meta) with self.lock_state(): state = self.get_state() if state.container_id is None: @@ -402,7 +401,6 @@ def remove(self): self.update_state(state) def get_status(self, raise_on_error=True) -> DeployStatus: - # self.check_type(meta) state = self.get_state() if state.container_id is None: return DeployStatus.NOT_DEPLOYED From d60f3cfe956b40fac1156ca27a9d71ebdfb4d8aa Mon Sep 17 00:00:00 2001 From: Mikhail Sveshnikov Date: Wed, 5 Oct 2022 13:30:12 +0300 Subject: [PATCH 26/26] Update mlem/cli/deployment.py Co-authored-by: Alexander Guschin <1aguschin@gmail.com> --- mlem/cli/deployment.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlem/cli/deployment.py b/mlem/cli/deployment.py index 38b27555..c8377961 100644 --- a/mlem/cli/deployment.py +++ b/mlem/cli/deployment.py @@ -131,7 +131,7 @@ def deploy_run_command( try: meta = load_meta(path, project=project, force_type=MlemDeployment) raise DeploymentError( - f"Deployment meta already exists at {meta.loc}. Please use mlem deployment run --load ..." + f"Deployment meta already exists at {meta.loc}. Please use `mlem deployment run --load ...`" ) except MlemObjectNotFound: with wrap_build_error(type_name, MlemDeployment):