diff --git a/.github/workflows/check-test-release.yml b/.github/workflows/check-test-release.yml index 2d84714b..c38f2b49 100644 --- a/.github/workflows/check-test-release.yml +++ b/.github/workflows/check-test-release.yml @@ -68,6 +68,11 @@ jobs: - uses: actions/setup-python@v4 with: python-version: ${{ matrix.python }} + - uses: conda-incubator/setup-miniconda@v2 + with: + python-version: ${{ matrix.python }} + auto-activate-base: true + activate-environment: "" - name: get pip cache dir id: pip-cache-dir run: | diff --git a/mlem/contrib/requirements.py b/mlem/contrib/requirements.py new file mode 100644 index 00000000..89a37fa1 --- /dev/null +++ b/mlem/contrib/requirements.py @@ -0,0 +1,52 @@ +"""Requirements support +Extension type: build + +MlemBuilder implementation for `Requirements` which includes +installable, conda, unix, custom, file etc. based requirements. +""" +import logging +from typing import ClassVar, Optional + +from pydantic import validator + +from mlem.core.base import load_impl_ext +from mlem.core.objects import MlemBuilder, MlemModel +from mlem.core.requirements import Requirement +from mlem.ui import EMOJI_OK, EMOJI_PACK, echo +from mlem.utils.entrypoints import list_implementations + +REQUIREMENTS = "requirements.txt" + +logger = logging.getLogger(__name__) + + +class RequirementsBuilder(MlemBuilder): + """MlemBuilder implementation for building requirements""" + + type: ClassVar = "requirements" + + target: Optional[str] = None + """Target path for requirements""" + req_type: str = "installable" + """Type of requirements, example: unix""" + + @validator("req_type") + def get_req_type(cls, req_type): # pylint: disable=no-self-argument + if req_type not in list_implementations(Requirement): + raise ValueError( + f"req_type {req_type} is not valid. Allowed options are: {list_implementations(Requirement)}" + ) + return req_type + + def build(self, obj: MlemModel): + req_type_cls = load_impl_ext(Requirement.abs_name, self.req_type) + assert issubclass(req_type_cls, Requirement) + reqs = obj.requirements.of_type(req_type_cls) + if self.target is None: + reqs_representation = [r.get_repr() for r in reqs] + requirement_string = " ".join(reqs_representation) + print(requirement_string) + else: + echo(EMOJI_PACK + "Materializing requirements...") + req_type_cls.materialize(reqs, self.target) + echo(EMOJI_OK + f"Materialized to {self.target}!") diff --git a/mlem/contrib/venv.py b/mlem/contrib/venv.py new file mode 100644 index 00000000..05136d34 --- /dev/null +++ b/mlem/contrib/venv.py @@ -0,0 +1,203 @@ +"""Virtual Environments support +Extension type: build + +MlemBuilder implementations for `Environments` which includes +conda based and venv based virtual environments. +""" +import os +import platform +import subprocess +import sys +import venv +from abc import abstractmethod +from typing import ClassVar, List, Optional + +from mlem.core.errors import MlemError +from mlem.core.objects import MlemBuilder, MlemModel +from mlem.core.requirements import Requirement +from mlem.ui import EMOJI_OK, EMOJI_PACK, echo + + +def get_python_exe_in_virtual_env(env_dir: str, use_conda_env: bool = False): + if platform.system() == "Windows": + if not use_conda_env: + return os.path.join(env_dir, "Scripts", "python.exe") + return os.path.join(env_dir, "python.exe") + return os.path.join(env_dir, "bin", "python") + + +def run_in_subprocess(cmd: List[str], error_msg: str, check_output=False): + try: + if check_output: + return subprocess.check_output(cmd) + return subprocess.run(cmd, check=True) + except ( + FileNotFoundError, + subprocess.CalledProcessError, + subprocess.TimeoutExpired, + ) as e: + raise MlemError(f"{error_msg}\n{e}") from e + + +class CondaPackageRequirement(Requirement): + """Represents a conda package that needs to be installed""" + + type: ClassVar[str] = "conda" + package_name: str + """Denotes name of a package such as 'numpy'""" + spec: Optional[str] = None + """Denotes selectors for a package such as '>=1.8,<2'""" + channel_name: str = "conda-forge" + """Denotes channel from which a package is to be installed""" + + def get_repr(self): + """ + conda installable representation of this module + """ + if self.spec is not None: + return f"{self.channel_name}::{self.package_name}{self.spec}" + return f"{self.channel_name}::{self.package_name}" + + @classmethod + def materialize(cls, reqs, target: str): + raise NotImplementedError + + +class EnvBuilder(MlemBuilder): + type: ClassVar = "env" + + target: Optional[str] = "venv" + """Name of the virtual environment""" + + @abstractmethod + def create_virtual_env(self): + raise NotImplementedError + + @abstractmethod + def get_installed_packages(self, env_dir: str): + raise NotImplementedError + + +class VenvBuilder(EnvBuilder): + """MlemBuilder implementation for building virtual environments""" + + type: ClassVar = "venv" + + no_cache: bool = False + """Disable cache""" + current_env: bool = False + """Whether to install in the current virtual env, must be active""" + + def create_virtual_env(self): + env_dir = os.path.abspath(self.target) + venv.create(env_dir, with_pip=True) + + def get_installed_packages(self, env_dir): + env_exe = get_python_exe_in_virtual_env(env_dir) + return run_in_subprocess( + [env_exe, "-m", "pip", "freeze"], + error_msg="Error running pip", + check_output=True, + ) + + def build(self, obj: MlemModel): + if self.current_env: + if ( + os.getenv("VIRTUAL_ENV") is None + or sys.prefix == sys.base_prefix + ): + raise MlemError("No virtual environment detected.") + echo(EMOJI_PACK + f"Detected the virtual env {sys.prefix}") + env_dir = sys.prefix + else: + assert self.target is not None + echo(EMOJI_PACK + f"Creating virtual env {self.target}...") + self.create_virtual_env() + env_dir = os.path.abspath(self.target) + os.environ["VIRTUAL_ENV"] = env_dir + + env_exe = get_python_exe_in_virtual_env(env_dir) + echo(EMOJI_PACK + "Installing the required packages...") + # Based on recommendation given in https://pip.pypa.io/en/latest/user_guide/#using-pip-from-your-program + install_cmd = [env_exe, "-m", "pip", "install"] + if self.no_cache: + install_cmd.append("--no-cache-dir") + install_cmd.extend(obj.requirements.to_pip()) + run_in_subprocess(install_cmd, error_msg="Error running pip") + if platform.system() == "Windows": + activate_cmd = f"`{self.target}\\Scripts\\activate`" + else: + activate_cmd = f"`source {self.target}/bin/activate`" + echo( + EMOJI_OK + + f"virtual environment `{self.target}` is ready, activate with {activate_cmd}" + ) + return env_dir + + +class CondaBuilder(EnvBuilder): + """MlemBuilder implementation for building conda environments""" + + type: ClassVar = "conda" + + python_version: str = f"{sys.version_info.major}.{sys.version_info.minor}" + """The python version to use""" + current_env: Optional[bool] = False + """Whether to install in the current conda env""" + conda_reqs: List[CondaPackageRequirement] = [] + """List of conda package requirements""" + + def create_virtual_env(self): + env_dir = os.path.abspath(self.target) + create_cmd = ["--prefix", env_dir, f"python={self.python_version}"] + run_in_subprocess( + ["conda", "create", "-y", *create_cmd], + error_msg="Error running conda", + ) + + def get_installed_packages(self, env_dir): + return run_in_subprocess( + ["conda", "list", "--prefix", env_dir], + error_msg="Error running conda", + check_output=True, + ) + + def build(self, obj: MlemModel): + pip_based_packages = obj.requirements.to_pip() + conda_based_packages = [r.get_repr() for r in self.conda_reqs] + + if self.current_env: + conda_default_env = os.getenv("CONDA_DEFAULT_ENV", None) + if conda_default_env == "base" or conda_default_env is None: + raise MlemError("No conda environment detected.") + echo(EMOJI_PACK + f"Detected the conda env {sys.prefix}") + env_dir = sys.prefix + env_exe = sys.executable + else: + assert self.target is not None + self.create_virtual_env() + env_dir = os.path.abspath(self.target) + env_exe = get_python_exe_in_virtual_env( + env_dir, use_conda_env=True + ) + if conda_based_packages: + run_in_subprocess( + [ + "conda", + "install", + "--prefix", + env_dir, + "-y", + *conda_based_packages, + ], + error_msg="Error running conda", + ) + + # install pip packages in conda env + if pip_based_packages: + run_in_subprocess( + [env_exe, "-m", "pip", "install", *pip_based_packages], + error_msg="Error running pip", + ) + + return env_dir diff --git a/mlem/core/requirements.py b/mlem/core/requirements.py index 5fcc6473..df802a36 100644 --- a/mlem/core/requirements.py +++ b/mlem/core/requirements.py @@ -55,12 +55,28 @@ class Config: abs_name: ClassVar[str] = "requirement" type: ClassVar = ... + @abstractmethod + def get_repr(self): + raise NotImplementedError + + @classmethod + @abstractmethod + def materialize(cls, reqs, target: str): + raise NotImplementedError + class PythonRequirement(Requirement, ABC): type: ClassVar = "_python" module: str """Python module name""" + def get_repr(self): + raise NotImplementedError + + @classmethod + def materialize(cls, reqs, target: str): + raise NotImplementedError + class InstallableRequirement(PythonRequirement): """ @@ -85,7 +101,7 @@ def package(self): self.module, self.module ) - def to_str(self): + def get_repr(self): """ pip installable representation of this module """ @@ -93,6 +109,13 @@ def to_str(self): return f"{self.package}=={self.version}" return self.package + @classmethod + def materialize(cls, reqs, target: str): + reqs = [r.get_repr() for r in reqs] + requirement_string = "\n".join(reqs) + with open(os.path.join(target), "w", encoding="utf8") as fp: + fp.write(requirement_string + "\n") + @classmethod def from_module( cls, mod: ModuleType, package_name: str = None @@ -148,6 +171,18 @@ class CustomRequirement(PythonRequirement): is_package: bool """Whether this code should be in %name%/__init__.py""" + def get_repr(self): + raise NotImplementedError + + @classmethod + def materialize(cls, reqs, target: str): + for cr in reqs: + for part, src in cr.to_sources_dict().items(): + p = os.path.join(target, part) + os.makedirs(os.path.dirname(p), exist_ok=True) + with open(p, "wb") as f: + f.write(src) + @staticmethod def from_module(mod: ModuleType) -> "CustomRequirement": """ @@ -273,6 +308,9 @@ class FileRequirement(CustomRequirement): module: str = "" """Ignored""" + def get_repr(self): + raise NotImplementedError + def to_sources_dict(self): """ Mapping path -> source code for this requirement @@ -296,6 +334,13 @@ class UnixPackageRequirement(Requirement): package_name: str """Name of the package""" + def get_repr(self): + return self.package_name + + @classmethod + def materialize(cls, reqs, target: str): + raise NotImplementedError + T = TypeVar("T", bound=Requirement) @@ -399,11 +444,17 @@ def add(self, requirement: Requirement): if requirement not in self.__root__: self.__root__.append(requirement) + def to_unix(self) -> List[str]: + """ + :return: list of unix based packages + """ + return [r.get_repr() for r in self.of_type(UnixPackageRequirement)] + def to_pip(self) -> List[str]: """ :return: list of pip installable packages """ - return [r.to_str() for r in self.installable] + return [r.get_repr() for r in self.installable] def __add__(self, other: "AnyRequirements"): other = resolve_requirements(other) @@ -426,12 +477,7 @@ def new(cls, requirements: "AnyRequirements" = None): return resolve_requirements(requirements) def materialize_custom(self, path: str): - for cr in self.custom: - for part, src in cr.to_sources_dict().items(): - p = os.path.join(path, part) - os.makedirs(os.path.dirname(p), exist_ok=True) - with open(p, "wb") as f: - f.write(src) + CustomRequirement.materialize(self.custom, path) @contextlib.contextmanager def import_custom(self): diff --git a/mlem/ext.py b/mlem/ext.py index 58660dcb..4b6d8849 100644 --- a/mlem/ext.py +++ b/mlem/ext.py @@ -116,6 +116,8 @@ class ExtensionLoader: ), Extension("mlem.contrib.pip", [], False), Extension("mlem.contrib.kubernetes", ["kubernetes", "docker"], False), + Extension("mlem.contrib.requirements", [], False), + Extension("mlem.contrib.venv", [], False), ) _loaded_extensions: Dict[Extension, ModuleType] = {} diff --git a/setup.cfg b/setup.cfg index a4d4d328..de26369f 100644 --- a/setup.cfg +++ b/setup.cfg @@ -26,6 +26,7 @@ markers = long: Marks long-running tests docker: Marks tests that needs Docker kubernetes: Marks tests that needs Kubernetes + conda: Marks tests that need conda testpaths = tests addopts = -rav --durations=0 --cov=mlem --cov-report=term-missing --cov-report=xml diff --git a/setup.py b/setup.py index 654956a9..2be1ea5a 100644 --- a/setup.py +++ b/setup.py @@ -193,6 +193,10 @@ "data_type.series = mlem.contrib.pandas:SeriesType", "builder.pip = mlem.contrib.pip.base:PipBuilder", "builder.whl = mlem.contrib.pip.base:WhlBuilder", + "builder.requirements = mlem.contrib.requirements:RequirementsBuilder", + "builder.venv = mlem.contrib.venv:VenvBuilder", + "builder.conda = mlem.contrib.venv:CondaBuilder", + "requirement.conda = mlem.contrib.venv:CondaPackageRequirement", "client.rmq = mlem.contrib.rabbitmq:RabbitMQClient", "server.rmq = mlem.contrib.rabbitmq:RabbitMQServer", "docker_registry.ecr = mlem.contrib.sagemaker.build:ECRegistry", diff --git a/tests/contrib/conftest.py b/tests/contrib/conftest.py index 4932aab0..2bb07b15 100644 --- a/tests/contrib/conftest.py +++ b/tests/contrib/conftest.py @@ -1,9 +1,26 @@ +import subprocess + import pytest from mlem.contrib.docker.context import use_mlem_source +from tests.conftest import long @pytest.fixture() def uses_docker_build(): with use_mlem_source("whl"): yield + + +def has_conda(): + try: + ret = subprocess.run(["conda"], check=True) + return ret.returncode == 0 + except FileNotFoundError: + return False + + +def conda_test(f): + mark = pytest.mark.conda + skip = pytest.mark.skipif(not has_conda(), reason="conda is unavailable") + return long(mark(skip(f))) diff --git a/tests/contrib/test_requirements.py b/tests/contrib/test_requirements.py new file mode 100644 index 00000000..ab571bce --- /dev/null +++ b/tests/contrib/test_requirements.py @@ -0,0 +1,46 @@ +import lightgbm as lgb +import numpy as np +import pytest +from pydantic.error_wrappers import ValidationError + +from mlem.contrib.requirements import RequirementsBuilder +from mlem.core.objects import MlemModel + + +def test_build_reqs(tmp_path, model_meta): + path = str(tmp_path / "reqs.txt") + builder = RequirementsBuilder(target=path) + builder.build(model_meta) + with open(path, "r", encoding="utf-8") as f: + assert model_meta.requirements.to_pip() == f.read().splitlines() + + +def test_build_reqs_with_invalid_req_type(): + with pytest.raises( + ValidationError, match="req_type invalid is not valid." + ): + RequirementsBuilder(req_type="invalid") + + +def test_build_requirements_should_print_with_no_path(capsys, model_meta): + builder = RequirementsBuilder() + builder.build(model_meta) + captured = capsys.readouterr() + assert captured.out == " ".join(model_meta.requirements.to_pip()) + "\n" + + +def test_unix_requirement(capsys): + np_payload = np.linspace(0, 2, 5).reshape((-1, 1)) + data_np = lgb.Dataset( + np_payload, + label=np_payload.reshape((-1,)).tolist(), + free_raw_data=False, + ) + booster = lgb.train({}, data_np, 1) + model = MlemModel.from_obj(booster, sample_data=data_np) + builder = RequirementsBuilder(req_type="unix") + builder.build(model) + captured = capsys.readouterr() + assert str(captured.out).endswith( + "\n".join(model.requirements.to_unix()) + "\n" + ) diff --git a/tests/contrib/test_tensorflow.py b/tests/contrib/test_tensorflow.py index 22d811b2..d8596ade 100644 --- a/tests/contrib/test_tensorflow.py +++ b/tests/contrib/test_tensorflow.py @@ -194,7 +194,7 @@ def test_model_wrapper(net, input_data, tmpdir, request): prediction = tmw.call_method("predict", input_data) - np.testing.assert_array_equal(orig_pred, prediction) + np.testing.assert_allclose(orig_pred, prediction) model_name = str(tmpdir / "tensorflow-model") artifacts = tmw.dump(LOCAL_STORAGE, model_name) @@ -214,7 +214,7 @@ def test_model_wrapper(net, input_data, tmpdir, request): prediction2 = tmw.call_method("predict", input_data) - np.testing.assert_array_equal(prediction, prediction2) + np.testing.assert_allclose(prediction, prediction2) assert set(tmw.get_requirements().modules) == expected_requirements diff --git a/tests/contrib/test_venv.py b/tests/contrib/test_venv.py new file mode 100644 index 00000000..80a0c3d3 --- /dev/null +++ b/tests/contrib/test_venv.py @@ -0,0 +1,90 @@ +import os +import re +import sys + +import pytest + +from mlem.contrib.venv import ( + CondaBuilder, + CondaPackageRequirement, + VenvBuilder, +) +from mlem.core.errors import MlemError +from mlem.core.requirements import InstallableRequirement +from tests.contrib.conftest import conda_test + + +@pytest.fixture +def sys_prefix_path(tmp_path): + old_sys_prefix = sys.prefix + path = str(tmp_path / "venv-act") + sys.prefix = os.path.abspath(path) + + yield path + + sys.prefix = old_sys_prefix + + +def process_conda_list_output(installed_pkgs): + def get_words(line): + return re.findall(r"[^\s]+", line) + + words = [get_words(x) for x in installed_pkgs.splitlines()[3:]] + keys = [] + vals = [] + for w in words: + if len(w) >= 4: + keys.append(w[0]) + vals.append(w[3]) + result = dict(zip(keys, vals)) + return result + + +@conda_test +def test_build_conda(tmp_path, model_meta): + path = str(tmp_path / "conda-env") + builder = CondaBuilder( + target=path, + conda_reqs=[CondaPackageRequirement(package_name="xtensor")], + ) + env_dir = builder.build(model_meta) + installed_pkgs = builder.get_installed_packages(env_dir).decode() + pkgs_info = process_conda_list_output(installed_pkgs) + for each_req in model_meta.requirements: + if isinstance(each_req, InstallableRequirement): + assert pkgs_info[each_req.package] == "pypi" + elif isinstance(each_req, CondaPackageRequirement): + assert pkgs_info[each_req.package_name] == each_req.channel_name + + +def test_build_venv(tmp_path, model_meta): + path = str(tmp_path / "venv") + builder = VenvBuilder(target=path) + env_dir = builder.build(model_meta) + installed_pkgs = set( + builder.get_installed_packages(env_dir).decode().splitlines() + ) + required_pkgs = set(model_meta.requirements.to_pip()) + assert required_pkgs.issubset(installed_pkgs) + + +def test_install_in_current_venv_not_active(tmp_path, model_meta): + path = str(tmp_path / "venv") + builder = VenvBuilder(target=path, current_env=True) + with pytest.raises(MlemError, match="No virtual environment detected"): + builder.build(model_meta) + + +def test_install_in_current_active_venv(sys_prefix_path, model_meta): + builder = VenvBuilder(target=sys_prefix_path) + env_dir = os.path.abspath(sys_prefix_path) + builder.create_virtual_env() + assert builder.get_installed_packages(env_dir).decode() == "" + os.environ["VIRTUAL_ENV"] = env_dir + builder.current_env = True + builder.build(model_meta) + installed_pkgs = ( + builder.get_installed_packages(env_dir).decode().splitlines() + ) + for each_req in model_meta.requirements.to_pip(): + assert each_req in installed_pkgs diff --git a/tests/core/test_requirements.py b/tests/core/test_requirements.py index e88c1b4a..06e060b1 100644 --- a/tests/core/test_requirements.py +++ b/tests/core/test_requirements.py @@ -46,39 +46,41 @@ def test_resolve_requirement_list_arg(): def test_resolve_str_arg(): req = "dumb==0.4.1" actual_reqs = resolve_requirements(req) - assert actual_reqs.installable[0].to_str() == req + assert actual_reqs.installable[0].get_repr() == req def test_resolve_str_list_arg(): req = ["dumb==0.4.1", "art==4.0"] actual_reqs = resolve_requirements(req) assert len(actual_reqs.installable) == 2 - assert sorted(req) == sorted([r.to_str() for r in actual_reqs.installable]) + assert sorted(req) == sorted( + [r.get_repr() for r in actual_reqs.installable] + ) def test_installable_requirement__from_module(): import pandas as pd assert ( - InstallableRequirement.from_module(pd).to_str() + InstallableRequirement.from_module(pd).get_repr() == f"pandas=={pd.__version__}" ) import numpy as np assert ( - InstallableRequirement.from_module(np).to_str() + InstallableRequirement.from_module(np).get_repr() == f"numpy=={np.__version__}" ) import sklearn as sk assert ( - InstallableRequirement.from_module(sk).to_str() + InstallableRequirement.from_module(sk).get_repr() == f"scikit-learn=={sk.__version__}" ) assert ( - InstallableRequirement.from_module(sk, "xyz").to_str() + InstallableRequirement.from_module(sk, "xyz").get_repr() == f"xyz=={sk.__version__}" ) @@ -138,8 +140,8 @@ def test_req_collection_main(tmpdir, postfix): assert res == 0 meta = load_meta(model_path, force_type=MlemModel) assert set(meta.requirements.to_pip()) == { - InstallableRequirement.from_module(emoji).to_str(), - InstallableRequirement.from_module(numpy).to_str(), + InstallableRequirement.from_module(emoji).get_repr(), + InstallableRequirement.from_module(numpy).get_repr(), }