diff --git a/.github/workflows/check-test-release.yml b/.github/workflows/check-test-release.yml index e82b3663..f0fff764 100644 --- a/.github/workflows/check-test-release.yml +++ b/.github/workflows/check-test-release.yml @@ -88,6 +88,8 @@ jobs: HEROKU_TEAM: iterative-sandbox GITHUB_MATRIX_OS: ${{ matrix.os }} GITHUB_MATRIX_PYTHON: ${{ matrix.python }} + BITBUCKET_USERNAME: ${{ secrets.BITBUCKET_USERNAME }} + BITBUCKET_PASSWORD: ${{ secrets.BITBUCKET_PASSWORD }} - name: "Upload coverage to Codecov" uses: codecov/codecov-action@v1 with: diff --git a/.pylintrc b/.pylintrc index 4ea818e6..75fac659 100644 --- a/.pylintrc +++ b/.pylintrc @@ -389,7 +389,7 @@ ignore-comments=yes ignore-docstrings=yes # Ignore imports when computing similarities. -ignore-imports=no +ignore-imports=yes # Ignore function signatures when computing similarities. ignore-signatures=no diff --git a/mlem/contrib/catboost.py b/mlem/contrib/catboost.py index 655a2bee..a28b795f 100644 --- a/mlem/contrib/catboost.py +++ b/mlem/contrib/catboost.py @@ -1,15 +1,18 @@ -import os -import posixpath -import tempfile from enum import Enum from typing import Any, ClassVar, Optional import catboost from catboost import CatBoost, CatBoostClassifier, CatBoostRegressor -from mlem.core.artifacts import Artifacts, Storage +from mlem.core.artifacts import Artifacts from mlem.core.hooks import IsInstanceHookMixin -from mlem.core.model import ModelHook, ModelIO, ModelType, Signature +from mlem.core.model import ( + BufferModelIO, + ModelHook, + ModelIO, + ModelType, + Signature, +) from mlem.core.requirements import InstallableRequirement, Requirements @@ -18,7 +21,7 @@ class CBType(str, Enum): regressor = "reg" -class CatBoostModelIO(ModelIO): +class CatBoostModelIO(BufferModelIO): """ :class:`mlem.core.model.ModelIO` for CatBoost models. """ @@ -28,16 +31,8 @@ class CatBoostModelIO(ModelIO): regressor_file_name: ClassVar = "rgr.cb" model_type: CBType = CBType.regressor - def dump(self, storage: Storage, path, model) -> Artifacts: - with tempfile.TemporaryDirectory() as tmpdir: - model_name = self._get_model_file_name(model) - model_path = os.path.join(tmpdir, model_name) - model.save_model(model_path) - return { - self.art_name: storage.upload( - model_path, posixpath.join(path, model_name) - ) - } + def save_model(self, model: Any, path: str): + model.save_model(path) def load(self, artifacts: Artifacts): """ diff --git a/mlem/contrib/fastai.py b/mlem/contrib/fastai.py new file mode 100644 index 00000000..26a53e2c --- /dev/null +++ b/mlem/contrib/fastai.py @@ -0,0 +1,101 @@ +from typing import Any, ClassVar, Optional, Type, Union + +from fastai.data.transforms import Category +from fastai.learner import Learner, load_learner +from fastai.vision.core import PILImage +from pydantic import BaseModel + +from mlem.core.artifacts import Artifacts +from mlem.core.data_type import DataHook, DataSerializer, DataType, DataWriter +from mlem.core.hooks import IsInstanceHookMixin +from mlem.core.model import BufferModelIO, ModelHook, ModelType, Signature +from mlem.core.requirements import Requirements + + +class FastAIModelIO(BufferModelIO): + type: ClassVar = "fastai" + + def save_model(self, model: Any, path: str): + model.export(path) + + def load(self, artifacts: Artifacts): + with artifacts[self.art_name].open() as f: + return load_learner(f) + + +class FastAIModel(ModelType, ModelHook, IsInstanceHookMixin): + type: ClassVar = "fastai" + valid_types: ClassVar = (Learner,) + io: FastAIModelIO = FastAIModelIO() + + @classmethod + def process( + cls, obj: "Learner", sample_data: Optional[Any] = None, **kwargs + ) -> ModelType: + + return FastAIModel( + methods={ + "predict": Signature.from_method( + obj.predict, + item=sample_data, + auto_infer=sample_data is not None, + ) + } + ) + + +class CategoryDataType( + DataType, DataSerializer, DataHook, IsInstanceHookMixin +): + type: ClassVar = "fastai_category" + valid_types: ClassVar = (Category,) + value: str + + def serialize(self, instance: Any) -> dict: + raise NotImplementedError # TODO + + def deserialize(self, obj: dict) -> Any: + raise NotImplementedError # TODO + + def get_model(self, prefix: str = "") -> Union[Type[BaseModel], type]: + raise NotImplementedError # TODO + + def get_requirements(self) -> Requirements: + return Requirements.new("fastai") + + @classmethod + def process(cls, obj: Any, **kwargs): + return CategoryDataType(value=str(obj)) + + def get_writer( + self, project: str = None, filename: str = None, **kwargs + ) -> DataWriter: + raise NotImplementedError # TODO + + +class PILImageDataType( + DataType, DataSerializer, DataHook, IsInstanceHookMixin +): + type: ClassVar = "fastai_pil_image" + valid_types: ClassVar = (PILImage,) + + def serialize(self, instance: Any) -> dict: + raise NotImplementedError # TODO + + def deserialize(self, obj: dict) -> Any: + raise NotImplementedError # TODO + + def get_model(self, prefix: str = "") -> Union[Type[BaseModel], type]: + raise NotImplementedError # TODO + + def get_requirements(self) -> Requirements: + return Requirements.new("fastai") + + @classmethod + def process(cls, obj: Any, **kwargs): + return PILImageDataType() + + def get_writer( + self, project: str = None, filename: str = None, **kwargs + ) -> DataWriter: + raise NotImplementedError # TODO diff --git a/mlem/contrib/lightgbm.py b/mlem/contrib/lightgbm.py index b45fad44..ec27a54a 100644 --- a/mlem/contrib/lightgbm.py +++ b/mlem/contrib/lightgbm.py @@ -1,5 +1,4 @@ import os -import posixpath import tempfile from typing import Any, ClassVar, Iterator, List, Optional, Tuple, Type @@ -18,7 +17,13 @@ ) from mlem.core.errors import DeserializationError, SerializationError from mlem.core.hooks import IsInstanceHookMixin -from mlem.core.model import ModelHook, ModelIO, ModelType, Signature +from mlem.core.model import ( + BufferModelIO, + ModelHook, + ModelIO, + ModelType, + Signature, +) from mlem.core.requirements import ( AddRequirementHook, InstallableRequirement, @@ -122,7 +127,7 @@ def read_batch( raise NotImplementedError -class LightGBMModelIO(ModelIO): +class LightGBMModelIO(BufferModelIO): """ :class:`.ModelIO` implementation for `lightgbm.Booster` type """ @@ -130,12 +135,8 @@ class LightGBMModelIO(ModelIO): type: ClassVar[str] = "lightgbm_io" model_file_name = "model.lgb" - def dump(self, storage: Storage, path, model) -> Artifacts: - with tempfile.TemporaryDirectory(prefix="mlem_lightgbm_dump") as f: - model_path = os.path.join(f, self.model_file_name) - model.save_model(model_path) - fs_path = posixpath.join(path, self.model_file_name) - return {self.art_name: storage.upload(model_path, fs_path)} + def save_model(self, model: Any, path: str): + model.save_model(path) def load(self, artifacts: Artifacts): if len(artifacts) != 1: diff --git a/mlem/contrib/xgboost.py b/mlem/contrib/xgboost.py index c7db3fe8..b9b623fd 100644 --- a/mlem/contrib/xgboost.py +++ b/mlem/contrib/xgboost.py @@ -1,5 +1,4 @@ import os -import posixpath import tempfile from typing import Any, ClassVar, Dict, List, Optional, Type @@ -8,11 +7,17 @@ from mlem.constants import PREDICT_METHOD_NAME from mlem.contrib.numpy import python_type_from_np_string_repr -from mlem.core.artifacts import Artifacts, Storage +from mlem.core.artifacts import Artifacts from mlem.core.data_type import DataHook, DataSerializer, DataType, DataWriter from mlem.core.errors import DeserializationError, SerializationError from mlem.core.hooks import IsInstanceHookMixin -from mlem.core.model import ModelHook, ModelIO, ModelType, Signature +from mlem.core.model import ( + BufferModelIO, + ModelHook, + ModelIO, + ModelType, + Signature, +) from mlem.core.requirements import ( AddRequirementHook, InstallableRequirement, @@ -112,7 +117,7 @@ def get_model(self, prefix: str = "") -> Type[BaseModel]: raise NotImplementedError -class XGBoostModelIO(ModelIO): +class XGBoostModelIO(BufferModelIO): """ :class:`~.ModelIO` implementation for XGBoost models """ @@ -120,14 +125,8 @@ class XGBoostModelIO(ModelIO): type: ClassVar[str] = "xgboost_io" model_file_name = "model.xgb" - def dump( - self, storage: Storage, path, model: xgboost.Booster - ) -> Artifacts: - with tempfile.TemporaryDirectory(prefix="mlem_xgboost_dump") as f: - local_path = os.path.join(f, self.model_file_name) - model.save_model(local_path) - remote_path = posixpath.join(path, self.model_file_name) - return {self.art_name: storage.upload(local_path, remote_path)} + def save_model(self, model: Any, path: str): + model.save_model(path) def load(self, artifacts: Artifacts): if len(artifacts) != 1: diff --git a/mlem/core/model.py b/mlem/core/model.py index 0ebbef29..4085220f 100644 --- a/mlem/core/model.py +++ b/mlem/core/model.py @@ -2,7 +2,9 @@ Base classes to work with ML models in MLEM """ import inspect +import os import pickle +import tempfile from abc import ABC, abstractmethod from typing import ( Any, @@ -50,6 +52,18 @@ def load(self, artifacts: Artifacts): raise NotImplementedError +class BufferModelIO(ModelIO, ABC): + @abstractmethod + def save_model(self, model: Any, path: str): + raise NotImplementedError + + def dump(self, storage: Storage, path, model) -> Artifacts: + with tempfile.TemporaryDirectory() as tmpdir: + model_path = os.path.join(tmpdir, "model") + self.save_model(model, model_path) + return {self.art_name: storage.upload(model_path, path)} + + class SimplePickleIO(ModelIO): """IO with simple pickling of python model object""" diff --git a/mlem/ext.py b/mlem/ext.py index 865c739c..8b9b818c 100644 --- a/mlem/ext.py +++ b/mlem/ext.py @@ -99,6 +99,7 @@ class ExtensionLoader: Extension("mlem.contrib.fastapi", ["fastapi", "uvicorn"], False), Extension("mlem.contrib.callable", [], True), Extension("mlem.contrib.rabbitmq", ["pika"], False, extra="rmq"), + Extension("mlem.contrib.fastai", ["fastai"], False), ) _loaded_extensions: Dict[Extension, ModuleType] = {} diff --git a/mlem/utils/module.py b/mlem/utils/module.py index 4cb3fddd..53bdd6b9 100644 --- a/mlem/utils/module.py +++ b/mlem/utils/module.py @@ -545,7 +545,9 @@ def add_requirement(self, obj_or_module): ) if parent_package_name not in self._modules: parent_package = sys.modules[parent_package_name] - self.add_requirement(parent_package) + # exclude namespace packages + if parent_package.__file__ is not None: + self.add_requirement(parent_package) def save(self, obj, save_persistent_id=True): if id(obj) in self.seen or isinstance(obj, IGNORE_TYPES_REQ): diff --git a/tests/contrib/test_fastai.py b/tests/contrib/test_fastai.py new file mode 100644 index 00000000..35be0700 --- /dev/null +++ b/tests/contrib/test_fastai.py @@ -0,0 +1,5 @@ +# TODO + + +def test_learner(): + pass