From be1b5c7e58e7461c73aac467aacebef41041c34e Mon Sep 17 00:00:00 2001 From: MarleneKress79789 Date: Wed, 29 Nov 2023 13:21:24 +0100 Subject: [PATCH 01/10] added Huggingface model transfer class with save_pretrained in download --- ...gingface_hub_bucketfs_model_transfer_sp.py | 78 +++++++++++++++++ ...ingface_hub_bucketfs__model_transfer_sp.py | 85 +++++++++++++++++++ 2 files changed, 163 insertions(+) create mode 100644 exasol_transformers_extension/utils/huggingface_hub_bucketfs_model_transfer_sp.py create mode 100644 tests/unit_tests/utils/test_huggingface_hub_bucketfs__model_transfer_sp.py diff --git a/exasol_transformers_extension/utils/huggingface_hub_bucketfs_model_transfer_sp.py b/exasol_transformers_extension/utils/huggingface_hub_bucketfs_model_transfer_sp.py new file mode 100644 index 00000000..49d8e31a --- /dev/null +++ b/exasol_transformers_extension/utils/huggingface_hub_bucketfs_model_transfer_sp.py @@ -0,0 +1,78 @@ +import os +import tempfile +from pathlib import Path +from typing import Protocol, Union, runtime_checkable + +import transformers +from exasol_bucketfs_utils_python.bucketfs_location import BucketFSLocation + +from exasol_transformers_extension.utils.bucketfs_model_uploader import BucketFSModelUploaderFactory +from exasol_transformers_extension.utils.temporary_directory_factory import TemporaryDirectoryFactory + + +@runtime_checkable +class ModelFactoryProtocol(Protocol): + def from_pretrained(self, model_name: str, cache_dir: Path, use_auth_token: str) -> transformers.PreTrainedModel: + pass + + def save_pretrained(self, save_directory: Union[str, Path]): + pass + + +class HuggingFaceHubBucketFSModelTransferSP: + def __init__(self, + bucketfs_location: BucketFSLocation, + model_name: str, + model_path: Path, + local_model_save_path: Path, + token: str, + temporary_directory_factory: TemporaryDirectoryFactory = TemporaryDirectoryFactory(), + bucketfs_model_uploader_factory: BucketFSModelUploaderFactory = BucketFSModelUploaderFactory()): + self._token = token + self._model_name = model_name + self._local_model_save_path = Path(local_model_save_path) + self._temporary_directory_factory = temporary_directory_factory + self._bucketfs_model_uploader = bucketfs_model_uploader_factory.create( + model_path=model_path, + bucketfs_location=bucketfs_location) + self._tmpdir = temporary_directory_factory.create() + self._tmpdir_name = self._tmpdir.__enter__() + + def __enter__(self): + return self + + def __del__(self): + self._tmpdir.cleanup() + + def __exit__(self, exc_type, exc_val, exc_tb): + self._tmpdir.__exit__(exc_type, exc_val, exc_tb) + + def download_from_huggingface_hub_sp(self, model_factory: ModelFactoryProtocol): + """ + Download a model from HuggingFace Hub into a temporary directory and save it with save_pretrained + at _local_model_save_path / _model_name for local storing + """ + model = model_factory.from_pretrained(self._model_name, cache_dir=self._tmpdir_name, use_auth_token=self._token) + path = self._local_model_save_path / self._model_name + model.save_pretrained(path) #todo save in cachedir in assuption will be uploaded and then deleted? + + def upload_to_bucketfs(self) -> Path: + """ + Upload the downloaded models into the BucketFS + """ + return self._bucketfs_model_uploader.upload_directory(self._tmpdir_name) + + +class HuggingFaceHubBucketFSModelTransferSPFactory: + + def create(self, + bucketfs_location: BucketFSLocation, + model_name: str, + model_path: Path, + local_model_save_path: Path, + token: str) -> HuggingFaceHubBucketFSModelTransferSP: + return HuggingFaceHubBucketFSModelTransferSP(bucketfs_location=bucketfs_location, + model_name=model_name, + model_path=model_path, + local_model_save_path=local_model_save_path, + token=token) diff --git a/tests/unit_tests/utils/test_huggingface_hub_bucketfs__model_transfer_sp.py b/tests/unit_tests/utils/test_huggingface_hub_bucketfs__model_transfer_sp.py new file mode 100644 index 00000000..1cae7766 --- /dev/null +++ b/tests/unit_tests/utils/test_huggingface_hub_bucketfs__model_transfer_sp.py @@ -0,0 +1,85 @@ +import tempfile +from pathlib import Path +from typing import Union +from unittest.mock import create_autospec, MagicMock, call + +from exasol_bucketfs_utils_python.bucketfs_location import BucketFSLocation +from transformers import AutoModel, PreTrainedModel + +from exasol_transformers_extension.utils.bucketfs_model_uploader import BucketFSModelUploader, \ + BucketFSModelUploaderFactory +from exasol_transformers_extension.utils.huggingface_hub_bucketfs_model_transfer_sp import ModelFactoryProtocol, \ + HuggingFaceHubBucketFSModelTransferSP +from exasol_transformers_extension.utils.temporary_directory_factory import TemporaryDirectoryFactory +from tests.utils.mock_cast import mock_cast + +from tests.utils.parameters import model_params + +class TestSetup: + def __init__(self, local_model_save_path: Path = "downloaded_models_test"): + self.bucketfs_location_mock: Union[BucketFSLocation, MagicMock] = create_autospec(BucketFSLocation) + self.model_factory_mock: Union[ModelFactoryProtocol, MagicMock] = create_autospec(ModelFactoryProtocol) + self.temporary_directory_factory_mock: Union[TemporaryDirectoryFactory, MagicMock] = \ + create_autospec(TemporaryDirectoryFactory) + self.bucketfs_model_uploader_factory_mock: Union[BucketFSModelUploaderFactory, MagicMock] = \ + create_autospec(BucketFSModelUploaderFactory) + self.bucketfs_model_uploader_mock: Union[BucketFSModelUploader, MagicMock] = \ + create_autospec(BucketFSModelUploader) + mock_cast(self.bucketfs_model_uploader_factory_mock.create).side_effect = [self.bucketfs_model_uploader_mock] + + self.token = "token" + model_params_ = model_params.tiny_model + print(model_params_) + self.model_name = model_params_ + self.model_path = Path("test_model_path") + self.downloader = HuggingFaceHubBucketFSModelTransferSP( + bucketfs_location=self.bucketfs_location_mock, + model_path=self.model_path, + model_name=self.model_name, + local_model_save_path=local_model_save_path, + token=self.token, + temporary_directory_factory=self.temporary_directory_factory_mock, + bucketfs_model_uploader_factory=self.bucketfs_model_uploader_factory_mock + ) + + def reset_mocks(self): + self.bucketfs_location_mock.reset_mock() + self.temporary_directory_factory_mock.reset_mock() + self.model_factory_mock.reset_mock() + self.bucketfs_model_uploader_mock.reset_mock() + + +def test_init(): + test_setup = TestSetup() + assert test_setup.temporary_directory_factory_mock.mock_calls == [call.create(), call.create().__enter__()] \ + and test_setup.model_factory_mock.mock_calls == [] \ + and test_setup.bucketfs_location_mock.mock_calls == [] \ + and mock_cast(test_setup.bucketfs_model_uploader_factory_mock.create).mock_calls == [ + call.create(model_path=test_setup.model_path, bucketfs_location=test_setup.bucketfs_location_mock) + ] + + +def test_download_function_call(): + test_setup = TestSetup() + test_setup.downloader.download_from_huggingface_hub_sp(model_factory=test_setup.model_factory_mock) + cache_dir = test_setup.temporary_directory_factory_mock.create().__enter__() + model_save_path = (test_setup.downloader._local_model_save_path/test_setup.model_name) + assert test_setup.model_factory_mock.mock_calls == [ + call.from_pretrained(test_setup.model_name, cache_dir=cache_dir, + use_auth_token=test_setup.token), + call.from_pretrained().save_pretrained(model_save_path)] + + +# todo add test for model already downloaded? + +def test_download_with_model(): + with tempfile.TemporaryDirectory() as folder: + folder_path = Path(folder) + test_setup = TestSetup(local_model_save_path=folder_path/"downloaded_models") + base_model_factory: ModelFactoryProtocol = AutoModel + test_setup.downloader.download_from_huggingface_hub_sp(model_factory=base_model_factory) + assert AutoModel.from_pretrained(folder_path/"downloaded_models"/test_setup.model_name) + test_setup.downloader.__del__() + #todo delete model + + From c4f22e0ae40455e03d8ac76ef91d94f9b61cdcbb Mon Sep 17 00:00:00 2001 From: MarleneKress79789 Date: Wed, 29 Nov 2023 13:21:24 +0100 Subject: [PATCH 02/10] added Huggingface model transfer class with save_pretrained in download --- ...gingface_hub_bucketfs_model_transfer_sp.py | 78 +++++++++++++++++ ...ingface_hub_bucketfs__model_transfer_sp.py | 85 +++++++++++++++++++ 2 files changed, 163 insertions(+) create mode 100644 exasol_transformers_extension/utils/huggingface_hub_bucketfs_model_transfer_sp.py create mode 100644 tests/unit_tests/utils/test_huggingface_hub_bucketfs__model_transfer_sp.py diff --git a/exasol_transformers_extension/utils/huggingface_hub_bucketfs_model_transfer_sp.py b/exasol_transformers_extension/utils/huggingface_hub_bucketfs_model_transfer_sp.py new file mode 100644 index 00000000..49d8e31a --- /dev/null +++ b/exasol_transformers_extension/utils/huggingface_hub_bucketfs_model_transfer_sp.py @@ -0,0 +1,78 @@ +import os +import tempfile +from pathlib import Path +from typing import Protocol, Union, runtime_checkable + +import transformers +from exasol_bucketfs_utils_python.bucketfs_location import BucketFSLocation + +from exasol_transformers_extension.utils.bucketfs_model_uploader import BucketFSModelUploaderFactory +from exasol_transformers_extension.utils.temporary_directory_factory import TemporaryDirectoryFactory + + +@runtime_checkable +class ModelFactoryProtocol(Protocol): + def from_pretrained(self, model_name: str, cache_dir: Path, use_auth_token: str) -> transformers.PreTrainedModel: + pass + + def save_pretrained(self, save_directory: Union[str, Path]): + pass + + +class HuggingFaceHubBucketFSModelTransferSP: + def __init__(self, + bucketfs_location: BucketFSLocation, + model_name: str, + model_path: Path, + local_model_save_path: Path, + token: str, + temporary_directory_factory: TemporaryDirectoryFactory = TemporaryDirectoryFactory(), + bucketfs_model_uploader_factory: BucketFSModelUploaderFactory = BucketFSModelUploaderFactory()): + self._token = token + self._model_name = model_name + self._local_model_save_path = Path(local_model_save_path) + self._temporary_directory_factory = temporary_directory_factory + self._bucketfs_model_uploader = bucketfs_model_uploader_factory.create( + model_path=model_path, + bucketfs_location=bucketfs_location) + self._tmpdir = temporary_directory_factory.create() + self._tmpdir_name = self._tmpdir.__enter__() + + def __enter__(self): + return self + + def __del__(self): + self._tmpdir.cleanup() + + def __exit__(self, exc_type, exc_val, exc_tb): + self._tmpdir.__exit__(exc_type, exc_val, exc_tb) + + def download_from_huggingface_hub_sp(self, model_factory: ModelFactoryProtocol): + """ + Download a model from HuggingFace Hub into a temporary directory and save it with save_pretrained + at _local_model_save_path / _model_name for local storing + """ + model = model_factory.from_pretrained(self._model_name, cache_dir=self._tmpdir_name, use_auth_token=self._token) + path = self._local_model_save_path / self._model_name + model.save_pretrained(path) #todo save in cachedir in assuption will be uploaded and then deleted? + + def upload_to_bucketfs(self) -> Path: + """ + Upload the downloaded models into the BucketFS + """ + return self._bucketfs_model_uploader.upload_directory(self._tmpdir_name) + + +class HuggingFaceHubBucketFSModelTransferSPFactory: + + def create(self, + bucketfs_location: BucketFSLocation, + model_name: str, + model_path: Path, + local_model_save_path: Path, + token: str) -> HuggingFaceHubBucketFSModelTransferSP: + return HuggingFaceHubBucketFSModelTransferSP(bucketfs_location=bucketfs_location, + model_name=model_name, + model_path=model_path, + local_model_save_path=local_model_save_path, + token=token) diff --git a/tests/unit_tests/utils/test_huggingface_hub_bucketfs__model_transfer_sp.py b/tests/unit_tests/utils/test_huggingface_hub_bucketfs__model_transfer_sp.py new file mode 100644 index 00000000..1cae7766 --- /dev/null +++ b/tests/unit_tests/utils/test_huggingface_hub_bucketfs__model_transfer_sp.py @@ -0,0 +1,85 @@ +import tempfile +from pathlib import Path +from typing import Union +from unittest.mock import create_autospec, MagicMock, call + +from exasol_bucketfs_utils_python.bucketfs_location import BucketFSLocation +from transformers import AutoModel, PreTrainedModel + +from exasol_transformers_extension.utils.bucketfs_model_uploader import BucketFSModelUploader, \ + BucketFSModelUploaderFactory +from exasol_transformers_extension.utils.huggingface_hub_bucketfs_model_transfer_sp import ModelFactoryProtocol, \ + HuggingFaceHubBucketFSModelTransferSP +from exasol_transformers_extension.utils.temporary_directory_factory import TemporaryDirectoryFactory +from tests.utils.mock_cast import mock_cast + +from tests.utils.parameters import model_params + +class TestSetup: + def __init__(self, local_model_save_path: Path = "downloaded_models_test"): + self.bucketfs_location_mock: Union[BucketFSLocation, MagicMock] = create_autospec(BucketFSLocation) + self.model_factory_mock: Union[ModelFactoryProtocol, MagicMock] = create_autospec(ModelFactoryProtocol) + self.temporary_directory_factory_mock: Union[TemporaryDirectoryFactory, MagicMock] = \ + create_autospec(TemporaryDirectoryFactory) + self.bucketfs_model_uploader_factory_mock: Union[BucketFSModelUploaderFactory, MagicMock] = \ + create_autospec(BucketFSModelUploaderFactory) + self.bucketfs_model_uploader_mock: Union[BucketFSModelUploader, MagicMock] = \ + create_autospec(BucketFSModelUploader) + mock_cast(self.bucketfs_model_uploader_factory_mock.create).side_effect = [self.bucketfs_model_uploader_mock] + + self.token = "token" + model_params_ = model_params.tiny_model + print(model_params_) + self.model_name = model_params_ + self.model_path = Path("test_model_path") + self.downloader = HuggingFaceHubBucketFSModelTransferSP( + bucketfs_location=self.bucketfs_location_mock, + model_path=self.model_path, + model_name=self.model_name, + local_model_save_path=local_model_save_path, + token=self.token, + temporary_directory_factory=self.temporary_directory_factory_mock, + bucketfs_model_uploader_factory=self.bucketfs_model_uploader_factory_mock + ) + + def reset_mocks(self): + self.bucketfs_location_mock.reset_mock() + self.temporary_directory_factory_mock.reset_mock() + self.model_factory_mock.reset_mock() + self.bucketfs_model_uploader_mock.reset_mock() + + +def test_init(): + test_setup = TestSetup() + assert test_setup.temporary_directory_factory_mock.mock_calls == [call.create(), call.create().__enter__()] \ + and test_setup.model_factory_mock.mock_calls == [] \ + and test_setup.bucketfs_location_mock.mock_calls == [] \ + and mock_cast(test_setup.bucketfs_model_uploader_factory_mock.create).mock_calls == [ + call.create(model_path=test_setup.model_path, bucketfs_location=test_setup.bucketfs_location_mock) + ] + + +def test_download_function_call(): + test_setup = TestSetup() + test_setup.downloader.download_from_huggingface_hub_sp(model_factory=test_setup.model_factory_mock) + cache_dir = test_setup.temporary_directory_factory_mock.create().__enter__() + model_save_path = (test_setup.downloader._local_model_save_path/test_setup.model_name) + assert test_setup.model_factory_mock.mock_calls == [ + call.from_pretrained(test_setup.model_name, cache_dir=cache_dir, + use_auth_token=test_setup.token), + call.from_pretrained().save_pretrained(model_save_path)] + + +# todo add test for model already downloaded? + +def test_download_with_model(): + with tempfile.TemporaryDirectory() as folder: + folder_path = Path(folder) + test_setup = TestSetup(local_model_save_path=folder_path/"downloaded_models") + base_model_factory: ModelFactoryProtocol = AutoModel + test_setup.downloader.download_from_huggingface_hub_sp(model_factory=base_model_factory) + assert AutoModel.from_pretrained(folder_path/"downloaded_models"/test_setup.model_name) + test_setup.downloader.__del__() + #todo delete model + + From a747fbfc4cca05be12115dbf8b39ed8755938678 Mon Sep 17 00:00:00 2001 From: MarleneKress79789 Date: Wed, 29 Nov 2023 13:36:56 +0100 Subject: [PATCH 03/10] added changes file --- doc/changes/changelog.md | 1 + doc/changes/changes_0.7.0.md | 23 +++++++++++++++++++++++ pyproject.toml | 2 +- 3 files changed, 25 insertions(+), 1 deletion(-) create mode 100644 doc/changes/changes_0.7.0.md diff --git a/doc/changes/changelog.md b/doc/changes/changelog.md index 232f6f79..a63fa416 100644 --- a/doc/changes/changelog.md +++ b/doc/changes/changelog.md @@ -1,5 +1,6 @@ # Changelog +* [0.7.0](changes_0.7.0.md) * [0.6.0](changes_0.6.0.md) * [0.5.0](changes_0.5.0.md) * [0.4.0](changes_0.4.0.md) diff --git a/doc/changes/changes_0.7.0.md b/doc/changes/changes_0.7.0.md new file mode 100644 index 00000000..151dd17e --- /dev/null +++ b/doc/changes/changes_0.7.0.md @@ -0,0 +1,23 @@ +# Transformers Extension 0.7.0, released T.B.D + +Code name: T.B.D + + +## Summary + +T.B.D + +### Features + - #143: Added HuggingfaceTransfer class with save_pretrained for saving model locally + - #152: Made the container uploading and language registration two separate actions + +### Bug Fixes + + + + +### Documentation + + + +### Security diff --git a/pyproject.toml b/pyproject.toml index 777da56f..93a3ca39 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "exasol-transformers-extension" -version = "0.5.0" +version = "0.6.0" description = "An Exasol extension to use state-of-the-art pretrained machine learning models via the transformers api." authors = [ From 39bce567822b7406242ab778c911ce009c1549b6 Mon Sep 17 00:00:00 2001 From: MarleneKress79789 Date: Wed, 29 Nov 2023 13:43:11 +0100 Subject: [PATCH 04/10] added upload of save_pretrained model --- .../utils/huggingface_hub_bucketfs_model_transfer_sp.py | 2 +- .../test_huggingface_hub_bucketfs__model_transfer_sp.py | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/exasol_transformers_extension/utils/huggingface_hub_bucketfs_model_transfer_sp.py b/exasol_transformers_extension/utils/huggingface_hub_bucketfs_model_transfer_sp.py index 49d8e31a..f9e1a73b 100644 --- a/exasol_transformers_extension/utils/huggingface_hub_bucketfs_model_transfer_sp.py +++ b/exasol_transformers_extension/utils/huggingface_hub_bucketfs_model_transfer_sp.py @@ -60,7 +60,7 @@ def upload_to_bucketfs(self) -> Path: """ Upload the downloaded models into the BucketFS """ - return self._bucketfs_model_uploader.upload_directory(self._tmpdir_name) + return self._bucketfs_model_uploader.upload_directory(self._local_model_save_path / self._model_name) class HuggingFaceHubBucketFSModelTransferSPFactory: diff --git a/tests/unit_tests/utils/test_huggingface_hub_bucketfs__model_transfer_sp.py b/tests/unit_tests/utils/test_huggingface_hub_bucketfs__model_transfer_sp.py index 1cae7766..fd3dc894 100644 --- a/tests/unit_tests/utils/test_huggingface_hub_bucketfs__model_transfer_sp.py +++ b/tests/unit_tests/utils/test_huggingface_hub_bucketfs__model_transfer_sp.py @@ -83,3 +83,10 @@ def test_download_with_model(): #todo delete model +def test_upload_function_call(): + test_setup = TestSetup() + test_setup.downloader.download_from_huggingface_hub_sp(model_factory=test_setup.model_factory_mock) + test_setup.reset_mocks() + model_save_path = (test_setup.downloader._local_model_save_path / test_setup.model_name) + test_setup.downloader.upload_to_bucketfs() + assert mock_cast(test_setup.bucketfs_model_uploader_mock.upload_directory).mock_calls == [call(model_save_path)] \ No newline at end of file From 7fccc90341c3ff50cb5a057aacda03dbdb27fd83 Mon Sep 17 00:00:00 2001 From: MarleneKress79789 Date: Wed, 29 Nov 2023 13:21:24 +0100 Subject: [PATCH 05/10] added Huggingface model transfer class with save_pretrained in download --- ...gingface_hub_bucketfs_model_transfer_sp.py | 78 +++++++++++++++++ ...ingface_hub_bucketfs__model_transfer_sp.py | 85 +++++++++++++++++++ 2 files changed, 163 insertions(+) create mode 100644 exasol_transformers_extension/utils/huggingface_hub_bucketfs_model_transfer_sp.py create mode 100644 tests/unit_tests/utils/test_huggingface_hub_bucketfs__model_transfer_sp.py diff --git a/exasol_transformers_extension/utils/huggingface_hub_bucketfs_model_transfer_sp.py b/exasol_transformers_extension/utils/huggingface_hub_bucketfs_model_transfer_sp.py new file mode 100644 index 00000000..49d8e31a --- /dev/null +++ b/exasol_transformers_extension/utils/huggingface_hub_bucketfs_model_transfer_sp.py @@ -0,0 +1,78 @@ +import os +import tempfile +from pathlib import Path +from typing import Protocol, Union, runtime_checkable + +import transformers +from exasol_bucketfs_utils_python.bucketfs_location import BucketFSLocation + +from exasol_transformers_extension.utils.bucketfs_model_uploader import BucketFSModelUploaderFactory +from exasol_transformers_extension.utils.temporary_directory_factory import TemporaryDirectoryFactory + + +@runtime_checkable +class ModelFactoryProtocol(Protocol): + def from_pretrained(self, model_name: str, cache_dir: Path, use_auth_token: str) -> transformers.PreTrainedModel: + pass + + def save_pretrained(self, save_directory: Union[str, Path]): + pass + + +class HuggingFaceHubBucketFSModelTransferSP: + def __init__(self, + bucketfs_location: BucketFSLocation, + model_name: str, + model_path: Path, + local_model_save_path: Path, + token: str, + temporary_directory_factory: TemporaryDirectoryFactory = TemporaryDirectoryFactory(), + bucketfs_model_uploader_factory: BucketFSModelUploaderFactory = BucketFSModelUploaderFactory()): + self._token = token + self._model_name = model_name + self._local_model_save_path = Path(local_model_save_path) + self._temporary_directory_factory = temporary_directory_factory + self._bucketfs_model_uploader = bucketfs_model_uploader_factory.create( + model_path=model_path, + bucketfs_location=bucketfs_location) + self._tmpdir = temporary_directory_factory.create() + self._tmpdir_name = self._tmpdir.__enter__() + + def __enter__(self): + return self + + def __del__(self): + self._tmpdir.cleanup() + + def __exit__(self, exc_type, exc_val, exc_tb): + self._tmpdir.__exit__(exc_type, exc_val, exc_tb) + + def download_from_huggingface_hub_sp(self, model_factory: ModelFactoryProtocol): + """ + Download a model from HuggingFace Hub into a temporary directory and save it with save_pretrained + at _local_model_save_path / _model_name for local storing + """ + model = model_factory.from_pretrained(self._model_name, cache_dir=self._tmpdir_name, use_auth_token=self._token) + path = self._local_model_save_path / self._model_name + model.save_pretrained(path) #todo save in cachedir in assuption will be uploaded and then deleted? + + def upload_to_bucketfs(self) -> Path: + """ + Upload the downloaded models into the BucketFS + """ + return self._bucketfs_model_uploader.upload_directory(self._tmpdir_name) + + +class HuggingFaceHubBucketFSModelTransferSPFactory: + + def create(self, + bucketfs_location: BucketFSLocation, + model_name: str, + model_path: Path, + local_model_save_path: Path, + token: str) -> HuggingFaceHubBucketFSModelTransferSP: + return HuggingFaceHubBucketFSModelTransferSP(bucketfs_location=bucketfs_location, + model_name=model_name, + model_path=model_path, + local_model_save_path=local_model_save_path, + token=token) diff --git a/tests/unit_tests/utils/test_huggingface_hub_bucketfs__model_transfer_sp.py b/tests/unit_tests/utils/test_huggingface_hub_bucketfs__model_transfer_sp.py new file mode 100644 index 00000000..1cae7766 --- /dev/null +++ b/tests/unit_tests/utils/test_huggingface_hub_bucketfs__model_transfer_sp.py @@ -0,0 +1,85 @@ +import tempfile +from pathlib import Path +from typing import Union +from unittest.mock import create_autospec, MagicMock, call + +from exasol_bucketfs_utils_python.bucketfs_location import BucketFSLocation +from transformers import AutoModel, PreTrainedModel + +from exasol_transformers_extension.utils.bucketfs_model_uploader import BucketFSModelUploader, \ + BucketFSModelUploaderFactory +from exasol_transformers_extension.utils.huggingface_hub_bucketfs_model_transfer_sp import ModelFactoryProtocol, \ + HuggingFaceHubBucketFSModelTransferSP +from exasol_transformers_extension.utils.temporary_directory_factory import TemporaryDirectoryFactory +from tests.utils.mock_cast import mock_cast + +from tests.utils.parameters import model_params + +class TestSetup: + def __init__(self, local_model_save_path: Path = "downloaded_models_test"): + self.bucketfs_location_mock: Union[BucketFSLocation, MagicMock] = create_autospec(BucketFSLocation) + self.model_factory_mock: Union[ModelFactoryProtocol, MagicMock] = create_autospec(ModelFactoryProtocol) + self.temporary_directory_factory_mock: Union[TemporaryDirectoryFactory, MagicMock] = \ + create_autospec(TemporaryDirectoryFactory) + self.bucketfs_model_uploader_factory_mock: Union[BucketFSModelUploaderFactory, MagicMock] = \ + create_autospec(BucketFSModelUploaderFactory) + self.bucketfs_model_uploader_mock: Union[BucketFSModelUploader, MagicMock] = \ + create_autospec(BucketFSModelUploader) + mock_cast(self.bucketfs_model_uploader_factory_mock.create).side_effect = [self.bucketfs_model_uploader_mock] + + self.token = "token" + model_params_ = model_params.tiny_model + print(model_params_) + self.model_name = model_params_ + self.model_path = Path("test_model_path") + self.downloader = HuggingFaceHubBucketFSModelTransferSP( + bucketfs_location=self.bucketfs_location_mock, + model_path=self.model_path, + model_name=self.model_name, + local_model_save_path=local_model_save_path, + token=self.token, + temporary_directory_factory=self.temporary_directory_factory_mock, + bucketfs_model_uploader_factory=self.bucketfs_model_uploader_factory_mock + ) + + def reset_mocks(self): + self.bucketfs_location_mock.reset_mock() + self.temporary_directory_factory_mock.reset_mock() + self.model_factory_mock.reset_mock() + self.bucketfs_model_uploader_mock.reset_mock() + + +def test_init(): + test_setup = TestSetup() + assert test_setup.temporary_directory_factory_mock.mock_calls == [call.create(), call.create().__enter__()] \ + and test_setup.model_factory_mock.mock_calls == [] \ + and test_setup.bucketfs_location_mock.mock_calls == [] \ + and mock_cast(test_setup.bucketfs_model_uploader_factory_mock.create).mock_calls == [ + call.create(model_path=test_setup.model_path, bucketfs_location=test_setup.bucketfs_location_mock) + ] + + +def test_download_function_call(): + test_setup = TestSetup() + test_setup.downloader.download_from_huggingface_hub_sp(model_factory=test_setup.model_factory_mock) + cache_dir = test_setup.temporary_directory_factory_mock.create().__enter__() + model_save_path = (test_setup.downloader._local_model_save_path/test_setup.model_name) + assert test_setup.model_factory_mock.mock_calls == [ + call.from_pretrained(test_setup.model_name, cache_dir=cache_dir, + use_auth_token=test_setup.token), + call.from_pretrained().save_pretrained(model_save_path)] + + +# todo add test for model already downloaded? + +def test_download_with_model(): + with tempfile.TemporaryDirectory() as folder: + folder_path = Path(folder) + test_setup = TestSetup(local_model_save_path=folder_path/"downloaded_models") + base_model_factory: ModelFactoryProtocol = AutoModel + test_setup.downloader.download_from_huggingface_hub_sp(model_factory=base_model_factory) + assert AutoModel.from_pretrained(folder_path/"downloaded_models"/test_setup.model_name) + test_setup.downloader.__del__() + #todo delete model + + From 2299a8a86b0f1496a13cf58667d454c074ced986 Mon Sep 17 00:00:00 2001 From: MarleneKress79789 Date: Wed, 29 Nov 2023 13:36:56 +0100 Subject: [PATCH 06/10] added changes file --- doc/changes/changelog.md | 1 + doc/changes/changes_0.7.0.md | 23 +++++++++++++++++++++++ pyproject.toml | 2 +- 3 files changed, 25 insertions(+), 1 deletion(-) create mode 100644 doc/changes/changes_0.7.0.md diff --git a/doc/changes/changelog.md b/doc/changes/changelog.md index 232f6f79..a63fa416 100644 --- a/doc/changes/changelog.md +++ b/doc/changes/changelog.md @@ -1,5 +1,6 @@ # Changelog +* [0.7.0](changes_0.7.0.md) * [0.6.0](changes_0.6.0.md) * [0.5.0](changes_0.5.0.md) * [0.4.0](changes_0.4.0.md) diff --git a/doc/changes/changes_0.7.0.md b/doc/changes/changes_0.7.0.md new file mode 100644 index 00000000..151dd17e --- /dev/null +++ b/doc/changes/changes_0.7.0.md @@ -0,0 +1,23 @@ +# Transformers Extension 0.7.0, released T.B.D + +Code name: T.B.D + + +## Summary + +T.B.D + +### Features + - #143: Added HuggingfaceTransfer class with save_pretrained for saving model locally + - #152: Made the container uploading and language registration two separate actions + +### Bug Fixes + + + + +### Documentation + + + +### Security diff --git a/pyproject.toml b/pyproject.toml index 777da56f..93a3ca39 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "exasol-transformers-extension" -version = "0.5.0" +version = "0.6.0" description = "An Exasol extension to use state-of-the-art pretrained machine learning models via the transformers api." authors = [ From cdcccc77dbc0181869858dd7acb1add901f2099e Mon Sep 17 00:00:00 2001 From: MarleneKress79789 Date: Wed, 29 Nov 2023 13:43:11 +0100 Subject: [PATCH 07/10] added upload of save_pretrained model --- .../utils/huggingface_hub_bucketfs_model_transfer_sp.py | 2 +- .../test_huggingface_hub_bucketfs__model_transfer_sp.py | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/exasol_transformers_extension/utils/huggingface_hub_bucketfs_model_transfer_sp.py b/exasol_transformers_extension/utils/huggingface_hub_bucketfs_model_transfer_sp.py index 49d8e31a..f9e1a73b 100644 --- a/exasol_transformers_extension/utils/huggingface_hub_bucketfs_model_transfer_sp.py +++ b/exasol_transformers_extension/utils/huggingface_hub_bucketfs_model_transfer_sp.py @@ -60,7 +60,7 @@ def upload_to_bucketfs(self) -> Path: """ Upload the downloaded models into the BucketFS """ - return self._bucketfs_model_uploader.upload_directory(self._tmpdir_name) + return self._bucketfs_model_uploader.upload_directory(self._local_model_save_path / self._model_name) class HuggingFaceHubBucketFSModelTransferSPFactory: diff --git a/tests/unit_tests/utils/test_huggingface_hub_bucketfs__model_transfer_sp.py b/tests/unit_tests/utils/test_huggingface_hub_bucketfs__model_transfer_sp.py index 1cae7766..fd3dc894 100644 --- a/tests/unit_tests/utils/test_huggingface_hub_bucketfs__model_transfer_sp.py +++ b/tests/unit_tests/utils/test_huggingface_hub_bucketfs__model_transfer_sp.py @@ -83,3 +83,10 @@ def test_download_with_model(): #todo delete model +def test_upload_function_call(): + test_setup = TestSetup() + test_setup.downloader.download_from_huggingface_hub_sp(model_factory=test_setup.model_factory_mock) + test_setup.reset_mocks() + model_save_path = (test_setup.downloader._local_model_save_path / test_setup.model_name) + test_setup.downloader.upload_to_bucketfs() + assert mock_cast(test_setup.bucketfs_model_uploader_mock.upload_directory).mock_calls == [call(model_save_path)] \ No newline at end of file From f30241424ea82dbba53e1bf6ed363f354cb02fde Mon Sep 17 00:00:00 2001 From: MarleneKress79789 Date: Wed, 13 Dec 2023 13:37:58 +0100 Subject: [PATCH 08/10] [CodeBuild] docstring and fix tests --- ...gingface_hub_bucketfs_model_transfer_sp.py | 50 +++++++++----- ...gingface_hub_bucketfs_model_transfer_sp.py | 65 +++++++++++++++++++ ...uggingface_hub_bucketfs_model_transfer.py} | 0 ...ingface_hub_bucketfs_model_transfer_sp.py} | 29 ++------- 4 files changed, 107 insertions(+), 37 deletions(-) create mode 100644 tests/integration_tests/without_db/utils/test_huggingface_hub_bucketfs_model_transfer_sp.py rename tests/unit_tests/utils/{test_huggingface_hub_bucketfs__model_transfer.py => test_huggingface_hub_bucketfs_model_transfer.py} (100%) rename tests/unit_tests/utils/{test_huggingface_hub_bucketfs__model_transfer_sp.py => test_huggingface_hub_bucketfs_model_transfer_sp.py} (74%) diff --git a/exasol_transformers_extension/utils/huggingface_hub_bucketfs_model_transfer_sp.py b/exasol_transformers_extension/utils/huggingface_hub_bucketfs_model_transfer_sp.py index f9e1a73b..3046a00d 100644 --- a/exasol_transformers_extension/utils/huggingface_hub_bucketfs_model_transfer_sp.py +++ b/exasol_transformers_extension/utils/huggingface_hub_bucketfs_model_transfer_sp.py @@ -1,5 +1,3 @@ -import os -import tempfile from pathlib import Path from typing import Protocol, Union, runtime_checkable @@ -12,6 +10,9 @@ @runtime_checkable class ModelFactoryProtocol(Protocol): + """ + Protocol for better type hints. + """ def from_pretrained(self, model_name: str, cache_dir: Path, use_auth_token: str) -> transformers.PreTrainedModel: pass @@ -20,23 +21,31 @@ def save_pretrained(self, save_directory: Union[str, Path]): class HuggingFaceHubBucketFSModelTransferSP: + """ + Class for downloading a model using the Huggingface Transformers API, and loading it into the BucketFS. + + :bucketfs_location: BucketFSLocation the model should be loaded to + :model_name: Name of the model to be downloaded using Huggingface Transformers API + :model_path: Path the model will be loaded into the BucketFS at + :token: Huggingface token, only needed for private models + :temporary_directory_factory: Optional. Default is TemporaryDirectoryFactory. Mainly change for testing. + :bucketfs_model_uploader_factory: Optional. Default is BucketFSModelUploaderFactory. Mainly change for testing. + """ def __init__(self, bucketfs_location: BucketFSLocation, model_name: str, model_path: Path, - local_model_save_path: Path, token: str, temporary_directory_factory: TemporaryDirectoryFactory = TemporaryDirectoryFactory(), bucketfs_model_uploader_factory: BucketFSModelUploaderFactory = BucketFSModelUploaderFactory()): self._token = token self._model_name = model_name - self._local_model_save_path = Path(local_model_save_path) self._temporary_directory_factory = temporary_directory_factory self._bucketfs_model_uploader = bucketfs_model_uploader_factory.create( model_path=model_path, bucketfs_location=bucketfs_location) self._tmpdir = temporary_directory_factory.create() - self._tmpdir_name = self._tmpdir.__enter__() + self._tmpdir_name = Path(self._tmpdir.__enter__()) def __enter__(self): return self @@ -47,32 +56,43 @@ def __del__(self): def __exit__(self, exc_type, exc_val, exc_tb): self._tmpdir.__exit__(exc_type, exc_val, exc_tb) - def download_from_huggingface_hub_sp(self, model_factory: ModelFactoryProtocol): + def download_from_huggingface_hub(self, model_factory: ModelFactoryProtocol): """ Download a model from HuggingFace Hub into a temporary directory and save it with save_pretrained - at _local_model_save_path / _model_name for local storing + in temporary directory / pretrained . """ - model = model_factory.from_pretrained(self._model_name, cache_dir=self._tmpdir_name, use_auth_token=self._token) - path = self._local_model_save_path / self._model_name - model.save_pretrained(path) #todo save in cachedir in assuption will be uploaded and then deleted? + model = model_factory.from_pretrained(self._model_name, cache_dir=self._tmpdir_name/"cache", use_auth_token=self._token) + model.save_pretrained(self._tmpdir_name/"pretrained"/self._model_name) def upload_to_bucketfs(self) -> Path: """ - Upload the downloaded models into the BucketFS + Upload the downloaded models into the BucketFS. + + returns: Path of the uploaded model in the BucketFS """ - return self._bucketfs_model_uploader.upload_directory(self._local_model_save_path / self._model_name) + return self._bucketfs_model_uploader.upload_directory(self._tmpdir_name/"pretrained"/self._model_name) class HuggingFaceHubBucketFSModelTransferSPFactory: - + """ + Class for creating a HuggingFaceHubBucketFSModelTransferSP object. + """ def create(self, bucketfs_location: BucketFSLocation, model_name: str, model_path: Path, - local_model_save_path: Path, token: str) -> HuggingFaceHubBucketFSModelTransferSP: + """ + Creates a HuggingFaceHubBucketFSModelTransferSP object. + + :bucketfs_location: BucketFSLocation the model should be loaded to + :model_name: Name of the model to be downloaded using Huggingface Transformers API + :model_path: Path the model will be loaded into the BucketFS at + :token: Huggingface token, only needed for private models + + returns: The created HuggingFaceHubBucketFSModelTransferSP object. + """ return HuggingFaceHubBucketFSModelTransferSP(bucketfs_location=bucketfs_location, model_name=model_name, model_path=model_path, - local_model_save_path=local_model_save_path, token=token) diff --git a/tests/integration_tests/without_db/utils/test_huggingface_hub_bucketfs_model_transfer_sp.py b/tests/integration_tests/without_db/utils/test_huggingface_hub_bucketfs_model_transfer_sp.py new file mode 100644 index 00000000..ea2bdf40 --- /dev/null +++ b/tests/integration_tests/without_db/utils/test_huggingface_hub_bucketfs_model_transfer_sp.py @@ -0,0 +1,65 @@ +import pytest +import tempfile +from pathlib import Path +from typing import Union +from unittest.mock import create_autospec, MagicMock + +from transformers import AutoModel + +from exasol_transformers_extension.utils.bucketfs_model_uploader import BucketFSModelUploader, \ + BucketFSModelUploaderFactory +from exasol_transformers_extension.utils.huggingface_hub_bucketfs_model_transfer_sp import ModelFactoryProtocol, \ + HuggingFaceHubBucketFSModelTransferSP +from exasol_transformers_extension.utils.temporary_directory_factory import TemporaryDirectoryFactory +from tests.utils.mock_cast import mock_cast + +from tests.utils.parameters import model_params + + +class TestSetup: + def __init__(self, bucketfs_location): + self.bucketfs_location = bucketfs_location + self.model_factory_mock: Union[ModelFactoryProtocol, MagicMock] = create_autospec(ModelFactoryProtocol) + self.temporary_directory_factory = TemporaryDirectoryFactory() + self.bucketfs_model_uploader_factory_mock: Union[BucketFSModelUploaderFactory, MagicMock] = \ + create_autospec(BucketFSModelUploaderFactory) + self.bucketfs_model_uploader_mock: Union[BucketFSModelUploader, MagicMock] = \ + create_autospec(BucketFSModelUploader) + mock_cast(self.bucketfs_model_uploader_factory_mock.create).side_effect = [self.bucketfs_model_uploader_mock] + + self.token = "token" + model_params_ = model_params.tiny_model + print(model_params_) + self.model_name = model_params_ + self.model_path = Path("test_model_path") + self.downloader = HuggingFaceHubBucketFSModelTransferSP( + bucketfs_location=self.bucketfs_location, + model_path=self.model_path, + model_name=self.model_name, + token=self.token, + temporary_directory_factory=self.temporary_directory_factory, + bucketfs_model_uploader_factory=self.bucketfs_model_uploader_factory_mock + ) + + def reset_mocks(self): + self.model_factory_mock.reset_mock() + self.bucketfs_model_uploader_mock.reset_mock() + + +def test_download_with_model(bucketfs_location): + with tempfile.TemporaryDirectory() as folder: + test_setup = TestSetup(bucketfs_location) + base_model_factory: ModelFactoryProtocol = AutoModel + test_setup.downloader.download_from_huggingface_hub(model_factory=base_model_factory) + assert AutoModel.from_pretrained(test_setup.downloader._tmpdir_name/"pretrained"/test_setup.model_name) + test_setup.downloader.__del__() + + +def test_download_with_duplicate_model(bucketfs_location): + with tempfile.TemporaryDirectory() as folder: + test_setup = TestSetup(bucketfs_location) + base_model_factory: ModelFactoryProtocol = AutoModel + test_setup.downloader.download_from_huggingface_hub(model_factory=base_model_factory) + test_setup.downloader.download_from_huggingface_hub(model_factory=base_model_factory) + assert AutoModel.from_pretrained(test_setup.downloader._tmpdir_name/"pretrained"/test_setup.model_name) + test_setup.downloader.__del__() diff --git a/tests/unit_tests/utils/test_huggingface_hub_bucketfs__model_transfer.py b/tests/unit_tests/utils/test_huggingface_hub_bucketfs_model_transfer.py similarity index 100% rename from tests/unit_tests/utils/test_huggingface_hub_bucketfs__model_transfer.py rename to tests/unit_tests/utils/test_huggingface_hub_bucketfs_model_transfer.py diff --git a/tests/unit_tests/utils/test_huggingface_hub_bucketfs__model_transfer_sp.py b/tests/unit_tests/utils/test_huggingface_hub_bucketfs_model_transfer_sp.py similarity index 74% rename from tests/unit_tests/utils/test_huggingface_hub_bucketfs__model_transfer_sp.py rename to tests/unit_tests/utils/test_huggingface_hub_bucketfs_model_transfer_sp.py index fd3dc894..009a03ac 100644 --- a/tests/unit_tests/utils/test_huggingface_hub_bucketfs__model_transfer_sp.py +++ b/tests/unit_tests/utils/test_huggingface_hub_bucketfs_model_transfer_sp.py @@ -1,10 +1,8 @@ -import tempfile from pathlib import Path from typing import Union from unittest.mock import create_autospec, MagicMock, call from exasol_bucketfs_utils_python.bucketfs_location import BucketFSLocation -from transformers import AutoModel, PreTrainedModel from exasol_transformers_extension.utils.bucketfs_model_uploader import BucketFSModelUploader, \ BucketFSModelUploaderFactory @@ -15,8 +13,9 @@ from tests.utils.parameters import model_params + class TestSetup: - def __init__(self, local_model_save_path: Path = "downloaded_models_test"): + def __init__(self): self.bucketfs_location_mock: Union[BucketFSLocation, MagicMock] = create_autospec(BucketFSLocation) self.model_factory_mock: Union[ModelFactoryProtocol, MagicMock] = create_autospec(ModelFactoryProtocol) self.temporary_directory_factory_mock: Union[TemporaryDirectoryFactory, MagicMock] = \ @@ -36,7 +35,6 @@ def __init__(self, local_model_save_path: Path = "downloaded_models_test"): bucketfs_location=self.bucketfs_location_mock, model_path=self.model_path, model_name=self.model_name, - local_model_save_path=local_model_save_path, token=self.token, temporary_directory_factory=self.temporary_directory_factory_mock, bucketfs_model_uploader_factory=self.bucketfs_model_uploader_factory_mock @@ -61,32 +59,19 @@ def test_init(): def test_download_function_call(): test_setup = TestSetup() - test_setup.downloader.download_from_huggingface_hub_sp(model_factory=test_setup.model_factory_mock) - cache_dir = test_setup.temporary_directory_factory_mock.create().__enter__() - model_save_path = (test_setup.downloader._local_model_save_path/test_setup.model_name) + test_setup.downloader.download_from_huggingface_hub(model_factory=test_setup.model_factory_mock) + cache_dir = test_setup.temporary_directory_factory_mock.create().__enter__().__truediv__() + model_save_path = (test_setup.downloader._tmpdir_name/"pretrained"/test_setup.model_name) assert test_setup.model_factory_mock.mock_calls == [ call.from_pretrained(test_setup.model_name, cache_dir=cache_dir, use_auth_token=test_setup.token), call.from_pretrained().save_pretrained(model_save_path)] -# todo add test for model already downloaded? - -def test_download_with_model(): - with tempfile.TemporaryDirectory() as folder: - folder_path = Path(folder) - test_setup = TestSetup(local_model_save_path=folder_path/"downloaded_models") - base_model_factory: ModelFactoryProtocol = AutoModel - test_setup.downloader.download_from_huggingface_hub_sp(model_factory=base_model_factory) - assert AutoModel.from_pretrained(folder_path/"downloaded_models"/test_setup.model_name) - test_setup.downloader.__del__() - #todo delete model - - def test_upload_function_call(): test_setup = TestSetup() - test_setup.downloader.download_from_huggingface_hub_sp(model_factory=test_setup.model_factory_mock) + test_setup.downloader.download_from_huggingface_hub(model_factory=test_setup.model_factory_mock) test_setup.reset_mocks() - model_save_path = (test_setup.downloader._local_model_save_path / test_setup.model_name) + model_save_path = (test_setup.downloader._tmpdir_name/"pretrained"/test_setup.model_name) test_setup.downloader.upload_to_bucketfs() assert mock_cast(test_setup.bucketfs_model_uploader_mock.upload_directory).mock_calls == [call(model_save_path)] \ No newline at end of file From cb4dc19626d689c0bd822662585d66f18e301417 Mon Sep 17 00:00:00 2001 From: MarleneKress79789 Date: Wed, 13 Dec 2023 14:12:55 +0100 Subject: [PATCH 09/10] [CodeBuild] fix tests --- .../test_huggingface_hub_bucketfs_model_transfer_sp.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tests/unit_tests/utils/test_huggingface_hub_bucketfs_model_transfer_sp.py b/tests/unit_tests/utils/test_huggingface_hub_bucketfs_model_transfer_sp.py index 009a03ac..5b5814f8 100644 --- a/tests/unit_tests/utils/test_huggingface_hub_bucketfs_model_transfer_sp.py +++ b/tests/unit_tests/utils/test_huggingface_hub_bucketfs_model_transfer_sp.py @@ -49,7 +49,9 @@ def reset_mocks(self): def test_init(): test_setup = TestSetup() - assert test_setup.temporary_directory_factory_mock.mock_calls == [call.create(), call.create().__enter__()] \ + assert test_setup.temporary_directory_factory_mock.mock_calls == [call.create(), + call.create().__enter__(), + call.create().__enter__().__fspath__()] \ and test_setup.model_factory_mock.mock_calls == [] \ and test_setup.bucketfs_location_mock.mock_calls == [] \ and mock_cast(test_setup.bucketfs_model_uploader_factory_mock.create).mock_calls == [ @@ -60,10 +62,10 @@ def test_init(): def test_download_function_call(): test_setup = TestSetup() test_setup.downloader.download_from_huggingface_hub(model_factory=test_setup.model_factory_mock) - cache_dir = test_setup.temporary_directory_factory_mock.create().__enter__().__truediv__() + cache_dir = test_setup.temporary_directory_factory_mock.create().__enter__() model_save_path = (test_setup.downloader._tmpdir_name/"pretrained"/test_setup.model_name) assert test_setup.model_factory_mock.mock_calls == [ - call.from_pretrained(test_setup.model_name, cache_dir=cache_dir, + call.from_pretrained(test_setup.model_name, cache_dir=Path(cache_dir)/"cache", use_auth_token=test_setup.token), call.from_pretrained().save_pretrained(model_save_path)] From 8be8f4aa605c96f7b92a597b3c4222aa170dcefa Mon Sep 17 00:00:00 2001 From: MarleneKress79789 Date: Tue, 19 Dec 2023 12:37:07 +0100 Subject: [PATCH 10/10] [CodeBuild] changes from code review --- doc/changes/changes_0.7.0.md | 7 +++---- ...huggingface_hub_bucketfs_model_transfer.py | 10 ++------- ...gingface_hub_bucketfs_model_transfer_sp.py | 21 ++++++------------- .../utils/model_factory_protocol.py | 16 ++++++++++++++ pyproject.toml | 2 +- ...gingface_hub_bucketfs_model_transfer_sp.py | 9 ++++---- ...gingface_hub_bucketfs_model_transfer_sp.py | 10 +++++---- 7 files changed, 38 insertions(+), 37 deletions(-) create mode 100644 exasol_transformers_extension/utils/model_factory_protocol.py diff --git a/doc/changes/changes_0.7.0.md b/doc/changes/changes_0.7.0.md index 5d0e9296..831981f9 100644 --- a/doc/changes/changes_0.7.0.md +++ b/doc/changes/changes_0.7.0.md @@ -14,16 +14,15 @@ T.B.D ### Bug Fixes - -### Bug Fixes + - n/a ### Refactorings - #144: Extracted base_model_udf.load_models into separate class - - + ### Documentation + - n/a ### Security diff --git a/exasol_transformers_extension/utils/huggingface_hub_bucketfs_model_transfer.py b/exasol_transformers_extension/utils/huggingface_hub_bucketfs_model_transfer.py index 180d21e4..f09e17fc 100644 --- a/exasol_transformers_extension/utils/huggingface_hub_bucketfs_model_transfer.py +++ b/exasol_transformers_extension/utils/huggingface_hub_bucketfs_model_transfer.py @@ -1,19 +1,13 @@ -import tempfile from pathlib import Path -from typing import Protocol, runtime_checkable + from exasol_bucketfs_utils_python.bucketfs_location import BucketFSLocation +from exasol_transformers_extension.utils.model_factory_protocol import ModelFactoryProtocol from exasol_transformers_extension.utils.bucketfs_model_uploader import BucketFSModelUploaderFactory from exasol_transformers_extension.utils.temporary_directory_factory import TemporaryDirectoryFactory -@runtime_checkable -class ModelFactoryProtocol(Protocol): - def from_pretrained(self, model_name: str, cache_dir: Path, use_auth_token: str): - pass - - class HuggingFaceHubBucketFSModelTransfer: def __init__(self, diff --git a/exasol_transformers_extension/utils/huggingface_hub_bucketfs_model_transfer_sp.py b/exasol_transformers_extension/utils/huggingface_hub_bucketfs_model_transfer_sp.py index 3046a00d..db90a7eb 100644 --- a/exasol_transformers_extension/utils/huggingface_hub_bucketfs_model_transfer_sp.py +++ b/exasol_transformers_extension/utils/huggingface_hub_bucketfs_model_transfer_sp.py @@ -1,28 +1,19 @@ from pathlib import Path -from typing import Protocol, Union, runtime_checkable -import transformers from exasol_bucketfs_utils_python.bucketfs_location import BucketFSLocation +from exasol_transformers_extension.utils.model_factory_protocol import ModelFactoryProtocol from exasol_transformers_extension.utils.bucketfs_model_uploader import BucketFSModelUploaderFactory from exasol_transformers_extension.utils.temporary_directory_factory import TemporaryDirectoryFactory -@runtime_checkable -class ModelFactoryProtocol(Protocol): - """ - Protocol for better type hints. - """ - def from_pretrained(self, model_name: str, cache_dir: Path, use_auth_token: str) -> transformers.PreTrainedModel: - pass - def save_pretrained(self, save_directory: Union[str, Path]): - pass class HuggingFaceHubBucketFSModelTransferSP: """ - Class for downloading a model using the Huggingface Transformers API, and loading it into the BucketFS. + Class for downloading a model using the Huggingface Transformers API, and loading it into the BucketFS + using save_pretrained. :bucketfs_location: BucketFSLocation the model should be loaded to :model_name: Name of the model to be downloaded using Huggingface Transformers API @@ -61,8 +52,8 @@ def download_from_huggingface_hub(self, model_factory: ModelFactoryProtocol): Download a model from HuggingFace Hub into a temporary directory and save it with save_pretrained in temporary directory / pretrained . """ - model = model_factory.from_pretrained(self._model_name, cache_dir=self._tmpdir_name/"cache", use_auth_token=self._token) - model.save_pretrained(self._tmpdir_name/"pretrained"/self._model_name) + model = model_factory.from_pretrained(self._model_name, cache_dir=self._tmpdir_name / "cache", use_auth_token=self._token) + model.save_pretrained(self._tmpdir_name / "pretrained" / self._model_name) def upload_to_bucketfs(self) -> Path: """ @@ -70,7 +61,7 @@ def upload_to_bucketfs(self) -> Path: returns: Path of the uploaded model in the BucketFS """ - return self._bucketfs_model_uploader.upload_directory(self._tmpdir_name/"pretrained"/self._model_name) + return self._bucketfs_model_uploader.upload_directory(self._tmpdir_name / "pretrained" / self._model_name) class HuggingFaceHubBucketFSModelTransferSPFactory: diff --git a/exasol_transformers_extension/utils/model_factory_protocol.py b/exasol_transformers_extension/utils/model_factory_protocol.py new file mode 100644 index 00000000..48fc390f --- /dev/null +++ b/exasol_transformers_extension/utils/model_factory_protocol.py @@ -0,0 +1,16 @@ +from pathlib import Path +from typing import Protocol, Union, runtime_checkable + +import transformers + + +@runtime_checkable +class ModelFactoryProtocol(Protocol): + """ + Protocol for better type hints. + """ + def from_pretrained(self, model_name: str, cache_dir: Path, use_auth_token: str) -> transformers.PreTrainedModel: + pass + + def save_pretrained(self, save_directory: Union[str, Path]): + pass \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 93a3ca39..3d39bbc2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "exasol-transformers-extension" -version = "0.6.0" +version = "0.7.0" description = "An Exasol extension to use state-of-the-art pretrained machine learning models via the transformers api." authors = [ diff --git a/tests/integration_tests/without_db/utils/test_huggingface_hub_bucketfs_model_transfer_sp.py b/tests/integration_tests/without_db/utils/test_huggingface_hub_bucketfs_model_transfer_sp.py index ea2bdf40..77ffdf00 100644 --- a/tests/integration_tests/without_db/utils/test_huggingface_hub_bucketfs_model_transfer_sp.py +++ b/tests/integration_tests/without_db/utils/test_huggingface_hub_bucketfs_model_transfer_sp.py @@ -29,7 +29,6 @@ def __init__(self, bucketfs_location): self.token = "token" model_params_ = model_params.tiny_model - print(model_params_) self.model_name = model_params_ self.model_path = Path("test_model_path") self.downloader = HuggingFaceHubBucketFSModelTransferSP( @@ -51,8 +50,8 @@ def test_download_with_model(bucketfs_location): test_setup = TestSetup(bucketfs_location) base_model_factory: ModelFactoryProtocol = AutoModel test_setup.downloader.download_from_huggingface_hub(model_factory=base_model_factory) - assert AutoModel.from_pretrained(test_setup.downloader._tmpdir_name/"pretrained"/test_setup.model_name) - test_setup.downloader.__del__() + assert AutoModel.from_pretrained(test_setup.downloader._tmpdir_name / "pretrained" / test_setup.model_name) + del test_setup.downloader def test_download_with_duplicate_model(bucketfs_location): @@ -61,5 +60,5 @@ def test_download_with_duplicate_model(bucketfs_location): base_model_factory: ModelFactoryProtocol = AutoModel test_setup.downloader.download_from_huggingface_hub(model_factory=base_model_factory) test_setup.downloader.download_from_huggingface_hub(model_factory=base_model_factory) - assert AutoModel.from_pretrained(test_setup.downloader._tmpdir_name/"pretrained"/test_setup.model_name) - test_setup.downloader.__del__() + assert AutoModel.from_pretrained(test_setup.downloader._tmpdir_name / "pretrained" / test_setup.model_name) + del test_setup.downloader diff --git a/tests/unit_tests/utils/test_huggingface_hub_bucketfs_model_transfer_sp.py b/tests/unit_tests/utils/test_huggingface_hub_bucketfs_model_transfer_sp.py index 5b5814f8..6595c15c 100644 --- a/tests/unit_tests/utils/test_huggingface_hub_bucketfs_model_transfer_sp.py +++ b/tests/unit_tests/utils/test_huggingface_hub_bucketfs_model_transfer_sp.py @@ -26,9 +26,9 @@ def __init__(self): create_autospec(BucketFSModelUploader) mock_cast(self.bucketfs_model_uploader_factory_mock.create).side_effect = [self.bucketfs_model_uploader_mock] + self.token = "token" model_params_ = model_params.tiny_model - print(model_params_) self.model_name = model_params_ self.model_path = Path("test_model_path") self.downloader = HuggingFaceHubBucketFSModelTransferSP( @@ -45,6 +45,7 @@ def reset_mocks(self): self.temporary_directory_factory_mock.reset_mock() self.model_factory_mock.reset_mock() self.bucketfs_model_uploader_mock.reset_mock() + self.bucketfs_model_uploader_factory_mock.reset_mock() def test_init(): @@ -62,8 +63,8 @@ def test_init(): def test_download_function_call(): test_setup = TestSetup() test_setup.downloader.download_from_huggingface_hub(model_factory=test_setup.model_factory_mock) - cache_dir = test_setup.temporary_directory_factory_mock.create().__enter__() - model_save_path = (test_setup.downloader._tmpdir_name/"pretrained"/test_setup.model_name) + cache_dir = mock_cast(test_setup.temporary_directory_factory_mock.create().__enter__).return_value + model_save_path = Path(cache_dir) / "pretrained" / test_setup.model_name assert test_setup.model_factory_mock.mock_calls == [ call.from_pretrained(test_setup.model_name, cache_dir=Path(cache_dir)/"cache", use_auth_token=test_setup.token), @@ -74,6 +75,7 @@ def test_upload_function_call(): test_setup = TestSetup() test_setup.downloader.download_from_huggingface_hub(model_factory=test_setup.model_factory_mock) test_setup.reset_mocks() - model_save_path = (test_setup.downloader._tmpdir_name/"pretrained"/test_setup.model_name) + cache_dir = mock_cast(test_setup.temporary_directory_factory_mock.create().__enter__).return_value + model_save_path = Path(cache_dir) / "pretrained" / test_setup.model_name test_setup.downloader.upload_to_bucketfs() assert mock_cast(test_setup.bucketfs_model_uploader_mock.upload_directory).mock_calls == [call(model_save_path)] \ No newline at end of file