From 8be8f4aa605c96f7b92a597b3c4222aa170dcefa Mon Sep 17 00:00:00 2001 From: MarleneKress79789 Date: Tue, 19 Dec 2023 12:37:07 +0100 Subject: [PATCH] [CodeBuild] changes from code review --- doc/changes/changes_0.7.0.md | 7 +++---- ...huggingface_hub_bucketfs_model_transfer.py | 10 ++------- ...gingface_hub_bucketfs_model_transfer_sp.py | 21 ++++++------------- .../utils/model_factory_protocol.py | 16 ++++++++++++++ pyproject.toml | 2 +- ...gingface_hub_bucketfs_model_transfer_sp.py | 9 ++++---- ...gingface_hub_bucketfs_model_transfer_sp.py | 10 +++++---- 7 files changed, 38 insertions(+), 37 deletions(-) create mode 100644 exasol_transformers_extension/utils/model_factory_protocol.py diff --git a/doc/changes/changes_0.7.0.md b/doc/changes/changes_0.7.0.md index 5d0e9296..831981f9 100644 --- a/doc/changes/changes_0.7.0.md +++ b/doc/changes/changes_0.7.0.md @@ -14,16 +14,15 @@ T.B.D ### Bug Fixes - -### Bug Fixes + - n/a ### Refactorings - #144: Extracted base_model_udf.load_models into separate class - - + ### Documentation + - n/a ### Security diff --git a/exasol_transformers_extension/utils/huggingface_hub_bucketfs_model_transfer.py b/exasol_transformers_extension/utils/huggingface_hub_bucketfs_model_transfer.py index 180d21e4..f09e17fc 100644 --- a/exasol_transformers_extension/utils/huggingface_hub_bucketfs_model_transfer.py +++ b/exasol_transformers_extension/utils/huggingface_hub_bucketfs_model_transfer.py @@ -1,19 +1,13 @@ -import tempfile from pathlib import Path -from typing import Protocol, runtime_checkable + from exasol_bucketfs_utils_python.bucketfs_location import BucketFSLocation +from exasol_transformers_extension.utils.model_factory_protocol import ModelFactoryProtocol from exasol_transformers_extension.utils.bucketfs_model_uploader import BucketFSModelUploaderFactory from exasol_transformers_extension.utils.temporary_directory_factory import TemporaryDirectoryFactory -@runtime_checkable -class ModelFactoryProtocol(Protocol): - def from_pretrained(self, model_name: str, cache_dir: Path, use_auth_token: str): - pass - - class HuggingFaceHubBucketFSModelTransfer: def __init__(self, diff --git a/exasol_transformers_extension/utils/huggingface_hub_bucketfs_model_transfer_sp.py b/exasol_transformers_extension/utils/huggingface_hub_bucketfs_model_transfer_sp.py index 3046a00d..db90a7eb 100644 --- a/exasol_transformers_extension/utils/huggingface_hub_bucketfs_model_transfer_sp.py +++ b/exasol_transformers_extension/utils/huggingface_hub_bucketfs_model_transfer_sp.py @@ -1,28 +1,19 @@ from pathlib import Path -from typing import Protocol, Union, runtime_checkable -import transformers from exasol_bucketfs_utils_python.bucketfs_location import BucketFSLocation +from exasol_transformers_extension.utils.model_factory_protocol import ModelFactoryProtocol from exasol_transformers_extension.utils.bucketfs_model_uploader import BucketFSModelUploaderFactory from exasol_transformers_extension.utils.temporary_directory_factory import TemporaryDirectoryFactory -@runtime_checkable -class ModelFactoryProtocol(Protocol): - """ - Protocol for better type hints. - """ - def from_pretrained(self, model_name: str, cache_dir: Path, use_auth_token: str) -> transformers.PreTrainedModel: - pass - def save_pretrained(self, save_directory: Union[str, Path]): - pass class HuggingFaceHubBucketFSModelTransferSP: """ - Class for downloading a model using the Huggingface Transformers API, and loading it into the BucketFS. + Class for downloading a model using the Huggingface Transformers API, and loading it into the BucketFS + using save_pretrained. :bucketfs_location: BucketFSLocation the model should be loaded to :model_name: Name of the model to be downloaded using Huggingface Transformers API @@ -61,8 +52,8 @@ def download_from_huggingface_hub(self, model_factory: ModelFactoryProtocol): Download a model from HuggingFace Hub into a temporary directory and save it with save_pretrained in temporary directory / pretrained . """ - model = model_factory.from_pretrained(self._model_name, cache_dir=self._tmpdir_name/"cache", use_auth_token=self._token) - model.save_pretrained(self._tmpdir_name/"pretrained"/self._model_name) + model = model_factory.from_pretrained(self._model_name, cache_dir=self._tmpdir_name / "cache", use_auth_token=self._token) + model.save_pretrained(self._tmpdir_name / "pretrained" / self._model_name) def upload_to_bucketfs(self) -> Path: """ @@ -70,7 +61,7 @@ def upload_to_bucketfs(self) -> Path: returns: Path of the uploaded model in the BucketFS """ - return self._bucketfs_model_uploader.upload_directory(self._tmpdir_name/"pretrained"/self._model_name) + return self._bucketfs_model_uploader.upload_directory(self._tmpdir_name / "pretrained" / self._model_name) class HuggingFaceHubBucketFSModelTransferSPFactory: diff --git a/exasol_transformers_extension/utils/model_factory_protocol.py b/exasol_transformers_extension/utils/model_factory_protocol.py new file mode 100644 index 00000000..48fc390f --- /dev/null +++ b/exasol_transformers_extension/utils/model_factory_protocol.py @@ -0,0 +1,16 @@ +from pathlib import Path +from typing import Protocol, Union, runtime_checkable + +import transformers + + +@runtime_checkable +class ModelFactoryProtocol(Protocol): + """ + Protocol for better type hints. + """ + def from_pretrained(self, model_name: str, cache_dir: Path, use_auth_token: str) -> transformers.PreTrainedModel: + pass + + def save_pretrained(self, save_directory: Union[str, Path]): + pass \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 93a3ca39..3d39bbc2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "exasol-transformers-extension" -version = "0.6.0" +version = "0.7.0" description = "An Exasol extension to use state-of-the-art pretrained machine learning models via the transformers api." authors = [ diff --git a/tests/integration_tests/without_db/utils/test_huggingface_hub_bucketfs_model_transfer_sp.py b/tests/integration_tests/without_db/utils/test_huggingface_hub_bucketfs_model_transfer_sp.py index ea2bdf40..77ffdf00 100644 --- a/tests/integration_tests/without_db/utils/test_huggingface_hub_bucketfs_model_transfer_sp.py +++ b/tests/integration_tests/without_db/utils/test_huggingface_hub_bucketfs_model_transfer_sp.py @@ -29,7 +29,6 @@ def __init__(self, bucketfs_location): self.token = "token" model_params_ = model_params.tiny_model - print(model_params_) self.model_name = model_params_ self.model_path = Path("test_model_path") self.downloader = HuggingFaceHubBucketFSModelTransferSP( @@ -51,8 +50,8 @@ def test_download_with_model(bucketfs_location): test_setup = TestSetup(bucketfs_location) base_model_factory: ModelFactoryProtocol = AutoModel test_setup.downloader.download_from_huggingface_hub(model_factory=base_model_factory) - assert AutoModel.from_pretrained(test_setup.downloader._tmpdir_name/"pretrained"/test_setup.model_name) - test_setup.downloader.__del__() + assert AutoModel.from_pretrained(test_setup.downloader._tmpdir_name / "pretrained" / test_setup.model_name) + del test_setup.downloader def test_download_with_duplicate_model(bucketfs_location): @@ -61,5 +60,5 @@ def test_download_with_duplicate_model(bucketfs_location): base_model_factory: ModelFactoryProtocol = AutoModel test_setup.downloader.download_from_huggingface_hub(model_factory=base_model_factory) test_setup.downloader.download_from_huggingface_hub(model_factory=base_model_factory) - assert AutoModel.from_pretrained(test_setup.downloader._tmpdir_name/"pretrained"/test_setup.model_name) - test_setup.downloader.__del__() + assert AutoModel.from_pretrained(test_setup.downloader._tmpdir_name / "pretrained" / test_setup.model_name) + del test_setup.downloader diff --git a/tests/unit_tests/utils/test_huggingface_hub_bucketfs_model_transfer_sp.py b/tests/unit_tests/utils/test_huggingface_hub_bucketfs_model_transfer_sp.py index 5b5814f8..6595c15c 100644 --- a/tests/unit_tests/utils/test_huggingface_hub_bucketfs_model_transfer_sp.py +++ b/tests/unit_tests/utils/test_huggingface_hub_bucketfs_model_transfer_sp.py @@ -26,9 +26,9 @@ def __init__(self): create_autospec(BucketFSModelUploader) mock_cast(self.bucketfs_model_uploader_factory_mock.create).side_effect = [self.bucketfs_model_uploader_mock] + self.token = "token" model_params_ = model_params.tiny_model - print(model_params_) self.model_name = model_params_ self.model_path = Path("test_model_path") self.downloader = HuggingFaceHubBucketFSModelTransferSP( @@ -45,6 +45,7 @@ def reset_mocks(self): self.temporary_directory_factory_mock.reset_mock() self.model_factory_mock.reset_mock() self.bucketfs_model_uploader_mock.reset_mock() + self.bucketfs_model_uploader_factory_mock.reset_mock() def test_init(): @@ -62,8 +63,8 @@ def test_init(): def test_download_function_call(): test_setup = TestSetup() test_setup.downloader.download_from_huggingface_hub(model_factory=test_setup.model_factory_mock) - cache_dir = test_setup.temporary_directory_factory_mock.create().__enter__() - model_save_path = (test_setup.downloader._tmpdir_name/"pretrained"/test_setup.model_name) + cache_dir = mock_cast(test_setup.temporary_directory_factory_mock.create().__enter__).return_value + model_save_path = Path(cache_dir) / "pretrained" / test_setup.model_name assert test_setup.model_factory_mock.mock_calls == [ call.from_pretrained(test_setup.model_name, cache_dir=Path(cache_dir)/"cache", use_auth_token=test_setup.token), @@ -74,6 +75,7 @@ def test_upload_function_call(): test_setup = TestSetup() test_setup.downloader.download_from_huggingface_hub(model_factory=test_setup.model_factory_mock) test_setup.reset_mocks() - model_save_path = (test_setup.downloader._tmpdir_name/"pretrained"/test_setup.model_name) + cache_dir = mock_cast(test_setup.temporary_directory_factory_mock.create().__enter__).return_value + model_save_path = Path(cache_dir) / "pretrained" / test_setup.model_name test_setup.downloader.upload_to_bucketfs() assert mock_cast(test_setup.bucketfs_model_uploader_mock.upload_directory).mock_calls == [call(model_save_path)] \ No newline at end of file