Skip to content

Commit

Permalink
#256: Removed import of fixtures (#257)
Browse files Browse the repository at this point in the history
* Removing fixture imports remaining fixture imports and move fixture constants and utils to separate files
* Revert db_conn fixture to test scope, because we need to reset the current open schema after the script deployer tests ran
* Update slct to 0.21.0 and remove debug prints
* Added pytest-exasol-itde to dependencies, because itde 3.1.0 doesn't contain the plugin anymore
* Add fix for "ValueError: You are trying to save a non-contiguous tensor"
* Fix ports forwarded by the itde
* Adapt _create_bucketfs_connection_onprem to new itde
  • Loading branch information
tkilias authored Aug 22, 2024
1 parent 4cca904 commit 6990f02
Show file tree
Hide file tree
Showing 37 changed files with 924 additions and 926 deletions.
1 change: 1 addition & 0 deletions doc/changes/changelog.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# Changelog

* [2.1.0](changes_2.1.0.md)
* [2.0.0](changes_2.0.0.md)
* [1.0.1](changes_1.0.1.md)
* [1.0.0](changes_1.0.0.md)
Expand Down
26 changes: 26 additions & 0 deletions doc/changes/changes_2.1.0.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# Transformers Extension 2.1.0, t.b.d

Code name:

## Summary


### Features

n/a

### Bugs

- #256: Removed imports of pytest fixtures to avoid session fixtures running for each test

### Documentation

n/a

### Refactorings

n/a

### Security

n/a
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,13 @@
from exasol_transformers_extension.utils.temporary_directory_factory import TemporaryDirectoryFactory


def make_parameters_of_model_contiguous_tensors(model):
# Fix for "ValueError: You are trying to save a non contiguous tensor" when calling save_pretrained
if hasattr(model, "parameters"):
for param in model.parameters():
param.data = param.data.contiguous()


class HuggingFaceHubBucketFSModelTransferSP:
"""
Class for downloading a model using the Huggingface Transformers API, saving it locally using
Expand Down Expand Up @@ -56,9 +63,9 @@ def download_from_huggingface_hub(self, model_factory: ModelFactoryProtocol):
model_name = self._model_specification.model_name
model = model_factory.from_pretrained(model_name, cache_dir=self._tmpdir_name / "cache",
use_auth_token=self._token)
make_parameters_of_model_contiguous_tensors(model)
model.save_pretrained(self._save_pretrained_model_path)


def upload_to_bucketfs(self) -> Path:
"""
Upload the downloaded models into the BucketFS.
Expand Down
4 changes: 2 additions & 2 deletions noxfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def without_db_integration_tests(session):
def start_database(session):
session.run('itde', 'spawn-test-environment',
'--environment-name', 'test',
'--database-port-forward', '8888',
'--bucketfs-port-forward', '6666',
'--database-port-forward', '8563',
'--bucketfs-port-forward', '2580',
'--db-mem-size', '8GB',
'--nameserver', '8.8.8.8')
1,380 changes: 704 additions & 676 deletions poetry.lock

Large diffs are not rendered by default.

5 changes: 3 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "exasol-transformers-extension"
version = "2.0.0"
version = "2.1.0"
description = "An Exasol extension for using state-of-the-art pretrained machine learning models via the Hugging Face Transformers API."

authors = [
Expand Down Expand Up @@ -32,8 +32,9 @@ exasol-python-extension-common = ">=0.3.0"
[tool.poetry.dev-dependencies]
pytest = "^7.2.0"
exasol-udf-mock-python = "^0.1.0"
exasol-script-languages-container-tool = "^0.18.2"
exasol-script-languages-container-tool = ">=0.21.0"
toml = "^0.10.2"
pytest-exasol-itde = ">=0.2.0"

[tool.poetry.group.dev.dependencies]
nox = "^2023.4.22"
Expand Down
6 changes: 3 additions & 3 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@

pytest_plugins = [
"tests.fixtures.database_connection_fixture",
"tests.fixtures.script_deployment_fixture",
"tests.fixtures.bucketfs_fixture",
"tests.fixtures.language_container_fixture",
"tests.fixtures.setup_database_fixture",
"tests.fixtures.bucketfs_fixture",
"tests.fixtures.model_fixture",
"tests.fixtures.script_deployment_fixture"
]

_BACKEND_OPTION = '--backend'


def pytest_addoption(parser):
parser.addoption(
_BACKEND_OPTION,
Expand Down
6 changes: 3 additions & 3 deletions tests/fixtures/bucketfs_fixture.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,16 @@

import exasol.bucketfs as bfs
import pytest
import pytest_itde
import exasol.pytest_itde

from exasol_transformers_extension.utils.bucketfs_operations import create_bucketfs_location
from tests.fixtures.database_connection_fixture import BACKEND_SAAS, BACKEND_ONPREM
from tests.fixtures.database_connection_fixture_constants import BACKEND_ONPREM, BACKEND_SAAS
from tests.utils.parameters import bucketfs_params


@pytest.fixture(scope="session")
def bucketfs_location_onprem(backend,
bucketfs_config: pytest_itde.config.BucketFs) -> bfs.path.PathLike | None:
bucketfs_config: exasol.pytest_itde.config.BucketFs) -> bfs.path.PathLike | None:
if backend == BACKEND_ONPREM:
return create_bucketfs_location(
path_in_bucket=bucketfs_params.path_in_bucket,
Expand Down
7 changes: 3 additions & 4 deletions tests/fixtures/database_connection_fixture.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,16 @@
import ssl
import pyexasol
import pytest
import exasol.bucketfs as bfs
from _pytest.fixtures import FixtureRequest
from exasol.saas.client.api_access import (
OpenApiAccess,
create_saas_client,
timestamp_name,
get_connection_params
)
from pytest_itde import config
from exasol.pytest_itde import config

from tests.fixtures.database_connection_fixture_constants import BACKEND_ONPREM, BACKEND_SAAS

CURRENT_SAAS_DATABASE_ID = pytest.StashKey[str]()

Expand All @@ -27,8 +28,6 @@ def _env(var: str) -> str:


_BACKEND_OPTION = '--backend'
BACKEND_ONPREM = 'onprem'
BACKEND_SAAS = 'saas'


@pytest.fixture(scope='session', params=[BACKEND_ONPREM, BACKEND_SAAS])
Expand Down
4 changes: 4 additions & 0 deletions tests/fixtures/database_connection_fixture_constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from __future__ import annotations

BACKEND_ONPREM = 'onprem'
BACKEND_SAAS = 'saas'
11 changes: 4 additions & 7 deletions tests/fixtures/language_container_fixture.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,17 @@
import os
import subprocess
from pathlib import Path
import time
from pathlib import Path
from typing import Dict

import pytest
from _pytest.fixtures import FixtureRequest
from exasol_script_languages_container_tool.lib.tasks.export.export_info import ExportInfo
from exasol.python_extension_common.deployment.language_container_deployer import LanguageContainerDeployer
import exasol.bucketfs as bfs
from exasol_script_languages_container_tool.lib.tasks.export.export_info import ExportInfo

from exasol_transformers_extension.deployment import language_container
from tests.fixtures.database_connection_fixture import BACKEND_SAAS

LANGUAGE_ALIAS = "PYTHON3_TE"
CONTAINER_FILE_NAME = "exasol_transformers_extension_container.tar.gz"
from tests.fixtures.database_connection_fixture_constants import BACKEND_SAAS
from tests.fixtures.language_container_fixture_constants import LANGUAGE_ALIAS, CONTAINER_FILE_NAME

SLC_EXPORT = pytest.StashKey[ExportInfo]()
SLC_UPLOADED = pytest.StashKey[Dict[str, bool]]()
Expand Down
2 changes: 2 additions & 0 deletions tests/fixtures/language_container_fixture_constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
LANGUAGE_ALIAS = "PYTHON3_TE"
CONTAINER_FILE_NAME = "exasol_transformers_extension_container.tar.gz"
80 changes: 3 additions & 77 deletions tests/fixtures/model_fixture.py
Original file line number Diff line number Diff line change
@@ -1,68 +1,11 @@
import time

import pytest
import transformers
from contextlib import contextmanager
from pathlib import PurePosixPath, Path
from pathlib import PurePosixPath

import exasol.bucketfs as bfs

from exasol_transformers_extension.utils.bucketfs_model_specification import BucketFSModelSpecification, \
get_BucketFSModelSpecification_from_model_Specs
from exasol_transformers_extension.utils.model_specification import ModelSpecification
from tests.utils import postprocessing
from tests.fixtures.model_fixture_utils import prepare_model_for_local_bucketfs, upload_model_to_bucketfs
from tests.utils.parameters import model_params
from exasol_transformers_extension.utils import bucketfs_operations


def download_model_to_standard_local_save_path(model_specification: ModelSpecification,
tmpdir_name: Path) -> Path:
tmpdir_name = Path(tmpdir_name)
local_model_save_path = bucketfs_operations.create_save_pretrained_model_path(tmpdir_name,
model_specification)
model_name = model_specification.model_name
model_factory = model_specification.get_model_factory()
for model in [model_factory, transformers.AutoTokenizer]:
downloaded_model = model.from_pretrained(model_name, cache_dir=tmpdir_name / "cache" / model_name)
downloaded_model.save_pretrained(local_model_save_path)
return local_model_save_path


def download_model_to_path(model_specification: ModelSpecification,
tmpdir_name: Path):
tmpdir_name = Path(tmpdir_name)
model_name = model_specification.model_name
# todo pull this download into a function? -> create ticket
for model_factory in [transformers.AutoModel, transformers.AutoTokenizer]:
model = model_factory.from_pretrained(model_name, cache_dir=tmpdir_name / "cache" / model_name)
model.save_pretrained(tmpdir_name)


@contextmanager
def upload_model(bucketfs_location: bfs.path.PathLike,
current_model_specification: BucketFSModelSpecification,
model_dir: Path) -> Path:
model_path = current_model_specification.get_bucketfs_model_save_path()
bucketfs_operations.upload_model_files_to_bucketfs(
model_directory=str(model_dir),
bucketfs_model_path=Path(model_path),
bucketfs_location=bucketfs_location)
time.sleep(20)
yield model_path


def prepare_model_for_local_bucketfs(model_specification: ModelSpecification,
tmpdir_factory):
current_model_specs = get_BucketFSModelSpecification_from_model_Specs(model_specification,
"",
model_params.sub_dir)

tmpdir = tmpdir_factory.mktemp(current_model_specs.task_type)
model_path_in_bucketfs = current_model_specs.get_bucketfs_model_save_path()

bucketfs_path_for_model = tmpdir / model_path_in_bucketfs
download_model_to_path(current_model_specs, bucketfs_path_for_model)
return tmpdir


@pytest.fixture(scope="session")
def prepare_filling_mask_model_for_local_bucketfs(tmpdir_factory) -> PurePosixPath:
Expand Down Expand Up @@ -121,23 +64,6 @@ def prepare_seq2seq_model_in_local_bucketfs(tmpdir_factory) -> PurePosixPath:
yield bucketfs_path


@contextmanager
def upload_model_to_bucketfs(
model_specification: ModelSpecification,
local_model_save_path: Path,
bucketfs_location: bfs.path.PathLike) -> str:
local_model_save_path = download_model_to_standard_local_save_path(model_specification, local_model_save_path)
current_model_specs = get_BucketFSModelSpecification_from_model_Specs(model_specification,
"",
model_params.sub_dir)
with upload_model(
bucketfs_location, current_model_specs, local_model_save_path) as model_path:
try:
yield model_path
finally:
postprocessing.cleanup_buckets(bucketfs_location, model_path)


@pytest.fixture(scope="session")
def upload_filling_mask_model_to_bucketfs(
bucketfs_location: bfs.path.PathLike, tmpdir_factory) -> PurePosixPath:
Expand Down
83 changes: 83 additions & 0 deletions tests/fixtures/model_fixture_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
import time
from contextlib import contextmanager
from pathlib import Path

import transformers
from exasol import bucketfs as bfs

from exasol_transformers_extension.utils import bucketfs_operations
from exasol_transformers_extension.utils.bucketfs_model_specification import \
get_BucketFSModelSpecification_from_model_Specs, BucketFSModelSpecification
from exasol_transformers_extension.utils.huggingface_hub_bucketfs_model_transfer_sp import \
make_parameters_of_model_contiguous_tensors
from exasol_transformers_extension.utils.model_specification import ModelSpecification
from tests.utils import postprocessing
from tests.utils.parameters import model_params


def download_model_to_standard_local_save_path(model_specification: ModelSpecification,
tmpdir_name: Path) -> Path:
tmpdir_name = Path(tmpdir_name)
local_model_save_path = bucketfs_operations.create_save_pretrained_model_path(tmpdir_name,
model_specification)
model_name = model_specification.model_name
model_factory = model_specification.get_model_factory()
for model in [model_factory, transformers.AutoTokenizer]:
downloaded_model = model.from_pretrained(model_name, cache_dir=tmpdir_name / "cache" / model_name)
make_parameters_of_model_contiguous_tensors(downloaded_model)
downloaded_model.save_pretrained(local_model_save_path)
return local_model_save_path


def download_model_to_path(model_specification: ModelSpecification,
tmpdir_name: Path):
tmpdir_name = Path(tmpdir_name)
model_name = model_specification.model_name
# todo pull this download into a function? -> create ticket
for model_factory in [transformers.AutoModel, transformers.AutoTokenizer]:
model = model_factory.from_pretrained(model_name, cache_dir=tmpdir_name / "cache" / model_name)
model.save_pretrained(tmpdir_name)


def prepare_model_for_local_bucketfs(model_specification: ModelSpecification,
tmpdir_factory):
current_model_specs = get_BucketFSModelSpecification_from_model_Specs(model_specification,
"",
model_params.sub_dir)

tmpdir = tmpdir_factory.mktemp(current_model_specs.task_type)
model_path_in_bucketfs = current_model_specs.get_bucketfs_model_save_path()

bucketfs_path_for_model = tmpdir / model_path_in_bucketfs
download_model_to_path(current_model_specs, bucketfs_path_for_model)
return tmpdir


@contextmanager
def upload_model_to_bucketfs(
model_specification: ModelSpecification,
local_model_save_path: Path,
bucketfs_location: bfs.path.PathLike) -> str:
local_model_save_path = download_model_to_standard_local_save_path(model_specification, local_model_save_path)
current_model_specs = get_BucketFSModelSpecification_from_model_Specs(model_specification,
"",
model_params.sub_dir)
with upload_model(
bucketfs_location, current_model_specs, local_model_save_path) as model_path:
try:
yield model_path
finally:
postprocessing.cleanup_buckets(bucketfs_location, model_path)


@contextmanager
def upload_model(bucketfs_location: bfs.path.PathLike,
current_model_specification: BucketFSModelSpecification,
model_dir: Path) -> Path:
model_path = current_model_specification.get_bucketfs_model_save_path()
bucketfs_operations.upload_model_files_to_bucketfs(
model_directory=str(model_dir),
bucketfs_model_path=Path(model_path),
bucketfs_location=bucketfs_location)
time.sleep(20)
yield model_path
9 changes: 4 additions & 5 deletions tests/fixtures/script_deployment_fixture.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
from __future__ import annotations
from typing import Any

from typing import Any
from urllib.parse import urlparse

import pytest
from pytest_itde import config
import exasol.bucketfs as bfs
from exasol.pytest_itde import config

from tests.fixtures.database_connection_fixture import BACKEND_SAAS, BACKEND_ONPREM
from tests.fixtures.database_connection_fixture_constants import BACKEND_ONPREM, BACKEND_SAAS
from tests.utils.parameters import bucketfs_params


Expand All @@ -21,7 +21,6 @@ def deploy_params_onprem(exasol_config: config.Exasol) -> dict[str, Any]:

@pytest.fixture(scope="session")
def upload_params_onprem(bucketfs_config: config.BucketFs):

parsed_url = urlparse(bucketfs_config.url)
host, port = parsed_url.netloc.split(":")
return {
Expand Down
Loading

0 comments on commit 6990f02

Please sign in to comment.