Skip to content

Commit

Permalink
#264 enabled all tests [CodeBuild]
Browse files Browse the repository at this point in the history
  • Loading branch information
ahsimb committed Oct 16, 2024
1 parent 63d5eea commit 58f1370
Show file tree
Hide file tree
Showing 11 changed files with 21 additions and 129 deletions.
1 change: 1 addition & 0 deletions doc/changes/changes_2.1.0.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ n/a
### Refactorings

- #252: Use the pytest plugins for in the integration tests.
- #264: Created a single deployment CLI command.

n/a

Expand Down
11 changes: 4 additions & 7 deletions exasol_transformers_extension/udfs/models/base_model_udf.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,17 @@
import os
from abc import abstractmethod, ABC
from typing import Iterator, List, Any
import torch
import traceback
import pandas as pd
import numpy as np
import transformers
from exasol.python_extension_common.connections.bucketfs_location import (
create_bucketfs_location_from_conn_object)

from exasol_transformers_extension.deployment import constants
from exasol_transformers_extension.utils import device_management, \
bucketfs_operations, dataframe_operations
from exasol_transformers_extension.utils import device_management, dataframe_operations
from exasol_transformers_extension.utils.bucketfs_model_specification import BucketFSModelSpecification
from exasol_transformers_extension.utils.load_local_model import LoadLocalModel
from exasol_transformers_extension.utils.model_factory_protocol import ModelFactoryProtocol
from exasol_transformers_extension.utils.model_specification import ModelSpecification


class BaseModelUDF(ABC):
Expand Down Expand Up @@ -189,8 +187,7 @@ def check_cache(self, model_df: pd.DataFrame) -> None:

if self.model_loader.current_model_specification != current_model_specification:
bucketfs_location = \
bucketfs_operations.create_bucketfs_location_from_conn_object(
self.exa.get_connection(bucketfs_conn))
create_bucketfs_location_from_conn_object(self.exa.get_connection(bucketfs_conn))

self.model_loader.clear_device_memory()
self.model_loader.set_current_model_specification(current_model_specification)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
from typing import Tuple

import transformers
from exasol.python_extension_common.connections.bucketfs_location import (
create_bucketfs_location_from_conn_object)

from exasol_transformers_extension.utils import bucketfs_operations
from exasol_transformers_extension.utils.bucketfs_model_specification import \
BucketFSModelSpecificationFactory
from exasol_transformers_extension.utils.model_factory_protocol import ModelFactoryProtocol
Expand Down Expand Up @@ -63,7 +64,7 @@ def _download_model(self, ctx) -> Tuple[str, str]:

# create bucketfs location
bfs_conn_obj = self._exa.get_connection(bfs_conn)
bucketfs_location = bucketfs_operations.create_bucketfs_location_from_conn_object(bfs_conn_obj)
bucketfs_location = create_bucketfs_location_from_conn_object(bfs_conn_obj)

# download base model and tokenizer into the model path
with self._huggingface_hub_bucketfs_model_transfer.create(
Expand Down
70 changes: 0 additions & 70 deletions exasol_transformers_extension/utils/bucketfs_operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,80 +4,12 @@
from pathlib import PurePosixPath, Path
from typing import BinaryIO

import json
import exasol.bucketfs as bfs
from exasol.saas.client.api_access import get_database_id # type: ignore

from exasol_transformers_extension.utils.model_specification import ModelSpecification


def create_bucketfs_location_from_conn_object(bfs_conn_obj) -> bfs.path.PathLike:

bfs_params = json.loads(bfs_conn_obj.address)
bfs_params.update(json.loads(bfs_conn_obj.user))
bfs_params.update(json.loads(bfs_conn_obj.password))
return bfs.path.build_path(**bfs_params)


def create_bucketfs_location(
path_in_bucket: str = '',
bucketfs_name: str | None = None,
bucketfs_url: str | None = None,
bucketfs_host: str | None = None,
bucketfs_port: int | None = None,
bucketfs_use_https: bool = True,
bucketfs_user: str | None = None,
bucketfs_password: str | None = None,
bucket: str | None = None,
saas_url: str | None = None,
saas_account_id: str | None = None,
saas_database_id: str | None = None,
saas_database_name: str | None = None,
saas_token: str | None = None,
use_ssl_cert_validation: bool = False
) -> bfs.path.PathLike:

# Infer where the database is - on-prem or SaaS.
is_on_prem = all((any((bucketfs_url, all((bucketfs_host, bucketfs_port)))), bucketfs_name,
bucket, bucketfs_user, bucketfs_password))
if is_on_prem:
if not bucketfs_url:
bucketfs_url = (f"{'https' if bucketfs_use_https else 'http'}://"
f"{bucketfs_host}:{bucketfs_port}")
return bfs.path.build_path(backend=bfs.path.StorageBackend.onprem,
url=bucketfs_url,
username=bucketfs_user,
password=bucketfs_password,
service_name=bucketfs_name,
bucket_name=bucket,
verify=use_ssl_cert_validation,
path=path_in_bucket)

is_saas = all((saas_url, saas_account_id, saas_token,
any((saas_database_id, saas_database_name))))
if is_saas:
saas_database_id = (saas_database_id or
get_database_id(
host=saas_url,
account_id=saas_account_id,
pat=saas_token,
database_name=saas_database_name
))
return bfs.path.build_path(backend=bfs.path.StorageBackend.saas,
url=saas_url,
account_id=saas_account_id,
database_id=saas_database_id,
pat=saas_token,
path=path_in_bucket)

raise ValueError('Incomplete parameter list. '
'Please either provide the parameters [bucketfs_host, '
'bucketfs_port, bucketfs_name, bucket, bucketfs_user, '
'bucketfs_password] for an On-Prem database or [saas_url, '
'saas_account_id, saas_database_id or saas_database_name, '
'saas_token] for a SaaS database.')


def upload_model_files_to_bucketfs(
model_directory: str,
bucketfs_model_path: Path,
Expand Down Expand Up @@ -116,5 +48,3 @@ def create_save_pretrained_model_path(_tmpdir_name, model_specification: ModelSp
"""
model_specific_path_suffix = model_specification.get_model_specific_path_suffix()
return Path(_tmpdir_name, "pretrained", model_specific_path_suffix)


6 changes: 3 additions & 3 deletions noxfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,21 +26,21 @@ def unit_tests(session):
def integration_tests(session):
# We need to use a external database here, because the itde plugin doesn't provide all necessary options to
# configure the database. See the start_database session.
session.run('pytest', '--setup-show', '-s', '--backend=onprem', '--itde-db-version=external', 'tests/integration_tests/with_db/deployment')
session.run('pytest', '--setup-show', '-s', '--backend=all', '--itde-db-version=external', 'tests/integration_tests')


@nox.session(python=False)
def saas_integration_tests(session):
# We need to use a external database here, because the itde plugin doesn't provide all necessary options to
# configure the database. See the start_database session.
session.run('pytest', '--setup-show', '-s', 'tests/integration_tests/with_db/deployment')
session.run('pytest', '--setup-show', '-s', '--backend=saas', 'tests/integration_tests/with_db')


@nox.session(python=False)
def onprem_integration_tests(session):
# We need to use a external database here, because the itde plugin doesn't provide all necessary options to
# configure the database. See the start_database session.
session.run('pytest', '--setup-show', '-s', '--backend=onprem', '--itde-db-version=external', 'tests/integration_tests/with_db/deployment')
session.run('pytest', '--setup-show', '-s', '--backend=onprem', '--itde-db-version=external', 'tests/integration_tests/with_db')


@nox.session(python=False)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from exasol_transformers_extension.deploy import (
deploy_command, DEPLOY_SLC_ARG, BUCKETFS_CONN_NAME_ARG, get_bool_opt_name)
from exasol_transformers_extension.deployment.language_container import export_slc
from tests.integration_tests.with_db.deployment.test_upload_model import run_model_upload_test
from tests.integration_tests.with_db.test_upload_model import run_model_upload_test

PATH_IN_BUCKET = 'te_end2end'
BUCKETFS_CONN_NAME = 'TE_E2E_BFS_CONN'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,19 @@
from typing import Dict, List

from exasol_udf_mock_python.connection import Connection
from exasol.python_extension_common.connections.bucketfs_location import (
create_bucketfs_location_from_conn_object)

from exasol_transformers_extension.udfs.models.model_downloader_udf import \
ModelDownloaderUDF
from exasol_transformers_extension.utils import bucketfs_operations
from exasol_transformers_extension.utils.bucketfs_model_specification import get_BucketFSModelSpecification_from_model_Specs
from exasol_transformers_extension.utils.bucketfs_model_specification import (
get_BucketFSModelSpecification_from_model_Specs)
from tests.utils.parameters import model_params
from tests.utils.mock_connections import (
create_mounted_bucketfs_connection, create_hf_token_connection)
from tests.utils.bucketfs_file_list import get_bucketfs_file_list


class ExaEnvironment:
def __init__(self, connections: Dict[str, Connection] = None):
self._connections = connections
Expand Down Expand Up @@ -83,8 +86,7 @@ def __init__(self, id: str, tmp_dir: Path, token_conn_name: str):
else create_hf_token_connection("valid")

def list_files_in_bucketfs(self):
bucketfs_location = bucketfs_operations.create_bucketfs_location_from_conn_object(
self.bucketfs_connection)
bucketfs_location = create_bucketfs_location_from_conn_object(self.bucketfs_connection)
return get_bucketfs_file_list(bucketfs_location)


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,16 @@
from pathlib import Path
from transformers import AutoModel, AutoTokenizer, pipeline
import tarfile
from exasol.python_extension_common.connections.bucketfs_location import (
create_bucketfs_location_from_conn_object)

from exasol_transformers_extension.utils.bucketfs_model_specification import BucketFSModelSpecification
from exasol_transformers_extension.utils.load_local_model import LoadLocalModel
from exasol_transformers_extension.utils.model_factory_protocol import ModelFactoryProtocol
from exasol_transformers_extension.utils.huggingface_hub_bucketfs_model_transfer_sp import \
HuggingFaceHubBucketFSModelTransferSPFactory, make_parameters_of_model_contiguous_tensors
from exasol_transformers_extension.utils.bucketfs_operations import (
create_save_pretrained_model_path, create_bucketfs_location_from_conn_object)
create_save_pretrained_model_path)

from tests.utils.parameters import model_params
from tests.utils.mock_connections import create_mounted_bucketfs_connection
Expand Down
33 changes: 0 additions & 33 deletions tests/unit_tests/utils/test_bucketfs_operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,6 @@
from exasol_udf_mock_python.connection import Connection

from exasol_transformers_extension.utils.bucketfs_operations import (
create_bucketfs_location,
create_bucketfs_location_from_conn_object,
upload_model_files_to_bucketfs,
create_tar_of_directory
)
Expand All @@ -25,37 +23,6 @@ def test_content(tmp_path):
return tmp_path


@patch("exasol.bucketfs.path.build_path")
def test_create_bucketfs_location_from_conn_object(mock_build_path):
url = 'https://bucket-fs-service'
bucket = 'my-bucket'
user = 'the-user'
password = 'the-password'
conn = Connection(
address=f'{{"url":"{url}", "bucket":"{bucket}"}}',
user=f'{{"user":"{user}"}}',
password=f'{{"password":"{password}"}}'
)
create_bucketfs_location_from_conn_object(conn)
mock_build_path.assert_called_with(url=url, bucket=bucket, user=user, password=password)


@patch("exasol.bucketfs.path.build_path")
def test_create_bucketfs_location_on_prem(mock_build_path):
create_bucketfs_location(bucketfs_host='https://bucket-fs-service', bucketfs_port=5678,
bucketfs_name='bfs-service', bucket='my-bucket', bucketfs_user='bfs-user',
bucketfs_password='bfs-password', path_in_bucket='bucket_path')
assert mock_build_path.call_args.kwargs['backend'] == bfs.path.StorageBackend.onprem


@patch("exasol.bucketfs.path.build_path")
def test_create_bucketfs_location_saas(mock_build_path):
create_bucketfs_location(saas_url='https://saas-service', saas_account_id='fake-account-id',
saas_database_id='fake-database-id', saas_token='fake-saas-token',
path_in_bucket='bucket_path')
assert mock_build_path.call_args.kwargs['backend'] == bfs.path.StorageBackend.saas


def test_upload_model_files_to_bucketfs(test_content, tmp_path):
path_in_backet = 'abcd'
bucket = bfs.MountedBucket(base_path=str(tmp_path))
Expand Down
8 changes: 0 additions & 8 deletions tests/utils/parameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,3 @@ class ModelParams:
tiny_model_specs=ModelSpecification("prajjwal1/bert-tiny", "task"),
text_data='The database software company Exasol is based in Nuremberg',
sub_dir='model_sub_dir')


def get_arg_list(**kwargs) -> list[str]:
args_list: list[str] = []
for k, v in kwargs.items():
args_list.append(f'--{k.replace("_", "-")}')
args_list.append(str(v))
return args_list

0 comments on commit 58f1370

Please sign in to comment.