Skip to content

Commit

Permalink
[CodeBuild] changes from code review, security update
Browse files Browse the repository at this point in the history
  • Loading branch information
MarleneKress79789 committed Dec 5, 2023
1 parent aa871fe commit 3e751bc
Show file tree
Hide file tree
Showing 6 changed files with 43 additions and 77 deletions.
5 changes: 3 additions & 2 deletions doc/changes/changes_0.7.0.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,12 @@ T.B.D

### Refactorings

- #144: Extracted base_udf.load_models into separate class
- #144: Extracted base_model_udf.load_models into separate class


### Documentation



### Security
### Security
- #144: Updated Cryptography to version 41.0.7
14 changes: 3 additions & 11 deletions exasol_transformers_extension/udfs/models/base_model_udf.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,12 +53,11 @@ def run(self, ctx):
predictions_df = self.get_predictions_from_batch(batch_df)
ctx.emit(predictions_df)

self.clear_device_memory()
self.model_loader.clear_device_memory()

def create_model_loader(self):
"""
creates the model_loader. In separate function, so it can be replaced for tests since the pipeline
creation does not work with dummy data
Creates the model_loader.
"""
self.model_loader = LoadModel(self.pipeline,
self.base_model,
Expand Down Expand Up @@ -183,7 +182,7 @@ def check_cache(self, model_df: pd.DataFrame) -> None:
current_model_key = (bucketfs_conn, sub_dir, model_name, token_conn)
if self.model_loader.last_loaded_model_key != current_model_key:
self.set_cache_dir(model_name, bucketfs_conn, sub_dir)
self.clear_device_memory()
self.model_loader.clear_device_memory()
if token_conn:
token_conn_obj = self.exa.get_connection(token_conn)
else:
Expand Down Expand Up @@ -211,13 +210,6 @@ def set_cache_dir(
self.cache_dir = bucketfs_operations.get_local_bucketfs_path(
bucketfs_location=bucketfs_location, model_path=str(model_path))

def clear_device_memory(self):
"""
Delete models and free device memory
"""
self.model_loader.last_loaded_model = None
self.model_loader.last_loaded_tokenizer = None
torch.cuda.empty_cache()

def get_prediction(self, model_df: pd.DataFrame) -> pd.DataFrame:
"""
Expand Down
10 changes: 9 additions & 1 deletion exasol_transformers_extension/utils/load_model.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@

import torch

class LoadModel:
def __init__(self,
Expand Down Expand Up @@ -46,3 +46,11 @@ def load_models(self, model_name: str,
framework="pt")
self.last_loaded_model_key = current_model_key
return last_created_pipeline

def clear_device_memory(self):
"""
Delete models and free device memory
"""
self.last_loaded_model = None
self.last_loaded_tokenizer = None
torch.cuda.empty_cache()
48 changes: 24 additions & 24 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

38 changes: 0 additions & 38 deletions tests/unit_tests/udfs/base_model_dummy_implementation.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,38 +6,6 @@
BaseModelUDF


class DummyModelLoader:
"""
Create a Dummy model loader that does not create a transformers Pipeline,
since that fails with test data.
"""
def __init__(self,
base_model,
tokenizer,
task_name,
device
):
self.base_model = base_model
self.tokenizer = tokenizer
self.task_name = task_name
self.device = device
self.last_loaded_model = None
self.last_loaded_tokenizer = None
self.last_created_pipeline = None
self.last_loaded_model_key = None

def load_models(self, model_name: str,
current_model_key,
cache_dir,
token_conn_obj) -> None:
token = False
self.last_loaded_model = self.base_model.from_pretrained(
model_name, cache_dir=cache_dir, use_auth_token=token)
self.last_loaded_tokenizer = self.tokenizer.from_pretrained(
model_name, cache_dir=cache_dir, use_auth_token=token)
return None


class DummyImplementationUDF(BaseModelUDF):
def __init__(self,
exa,
Expand Down Expand Up @@ -76,9 +44,3 @@ def create_dataframes_from_predictions(
results_df_list.append(result_df)
return results_df_list

def create_model_loader(self):
""" overwrite the model loader creation with dummy model loader creation"""
self.model_loader = DummyModelLoader(self.base_model,
self.tokenizer,
self.task_name,
self.device)
5 changes: 4 additions & 1 deletion tests/unit_tests/udfs/test_base_udf.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,10 +79,13 @@ def setup_tests_and_run(bucketfs_conn_name, bucketfs_conn, sub_dir, model_name):
mock_meta,
'',
None)

mock_pipeline = lambda task_name, model, tokenizer, device, framework: None
mock_ctx = create_mock_udf_context(input_data, mock_meta)
udf = DummyImplementationUDF(exa=mock_exa,
base_model=mock_base_model_factory,
tokenizer=mock_tokenizer_factory)
tokenizer=mock_tokenizer_factory,
pipeline=mock_pipeline)
udf.run(mock_ctx)
res = mock_ctx.output
return res, mock_meta
Expand Down

0 comments on commit 3e751bc

Please sign in to comment.