Skip to content

Commit

Permalink
converted error unit tests
Browse files Browse the repository at this point in the history
  • Loading branch information
MarleneKress79789 committed Nov 12, 2024
1 parent f8e72b0 commit a459d0a
Showing 1 changed file with 40 additions and 42 deletions.
82 changes: 40 additions & 42 deletions tests/unit_tests/udfs/test_token_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,21 @@
import pytest
import transformers
from exasol_udf_mock_python.column import Column
from exasol_udf_mock_python.group import Group
from exasol_udf_mock_python.mock_context import StandaloneMockContext
from exasol_udf_mock_python.mock_exa_environment import MockExaEnvironment
from exasol_udf_mock_python.mock_meta_data import MockMetaData
from exasol_udf_mock_python.udf_mock_executor import UDFMockExecutor
from transformers import Pipeline

from exasol_transformers_extension.udfs.models.token_classification_udf import TokenClassificationUDF
from exasol_transformers_extension.utils.model_factory_protocol import ModelFactoryProtocol
from tests.unit_tests.udf_wrapper_params.token_classification.error_not_cached_multiple_model_multiple_batch import \
ErrorNotCachedMultipleModelMultipleBatch
from tests.unit_tests.udf_wrapper_params.token_classification.error_not_cached_single_model_multiple_batch import \
ErrorNotCachedSingleModelMultipleBatch
from tests.unit_tests.udf_wrapper_params.token_classification.error_on_prediction_multiple_model_multiple_batch import \
ErrorOnPredictionMultipleModelMultipleBatch
from tests.unit_tests.udf_wrapper_params.token_classification.error_on_prediction_single_model_multiple_batch import \
ErrorOnPredictionSingleModelMultipleBatch
from tests.unit_tests.udf_wrapper_params.token_classification.multiple_bfsconn_single_subdir_single_model_multiple_batch import \
MultipleBucketFSConnSingleSubdirSingleModelNameMultipleBatch
from tests.unit_tests.udf_wrapper_params.token_classification.multiple_bfsconn_single_subdir_single_model_single_batch import \
Expand Down Expand Up @@ -134,10 +140,10 @@ def create_mock_metadata():
MultipleBucketFSConnSingleSubdirSingleModelNameMultipleBatch,
MultipleStrategySingleModelNameSingleBatch,
MultipleStrategySingleModelNameMultipleBatch,
#ErrorNotCachedSingleModelMultipleBatch,
#ErrorNotCachedMultipleModelMultipleBatch,
#ErrorOnPredictionMultipleModelMultipleBatch,
#ErrorOnPredictionSingleModelMultipleBatch
ErrorNotCachedSingleModelMultipleBatch,
ErrorNotCachedMultipleModelMultipleBatch,
ErrorOnPredictionMultipleModelMultipleBatch,
ErrorOnPredictionSingleModelMultipleBatch
])

@patch('exasol.python_extension_common.connections.bucketfs_location.create_bucketfs_location_from_conn_object')
Expand All @@ -149,25 +155,27 @@ def test_token_classification_with_span(mock_local_path, mock_create_loc, params
mock_meta = create_mock_metadata_with_span()
input = params.work_with_span_input_data
mock_ctx = StandaloneMockContext(inp=input, metadata=mock_meta)


mock_exa = MockExaEnvironment(
metadata=mock_meta,
connections=params.bfs_connections)

mock_base_model_factory: Union[ModelFactoryProtocol, MagicMock] = create_autospec(ModelFactoryProtocol,
_name="mock_base_model_factory")
number_of_intendet_used_models = params.expected_model_counter # todo is this always same?
number_of_intended_used_models = params.expected_model_counter
mock_models: List[Union[transformers.AutoModel, MagicMock]] = [
create_autospec(transformers.AutoModel) for i in range (0,number_of_intendet_used_models)
create_autospec(transformers.AutoModel) for i in range (0,number_of_intended_used_models)
]
print(mock_models)
mock_cast(mock_base_model_factory.from_pretrained).side_effect = mock_models

mock_tokenizer_factory: Union[ModelFactoryProtocol, MagicMock] = create_autospec(ModelFactoryProtocol)
mock_pipeline: List[Union[transformers.AutoModel, MagicMock]] = [
create_autospec(Pipeline, side_effect=params.tokenizer_models_output_df[i])
for i in range (0,number_of_intendet_used_models)
]

mock_pipeline: List[Union[transformers.AutoModel, MagicMock]] = [
create_autospec(Pipeline, side_effect=params.tokenizer_models_output_df[i]) if params.tokenizer_models_output_df[i][0][0][0]["word"]
else Exception("Traceback mock_pipeline is throwing an error intentionally")
for i in range(0, number_of_intended_used_models)
]

mock_pipeline_factory: Union[Pipeline, MagicMock] = create_autospec(Pipeline,
side_effect=mock_pipeline)
udf = TokenClassificationUDF(exa=mock_exa,
Expand All @@ -180,13 +188,12 @@ def test_token_classification_with_span(mock_local_path, mock_create_loc, params
udf.run(mock_ctx)
result = mock_ctx.output

assert result[0][-1] is None and len(result[0]) == len(mock_meta.output_columns)
assert len(result[0]) == len(mock_meta.output_columns)

expected_output = Output(params.work_with_span_output_data)
actual_output = Output(result)
n_input_columns = len(mock_meta.input_columns) - 1
assert (
OutputMatcher(actual_output, n_input_columns) == expected_output and
assert (OutputMatcher(actual_output, n_input_columns) == expected_output and
len(mock_pipeline_factory.mock_calls) == params.expected_model_counter)


Expand All @@ -207,10 +214,10 @@ def test_token_classification_with_span(mock_local_path, mock_create_loc, params
MultipleBucketFSConnSingleSubdirSingleModelNameMultipleBatch,
MultipleStrategySingleModelNameSingleBatch,
MultipleStrategySingleModelNameMultipleBatch,
# ErrorNotCachedSingleModelMultipleBatch,
# ErrorNotCachedMultipleModelMultipleBatch,
# ErrorOnPredictionMultipleModelMultipleBatch,
# ErrorOnPredictionSingleModelMultipleBatch
ErrorNotCachedSingleModelMultipleBatch,
ErrorNotCachedMultipleModelMultipleBatch,
ErrorOnPredictionMultipleModelMultipleBatch,
ErrorOnPredictionSingleModelMultipleBatch
])
@patch('exasol.python_extension_common.connections.bucketfs_location.create_bucketfs_location_from_conn_object')
@patch('exasol_transformers_extension.utils.bucketfs_operations.get_local_bucketfs_path')
Expand All @@ -221,32 +228,25 @@ def test_token_classification(mock_local_path, mock_create_loc, params):
mock_meta = create_mock_metadata()
input = params.input_data
mock_ctx = StandaloneMockContext(inp=input, metadata=mock_meta)


mock_exa = MockExaEnvironment(
metadata=mock_meta,
connections=params.bfs_connections)

mock_base_model_factory: Union[ModelFactoryProtocol, MagicMock] = create_autospec(ModelFactoryProtocol,
_name="mock_base_model_factory")
number_of_intendet_used_models = params.expected_model_counter# todo is this always same?
number_of_intended_used_models = params.expected_model_counter
mock_models: List[Union[transformers.AutoModel, MagicMock]] = [
create_autospec(transformers.AutoModel) for i in range (0,number_of_intendet_used_models)
create_autospec(transformers.AutoModel) for i in range (0,number_of_intended_used_models)
]
print(mock_models)
mock_cast(mock_base_model_factory.from_pretrained).side_effect = mock_models

mock_tokenizer_factory: Union[ModelFactoryProtocol, MagicMock] = create_autospec(ModelFactoryProtocol)
print(params.batch_size)
print(params.work_with_span_input_data)
print(len(params.work_with_span_output_data))
print("tokenizer_model_output_df")
print(params.tokenizer_models_output_df)
for i in range(0, number_of_intendet_used_models):
print(params.tokenizer_models_output_df[i])
mock_pipeline: List[Union[transformers.AutoModel, MagicMock]] = [
create_autospec(Pipeline, side_effect=params.tokenizer_models_output_df[i]) for i in range (0,number_of_intendet_used_models)
]

mock_pipeline: List[Union[transformers.AutoModel, MagicMock]] = [
create_autospec(Pipeline, side_effect=params.tokenizer_models_output_df[i]) if params.tokenizer_models_output_df[i][0][0][0]["word"]
else Exception("Traceback mock_pipeline is throwing an error intentionally")
for i in range(0, number_of_intended_used_models)
]
mock_pipeline_factory: Union[Pipeline, MagicMock] = create_autospec(Pipeline,
side_effect=mock_pipeline)
udf = TokenClassificationUDF(exa=mock_exa,
Expand All @@ -257,13 +257,11 @@ def test_token_classification(mock_local_path, mock_create_loc, params):

udf.run(mock_ctx)
result = mock_ctx.output
print("result_____________________________________________")
print(result)
print("_____________________________________________")
assert result[0][-1] is None and len(result[0]) == len(mock_meta.output_columns)

assert len(result[0]) == len(mock_meta.output_columns)

expected_output = Output(params.output_data)
actual_output = Output(result)
n_input_columns = len(mock_meta.input_columns) - 1
assert (OutputMatcher(actual_output, n_input_columns) == expected_output )#and
#len(mock_pipeline_factory.mock_calls) == params.expected_model_counter)
assert (OutputMatcher(actual_output, n_input_columns) == expected_output and
len(mock_pipeline_factory.mock_calls) == params.expected_model_counter)

0 comments on commit a459d0a

Please sign in to comment.