diff --git a/tests/unit_tests/udfs/test_token_classification.py b/tests/unit_tests/udfs/test_token_classification.py index 5a8567ee..53f96f6e 100644 --- a/tests/unit_tests/udfs/test_token_classification.py +++ b/tests/unit_tests/udfs/test_token_classification.py @@ -4,15 +4,21 @@ import pytest import transformers from exasol_udf_mock_python.column import Column -from exasol_udf_mock_python.group import Group from exasol_udf_mock_python.mock_context import StandaloneMockContext from exasol_udf_mock_python.mock_exa_environment import MockExaEnvironment from exasol_udf_mock_python.mock_meta_data import MockMetaData -from exasol_udf_mock_python.udf_mock_executor import UDFMockExecutor from transformers import Pipeline from exasol_transformers_extension.udfs.models.token_classification_udf import TokenClassificationUDF from exasol_transformers_extension.utils.model_factory_protocol import ModelFactoryProtocol +from tests.unit_tests.udf_wrapper_params.token_classification.error_not_cached_multiple_model_multiple_batch import \ + ErrorNotCachedMultipleModelMultipleBatch +from tests.unit_tests.udf_wrapper_params.token_classification.error_not_cached_single_model_multiple_batch import \ + ErrorNotCachedSingleModelMultipleBatch +from tests.unit_tests.udf_wrapper_params.token_classification.error_on_prediction_multiple_model_multiple_batch import \ + ErrorOnPredictionMultipleModelMultipleBatch +from tests.unit_tests.udf_wrapper_params.token_classification.error_on_prediction_single_model_multiple_batch import \ + ErrorOnPredictionSingleModelMultipleBatch from tests.unit_tests.udf_wrapper_params.token_classification.multiple_bfsconn_single_subdir_single_model_multiple_batch import \ MultipleBucketFSConnSingleSubdirSingleModelNameMultipleBatch from tests.unit_tests.udf_wrapper_params.token_classification.multiple_bfsconn_single_subdir_single_model_single_batch import \ @@ -134,10 +140,10 @@ def create_mock_metadata(): MultipleBucketFSConnSingleSubdirSingleModelNameMultipleBatch, MultipleStrategySingleModelNameSingleBatch, MultipleStrategySingleModelNameMultipleBatch, - #ErrorNotCachedSingleModelMultipleBatch, - #ErrorNotCachedMultipleModelMultipleBatch, - #ErrorOnPredictionMultipleModelMultipleBatch, - #ErrorOnPredictionSingleModelMultipleBatch + ErrorNotCachedSingleModelMultipleBatch, + ErrorNotCachedMultipleModelMultipleBatch, + ErrorOnPredictionMultipleModelMultipleBatch, + ErrorOnPredictionSingleModelMultipleBatch ]) @patch('exasol.python_extension_common.connections.bucketfs_location.create_bucketfs_location_from_conn_object') @@ -149,25 +155,27 @@ def test_token_classification_with_span(mock_local_path, mock_create_loc, params mock_meta = create_mock_metadata_with_span() input = params.work_with_span_input_data mock_ctx = StandaloneMockContext(inp=input, metadata=mock_meta) - - mock_exa = MockExaEnvironment( metadata=mock_meta, connections=params.bfs_connections) mock_base_model_factory: Union[ModelFactoryProtocol, MagicMock] = create_autospec(ModelFactoryProtocol, _name="mock_base_model_factory") - number_of_intendet_used_models = params.expected_model_counter # todo is this always same? + number_of_intended_used_models = params.expected_model_counter mock_models: List[Union[transformers.AutoModel, MagicMock]] = [ - create_autospec(transformers.AutoModel) for i in range (0,number_of_intendet_used_models) + create_autospec(transformers.AutoModel) for i in range (0,number_of_intended_used_models) ] + print(mock_models) mock_cast(mock_base_model_factory.from_pretrained).side_effect = mock_models mock_tokenizer_factory: Union[ModelFactoryProtocol, MagicMock] = create_autospec(ModelFactoryProtocol) - mock_pipeline: List[Union[transformers.AutoModel, MagicMock]] = [ - create_autospec(Pipeline, side_effect=params.tokenizer_models_output_df[i]) - for i in range (0,number_of_intendet_used_models) - ] + + mock_pipeline: List[Union[transformers.AutoModel, MagicMock]] = [ + create_autospec(Pipeline, side_effect=params.tokenizer_models_output_df[i]) if params.tokenizer_models_output_df[i][0][0][0]["word"] + else Exception("Traceback mock_pipeline is throwing an error intentionally") + for i in range(0, number_of_intended_used_models) + ] + mock_pipeline_factory: Union[Pipeline, MagicMock] = create_autospec(Pipeline, side_effect=mock_pipeline) udf = TokenClassificationUDF(exa=mock_exa, @@ -180,13 +188,12 @@ def test_token_classification_with_span(mock_local_path, mock_create_loc, params udf.run(mock_ctx) result = mock_ctx.output - assert result[0][-1] is None and len(result[0]) == len(mock_meta.output_columns) + assert len(result[0]) == len(mock_meta.output_columns) expected_output = Output(params.work_with_span_output_data) actual_output = Output(result) n_input_columns = len(mock_meta.input_columns) - 1 - assert ( - OutputMatcher(actual_output, n_input_columns) == expected_output and + assert (OutputMatcher(actual_output, n_input_columns) == expected_output and len(mock_pipeline_factory.mock_calls) == params.expected_model_counter) @@ -207,10 +214,10 @@ def test_token_classification_with_span(mock_local_path, mock_create_loc, params MultipleBucketFSConnSingleSubdirSingleModelNameMultipleBatch, MultipleStrategySingleModelNameSingleBatch, MultipleStrategySingleModelNameMultipleBatch, - # ErrorNotCachedSingleModelMultipleBatch, - # ErrorNotCachedMultipleModelMultipleBatch, - # ErrorOnPredictionMultipleModelMultipleBatch, - # ErrorOnPredictionSingleModelMultipleBatch + ErrorNotCachedSingleModelMultipleBatch, + ErrorNotCachedMultipleModelMultipleBatch, + ErrorOnPredictionMultipleModelMultipleBatch, + ErrorOnPredictionSingleModelMultipleBatch ]) @patch('exasol.python_extension_common.connections.bucketfs_location.create_bucketfs_location_from_conn_object') @patch('exasol_transformers_extension.utils.bucketfs_operations.get_local_bucketfs_path') @@ -221,32 +228,25 @@ def test_token_classification(mock_local_path, mock_create_loc, params): mock_meta = create_mock_metadata() input = params.input_data mock_ctx = StandaloneMockContext(inp=input, metadata=mock_meta) - - mock_exa = MockExaEnvironment( metadata=mock_meta, connections=params.bfs_connections) mock_base_model_factory: Union[ModelFactoryProtocol, MagicMock] = create_autospec(ModelFactoryProtocol, _name="mock_base_model_factory") - number_of_intendet_used_models = params.expected_model_counter# todo is this always same? + number_of_intended_used_models = params.expected_model_counter mock_models: List[Union[transformers.AutoModel, MagicMock]] = [ - create_autospec(transformers.AutoModel) for i in range (0,number_of_intendet_used_models) + create_autospec(transformers.AutoModel) for i in range (0,number_of_intended_used_models) ] - print(mock_models) mock_cast(mock_base_model_factory.from_pretrained).side_effect = mock_models mock_tokenizer_factory: Union[ModelFactoryProtocol, MagicMock] = create_autospec(ModelFactoryProtocol) - print(params.batch_size) - print(params.work_with_span_input_data) - print(len(params.work_with_span_output_data)) - print("tokenizer_model_output_df") - print(params.tokenizer_models_output_df) - for i in range(0, number_of_intendet_used_models): - print(params.tokenizer_models_output_df[i]) - mock_pipeline: List[Union[transformers.AutoModel, MagicMock]] = [ - create_autospec(Pipeline, side_effect=params.tokenizer_models_output_df[i]) for i in range (0,number_of_intendet_used_models) - ] + + mock_pipeline: List[Union[transformers.AutoModel, MagicMock]] = [ + create_autospec(Pipeline, side_effect=params.tokenizer_models_output_df[i]) if params.tokenizer_models_output_df[i][0][0][0]["word"] + else Exception("Traceback mock_pipeline is throwing an error intentionally") + for i in range(0, number_of_intended_used_models) + ] mock_pipeline_factory: Union[Pipeline, MagicMock] = create_autospec(Pipeline, side_effect=mock_pipeline) udf = TokenClassificationUDF(exa=mock_exa, @@ -257,13 +257,11 @@ def test_token_classification(mock_local_path, mock_create_loc, params): udf.run(mock_ctx) result = mock_ctx.output - print("result_____________________________________________") - print(result) - print("_____________________________________________") - assert result[0][-1] is None and len(result[0]) == len(mock_meta.output_columns) + + assert len(result[0]) == len(mock_meta.output_columns) expected_output = Output(params.output_data) actual_output = Output(result) n_input_columns = len(mock_meta.input_columns) - 1 - assert (OutputMatcher(actual_output, n_input_columns) == expected_output )#and - #len(mock_pipeline_factory.mock_calls) == params.expected_model_counter) + assert (OutputMatcher(actual_output, n_input_columns) == expected_output and + len(mock_pipeline_factory.mock_calls) == params.expected_model_counter)