diff --git a/doc/changes/changes_2.2.0.md b/doc/changes/changes_2.2.0.md index c1524e49..58c725d2 100644 --- a/doc/changes/changes_2.2.0.md +++ b/doc/changes/changes_2.2.0.md @@ -13,6 +13,7 @@ n/a ### Bugs - #272: Fixed unit tests assertions not working correctly +- #275: Fixed a bug where models returning unexpected results was not handled correctly ### Documentation diff --git a/tests/unit_tests/udf_wrapper_params/token_classification/error_prediction_containing_only_unknown_fields.py b/tests/unit_tests/udf_wrapper_params/token_classification/error_prediction_containing_only_unknown_fields.py new file mode 100644 index 00000000..d6498b62 --- /dev/null +++ b/tests/unit_tests/udf_wrapper_params/token_classification/error_prediction_containing_only_unknown_fields.py @@ -0,0 +1,43 @@ +from pathlib import PurePosixPath +from exasol_udf_mock_python.connection import Connection +from tests.unit_tests.udf_wrapper_params.token_classification.make_data_row_functions import make_input_row, \ + make_output_row, make_input_row_with_span, make_output_row_with_span, bucketfs_conn, \ + text_docid, text_start, text_end, agg_strategy_simple, make_model_output_for_one_input_row, sub_dir, model_name + + +class ErrorPredictionOnlyContainsUnknownFields: + """ + + """ + expected_model_counter = 1 + batch_size = 2 + data_size = 5 + n_entities = 3 + + text_data = "error_result_contains_only_unknown fields" + + input_data = make_input_row(text_data=text_data) * data_size + output_data = make_output_row(text_data=text_data, + score=None, start=None, end=None, word=None, entity=None, + error_msg="Traceback") * n_entities * data_size + + work_with_span_input_data = make_input_row_with_span(text_data=text_data) * data_size + work_with_span_output_data = [(bucketfs_conn, sub_dir, model_name, + text_docid, text_start, text_end, agg_strategy_simple, + None, None, None, None, None, None, + "Traceback")] * n_entities * data_size + + + number_complete_batches = data_size // batch_size + number_remaining_data_entries_in_last_batch = data_size % batch_size + model_output_row_wrong_keys = [[{"unknown key": "some value", "diff unknown key": i}] for i in range(n_entities)] + tokenizer_model_output_df_model1 = [model_output_row_wrong_keys * batch_size] * \ + number_complete_batches + \ + [model_output_row_wrong_keys * number_remaining_data_entries_in_last_batch] + tokenizer_models_output_df = [tokenizer_model_output_df_model1] + + tmpdir_name = "_".join(("/tmpdir", __qualname__)) + base_cache_dir1 = PurePosixPath(tmpdir_name, bucketfs_conn) + bfs_connections = { + bucketfs_conn: Connection(address=f"file://{base_cache_dir1}") + } \ No newline at end of file diff --git a/tests/unit_tests/udf_wrapper_params/token_classification/error_prediction_missing_expected_field.py b/tests/unit_tests/udf_wrapper_params/token_classification/error_prediction_missing_expected_field.py new file mode 100644 index 00000000..7b4941d8 --- /dev/null +++ b/tests/unit_tests/udf_wrapper_params/token_classification/error_prediction_missing_expected_field.py @@ -0,0 +1,44 @@ +from pathlib import PurePosixPath +from exasol_udf_mock_python.connection import Connection +from tests.unit_tests.udf_wrapper_params.token_classification.make_data_row_functions import make_input_row, \ + make_output_row, make_input_row_with_span, make_output_row_with_span, bucketfs_conn, \ + text_docid, text_start, text_end, agg_strategy_simple, make_model_output_for_one_input_row, sub_dir, model_name + + +class ErrorPredictionMissingExpectedFields: + """ + + """ + expected_model_counter = 1 + batch_size = 2 + data_size = 5 + n_entities = 3 + + text_data = "error_result_missing_field_'word'" #todo do we want tests for different combinations? seems like a lot of work + # todo do we want tests for multiple models? multiple inputs where one works and one does not? how many test cases are to many test cases? + # todo these should be moved to the base model tests together with the others + + input_data = make_input_row(text_data=text_data) * data_size + output_data = make_output_row(text_data=text_data, score=None, error_msg="Traceback") * n_entities * data_size + + work_with_span_input_data = make_input_row_with_span(text_data=text_data) * data_size + work_with_span_output_data = make_output_row_with_span(score=None, + error_msg="Traceback") * n_entities * data_size + + + number_complete_batches = data_size // batch_size + number_remaining_data_entries_in_last_batch = data_size % batch_size + + model_output_row_missing_key = [[model_output_row[0].pop("score")] + for model_output_row in make_model_output_for_one_input_row(number_entities=n_entities)] + + tokenizer_model_output_df_model1 = [model_output_row_missing_key * batch_size] * \ + number_complete_batches + \ + [model_output_row_missing_key * number_remaining_data_entries_in_last_batch] + tokenizer_models_output_df = [tokenizer_model_output_df_model1] + + tmpdir_name = "_".join(("/tmpdir", __qualname__)) + base_cache_dir1 = PurePosixPath(tmpdir_name, bucketfs_conn) + bfs_connections = { + bucketfs_conn: Connection(address=f"file://{base_cache_dir1}") + } \ No newline at end of file diff --git a/tests/unit_tests/udf_wrapper_params/token_classification/prediction_returns_empty_result.py b/tests/unit_tests/udf_wrapper_params/token_classification/prediction_returns_empty_result.py new file mode 100644 index 00000000..6d526cb4 --- /dev/null +++ b/tests/unit_tests/udf_wrapper_params/token_classification/prediction_returns_empty_result.py @@ -0,0 +1,36 @@ +from pathlib import PurePosixPath +from exasol_udf_mock_python.connection import Connection +from tests.unit_tests.udf_wrapper_params.token_classification.make_data_row_functions import make_input_row, \ + make_output_row, make_input_row_with_span, make_output_row_with_span, bucketfs_conn, \ + text_docid, text_start, text_end, agg_strategy_simple, make_model_output_for_one_input_row, sub_dir, model_name + + +class PredictionReturnsEmptyResult: + """ + + """ + expected_model_counter = 1 + batch_size = 4 + data_size = 5 + n_entities = 3 + + text_data = "error_result_empty" + # todo throws error but meassage could be better + # TODO mention in docu if result is empty row not in output + input_data = make_input_row() * data_size + \ + make_input_row(text_data=text_data) * data_size + output_data = make_output_row() * n_entities * data_size # Result of input #2 is empty, so the row does not appear in the output + + work_with_span_input_data = make_input_row_with_span() * data_size + \ + make_input_row_with_span(text_data=text_data) * data_size + work_with_span_output_data = make_output_row_with_span() * n_entities * data_size # Result of input #2 is empty, so the row does not appear in the output + + # error on pred -> only one output per input + tokenizer_model_output_df_model1 = make_model_output_for_one_input_row(number_entities=n_entities) * data_size + tokenizer_models_output_df = [tokenizer_model_output_df_model1] + + tmpdir_name = "_".join(("/tmpdir", __qualname__)) + base_cache_dir1 = PurePosixPath(tmpdir_name, bucketfs_conn) + bfs_connections = { + bucketfs_conn: Connection(address=f"file://{base_cache_dir1}") + } \ No newline at end of file diff --git a/tests/unit_tests/udf_wrapper_params/token_classification/result_contains_additional_keys.py b/tests/unit_tests/udf_wrapper_params/token_classification/result_contains_additional_keys.py new file mode 100644 index 00000000..23784b70 --- /dev/null +++ b/tests/unit_tests/udf_wrapper_params/token_classification/result_contains_additional_keys.py @@ -0,0 +1,37 @@ +from pathlib import PurePosixPath +from exasol_udf_mock_python.connection import Connection +from tests.unit_tests.udf_wrapper_params.token_classification.make_data_row_functions import make_input_row, \ + make_output_row, make_input_row_with_span, make_output_row_with_span, bucketfs_conn, \ + text_docid, text_start, text_end, agg_strategy_simple, make_model_output_for_one_input_row, sub_dir, model_name + +# todo do we wan to throw in this case? or just ignore additional results? + +class ErrorPredictionContainsAdditionalFields: + """ + + """ + expected_model_counter = 1 + batch_size = 2 + data_size = 2 + n_entities = 3 + + text_data = "result contains additional keys" + + #todod these are not filled out + input_data = make_input_row(text_data=text_data) * data_size + output_data = make_output_row(text_data=text_data, error_msg="Traceback") * n_entities * data_size + + work_with_span_input_data = make_input_row_with_span(text_data=text_data) * data_size + work_with_span_output_data = make_output_row_with_span(error_msg="Traceback") * n_entities * data_size + + model_output_rows = make_model_output_for_one_input_row(number_entities=n_entities) + model_output_row_wrong_keys = [model_output_row[0].update({"unknown key": "some value", "diff unknown key": 1}) + for model_output_row in model_output_rows] + tokenizer_model_output_df_model1 = [model_output_row_wrong_keys * data_size] + tokenizer_models_output_df = [tokenizer_model_output_df_model1] + + tmpdir_name = "_".join(("/tmpdir", __qualname__)) + base_cache_dir1 = PurePosixPath(tmpdir_name, bucketfs_conn) + bfs_connections = { + bucketfs_conn: Connection(address=f"file://{base_cache_dir1}") + } \ No newline at end of file diff --git a/tests/unit_tests/udfs/test_token_classification.py b/tests/unit_tests/udfs/test_token_classification.py index 09d9f4a6..e90b5325 100644 --- a/tests/unit_tests/udfs/test_token_classification.py +++ b/tests/unit_tests/udfs/test_token_classification.py @@ -10,6 +10,14 @@ from exasol_transformers_extension.udfs.models.token_classification_udf import TokenClassificationUDF from exasol_transformers_extension.utils.model_factory_protocol import ModelFactoryProtocol +from tests.unit_tests.udf_wrapper_params.token_classification.error_prediction_containing_only_unknown_fields import \ + ErrorPredictionOnlyContainsUnknownFields +from tests.unit_tests.udf_wrapper_params.token_classification.error_prediction_missing_expected_field import \ + ErrorPredictionMissingExpectedFields +from tests.unit_tests.udf_wrapper_params.token_classification.prediction_returns_empty_result import \ + PredictionReturnsEmptyResult +from tests.unit_tests.udf_wrapper_params.token_classification.result_contains_additional_keys import \ + ErrorPredictionContainsAdditionalFields from tests.unit_tests.udfs.output_matcher import Output, OutputMatcher from tests.utils.mock_bucketfs_location import fake_bucketfs_location_from_conn_object, fake_local_bucketfs_path from tests.utils.mock_cast import mock_cast @@ -55,14 +63,10 @@ SingleModelSingleBatchIncomplete -def udf_wrapper_empty(): - # placeholder to use for MockMetaData creation. - # todo replace with newer version and then delete this - pass def create_mock_metadata_with_span(): meta = MockMetaData( - script_code_wrapper_function=udf_wrapper_empty, + script_code_wrapper_function=None, input_type="SET", input_columns=[ Column("device_id", int, "INTEGER"), @@ -97,7 +101,7 @@ def create_mock_metadata_with_span(): def create_mock_metadata(): meta = MockMetaData( - script_code_wrapper_function=udf_wrapper_empty, + script_code_wrapper_function=None, input_type="SET", input_columns=[ Column("device_id", int, "INTEGER"), @@ -159,7 +163,7 @@ def create_mock_pipeline_factory(tokenizer_models_output_df, number_of_intended_ This mock_pipeline is feed into a mock_pipeline_factory. """ mock_pipeline: List[Union[AutoModel, MagicMock]] = [ - create_autospec(Pipeline, side_effect=tokenizer_models_output_df[i]) if tokenizer_models_output_df[i][0][0][0]["word"] + create_autospec(Pipeline, side_effect=tokenizer_models_output_df[i]) if tokenizer_models_output_df[i][0][0][0]["start"]#todo fix else [Exception("Traceback mock_pipeline is throwing an error intentionally")] # todo we could probably put this exception into the tokenizer_models_output_df in the params files instead for i in range(0, number_of_intended_used_models) ] @@ -204,7 +208,11 @@ def assert_result_matches_expected_output(result, expected_output_data, input_co ErrorNotCachedSingleModelMultipleBatch, ErrorNotCachedMultipleModelMultipleBatch, ErrorOnPredictionMultipleModelMultipleBatch, - ErrorOnPredictionSingleModelMultipleBatch + ErrorOnPredictionSingleModelMultipleBatch, + PredictionReturnsEmptyResult, + ErrorPredictionMissingExpectedFields, + ErrorPredictionOnlyContainsUnknownFields, + ErrorPredictionContainsAdditionalFields ]) @patch('exasol.python_extension_common.connections.bucketfs_location.create_bucketfs_location_from_conn_object') @@ -264,7 +272,11 @@ def test_token_classification_with_span(mock_local_path, mock_create_loc, params ErrorNotCachedSingleModelMultipleBatch, ErrorNotCachedMultipleModelMultipleBatch, ErrorOnPredictionMultipleModelMultipleBatch, - ErrorOnPredictionSingleModelMultipleBatch + ErrorOnPredictionSingleModelMultipleBatch, + PredictionReturnsEmptyResult, + ErrorPredictionMissingExpectedFields, + ErrorPredictionOnlyContainsUnknownFields, + ErrorPredictionContainsAdditionalFields ]) @patch('exasol.python_extension_common.connections.bucketfs_location.create_bucketfs_location_from_conn_object') @patch('exasol_transformers_extension.utils.bucketfs_operations.get_local_bucketfs_path') @@ -296,6 +308,9 @@ def test_token_classification(mock_local_path, mock_create_loc, params): udf.run(mock_ctx) result = mock_ctx.output + print(model_input_data) + print("____________________________________________") + print(result) assert_correct_number_of_results(result, mock_meta.output_columns, expected_output_data) assert_result_matches_expected_output(result, expected_output_data, mock_meta.input_columns)