#269: fixed token class span output (#270)

* token_classification_udf with spans now returns input span * started changing unit tests to use stadaloneudfmack, and be easier maintainable * fixed error in unit tests asserts * fix failing unit tests wrong assumptions * version update of pandas and udf_mock_python
exasol · Nov 21, 2024 · 4091c57 · 4091c57
1 parent 594bf64
commit 4091c57
Show file tree

Hide file tree

Showing 147 changed files with 1,097 additions and 2,082 deletions.
diff --git a/doc/changes/changelog.md b/doc/changes/changelog.md
@@ -1,5 +1,6 @@
 # Changelog
 
+* [2.2.0](changes_2.1.0.md)
 * [2.1.0](changes_2.1.0.md)
 * [2.0.0](changes_2.0.0.md)
 * [1.0.1](changes_1.0.1.md)

diff --git a/doc/changes/changes_2.2.0.md b/doc/changes/changes_2.2.0.md
@@ -0,0 +1,27 @@
+# Transformers Extension 2.2.0, T.B.D
+
+Code name: T.B.D
+
+## Summary
+
+T.B.D
+
+### Features
+
+n/a
+
+### Bugs
+
+- #272: Fixed unit tests assertions not working correctly
+
+### Documentation
+
+n/a
+
+### Refactorings
+
+- #273: Refactored unit tests for token_classification_udf to use StandAloneUDFMock, made params files more maintainable
+
+### Security
+
+n/a
diff --git a/...sformers_extension/resources/templates/with_spans/span_token_classification_udf.jinja.sql b/...sformers_extension/resources/templates/with_spans/span_token_classification_udf.jinja.sql
@@ -13,6 +13,9 @@ CREATE OR REPLACE {{ language_alias }} SET SCRIPT "TE_TOKEN_CLASSIFICATION_UDF_W
     bucketfs_conn VARCHAR(2000000),
     sub_dir VARCHAR(2000000),
     model_name VARCHAR(2000000),
+    text_data_docid INTEGER,
+    text_data_char_begin INTEGER,
+    text_data_char_end INTEGER,
     aggregation_strategy VARCHAR(2000000),
     entity_covered_text VARCHAR(2000000),
     entity_type VARCHAR(2000000),

diff --git a/exasol_transformers_extension/udfs/models/token_classification_udf.py b/exasol_transformers_extension/udfs/models/token_classification_udf.py
@@ -24,7 +24,6 @@ def __init__(self,
                          tokenizer, task_type='token-classification',
                          work_with_spans=work_with_spans)
         self._default_aggregation_strategy = 'simple'
-        #self.work_with_spans = work_with_spans
         self._desired_fields_in_prediction = [
             "start", "end", "word", "entity", "score"]
         self.new_columns = [
@@ -92,8 +91,7 @@ def create_new_span_columns(self, model_df: pd.DataFrame) -> pd.DataFrame:
 
     def drop_old_data_for_span_execution(self, model_df: pd.DataFrame) -> pd.DataFrame:
         # drop columns which are made superfluous by the spans to save data transfer
-        model_df = model_df.drop(columns=["text_data", "text_data_docid", "text_data_char_begin",
-                                          "text_data_char_end", "start_pos", "end_pos"])
+        model_df = model_df.drop(columns=["text_data", "start_pos", "end_pos"])
         return model_df
 
     def make_entity_span(self, df_row):

diff --git a/exasol_transformers_extension/utils/load_local_model.py b/exasol_transformers_extension/utils/load_local_model.py
@@ -57,7 +57,7 @@ def load_models(self) -> transformers.pipelines.Pipeline:
         loaded_tokenizer = self._tokenizer_factory.from_pretrained(str(self._bucketfs_model_cache_dir))
 
         last_created_pipeline = self.pipeline_factory(
-            self.task_type,
+            task=self.task_type,
             model=loaded_model,
             tokenizer=loaded_tokenizer,
             device=self.device,

diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "exasol-transformers-extension"
-version = "2.1.0"
+version = "2.2.0"
 description = "An Exasol extension for using state-of-the-art pretrained machine learning models via the Hugging Face Transformers API."
 
 authors = [
@@ -17,7 +17,7 @@ keywords = ['exasol']
 
 [tool.poetry.dependencies]
 python = "^3.10.0"
-pandas = ">=1.4.2,<2.0.0"
+pandas = ">=1.4.2,<3.0.0"
 torch = "^2.0.1"
 transformers = {extras = ["torch"], version = "^4.36.2"}
 Jinja2 = "^3.0.3"
@@ -31,7 +31,7 @@ exasol-python-extension-common = ">=0.8.0,<1"
 
 [tool.poetry.dev-dependencies]
 pytest = "^7.2.0"
-exasol-udf-mock-python = "^0.1.0"
+exasol-udf-mock-python = ">=0.3.0"
 toml = "^0.10.2"
 
 [tool.poetry.group.dev.dependencies]

diff --git a/tests/integration_tests/with_db/udfs/test_token_classification_script.py b/tests/integration_tests/with_db/udfs/test_token_classification_script.py
@@ -101,9 +101,9 @@ def test_token_classification_script_with_span(
     # assertions
     assert result[0][-1] is None
     # added_columns: entity_covered_text, entity_type, score, entity_docid, entity_char_begin, entity_char_end, error_message
-    # removed_columns: # device_id, text_data, text_data_docid, text_data_char_begin, text_data_char_end
-    assert_correct_number_of_results(7, 5,
+    # removed_columns: # device_id, text_data
+    assert_correct_number_of_results(7, 2,
                                      input_data[0], result, n_rows)
     # lenient test for quality of results, will be replaced by deterministic test later
-    results = [[result[i][4], result[i][5]] for i in range(len(result))]
+    results = [[result[i][7], result[i][8]] for i in range(len(result))]
     assert_lenient_check_of_output_quality(results)
diff --git a/tests/integration_tests/without_db/udfs/matcher.py b/tests/integration_tests/without_db/udfs/matcher.py
@@ -52,7 +52,7 @@ def _is_rank_monotonic(self, score_rank_df: pd.DataFrame, row: int) -> bool:
         return (
             score_rank_df[row * self._results_per_row: self._results_per_row + row * self._results_per_row]
             .sort_values(by='score', ascending=False)['rank']
-            .is_monotonic
+            .is_monotonic_increasing
         )
 
     def __eq__(self, other) -> bool:

diff --git a/tests/integration_tests/without_db/udfs/test_token_classification_udf.py b/tests/integration_tests/without_db/udfs/test_token_classification_udf.py
@@ -163,6 +163,9 @@ def test_token_classification_udf_with_span(
         'bucketfs_conn',
         'sub_dir',
         'model_name',
+        "text_data_docid",
+        "text_data_char_begin",
+        "text_data_char_end",
         'aggregation_strategy'
     ]
 

diff --git a/tests/unit_tests/udf_wrapper_params/filling_mask/mock_filling_mask.py b/tests/unit_tests/udf_wrapper_params/filling_mask/mock_filling_mask.py
@@ -29,12 +29,12 @@ class MockPipeline:
     counter = 0
 
     def __init__(self,
-                 task_type: str,
+                 task: str,
                  model: "MockFillingMaskModel",
                  tokenizer: MockSequenceTokenizer,
                  device: str,
                  framework: str):
-        self.task_type = task_type
+        self.task_type = task
         self.model = model
         self.tokenizer = tokenizer
         self.device = device

diff --git a/...s/udf_wrapper_params/question_answering/error_not_cached_multiple_model_multiple_batch.py b/...s/udf_wrapper_params/question_answering/error_not_cached_multiple_model_multiple_batch.py
@@ -54,9 +54,9 @@ class ErrorNotCachedMultipleModelMultipleBatch:
         "bfs_conn2": Connection(address=f"file://{base_cache_dir2}")
     }
     mock_factory = MockQuestionAnsweringFactory({
-        PurePosixPath(base_cache_dir1, "sub_dir1", "model1"):
+        PurePosixPath(base_cache_dir1, "sub_dir1", "model1_question-answering"):
             MockQuestionAnsweringModel(answer="answer 1", score=0.1, rank=1),
-        PurePosixPath(base_cache_dir2, "sub_dir2", "model2"):
+        PurePosixPath(base_cache_dir2, "sub_dir2", "model2_question-answering"):
             MockQuestionAnsweringModel(answer="answer 2", score=0.2, rank=1),
     })
 

diff --git a/...sts/udf_wrapper_params/question_answering/error_not_cached_single_model_multiple_batch.py b/...sts/udf_wrapper_params/question_answering/error_not_cached_single_model_multiple_batch.py
@@ -48,7 +48,7 @@ class ErrorNotCachedSingleModelMultipleBatch:
     }
 
     mock_factory = MockQuestionAnsweringFactory({
-        PurePosixPath(base_cache_dir1, "sub_dir1", "model1"):
+        PurePosixPath(base_cache_dir1, "sub_dir1", "model1_question-answering"):
             MockQuestionAnsweringModel(answer="answer 1", score=0.1, rank=1)
     })
 

diff --git a/...df_wrapper_params/question_answering/error_on_prediction_multiple_model_multiple_batch.py b/...df_wrapper_params/question_answering/error_on_prediction_multiple_model_multiple_batch.py
@@ -53,9 +53,9 @@ class ErrorOnPredictionMultipleModelMultipleBatch:
         "bfs_conn2": Connection(address=f"file://{base_cache_dir2}")
     }
     mock_factory = MockQuestionAnsweringFactory({
-        PurePosixPath(base_cache_dir1, "sub_dir1", "model1"):
+        PurePosixPath(base_cache_dir1, "sub_dir1", "model1_question-answering"):
             MockQuestionAnsweringModel(answer="answer 1", score=0.1, rank=1),
-        PurePosixPath(base_cache_dir2, "sub_dir2", "model2"):
+        PurePosixPath(base_cache_dir2, "sub_dir2", "model2_question-answering"):
             MockQuestionAnsweringModel(answer="answer 2", score=0.2, rank=1),
     })
 

diff --git a/.../udf_wrapper_params/question_answering/error_on_prediction_single_model_multiple_batch.py b/.../udf_wrapper_params/question_answering/error_on_prediction_single_model_multiple_batch.py
@@ -47,7 +47,7 @@ class ErrorOnPredictionSingleModelMultipleBatch:
     }
 
     mock_factory = MockQuestionAnsweringFactory({
-        PurePosixPath(base_cache_dir1, "sub_dir1", "model1"):
+        PurePosixPath(base_cache_dir1, "sub_dir1", "model1_question-answering"):
             MockQuestionAnsweringModel(answer="answer 1", score=0.1, rank=1)
     })
 

diff --git a/tests/unit_tests/udf_wrapper_params/question_answering/mock_question_answering.py b/tests/unit_tests/udf_wrapper_params/question_answering/mock_question_answering.py
@@ -9,7 +9,7 @@ def __init__(self, answer: str, score: float, rank: int):
         self.result = {"answer": answer, "score": score, "rank": rank}
 
     @classmethod
-    def from_pretrained(cls, model_name, cache_dir, use_auth_token):
+    def from_pretrained(cls, model_path):
         return cls
 
 
@@ -18,22 +18,21 @@ def __init__(self, mock_models: Dict[PurePosixPath,
                                          MockQuestionAnsweringModel]):
         self.mock_models = mock_models
 
-    def from_pretrained(self, model_name, cache_dir):
-        # the cache_dir path already has model_name
-        return self.mock_models[cache_dir]
+    def from_pretrained(self, model_path):
+        return self.mock_models[PurePosixPath(model_path)]
 
 
 class MockPipeline:
     ResultDict = NewType("ResultDict", Dict[str, Union[str, float]])
     counter = 0
 
     def __init__(self,
-                 task_type: str,
+                 task: str,
                  model: MockQuestionAnsweringModel,
                  tokenizer: MockSequenceTokenizer,
                  device: str,
                  framework: str):
-        self.task_type = task_type
+        self.task_type = task
         self.model = model
         self.tokenizer = tokenizer
         self.device = device

diff --git a/tests/unit_tests/udf_wrapper_params/question_answering/mock_sequence_tokenizer.py b/tests/unit_tests/udf_wrapper_params/question_answering/mock_sequence_tokenizer.py
@@ -2,5 +2,5 @@
 
 class MockSequenceTokenizer:
     @classmethod
-    def from_pretrained(cls, model_name, cache_dir, use_auth_token):
+    def from_pretrained(cls, model_path):
         return cls
diff --git a/...r_params/question_answering/multiple_bfsconn_single_subdir_single_model_multiple_batch.py b/...r_params/question_answering/multiple_bfsconn_single_subdir_single_model_multiple_batch.py
@@ -53,9 +53,9 @@ class MultipleBucketFSConnSingleSubdirSingleModelNameMultipleBatch:
         "bfs_conn2": Connection(address=f"file://{base_cache_dir2}")
     }
     mock_factory = MockQuestionAnsweringFactory({
-        PurePosixPath(base_cache_dir1, "sub_dir1", "model1"):
+        PurePosixPath(base_cache_dir1, "sub_dir1", "model1_question-answering"):
             MockQuestionAnsweringModel(answer="answer 1", score=0.1, rank=1),
-        PurePosixPath(base_cache_dir2, "sub_dir1", "model1"):
+        PurePosixPath(base_cache_dir2, "sub_dir1", "model1_question-answering"):
             MockQuestionAnsweringModel(answer="answer 2", score=0.2, rank=1),
     })
 

diff --git a/...per_params/question_answering/multiple_bfsconn_single_subdir_single_model_single_batch.py b/...per_params/question_answering/multiple_bfsconn_single_subdir_single_model_single_batch.py
@@ -53,9 +53,9 @@ class MultipleBucketFSConnSingleSubdirSingleModelNameSingleBatch:
         "bfs_conn2": Connection(address=f"file://{base_cache_dir2}")
     }
     mock_factory = MockQuestionAnsweringFactory({
-        PurePosixPath(base_cache_dir1, "sub_dir1", "model1"):
+        PurePosixPath(base_cache_dir1, "sub_dir1", "model1_question-answering"):
             MockQuestionAnsweringModel(answer="answer 1", score=0.1, rank=1),
-        PurePosixPath(base_cache_dir2, "sub_dir1", "model1"):
+        PurePosixPath(base_cache_dir2, "sub_dir1", "model1_question-answering"):
             MockQuestionAnsweringModel(answer="answer 2", score=0.2, rank=1),
     })
 

diff --git a/...nit_tests/udf_wrapper_params/question_answering/multiple_model_multiple_batch_complete.py b/...nit_tests/udf_wrapper_params/question_answering/multiple_model_multiple_batch_complete.py
@@ -53,9 +53,9 @@ class MultipleModelMultipleBatchComplete:
         "bfs_conn2": Connection(address=f"file://{base_cache_dir2}")
     }
     mock_factory = MockQuestionAnsweringFactory({
-        PurePosixPath(base_cache_dir1, "sub_dir1", "model1"):
+        PurePosixPath(base_cache_dir1, "sub_dir1", "model1_question-answering"):
             MockQuestionAnsweringModel(answer="answer 1", score=0.1, rank=1),
-        PurePosixPath(base_cache_dir2, "sub_dir2", "model2"):
+        PurePosixPath(base_cache_dir2, "sub_dir2", "model2_question-answering"):
             MockQuestionAnsweringModel(answer="answer 2", score=0.2, rank=1),
     })
 

diff --git a/...t_tests/udf_wrapper_params/question_answering/multiple_model_multiple_batch_incomplete.py b/...t_tests/udf_wrapper_params/question_answering/multiple_model_multiple_batch_incomplete.py
@@ -53,9 +53,9 @@ class MultipleModelMultipleBatchIncomplete:
         "bfs_conn2": Connection(address=f"file://{base_cache_dir2}")
     }
     mock_factory = MockQuestionAnsweringFactory({
-        PurePosixPath(base_cache_dir1, "sub_dir1", "model1"):
+        PurePosixPath(base_cache_dir1, "sub_dir1", "model1_question-answering"):
             MockQuestionAnsweringModel(answer="answer 1", score=0.1, rank=1),
-        PurePosixPath(base_cache_dir2, "sub_dir2", "model2"):
+        PurePosixPath(base_cache_dir2, "sub_dir2", "model2_question-answering"):
             MockQuestionAnsweringModel(answer="answer 2", score=0.2, rank=1),
     })
 

diff --git a/...pper_params/question_answering/multiple_model_multiple_batch_multiple_models_per_batch.py b/...pper_params/question_answering/multiple_model_multiple_batch_multiple_models_per_batch.py
@@ -65,13 +65,13 @@ class MultipleModelMultipleBatchMultipleModelsPerBatch:
         "bfs_conn4": Connection(address=f"file://{cache_dir4}")}
 
     mock_factory = MockQuestionAnsweringFactory({
-        PurePosixPath(base_cache_dir1, "sub_dir1", "model1"):
+        PurePosixPath(base_cache_dir1, "sub_dir1", "model1_question-answering"):
             MockQuestionAnsweringModel(answer="answer 1", score=0.1, rank=1),
-        PurePosixPath(base_cache_dir2, "sub_dir2", "model2"):
+        PurePosixPath(base_cache_dir2, "sub_dir2", "model2_question-answering"):
             MockQuestionAnsweringModel(answer="answer 2", score=0.2, rank=1),
-        PurePosixPath(cache_dir3, "sub_dir3", "model3"):
+        PurePosixPath(cache_dir3, "sub_dir3", "model3_question-answering"):
             MockQuestionAnsweringModel(answer="answer 3", score=0.3, rank=1),
-        PurePosixPath(cache_dir4, "sub_dir4", "model4"):
+        PurePosixPath(cache_dir4, "sub_dir4", "model4_question-answering"):
             MockQuestionAnsweringModel(answer="answer 4", score=0.4, rank=1),
     })
 

diff --git a/.../unit_tests/udf_wrapper_params/question_answering/multiple_model_single_batch_complete.py b/.../unit_tests/udf_wrapper_params/question_answering/multiple_model_single_batch_complete.py
@@ -53,9 +53,9 @@ class MultipleModelSingleBatchComplete:
         "bfs_conn2": Connection(address=f"file://{base_cache_dir2}")
     }
     mock_factory = MockQuestionAnsweringFactory({
-        PurePosixPath(base_cache_dir1, "sub_dir1", "model1"):
+        PurePosixPath(base_cache_dir1, "sub_dir1", "model1_question-answering"):
             MockQuestionAnsweringModel(answer="answer 1", score=0.1, rank=1),
-        PurePosixPath(base_cache_dir2, "sub_dir2", "model2"):
+        PurePosixPath(base_cache_dir2, "sub_dir2", "model2_question-answering"):
             MockQuestionAnsweringModel(answer="answer 2", score=0.2, rank=1),
     })
 

diff --git a/...nit_tests/udf_wrapper_params/question_answering/multiple_model_single_batch_incomplete.py b/...nit_tests/udf_wrapper_params/question_answering/multiple_model_single_batch_incomplete.py
@@ -53,9 +53,9 @@ class MultipleModelSingleBatchIncomplete:
         "bfs_conn2": Connection(address=f"file://{base_cache_dir2}")
     }
     mock_factory = MockQuestionAnsweringFactory({
-        PurePosixPath(base_cache_dir1, "sub_dir1", "model1"):
+        PurePosixPath(base_cache_dir1, "sub_dir1", "model1_question-answering"):
             MockQuestionAnsweringModel(answer="answer 1", score=0.1, rank=1),
-        PurePosixPath(base_cache_dir2, "sub_dir2", "model2"):
+        PurePosixPath(base_cache_dir2, "sub_dir2", "model2_question-answering"):
             MockQuestionAnsweringModel(answer="answer 2", score=0.2, rank=1),
     })
 

diff --git a/...ms/question_answering/multiple_topk_multiple_size_single_model_multiple_batch_complete.py b/...ms/question_answering/multiple_topk_multiple_size_single_model_multiple_batch_complete.py
@@ -68,13 +68,13 @@ class MultipleTopkMultipleSizeSingleModelNameMultipleBatch:
     }
 
     mock_factory = MockQuestionAnsweringFactory({
-        PurePosixPath(base_cache_dir1, "sub_dir1", "model1"):
+        PurePosixPath(base_cache_dir1, "sub_dir1", "model1_question-answering"):
             MockQuestionAnsweringModel(answer="answer 1", score=0.1, rank=1),
-        PurePosixPath(base_cache_dir1, "sub_dir1", "model2"):
+        PurePosixPath(base_cache_dir1, "sub_dir1", "model2_question-answering"):
             MockQuestionAnsweringModel(answer="answer 2", score=0.2, rank=1),
-        PurePosixPath(base_cache_dir1, "sub_dir1", "model3"):
+        PurePosixPath(base_cache_dir1, "sub_dir1", "model3_question-answering"):
             MockQuestionAnsweringModel(answer="answer 3", score=0.3, rank=1),
-        PurePosixPath(base_cache_dir1, "sub_dir1", "model4"):
+        PurePosixPath(base_cache_dir1, "sub_dir1", "model4_question-answering"):
             MockQuestionAnsweringModel(answer="answer 4", score=0.4, rank=1),
     })
 

diff --git a/...rams/question_answering/multiple_topk_multiple_size_single_model_single_batch_complete.py b/...rams/question_answering/multiple_topk_multiple_size_single_model_single_batch_complete.py
@@ -68,13 +68,13 @@ class MultipleTopkMultipleSizeSingleModelNameSingleBatch:
     }
 
     mock_factory = MockQuestionAnsweringFactory({
-        PurePosixPath(base_cache_dir1, "sub_dir1", "model1"):
+        PurePosixPath(base_cache_dir1, "sub_dir1", "model1_question-answering"):
             MockQuestionAnsweringModel(answer="answer 1", score=0.1, rank=1),
-        PurePosixPath(base_cache_dir1, "sub_dir1", "model2"):
+        PurePosixPath(base_cache_dir1, "sub_dir1", "model2_question-answering"):
             MockQuestionAnsweringModel(answer="answer 2", score=0.2, rank=1),
-        PurePosixPath(base_cache_dir1, "sub_dir1", "model3"):
+        PurePosixPath(base_cache_dir1, "sub_dir1", "model3_question-answering"):
             MockQuestionAnsweringModel(answer="answer 3", score=0.3, rank=1),
-        PurePosixPath(base_cache_dir1, "sub_dir1", "model4"):
+        PurePosixPath(base_cache_dir1, "sub_dir1", "model4_question-answering"):
             MockQuestionAnsweringModel(answer="answer 4", score=0.4, rank=1),
     })
 

diff --git a/...rams/question_answering/multiple_topk_single_size_single_model_multiple_batch_complete.py b/...rams/question_answering/multiple_topk_single_size_single_model_multiple_batch_complete.py
@@ -52,7 +52,7 @@ class MultipleTopkSingleSizeSingleModelNameMultipleBatch:
     }
 
     mock_factory = MockQuestionAnsweringFactory({
-        PurePosixPath(base_cache_dir1, "sub_dir1", "model1"):
+        PurePosixPath(base_cache_dir1, "sub_dir1", "model1_question-answering"):
             MockQuestionAnsweringModel(answer="answer 1", score=0.1, rank=1)
     })
 

diff --git a/...params/question_answering/multiple_topk_single_size_single_model_single_batch_complete.py b/...params/question_answering/multiple_topk_single_size_single_model_single_batch_complete.py
@@ -52,7 +52,7 @@ class MultipleTopkSingleSizeSingleModelNameSingleBatch:
     }
 
     mock_factory = MockQuestionAnsweringFactory({
-        PurePosixPath(base_cache_dir1, "sub_dir1", "model1"):
+        PurePosixPath(base_cache_dir1, "sub_dir1", "model1_question-answering"):
             MockQuestionAnsweringModel(answer="answer 1", score=0.1, rank=1)
     })
 

diff --git a/...r_params/question_answering/single_bfsconn_multiple_subdir_single_model_multiple_batch.py b/...r_params/question_answering/single_bfsconn_multiple_subdir_single_model_multiple_batch.py
@@ -51,9 +51,9 @@ class SingleBucketFSConnMultipleSubdirSingleModelNameMultipleBatch:
     }
 
     mock_factory = MockQuestionAnsweringFactory({
-        PurePosixPath(base_cache_dir1, "sub_dir1", "model1"):
+        PurePosixPath(base_cache_dir1, "sub_dir1", "model1_question-answering"):
             MockQuestionAnsweringModel(answer="answer 1", score=0.1, rank=1),
-        PurePosixPath(base_cache_dir1, "sub_dir2", "model1"):
+        PurePosixPath(base_cache_dir1, "sub_dir2", "model1_question-answering"):
             MockQuestionAnsweringModel(answer="answer 2", score=0.2, rank=1),
     })
 

diff --git a/...per_params/question_answering/single_bfsconn_multiple_subdir_single_model_single_batch.py b/...per_params/question_answering/single_bfsconn_multiple_subdir_single_model_single_batch.py
@@ -51,9 +51,9 @@ class SingleBucketFSConnMultipleSubdirSingleModelNameSingleBatch:
     }
 
     mock_factory = MockQuestionAnsweringFactory({
-        PurePosixPath(base_cache_dir1, "sub_dir1", "model1"):
+        PurePosixPath(base_cache_dir1, "sub_dir1", "model1_question-answering"):
             MockQuestionAnsweringModel(answer="answer 1", score=0.1, rank=1),
-        PurePosixPath(base_cache_dir1, "sub_dir2", "model1"):
+        PurePosixPath(base_cache_dir1, "sub_dir2", "model1_question-answering"):
             MockQuestionAnsweringModel(answer="answer 2", score=0.2, rank=1),
     })