Skip to content

Commit

Permalink
fix Document process ut
Browse files Browse the repository at this point in the history
Signed-off-by: xinyual <[email protected]>
  • Loading branch information
xinyual committed Mar 11, 2024
1 parent 2d91130 commit a97011c
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 15 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -76,12 +76,12 @@ public DocumentChunkingProcessor(
AnalysisRegistry analysisRegistry
) {
super(tag, description);
validateAndParseAlgorithmMap(algorithmMap);
this.fieldMap = fieldMap;
this.environment = environment;
this.clusterService = clusterService;
this.indicesService = indicesService;
this.analysisRegistry = analysisRegistry;
validateAndParseAlgorithmMap(algorithmMap);
}

public String getType() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -360,8 +360,8 @@ public void testExecute_withFixedTokenLength_andSourceDataStringWithMaxChunkNum_
Object passages = document.getSourceAndMetadata().get(OUTPUT_FIELD);
assert (passages instanceof List<?>);
List<String> expectedPassages = new ArrayList<>();
expectedPassages.add("This is an example document to be chunked. The document");
expectedPassages.add("contains a single paragraph, two sentences and 24 tokens by");
expectedPassages.add("This is an example document to be chunked The document");
expectedPassages.add("contains a single paragraph two sentences and 24 tokens by");
expectedPassages.add("standard tokenizer in OpenSearch");
assertEquals(expectedPassages, passages);
}
Expand All @@ -376,8 +376,8 @@ public void testExecute_withFixedTokenLength_andSourceDataStringWithMaxChunkNumT
Object passages = document.getSourceAndMetadata().get(OUTPUT_FIELD);
assert (passages instanceof List<?>);
List<String> expectedPassages = new ArrayList<>();
expectedPassages.add("This is an example document to be chunked. The document");
expectedPassages.add("contains a single paragraph, two sentences and 24 tokens by");
expectedPassages.add("This is an example document to be chunked The document");
expectedPassages.add("contains a single paragraph two sentences and 24 tokens by");
expectedPassages.add("standard tokenizer in OpenSearch");
assertEquals(expectedPassages, passages);
}
Expand Down Expand Up @@ -407,8 +407,8 @@ public void testExecute_withFixedTokenLength_andSourceDataString_thenSucceed() {
Object passages = document.getSourceAndMetadata().get(OUTPUT_FIELD);
assert (passages instanceof List<?>);
List<String> expectedPassages = new ArrayList<>();
expectedPassages.add("This is an example document to be chunked. The document");
expectedPassages.add("contains a single paragraph, two sentences and 24 tokens by");
expectedPassages.add("This is an example document to be chunked The document");
expectedPassages.add("contains a single paragraph two sentences and 24 tokens by");
expectedPassages.add("standard tokenizer in OpenSearch");
assertEquals(expectedPassages, passages);
}
Expand Down Expand Up @@ -440,11 +440,11 @@ public void testExecute_withFixedTokenLength_andSourceDataListStrings_thenSuccee
assert (passages instanceof List<?>);

List<String> expectedPassages = new ArrayList<>();
expectedPassages.add("This is the first document to be chunked. The document");
expectedPassages.add("contains a single paragraph, two sentences and 24 tokens by");
expectedPassages.add("This is the first document to be chunked The document");
expectedPassages.add("contains a single paragraph two sentences and 24 tokens by");
expectedPassages.add("standard tokenizer in OpenSearch");
expectedPassages.add("This is the second document to be chunked. The document");
expectedPassages.add("contains a single paragraph, two sentences and 24 tokens by");
expectedPassages.add("This is the second document to be chunked The document");
expectedPassages.add("contains a single paragraph two sentences and 24 tokens by");
expectedPassages.add("standard tokenizer in OpenSearch");
assertEquals(expectedPassages, passages);
}
Expand Down Expand Up @@ -488,8 +488,8 @@ public void testExecute_withFixedTokenLength_andFieldMapNestedMap_thenSucceed()
assert (passages instanceof List);

List<String> expectedPassages = new ArrayList<>();
expectedPassages.add("This is an example document to be chunked. The document");
expectedPassages.add("contains a single paragraph, two sentences and 24 tokens by");
expectedPassages.add("This is an example document to be chunked The document");
expectedPassages.add("contains a single paragraph two sentences and 24 tokens by");
expectedPassages.add("standard tokenizer in OpenSearch");
assertEquals(expectedPassages, passages);
}
Expand Down Expand Up @@ -532,8 +532,8 @@ public void testExecute_withFixedTokenLength_andFieldMapNestedMap_sourceList_the
Object nestedResult = document.getSourceAndMetadata().get(INPUT_NESTED_FIELD_KEY);
List<String> expectedPassages = new ArrayList<>();

expectedPassages.add("This is an example document to be chunked. The document");
expectedPassages.add("contains a single paragraph, two sentences and 24 tokens by");
expectedPassages.add("This is an example document to be chunked The document");
expectedPassages.add("contains a single paragraph two sentences and 24 tokens by");
expectedPassages.add("standard tokenizer in OpenSearch");
assert (nestedResult instanceof List);
assertEquals(((List<?>) nestedResult).size(), 2);
Expand Down

0 comments on commit a97011c

Please sign in to comment.