From 35588a29125aad75e6244f62b5f584beb074f8c5 Mon Sep 17 00:00:00 2001 From: xinyual <xinyual@amazon.com> Date: Mon, 11 Mar 2024 13:48:30 +0800 Subject: [PATCH] fix UT and chunker factory Signed-off-by: xinyual <xinyual@amazon.com> --- .../neuralsearch/processor/DocumentChunkingProcessor.java | 6 +----- .../neuralsearch/processor/chunker/ChunkerFactory.java | 2 +- .../processor/chunker/FixedTokenLengthChunker.java | 6 +++--- 3 files changed, 5 insertions(+), 9 deletions(-) diff --git a/src/main/java/org/opensearch/neuralsearch/processor/DocumentChunkingProcessor.java b/src/main/java/org/opensearch/neuralsearch/processor/DocumentChunkingProcessor.java index c3432dfaa..275438a88 100644 --- a/src/main/java/org/opensearch/neuralsearch/processor/DocumentChunkingProcessor.java +++ b/src/main/java/org/opensearch/neuralsearch/processor/DocumentChunkingProcessor.java @@ -30,7 +30,6 @@ import org.opensearch.neuralsearch.processor.chunker.Chunker; import org.opensearch.index.mapper.IndexFieldMapper; import org.opensearch.neuralsearch.processor.chunker.FixedTokenLengthChunker; -import static org.opensearch.neuralsearch.processor.chunker.ChunkerFactory.FIXED_TOKEN_LENGTH_ALGORITHM; /** * This processor is used for chunking user input data and chunked data could be used for downstream embedding processor, @@ -112,10 +111,7 @@ private void validateAndParseAlgorithmMap(Map<String, Object> algorithmMap) { ); } Map<String, Object> chunkerParameters = (Map<String, Object>) algorithmValue; - if (Objects.equals(algorithmKey, FIXED_TOKEN_LENGTH_ALGORITHM)) { - chunkerParameters.put(FixedTokenLengthChunker.ANALYSIS_REGISTRY_FIELD, analysisRegistry); - } - this.chunker = ChunkerFactory.create(algorithmKey, chunkerParameters); + this.chunker = ChunkerFactory.create(algorithmKey, analysisRegistry, chunkerParameters); if (chunkerParameters.containsKey(MAX_CHUNK_LIMIT_FIELD)) { String maxChunkLimitString = chunkerParameters.get(MAX_CHUNK_LIMIT_FIELD).toString(); if (!(NumberUtils.isParsable(maxChunkLimitString))) { diff --git a/src/main/java/org/opensearch/neuralsearch/processor/chunker/ChunkerFactory.java b/src/main/java/org/opensearch/neuralsearch/processor/chunker/ChunkerFactory.java index 99460c762..332c62c4f 100644 --- a/src/main/java/org/opensearch/neuralsearch/processor/chunker/ChunkerFactory.java +++ b/src/main/java/org/opensearch/neuralsearch/processor/chunker/ChunkerFactory.java @@ -25,7 +25,7 @@ public static Chunker create(String type, AnalysisRegistry analysisRegistry, Map return new DelimiterChunker(parameters); default: throw new IllegalArgumentException( - "chunker type [" + type + "] is not supported. Supported chunkers types are " + ChunkerFactory.getAllChunkers() + "chunker type [" + type + "] is not supported. Supported chunkers types are " + ChunkerFactory.getAllChunkers() ); } } diff --git a/src/main/java/org/opensearch/neuralsearch/processor/chunker/FixedTokenLengthChunker.java b/src/main/java/org/opensearch/neuralsearch/processor/chunker/FixedTokenLengthChunker.java index b63bed987..95b80363d 100644 --- a/src/main/java/org/opensearch/neuralsearch/processor/chunker/FixedTokenLengthChunker.java +++ b/src/main/java/org/opensearch/neuralsearch/processor/chunker/FixedTokenLengthChunker.java @@ -75,13 +75,13 @@ public void validateParameters(Map<String, Object> parameters) { String overlapRateString = parameters.get(OVERLAP_RATE_FIELD).toString(); if (!(NumberUtils.isParsable(overlapRateString))) { throw new IllegalArgumentException( - "fixed length parameter [" + OVERLAP_RATE_FIELD + "] cannot be cast to [" + Number.class.getName() + "]" + "fixed length parameter [" + OVERLAP_RATE_FIELD + "] cannot be cast to [" + Number.class.getName() + "]" ); } Double overlapRate = Double.valueOf(overlapRateString); if (overlapRate < 0 || overlapRate.compareTo(OVERLAP_RATE_UPPER_BOUND) > 0) { throw new IllegalArgumentException( - "fixed length parameter [" + OVERLAP_RATE_FIELD + "] must be between 0 and " + OVERLAP_RATE_UPPER_BOUND + "fixed length parameter [" + OVERLAP_RATE_FIELD + "] must be between 0 and " + OVERLAP_RATE_UPPER_BOUND ); } this.overlapRate = overlapRate; @@ -119,7 +119,7 @@ private int validatePositiveIntegerParameter(Map<String, Object> parameters, Str String fieldValue = parameters.get(fieldName).toString(); if (!(NumberUtils.isParsable(fieldValue))) { throw new IllegalArgumentException( - "fixed length parameter [" + fieldName + "] cannot be cast to [" + Number.class.getName() + "]" + "fixed length parameter [" + fieldName + "] cannot be cast to [" + Number.class.getName() + "]" ); } if (NumberUtils.createInteger(fieldValue) <= 0) {