From 5a4824768751f3690781767b16f726a39d3d76bb Mon Sep 17 00:00:00 2001 From: Daniel Widdis Date: Wed, 17 Jul 2024 12:03:18 -0700 Subject: [PATCH 1/4] Add SplitResponseProcessor for search pipelines Signed-off-by: Daniel Widdis --- CHANGELOG.md | 1 + .../common/SplitResponseProcessor.java | 166 ++++++++++++++++ .../common/SplitResponseProcessorTests.java | 182 ++++++++++++++++++ 3 files changed, 349 insertions(+) create mode 100644 modules/search-pipeline-common/src/main/java/org/opensearch/search/pipeline/common/SplitResponseProcessor.java create mode 100644 modules/search-pipeline-common/src/test/java/org/opensearch/search/pipeline/common/SplitResponseProcessorTests.java diff --git a/CHANGELOG.md b/CHANGELOG.md index 29e70c5026bb8..327997d1a42b6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Add Plugin interface for loading application based configuration templates (([#14659](https://github.com/opensearch-project/OpenSearch/issues/14659))) - Refactor remote-routing-table service inline with remote state interfaces([#14668](https://github.com/opensearch-project/OpenSearch/pull/14668)) - Add prefix mode verification setting for repository verification (([#14790](https://github.com/opensearch-project/OpenSearch/pull/14790))) +- Add SplitResponseProcessor to Search Pipelines (([#14800](https://github.com/opensearch-project/OpenSearch/issues/14800))) ### Dependencies - Bump `org.gradle.test-retry` from 1.5.8 to 1.5.9 ([#13442](https://github.com/opensearch-project/OpenSearch/pull/13442)) diff --git a/modules/search-pipeline-common/src/main/java/org/opensearch/search/pipeline/common/SplitResponseProcessor.java b/modules/search-pipeline-common/src/main/java/org/opensearch/search/pipeline/common/SplitResponseProcessor.java new file mode 100644 index 0000000000000..6bbf7386622a4 --- /dev/null +++ b/modules/search-pipeline-common/src/main/java/org/opensearch/search/pipeline/common/SplitResponseProcessor.java @@ -0,0 +1,166 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.search.pipeline.common; + +import org.opensearch.action.search.SearchRequest; +import org.opensearch.action.search.SearchResponse; +import org.opensearch.common.collect.Tuple; +import org.opensearch.common.document.DocumentField; +import org.opensearch.common.xcontent.XContentHelper; +import org.opensearch.core.common.bytes.BytesReference; +import org.opensearch.core.xcontent.MediaType; +import org.opensearch.core.xcontent.XContentBuilder; +import org.opensearch.ingest.ConfigurationUtils; +import org.opensearch.search.SearchHit; +import org.opensearch.search.pipeline.AbstractProcessor; +import org.opensearch.search.pipeline.Processor; +import org.opensearch.search.pipeline.SearchResponseProcessor; + +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +/** + * Processor that sorts an array of items. + * Throws exception is the specified field is not an array. + */ +public class SplitResponseProcessor extends AbstractProcessor implements SearchResponseProcessor { + /** Key to reference this processor type from a search pipeline. */ + public static final String TYPE = "split"; + /** Key defining the string field to be split. */ + public static final String SPLIT_FIELD = "field"; + /** Key defining the delimiter used to split the string. This can be a regular expression pattern. */ + public static final String SEPARATOR = "separator"; + /** Optional key for handling empty trailing fields. */ + public static final String PRESERVE_TRAILING = "preserve_trailing"; + /** Optional key to put the split values in a different field. */ + public static final String TARGET_FIELD = "target_field"; + + private final String splitField; + private final String separator; + private final boolean preserveTrailing; + private final String targetField; + + SplitResponseProcessor( + String tag, + String description, + boolean ignoreFailure, + String splitField, + String separator, + boolean preserveTrailing, + String targetField + ) { + super(tag, description, ignoreFailure); + this.splitField = Objects.requireNonNull(splitField); + this.separator = Objects.requireNonNull(separator); + this.preserveTrailing = preserveTrailing; + this.targetField = targetField == null ? splitField : targetField; + } + + /** + * Getter function for splitField + * @return sortField + */ + public String getSplitField() { + return splitField; + } + + /** + * Getter function for separator + * @return separator + */ + public String getSeparator() { + return separator; + } + + /** + * Getter function for preserveTrailing + * @return preserveTrailing; + */ + public boolean isPreserveTrailing() { + return preserveTrailing; + } + + /** + * Getter function for targetField + * @return targetField + */ + public String getTargetField() { + return targetField; + } + + @Override + public String getType() { + return TYPE; + } + + @Override + public SearchResponse processResponse(SearchRequest request, SearchResponse response) throws Exception { + SearchHit[] hits = response.getHits().getHits(); + for (SearchHit hit : hits) { + Map fields = hit.getFields(); + if (fields.containsKey(splitField)) { + DocumentField docField = hit.getFields().get(splitField); + if (docField == null) { + throw new IllegalArgumentException("field [" + splitField + "] is null, cannot split."); + } + Object val = docField.getValue(); + if (val == null || !String.class.isAssignableFrom(val.getClass())) { + throw new IllegalArgumentException("field [" + splitField + "] is not a string, cannot split"); + } + String[] strings = ((String) val).split(separator, preserveTrailing ? -1 : 0); + List splitList = Stream.of(strings).collect(Collectors.toList()); + hit.setDocumentField(targetField, new DocumentField(targetField, splitList)); + } + if (hit.hasSource()) { + BytesReference sourceRef = hit.getSourceRef(); + Tuple> typeAndSourceMap = XContentHelper.convertToMap( + sourceRef, + false, + (MediaType) null + ); + + Map sourceAsMap = typeAndSourceMap.v2(); + if (sourceAsMap.containsKey(splitField)) { + Object val = sourceAsMap.get(splitField); + if (val instanceof String) { + String[] strings = ((String) val).split(separator, preserveTrailing ? -1 : 0); + List splitList = Stream.of(strings).collect(Collectors.toList()); + sourceAsMap.put(targetField, splitList); + } + XContentBuilder builder = XContentBuilder.builder(typeAndSourceMap.v1().xContent()); + builder.map(sourceAsMap); + hit.sourceRef(BytesReference.bytes(builder)); + } + } + } + return response; + } + + static class Factory implements Processor.Factory { + + @Override + public SplitResponseProcessor create( + Map> processorFactories, + String tag, + String description, + boolean ignoreFailure, + Map config, + PipelineContext pipelineContext + ) { + String splitField = ConfigurationUtils.readStringProperty(TYPE, tag, config, "field"); + String separator = ConfigurationUtils.readStringProperty(TYPE, tag, config, "separator"); + boolean preserveTrailing = ConfigurationUtils.readBooleanProperty(TYPE, tag, config, "preserve_trailing", false); + String targetField = ConfigurationUtils.readStringProperty(TYPE, tag, config, "target_field", splitField); + return new SplitResponseProcessor(tag, description, ignoreFailure, splitField, separator, preserveTrailing, targetField); + } + } +} diff --git a/modules/search-pipeline-common/src/test/java/org/opensearch/search/pipeline/common/SplitResponseProcessorTests.java b/modules/search-pipeline-common/src/test/java/org/opensearch/search/pipeline/common/SplitResponseProcessorTests.java new file mode 100644 index 0000000000000..394f4d696befd --- /dev/null +++ b/modules/search-pipeline-common/src/test/java/org/opensearch/search/pipeline/common/SplitResponseProcessorTests.java @@ -0,0 +1,182 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a.java + * compatible open source license. + */ + +package org.opensearch.search.pipeline.common; + +import org.apache.lucene.search.TotalHits; +import org.opensearch.OpenSearchParseException; +import org.opensearch.action.search.SearchRequest; +import org.opensearch.action.search.SearchResponse; +import org.opensearch.action.search.SearchResponseSections; +import org.opensearch.common.document.DocumentField; +import org.opensearch.core.common.bytes.BytesArray; +import org.opensearch.index.query.QueryBuilder; +import org.opensearch.index.query.TermQueryBuilder; +import org.opensearch.ingest.RandomDocumentPicks; +import org.opensearch.search.SearchHit; +import org.opensearch.search.SearchHits; +import org.opensearch.search.builder.SearchSourceBuilder; +import org.opensearch.test.OpenSearchTestCase; + +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +public class SplitResponseProcessorTests extends OpenSearchTestCase { + + private static final String NO_TRAILING = "one,two,three"; + private static final String TRAILING = "alpha,beta,gamma,"; + + private SearchRequest createDummyRequest() { + QueryBuilder query = new TermQueryBuilder("field", "value"); + SearchSourceBuilder source = new SearchSourceBuilder().query(query); + return new SearchRequest().source(source); + } + + private SearchResponse createTestResponse() { + SearchHit[] hits = new SearchHit[2]; + + // one response with source + Map csvMap = new HashMap<>(); + csvMap.put("csv", new DocumentField("csv", List.of(NO_TRAILING))); + hits[0] = new SearchHit(0, "doc 1", csvMap, Collections.emptyMap()); + hits[0].sourceRef(new BytesArray("{ \"csv\" : \"" + NO_TRAILING + "\" }")); + hits[0].score(1f); + + // one without source + csvMap = new HashMap<>(); + csvMap.put("csv", new DocumentField("csv", List.of(TRAILING))); + hits[1] = new SearchHit(1, "doc 2", csvMap, Collections.emptyMap()); + hits[1].score(2f); + + SearchHits searchHits = new SearchHits(hits, new TotalHits(2, TotalHits.Relation.EQUAL_TO), 2); + SearchResponseSections searchResponseSections = new SearchResponseSections(searchHits, null, null, false, false, null, 0); + return new SearchResponse(searchResponseSections, null, 1, 1, 0, 10, null, null); + } + + private SearchResponse createTestResponseNullField() { + SearchHit[] hits = new SearchHit[1]; + + Map map = new HashMap<>(); + map.put("csv", null); + hits[0] = new SearchHit(0, "doc 1", map, Collections.emptyMap()); + hits[0].sourceRef(new BytesArray("{ \"csv\" : null }")); + hits[0].score(1f); + + SearchHits searchHits = new SearchHits(hits, new TotalHits(1, TotalHits.Relation.EQUAL_TO), 1); + SearchResponseSections searchResponseSections = new SearchResponseSections(searchHits, null, null, false, false, null, 0); + return new SearchResponse(searchResponseSections, null, 1, 1, 0, 10, null, null); + } + + private SearchResponse createTestResponseEmptyList() { + SearchHit[] hits = new SearchHit[1]; + + Map map = new HashMap<>(); + map.put("empty", new DocumentField("empty", List.of())); + hits[0] = new SearchHit(0, "doc 1", map, Collections.emptyMap()); + hits[0].sourceRef(new BytesArray("{ \"empty\" : [] }")); + hits[0].score(1f); + + SearchHits searchHits = new SearchHits(hits, new TotalHits(1, TotalHits.Relation.EQUAL_TO), 1); + SearchResponseSections searchResponseSections = new SearchResponseSections(searchHits, null, null, false, false, null, 0); + return new SearchResponse(searchResponseSections, null, 1, 1, 0, 10, null, null); + } + + private SearchResponse createTestResponseNotString() { + SearchHit[] hits = new SearchHit[1]; + + Map piMap = new HashMap<>(); + piMap.put("maps", new DocumentField("maps", List.of(Map.of("foo", "I'm the Map!")))); + hits[0] = new SearchHit(0, "doc 1", piMap, Collections.emptyMap()); + hits[0].sourceRef(new BytesArray("{ \"maps\" : [{ \"foo\" : \"I'm the Map!\"}]] }")); + hits[0].score(1f); + + SearchHits searchHits = new SearchHits(hits, new TotalHits(1, TotalHits.Relation.EQUAL_TO), 1); + SearchResponseSections searchResponseSections = new SearchResponseSections(searchHits, null, null, false, false, null, 0); + return new SearchResponse(searchResponseSections, null, 1, 1, 0, 10, null, null); + } + + public void testSplitResponse() throws Exception { + SearchRequest request = createDummyRequest(); + + SplitResponseProcessor splitResponseProcessor = new SplitResponseProcessor(null, null, false, "csv", ",", false, "split"); + SearchResponse response = createTestResponse(); + SearchResponse splitResponse = splitResponseProcessor.processResponse(request, response); + + assertEquals(response.getHits(), splitResponse.getHits()); + + assertEquals(NO_TRAILING, splitResponse.getHits().getHits()[0].field("csv").getValue()); + assertEquals(List.of("one", "two", "three"), splitResponse.getHits().getHits()[0].field("split").getValues()); + Map map = splitResponse.getHits().getHits()[0].getSourceAsMap(); + assertNotNull(map); + assertEquals(List.of("one", "two", "three"), map.get("split")); + + assertEquals(TRAILING, splitResponse.getHits().getHits()[1].field("csv").getValue()); + assertEquals(List.of("alpha", "beta", "gamma"), splitResponse.getHits().getHits()[1].field("split").getValues()); + assertNull(splitResponse.getHits().getHits()[1].getSourceAsMap()); + } + + public void testSplitResponseSameField() throws Exception { + SearchRequest request = createDummyRequest(); + + SplitResponseProcessor splitResponseProcessor = new SplitResponseProcessor(null, null, false, "csv", ",", true, null); + SearchResponse response = createTestResponse(); + SearchResponse splitResponse = splitResponseProcessor.processResponse(request, response); + + assertEquals(response.getHits(), splitResponse.getHits()); + assertEquals(List.of("one", "two", "three"), splitResponse.getHits().getHits()[0].field("csv").getValues()); + assertEquals(List.of("alpha", "beta", "gamma", ""), splitResponse.getHits().getHits()[1].field("csv").getValues()); + } + + public void testSplitResponseEmptyList() { + SearchRequest request = createDummyRequest(); + + SplitResponseProcessor splitResponseProcessor = new SplitResponseProcessor(null, null, false, "empty", ",", false, null); + assertThrows(IllegalArgumentException.class, () -> splitResponseProcessor.processResponse(request, createTestResponseEmptyList())); + } + + public void testNullField() { + SearchRequest request = createDummyRequest(); + + SplitResponseProcessor splitResponseProcessor = new SplitResponseProcessor(null, null, false, "csv", ",", false, null); + + assertThrows(IllegalArgumentException.class, () -> splitResponseProcessor.processResponse(request, createTestResponseNullField())); + } + + public void testNotStringField() { + SearchRequest request = createDummyRequest(); + + SplitResponseProcessor splitResponseProcessor = new SplitResponseProcessor(null, null, false, "maps", ",", false, null); + + assertThrows(IllegalArgumentException.class, () -> splitResponseProcessor.processResponse(request, createTestResponseNotString())); + } + + public void testFactory() { + String splitField = RandomDocumentPicks.randomFieldName(random()); + String targetField = RandomDocumentPicks.randomFieldName(random()); + Map config = new HashMap<>(); + config.put("field", splitField); + config.put("separator", ","); + config.put("preserve_trailing", true); + config.put("target_field", targetField); + + SplitResponseProcessor.Factory factory = new SplitResponseProcessor.Factory(); + SplitResponseProcessor processor = factory.create(Collections.emptyMap(), null, null, false, config, null); + assertEquals("split", processor.getType()); + assertEquals(splitField, processor.getSplitField()); + assertEquals(",", processor.getSeparator()); + assertTrue(processor.isPreserveTrailing()); + assertEquals(targetField, processor.getTargetField()); + + expectThrows( + OpenSearchParseException.class, + () -> factory.create(Collections.emptyMap(), null, null, false, Collections.emptyMap(), null) + ); + } +} From 3d2f770e3c1238768c37cc868f60a4dca78c21cf Mon Sep 17 00:00:00 2001 From: Daniel Widdis Date: Wed, 17 Jul 2024 15:46:03 -0700 Subject: [PATCH 2/4] Register the split processor factory Signed-off-by: Daniel Widdis --- .../pipeline/common/SearchPipelineCommonModulePlugin.java | 4 +++- .../common/SearchPipelineCommonModulePluginTests.java | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/modules/search-pipeline-common/src/main/java/org/opensearch/search/pipeline/common/SearchPipelineCommonModulePlugin.java b/modules/search-pipeline-common/src/main/java/org/opensearch/search/pipeline/common/SearchPipelineCommonModulePlugin.java index 1574621a8200e..d05101da2817c 100644 --- a/modules/search-pipeline-common/src/main/java/org/opensearch/search/pipeline/common/SearchPipelineCommonModulePlugin.java +++ b/modules/search-pipeline-common/src/main/java/org/opensearch/search/pipeline/common/SearchPipelineCommonModulePlugin.java @@ -96,7 +96,9 @@ public Map> getResponseProces TruncateHitsResponseProcessor.TYPE, new TruncateHitsResponseProcessor.Factory(), CollapseResponseProcessor.TYPE, - new CollapseResponseProcessor.Factory() + new CollapseResponseProcessor.Factory(), + SplitResponseProcessor.TYPE, + new SplitResponseProcessor.Factory() ) ); } diff --git a/modules/search-pipeline-common/src/test/java/org/opensearch/search/pipeline/common/SearchPipelineCommonModulePluginTests.java b/modules/search-pipeline-common/src/test/java/org/opensearch/search/pipeline/common/SearchPipelineCommonModulePluginTests.java index 519468ebe17ff..d4f9ae2490a10 100644 --- a/modules/search-pipeline-common/src/test/java/org/opensearch/search/pipeline/common/SearchPipelineCommonModulePluginTests.java +++ b/modules/search-pipeline-common/src/test/java/org/opensearch/search/pipeline/common/SearchPipelineCommonModulePluginTests.java @@ -82,7 +82,7 @@ public void testAllowlistNotSpecified() throws IOException { try (SearchPipelineCommonModulePlugin plugin = new SearchPipelineCommonModulePlugin()) { assertEquals(Set.of("oversample", "filter_query", "script"), plugin.getRequestProcessors(createParameters(settings)).keySet()); assertEquals( - Set.of("rename_field", "truncate_hits", "collapse"), + Set.of("rename_field", "truncate_hits", "collapse", "split"), plugin.getResponseProcessors(createParameters(settings)).keySet() ); assertEquals(Set.of(), plugin.getSearchPhaseResultsProcessors(createParameters(settings)).keySet()); From 8d11fa2037075e2879a2f6f15ef226e8d82bb940 Mon Sep 17 00:00:00 2001 From: Daniel Widdis Date: Fri, 19 Jul 2024 13:35:41 -0700 Subject: [PATCH 3/4] Address code review comments Signed-off-by: Daniel Widdis --- .../common/SplitResponseProcessor.java | 15 +++++---- .../common/SplitResponseProcessorTests.java | 31 +++++++++++++++++++ 2 files changed, 38 insertions(+), 8 deletions(-) diff --git a/modules/search-pipeline-common/src/main/java/org/opensearch/search/pipeline/common/SplitResponseProcessor.java b/modules/search-pipeline-common/src/main/java/org/opensearch/search/pipeline/common/SplitResponseProcessor.java index 6bbf7386622a4..bd9ecac67dae0 100644 --- a/modules/search-pipeline-common/src/main/java/org/opensearch/search/pipeline/common/SplitResponseProcessor.java +++ b/modules/search-pipeline-common/src/main/java/org/opensearch/search/pipeline/common/SplitResponseProcessor.java @@ -22,11 +22,10 @@ import org.opensearch.search.pipeline.Processor; import org.opensearch.search.pipeline.SearchResponseProcessor; +import java.util.Arrays; import java.util.List; import java.util.Map; import java.util.Objects; -import java.util.stream.Collectors; -import java.util.stream.Stream; /** * Processor that sorts an array of items. @@ -117,7 +116,7 @@ public SearchResponse processResponse(SearchRequest request, SearchResponse resp throw new IllegalArgumentException("field [" + splitField + "] is not a string, cannot split"); } String[] strings = ((String) val).split(separator, preserveTrailing ? -1 : 0); - List splitList = Stream.of(strings).collect(Collectors.toList()); + List splitList = List.copyOf(Arrays.asList(strings)); hit.setDocumentField(targetField, new DocumentField(targetField, splitList)); } if (hit.hasSource()) { @@ -133,7 +132,7 @@ public SearchResponse processResponse(SearchRequest request, SearchResponse resp Object val = sourceAsMap.get(splitField); if (val instanceof String) { String[] strings = ((String) val).split(separator, preserveTrailing ? -1 : 0); - List splitList = Stream.of(strings).collect(Collectors.toList()); + List splitList = List.copyOf(Arrays.asList(strings)); sourceAsMap.put(targetField, splitList); } XContentBuilder builder = XContentBuilder.builder(typeAndSourceMap.v1().xContent()); @@ -156,10 +155,10 @@ public SplitResponseProcessor create( Map config, PipelineContext pipelineContext ) { - String splitField = ConfigurationUtils.readStringProperty(TYPE, tag, config, "field"); - String separator = ConfigurationUtils.readStringProperty(TYPE, tag, config, "separator"); - boolean preserveTrailing = ConfigurationUtils.readBooleanProperty(TYPE, tag, config, "preserve_trailing", false); - String targetField = ConfigurationUtils.readStringProperty(TYPE, tag, config, "target_field", splitField); + String splitField = ConfigurationUtils.readStringProperty(TYPE, tag, config, SPLIT_FIELD); + String separator = ConfigurationUtils.readStringProperty(TYPE, tag, config, SEPARATOR); + boolean preserveTrailing = ConfigurationUtils.readBooleanProperty(TYPE, tag, config, PRESERVE_TRAILING, false); + String targetField = ConfigurationUtils.readStringProperty(TYPE, tag, config, TARGET_FIELD, splitField); return new SplitResponseProcessor(tag, description, ignoreFailure, splitField, separator, preserveTrailing, targetField); } } diff --git a/modules/search-pipeline-common/src/test/java/org/opensearch/search/pipeline/common/SplitResponseProcessorTests.java b/modules/search-pipeline-common/src/test/java/org/opensearch/search/pipeline/common/SplitResponseProcessorTests.java index 394f4d696befd..fcbc8ccf43cff 100644 --- a/modules/search-pipeline-common/src/test/java/org/opensearch/search/pipeline/common/SplitResponseProcessorTests.java +++ b/modules/search-pipeline-common/src/test/java/org/opensearch/search/pipeline/common/SplitResponseProcessorTests.java @@ -32,6 +32,7 @@ public class SplitResponseProcessorTests extends OpenSearchTestCase { private static final String NO_TRAILING = "one,two,three"; private static final String TRAILING = "alpha,beta,gamma,"; + private static final String REGEX_DELIM = "one1two2three"; private SearchRequest createDummyRequest() { QueryBuilder query = new TermQueryBuilder("field", "value"); @@ -60,6 +61,20 @@ private SearchResponse createTestResponse() { return new SearchResponse(searchResponseSections, null, 1, 1, 0, 10, null, null); } + private SearchResponse createTestResponseRegex() { + SearchHit[] hits = new SearchHit[1]; + + Map dsvMap = new HashMap<>(); + dsvMap.put("dsv", new DocumentField("dsv", List.of(REGEX_DELIM))); + hits[0] = new SearchHit(0, "doc 1", dsvMap, Collections.emptyMap()); + hits[0].sourceRef(new BytesArray("{ \"dsv\" : \"" + REGEX_DELIM + "\" }")); + hits[0].score(1f); + + SearchHits searchHits = new SearchHits(hits, new TotalHits(1, TotalHits.Relation.EQUAL_TO), 1); + SearchResponseSections searchResponseSections = new SearchResponseSections(searchHits, null, null, false, false, null, 0); + return new SearchResponse(searchResponseSections, null, 1, 1, 0, 10, null, null); + } + private SearchResponse createTestResponseNullField() { SearchHit[] hits = new SearchHit[1]; @@ -122,6 +137,22 @@ public void testSplitResponse() throws Exception { assertNull(splitResponse.getHits().getHits()[1].getSourceAsMap()); } + public void testSplitResponseRegex() throws Exception { + SearchRequest request = createDummyRequest(); + + SplitResponseProcessor splitResponseProcessor = new SplitResponseProcessor(null, null, false, "dsv", "\\d", false, "split"); + SearchResponse response = createTestResponseRegex(); + SearchResponse splitResponse = splitResponseProcessor.processResponse(request, response); + + assertEquals(response.getHits(), splitResponse.getHits()); + + assertEquals(REGEX_DELIM, splitResponse.getHits().getHits()[0].field("dsv").getValue()); + assertEquals(List.of("one", "two", "three"), splitResponse.getHits().getHits()[0].field("split").getValues()); + Map map = splitResponse.getHits().getHits()[0].getSourceAsMap(); + assertNotNull(map); + assertEquals(List.of("one", "two", "three"), map.get("split")); + } + public void testSplitResponseSameField() throws Exception { SearchRequest request = createDummyRequest(); From f20efd45948819cd452f4bd9f4f2baea7d5f670f Mon Sep 17 00:00:00 2001 From: Daniel Widdis Date: Sun, 21 Jul 2024 15:26:37 -0700 Subject: [PATCH 4/4] Avoid list copy by casting array Signed-off-by: Daniel Widdis --- .../pipeline/common/SplitResponseProcessor.java | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/modules/search-pipeline-common/src/main/java/org/opensearch/search/pipeline/common/SplitResponseProcessor.java b/modules/search-pipeline-common/src/main/java/org/opensearch/search/pipeline/common/SplitResponseProcessor.java index bd9ecac67dae0..0762f8f59b76e 100644 --- a/modules/search-pipeline-common/src/main/java/org/opensearch/search/pipeline/common/SplitResponseProcessor.java +++ b/modules/search-pipeline-common/src/main/java/org/opensearch/search/pipeline/common/SplitResponseProcessor.java @@ -23,7 +23,6 @@ import org.opensearch.search.pipeline.SearchResponseProcessor; import java.util.Arrays; -import java.util.List; import java.util.Map; import java.util.Objects; @@ -115,9 +114,8 @@ public SearchResponse processResponse(SearchRequest request, SearchResponse resp if (val == null || !String.class.isAssignableFrom(val.getClass())) { throw new IllegalArgumentException("field [" + splitField + "] is not a string, cannot split"); } - String[] strings = ((String) val).split(separator, preserveTrailing ? -1 : 0); - List splitList = List.copyOf(Arrays.asList(strings)); - hit.setDocumentField(targetField, new DocumentField(targetField, splitList)); + Object[] strings = ((String) val).split(separator, preserveTrailing ? -1 : 0); + hit.setDocumentField(targetField, new DocumentField(targetField, Arrays.asList(strings))); } if (hit.hasSource()) { BytesReference sourceRef = hit.getSourceRef(); @@ -131,9 +129,8 @@ public SearchResponse processResponse(SearchRequest request, SearchResponse resp if (sourceAsMap.containsKey(splitField)) { Object val = sourceAsMap.get(splitField); if (val instanceof String) { - String[] strings = ((String) val).split(separator, preserveTrailing ? -1 : 0); - List splitList = List.copyOf(Arrays.asList(strings)); - sourceAsMap.put(targetField, splitList); + Object[] strings = ((String) val).split(separator, preserveTrailing ? -1 : 0); + sourceAsMap.put(targetField, Arrays.asList(strings)); } XContentBuilder builder = XContentBuilder.builder(typeAndSourceMap.v1().xContent()); builder.map(sourceAsMap);