-
Notifications
You must be signed in to change notification settings - Fork 1.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add SplitResponseProcessor to Search Pipelines #14800
Changes from all commits
5a48247
3d2f770
8d11fa2
f20efd4
65127cf
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,162 @@ | ||
/* | ||
* SPDX-License-Identifier: Apache-2.0 | ||
* | ||
* The OpenSearch Contributors require contributions made to | ||
* this file be licensed under the Apache-2.0 license or a | ||
* compatible open source license. | ||
*/ | ||
|
||
package org.opensearch.search.pipeline.common; | ||
|
||
import org.opensearch.action.search.SearchRequest; | ||
import org.opensearch.action.search.SearchResponse; | ||
import org.opensearch.common.collect.Tuple; | ||
import org.opensearch.common.document.DocumentField; | ||
import org.opensearch.common.xcontent.XContentHelper; | ||
import org.opensearch.core.common.bytes.BytesReference; | ||
import org.opensearch.core.xcontent.MediaType; | ||
import org.opensearch.core.xcontent.XContentBuilder; | ||
import org.opensearch.ingest.ConfigurationUtils; | ||
import org.opensearch.search.SearchHit; | ||
import org.opensearch.search.pipeline.AbstractProcessor; | ||
import org.opensearch.search.pipeline.Processor; | ||
import org.opensearch.search.pipeline.SearchResponseProcessor; | ||
|
||
import java.util.Arrays; | ||
import java.util.Map; | ||
import java.util.Objects; | ||
|
||
/** | ||
* Processor that sorts an array of items. | ||
* Throws exception is the specified field is not an array. | ||
*/ | ||
public class SplitResponseProcessor extends AbstractProcessor implements SearchResponseProcessor { | ||
/** Key to reference this processor type from a search pipeline. */ | ||
public static final String TYPE = "split"; | ||
/** Key defining the string field to be split. */ | ||
public static final String SPLIT_FIELD = "field"; | ||
/** Key defining the delimiter used to split the string. This can be a regular expression pattern. */ | ||
public static final String SEPARATOR = "separator"; | ||
/** Optional key for handling empty trailing fields. */ | ||
public static final String PRESERVE_TRAILING = "preserve_trailing"; | ||
/** Optional key to put the split values in a different field. */ | ||
public static final String TARGET_FIELD = "target_field"; | ||
|
||
private final String splitField; | ||
private final String separator; | ||
private final boolean preserveTrailing; | ||
private final String targetField; | ||
|
||
SplitResponseProcessor( | ||
String tag, | ||
String description, | ||
boolean ignoreFailure, | ||
String splitField, | ||
String separator, | ||
boolean preserveTrailing, | ||
String targetField | ||
) { | ||
super(tag, description, ignoreFailure); | ||
this.splitField = Objects.requireNonNull(splitField); | ||
this.separator = Objects.requireNonNull(separator); | ||
this.preserveTrailing = preserveTrailing; | ||
this.targetField = targetField == null ? splitField : targetField; | ||
} | ||
|
||
/** | ||
* Getter function for splitField | ||
* @return sortField | ||
*/ | ||
public String getSplitField() { | ||
return splitField; | ||
} | ||
|
||
/** | ||
* Getter function for separator | ||
* @return separator | ||
*/ | ||
public String getSeparator() { | ||
return separator; | ||
} | ||
|
||
/** | ||
* Getter function for preserveTrailing | ||
* @return preserveTrailing; | ||
*/ | ||
public boolean isPreserveTrailing() { | ||
return preserveTrailing; | ||
} | ||
|
||
/** | ||
* Getter function for targetField | ||
* @return targetField | ||
*/ | ||
public String getTargetField() { | ||
return targetField; | ||
} | ||
|
||
@Override | ||
public String getType() { | ||
return TYPE; | ||
} | ||
|
||
@Override | ||
public SearchResponse processResponse(SearchRequest request, SearchResponse response) throws Exception { | ||
SearchHit[] hits = response.getHits().getHits(); | ||
for (SearchHit hit : hits) { | ||
Map<String, DocumentField> fields = hit.getFields(); | ||
if (fields.containsKey(splitField)) { | ||
DocumentField docField = hit.getFields().get(splitField); | ||
if (docField == null) { | ||
throw new IllegalArgumentException("field [" + splitField + "] is null, cannot split."); | ||
} | ||
Object val = docField.getValue(); | ||
if (val == null || !String.class.isAssignableFrom(val.getClass())) { | ||
throw new IllegalArgumentException("field [" + splitField + "] is not a string, cannot split"); | ||
} | ||
Object[] strings = ((String) val).split(separator, preserveTrailing ? -1 : 0); | ||
hit.setDocumentField(targetField, new DocumentField(targetField, Arrays.asList(strings))); | ||
} | ||
if (hit.hasSource()) { | ||
BytesReference sourceRef = hit.getSourceRef(); | ||
Tuple<? extends MediaType, Map<String, Object>> typeAndSourceMap = XContentHelper.convertToMap( | ||
sourceRef, | ||
false, | ||
(MediaType) null | ||
); | ||
|
||
Map<String, Object> sourceAsMap = typeAndSourceMap.v2(); | ||
if (sourceAsMap.containsKey(splitField)) { | ||
Object val = sourceAsMap.get(splitField); | ||
if (val instanceof String) { | ||
Object[] strings = ((String) val).split(separator, preserveTrailing ? -1 : 0); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. A method for splitting string to avoid duplication. Can be used here and on L117 private String[] splitString(String stringVal) {
return stringVal.split(separator, preserveTrailing ? -1 : 0);
} There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That already is a single method. It also matches exactly the ingest split processor implementation. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. And it needs to return |
||
sourceAsMap.put(targetField, Arrays.asList(strings)); | ||
} | ||
XContentBuilder builder = XContentBuilder.builder(typeAndSourceMap.v1().xContent()); | ||
builder.map(sourceAsMap); | ||
hit.sourceRef(BytesReference.bytes(builder)); | ||
} | ||
} | ||
} | ||
return response; | ||
} | ||
|
||
static class Factory implements Processor.Factory<SearchResponseProcessor> { | ||
|
||
@Override | ||
public SplitResponseProcessor create( | ||
Map<String, Processor.Factory<SearchResponseProcessor>> processorFactories, | ||
String tag, | ||
String description, | ||
boolean ignoreFailure, | ||
Map<String, Object> config, | ||
PipelineContext pipelineContext | ||
) { | ||
String splitField = ConfigurationUtils.readStringProperty(TYPE, tag, config, SPLIT_FIELD); | ||
String separator = ConfigurationUtils.readStringProperty(TYPE, tag, config, SEPARATOR); | ||
boolean preserveTrailing = ConfigurationUtils.readBooleanProperty(TYPE, tag, config, PRESERVE_TRAILING, false); | ||
String targetField = ConfigurationUtils.readStringProperty(TYPE, tag, config, TARGET_FIELD, splitField); | ||
return new SplitResponseProcessor(tag, description, ignoreFailure, splitField, separator, preserveTrailing, targetField); | ||
} | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We should also check for
if (val instanceof String) {
before L117There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We do:
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
But
if (!(val instanceof String))
would be more performant :|