Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add option to split processor for preserving trailing empty fields #48664

Merged
merged 1 commit into from
Oct 30, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions docs/reference/ingest/processors/split.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ Splits a field into an array using a separator character. Only works on string f
| `separator` | yes | - | A regex which matches the separator, eg `,` or `\s+`
| `target_field` | no | `field` | The field to assign the split value to, by default `field` is updated in-place
| `ignore_missing` | no | `false` | If `true` and `field` does not exist, the processor quietly exits without modifying the document
| `preserve_trailing`| no | `false` | Preserves empty trailing fields, if any.
include::common-options.asciidoc[]
|======

Expand All @@ -25,3 +26,20 @@ include::common-options.asciidoc[]
--------------------------------------------------
// NOTCONSOLE
<1> Treat all consecutive whitespace characters as a single separator

If the `preserve_trailing` option is enabled, any trailing empty fields in the input will be preserved. For example,
in the configuration below, a value of `A,,B,,` in the `my_field` property will be split into an array of five elements
`["A", "", "B", "", ""]` with two empty trailing fields. If the `preserve_trailing` property were not enabled, the two
empty trailing fields would be discarded resulting in the three-element array `["A", "", "B"]`.

[source,js]
--------------------------------------------------
{
"split": {
"field": "my_field",
"separator": ",",
"preserve_trailing": true
}
}
--------------------------------------------------
// NOTCONSOLE
Original file line number Diff line number Diff line change
Expand Up @@ -41,13 +41,15 @@ public final class SplitProcessor extends AbstractProcessor {
private final String field;
private final String separator;
private final boolean ignoreMissing;
private final boolean preserveTrailing;
private final String targetField;

SplitProcessor(String tag, String field, String separator, boolean ignoreMissing, String targetField) {
SplitProcessor(String tag, String field, String separator, boolean ignoreMissing, boolean preserveTrailing, String targetField) {
super(tag);
this.field = field;
this.separator = separator;
this.ignoreMissing = ignoreMissing;
this.preserveTrailing = preserveTrailing;
this.targetField = targetField;
}

Expand All @@ -63,6 +65,8 @@ boolean isIgnoreMissing() {
return ignoreMissing;
}

boolean isPreserveTrailing() { return preserveTrailing; }

String getTargetField() {
return targetField;
}
Expand All @@ -77,7 +81,7 @@ public IngestDocument execute(IngestDocument document) {
throw new IllegalArgumentException("field [" + field + "] is null, cannot split.");
}

String[] strings = oldVal.split(separator);
String[] strings = oldVal.split(separator, preserveTrailing ? -1 : 0);
List<String> splitList = new ArrayList<>(strings.length);
Collections.addAll(splitList, strings);
document.setFieldValue(targetField, splitList);
Expand All @@ -95,9 +99,10 @@ public SplitProcessor create(Map<String, Processor.Factory> registry, String pro
Map<String, Object> config) throws Exception {
String field = ConfigurationUtils.readStringProperty(TYPE, processorTag, config, "field");
boolean ignoreMissing = ConfigurationUtils.readBooleanProperty(TYPE, processorTag, config, "ignore_missing", false);
boolean preserveTrailing = ConfigurationUtils.readBooleanProperty(TYPE, processorTag, config, "preserve_trailing", false);
String targetField = ConfigurationUtils.readStringProperty(TYPE, processorTag, config, "target_field", field);
return new SplitProcessor(processorTag, field,
ConfigurationUtils.readStringProperty(TYPE, processorTag, config, "separator"), ignoreMissing, targetField);
String separator = ConfigurationUtils.readStringProperty(TYPE, processorTag, config, "separator");
return new SplitProcessor(processorTag, field, separator, ignoreMissing, preserveTrailing, targetField);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,24 @@ public void testCreateWithTargetField() throws Exception {
assertThat(splitProcessor.getField(), equalTo("field1"));
assertThat(splitProcessor.getSeparator(), equalTo("\\."));
assertFalse(splitProcessor.isIgnoreMissing());
assertFalse(splitProcessor.isPreserveTrailing());
assertThat(splitProcessor.getTargetField(), equalTo("target"));
}

public void testCreateWithPreserveTrailing() throws Exception {
SplitProcessor.Factory factory = new SplitProcessor.Factory();
Map<String, Object> config = new HashMap<>();
config.put("field", "field1");
config.put("separator", "\\.");
config.put("target_field", "target");
config.put("preserve_trailing", true);
String processorTag = randomAlphaOfLength(10);
SplitProcessor splitProcessor = factory.create(null, processorTag, config);
assertThat(splitProcessor.getTag(), equalTo(processorTag));
assertThat(splitProcessor.getField(), equalTo("field1"));
assertThat(splitProcessor.getSeparator(), equalTo("\\."));
assertFalse(splitProcessor.isIgnoreMissing());
assertThat(splitProcessor.getTargetField(), equalTo("target"));
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -39,15 +39,15 @@ public class SplitProcessorTests extends ESTestCase {
public void testSplit() throws Exception {
IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random());
String fieldName = RandomDocumentPicks.addRandomField(random(), ingestDocument, "127.0.0.1");
Processor processor = new SplitProcessor(randomAlphaOfLength(10), fieldName, "\\.", false, fieldName);
Processor processor = new SplitProcessor(randomAlphaOfLength(10), fieldName, "\\.", false, false, fieldName);
processor.execute(ingestDocument);
assertThat(ingestDocument.getFieldValue(fieldName, List.class), equalTo(Arrays.asList("127", "0", "0", "1")));
}

public void testSplitFieldNotFound() throws Exception {
IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random(), new HashMap<>());
String fieldName = RandomDocumentPicks.randomFieldName(random());
Processor processor = new SplitProcessor(randomAlphaOfLength(10), fieldName, "\\.", false, fieldName);
Processor processor = new SplitProcessor(randomAlphaOfLength(10), fieldName, "\\.", false, false, fieldName);
try {
processor.execute(ingestDocument);
fail("split processor should have failed");
Expand All @@ -59,7 +59,7 @@ public void testSplitFieldNotFound() throws Exception {
public void testSplitNullValue() throws Exception {
IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random(),
Collections.singletonMap("field", null));
Processor processor = new SplitProcessor(randomAlphaOfLength(10), "field", "\\.", false, "field");
Processor processor = new SplitProcessor(randomAlphaOfLength(10), "field", "\\.", false, false, "field");
try {
processor.execute(ingestDocument);
fail("split processor should have failed");
Expand All @@ -73,15 +73,15 @@ public void testSplitNullValueWithIgnoreMissing() throws Exception {
IngestDocument originalIngestDocument = RandomDocumentPicks.randomIngestDocument(random(),
Collections.singletonMap(fieldName, null));
IngestDocument ingestDocument = new IngestDocument(originalIngestDocument);
Processor processor = new SplitProcessor(randomAlphaOfLength(10), fieldName, "\\.", true, fieldName);
Processor processor = new SplitProcessor(randomAlphaOfLength(10), fieldName, "\\.", true, false, fieldName);
processor.execute(ingestDocument);
assertIngestDocument(originalIngestDocument, ingestDocument);
}

public void testSplitNonExistentWithIgnoreMissing() throws Exception {
IngestDocument originalIngestDocument = RandomDocumentPicks.randomIngestDocument(random(), Collections.emptyMap());
IngestDocument ingestDocument = new IngestDocument(originalIngestDocument);
Processor processor = new SplitProcessor(randomAlphaOfLength(10), "field", "\\.", true, "field");
Processor processor = new SplitProcessor(randomAlphaOfLength(10), "field", "\\.", true, false, "field");
processor.execute(ingestDocument);
assertIngestDocument(originalIngestDocument, ingestDocument);
}
Expand All @@ -90,7 +90,7 @@ public void testSplitNonStringValue() throws Exception {
IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random(), new HashMap<>());
String fieldName = RandomDocumentPicks.randomFieldName(random());
ingestDocument.setFieldValue(fieldName, randomInt());
Processor processor = new SplitProcessor(randomAlphaOfLength(10), fieldName, "\\.", false, fieldName);
Processor processor = new SplitProcessor(randomAlphaOfLength(10), fieldName, "\\.", false, false, fieldName);
try {
processor.execute(ingestDocument);
fail("split processor should have failed");
Expand Down Expand Up @@ -121,8 +121,24 @@ public void testSplitWithTargetField() throws Exception {
IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random());
String fieldName = RandomDocumentPicks.addRandomField(random(), ingestDocument, "127.0.0.1");
String targetFieldName = fieldName + randomAlphaOfLength(5);
Processor processor = new SplitProcessor(randomAlphaOfLength(10), fieldName, "\\.", false, targetFieldName);
Processor processor = new SplitProcessor(randomAlphaOfLength(10), fieldName, "\\.", false, false, targetFieldName);
processor.execute(ingestDocument);
assertThat(ingestDocument.getFieldValue(targetFieldName, List.class), equalTo(Arrays.asList("127", "0", "0", "1")));
}

public void testSplitWithPreserveTrailing() throws Exception {
doTestSplitWithPreserveTrailing(true, "foo|bar|baz||", Arrays.asList("foo", "bar", "baz", "", ""));
}

public void testSplitWithoutPreserveTrailing() throws Exception {
doTestSplitWithPreserveTrailing(false, "foo|bar|baz||", Arrays.asList("foo", "bar", "baz"));
}

private void doTestSplitWithPreserveTrailing(boolean preserveTrailing, String fieldValue, List<String> expected) throws Exception {
IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random());
String fieldName = RandomDocumentPicks.addRandomField(random(), ingestDocument, fieldValue);
Processor processor = new SplitProcessor(randomAlphaOfLength(10), fieldName, "\\|", false, preserveTrailing, fieldName);
processor.execute(ingestDocument);
assertThat(ingestDocument.getFieldValue(fieldName, List.class), equalTo(expected));
}
}