Skip to content

Commit

Permalink
Add option to split processor for preserving trailing empty fields (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
danhermann authored Oct 30, 2019
1 parent 63831fa commit fcc18dc
Show file tree
Hide file tree
Showing 4 changed files with 68 additions and 11 deletions.
18 changes: 18 additions & 0 deletions docs/reference/ingest/processors/split.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ Splits a field into an array using a separator character. Only works on string f
| `separator` | yes | - | A regex which matches the separator, eg `,` or `\s+`
| `target_field` | no | `field` | The field to assign the split value to, by default `field` is updated in-place
| `ignore_missing` | no | `false` | If `true` and `field` does not exist, the processor quietly exits without modifying the document
| `preserve_trailing`| no | `false` | Preserves empty trailing fields, if any.
include::common-options.asciidoc[]
|======

Expand All @@ -25,3 +26,20 @@ include::common-options.asciidoc[]
--------------------------------------------------
// NOTCONSOLE
<1> Treat all consecutive whitespace characters as a single separator

If the `preserve_trailing` option is enabled, any trailing empty fields in the input will be preserved. For example,
in the configuration below, a value of `A,,B,,` in the `my_field` property will be split into an array of five elements
`["A", "", "B", "", ""]` with two empty trailing fields. If the `preserve_trailing` property were not enabled, the two
empty trailing fields would be discarded resulting in the three-element array `["A", "", "B"]`.

[source,js]
--------------------------------------------------
{
"split": {
"field": "my_field",
"separator": ",",
"preserve_trailing": true
}
}
--------------------------------------------------
// NOTCONSOLE
Original file line number Diff line number Diff line change
Expand Up @@ -41,13 +41,15 @@ public final class SplitProcessor extends AbstractProcessor {
private final String field;
private final String separator;
private final boolean ignoreMissing;
private final boolean preserveTrailing;
private final String targetField;

SplitProcessor(String tag, String field, String separator, boolean ignoreMissing, String targetField) {
SplitProcessor(String tag, String field, String separator, boolean ignoreMissing, boolean preserveTrailing, String targetField) {
super(tag);
this.field = field;
this.separator = separator;
this.ignoreMissing = ignoreMissing;
this.preserveTrailing = preserveTrailing;
this.targetField = targetField;
}

Expand All @@ -63,6 +65,8 @@ boolean isIgnoreMissing() {
return ignoreMissing;
}

boolean isPreserveTrailing() { return preserveTrailing; }

String getTargetField() {
return targetField;
}
Expand All @@ -77,7 +81,7 @@ public IngestDocument execute(IngestDocument document) {
throw new IllegalArgumentException("field [" + field + "] is null, cannot split.");
}

String[] strings = oldVal.split(separator);
String[] strings = oldVal.split(separator, preserveTrailing ? -1 : 0);
List<String> splitList = new ArrayList<>(strings.length);
Collections.addAll(splitList, strings);
document.setFieldValue(targetField, splitList);
Expand All @@ -95,9 +99,10 @@ public SplitProcessor create(Map<String, Processor.Factory> registry, String pro
Map<String, Object> config) throws Exception {
String field = ConfigurationUtils.readStringProperty(TYPE, processorTag, config, "field");
boolean ignoreMissing = ConfigurationUtils.readBooleanProperty(TYPE, processorTag, config, "ignore_missing", false);
boolean preserveTrailing = ConfigurationUtils.readBooleanProperty(TYPE, processorTag, config, "preserve_trailing", false);
String targetField = ConfigurationUtils.readStringProperty(TYPE, processorTag, config, "target_field", field);
return new SplitProcessor(processorTag, field,
ConfigurationUtils.readStringProperty(TYPE, processorTag, config, "separator"), ignoreMissing, targetField);
String separator = ConfigurationUtils.readStringProperty(TYPE, processorTag, config, "separator");
return new SplitProcessor(processorTag, field, separator, ignoreMissing, preserveTrailing, targetField);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,24 @@ public void testCreateWithTargetField() throws Exception {
assertThat(splitProcessor.getField(), equalTo("field1"));
assertThat(splitProcessor.getSeparator(), equalTo("\\."));
assertFalse(splitProcessor.isIgnoreMissing());
assertFalse(splitProcessor.isPreserveTrailing());
assertThat(splitProcessor.getTargetField(), equalTo("target"));
}

public void testCreateWithPreserveTrailing() throws Exception {
SplitProcessor.Factory factory = new SplitProcessor.Factory();
Map<String, Object> config = new HashMap<>();
config.put("field", "field1");
config.put("separator", "\\.");
config.put("target_field", "target");
config.put("preserve_trailing", true);
String processorTag = randomAlphaOfLength(10);
SplitProcessor splitProcessor = factory.create(null, processorTag, config);
assertThat(splitProcessor.getTag(), equalTo(processorTag));
assertThat(splitProcessor.getField(), equalTo("field1"));
assertThat(splitProcessor.getSeparator(), equalTo("\\."));
assertFalse(splitProcessor.isIgnoreMissing());
assertThat(splitProcessor.getTargetField(), equalTo("target"));
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -39,15 +39,15 @@ public class SplitProcessorTests extends ESTestCase {
public void testSplit() throws Exception {
IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random());
String fieldName = RandomDocumentPicks.addRandomField(random(), ingestDocument, "127.0.0.1");
Processor processor = new SplitProcessor(randomAlphaOfLength(10), fieldName, "\\.", false, fieldName);
Processor processor = new SplitProcessor(randomAlphaOfLength(10), fieldName, "\\.", false, false, fieldName);
processor.execute(ingestDocument);
assertThat(ingestDocument.getFieldValue(fieldName, List.class), equalTo(Arrays.asList("127", "0", "0", "1")));
}

public void testSplitFieldNotFound() throws Exception {
IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random(), new HashMap<>());
String fieldName = RandomDocumentPicks.randomFieldName(random());
Processor processor = new SplitProcessor(randomAlphaOfLength(10), fieldName, "\\.", false, fieldName);
Processor processor = new SplitProcessor(randomAlphaOfLength(10), fieldName, "\\.", false, false, fieldName);
try {
processor.execute(ingestDocument);
fail("split processor should have failed");
Expand All @@ -59,7 +59,7 @@ public void testSplitFieldNotFound() throws Exception {
public void testSplitNullValue() throws Exception {
IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random(),
Collections.singletonMap("field", null));
Processor processor = new SplitProcessor(randomAlphaOfLength(10), "field", "\\.", false, "field");
Processor processor = new SplitProcessor(randomAlphaOfLength(10), "field", "\\.", false, false, "field");
try {
processor.execute(ingestDocument);
fail("split processor should have failed");
Expand All @@ -73,15 +73,15 @@ public void testSplitNullValueWithIgnoreMissing() throws Exception {
IngestDocument originalIngestDocument = RandomDocumentPicks.randomIngestDocument(random(),
Collections.singletonMap(fieldName, null));
IngestDocument ingestDocument = new IngestDocument(originalIngestDocument);
Processor processor = new SplitProcessor(randomAlphaOfLength(10), fieldName, "\\.", true, fieldName);
Processor processor = new SplitProcessor(randomAlphaOfLength(10), fieldName, "\\.", true, false, fieldName);
processor.execute(ingestDocument);
assertIngestDocument(originalIngestDocument, ingestDocument);
}

public void testSplitNonExistentWithIgnoreMissing() throws Exception {
IngestDocument originalIngestDocument = RandomDocumentPicks.randomIngestDocument(random(), Collections.emptyMap());
IngestDocument ingestDocument = new IngestDocument(originalIngestDocument);
Processor processor = new SplitProcessor(randomAlphaOfLength(10), "field", "\\.", true, "field");
Processor processor = new SplitProcessor(randomAlphaOfLength(10), "field", "\\.", true, false, "field");
processor.execute(ingestDocument);
assertIngestDocument(originalIngestDocument, ingestDocument);
}
Expand All @@ -90,7 +90,7 @@ public void testSplitNonStringValue() throws Exception {
IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random(), new HashMap<>());
String fieldName = RandomDocumentPicks.randomFieldName(random());
ingestDocument.setFieldValue(fieldName, randomInt());
Processor processor = new SplitProcessor(randomAlphaOfLength(10), fieldName, "\\.", false, fieldName);
Processor processor = new SplitProcessor(randomAlphaOfLength(10), fieldName, "\\.", false, false, fieldName);
try {
processor.execute(ingestDocument);
fail("split processor should have failed");
Expand Down Expand Up @@ -121,8 +121,24 @@ public void testSplitWithTargetField() throws Exception {
IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random());
String fieldName = RandomDocumentPicks.addRandomField(random(), ingestDocument, "127.0.0.1");
String targetFieldName = fieldName + randomAlphaOfLength(5);
Processor processor = new SplitProcessor(randomAlphaOfLength(10), fieldName, "\\.", false, targetFieldName);
Processor processor = new SplitProcessor(randomAlphaOfLength(10), fieldName, "\\.", false, false, targetFieldName);
processor.execute(ingestDocument);
assertThat(ingestDocument.getFieldValue(targetFieldName, List.class), equalTo(Arrays.asList("127", "0", "0", "1")));
}

public void testSplitWithPreserveTrailing() throws Exception {
doTestSplitWithPreserveTrailing(true, "foo|bar|baz||", Arrays.asList("foo", "bar", "baz", "", ""));
}

public void testSplitWithoutPreserveTrailing() throws Exception {
doTestSplitWithPreserveTrailing(false, "foo|bar|baz||", Arrays.asList("foo", "bar", "baz"));
}

private void doTestSplitWithPreserveTrailing(boolean preserveTrailing, String fieldValue, List<String> expected) throws Exception {
IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random());
String fieldName = RandomDocumentPicks.addRandomField(random(), ingestDocument, fieldValue);
Processor processor = new SplitProcessor(randomAlphaOfLength(10), fieldName, "\\|", false, preserveTrailing, fieldName);
processor.execute(ingestDocument);
assertThat(ingestDocument.getFieldValue(fieldName, List.class), equalTo(expected));
}
}

0 comments on commit fcc18dc

Please sign in to comment.