From f4c82a640aed88485cbef6f474fbe8a72bc469a0 Mon Sep 17 00:00:00 2001 From: Dan Hermann Date: Tue, 29 Oct 2019 17:24:41 -0500 Subject: [PATCH] Add option to split processor for preserving trailing empty fields --- .../ingest/processors/split.asciidoc | 18 +++++++++++ .../ingest/common/SplitProcessor.java | 13 +++++--- .../common/SplitProcessorFactoryTests.java | 18 +++++++++++ .../ingest/common/SplitProcessorTests.java | 30 ++++++++++++++----- 4 files changed, 68 insertions(+), 11 deletions(-) diff --git a/docs/reference/ingest/processors/split.asciidoc b/docs/reference/ingest/processors/split.asciidoc index 7d1487b8ca047..60b82fdaa9441 100644 --- a/docs/reference/ingest/processors/split.asciidoc +++ b/docs/reference/ingest/processors/split.asciidoc @@ -11,6 +11,7 @@ Splits a field into an array using a separator character. Only works on string f | `separator` | yes | - | A regex which matches the separator, eg `,` or `\s+` | `target_field` | no | `field` | The field to assign the split value to, by default `field` is updated in-place | `ignore_missing` | no | `false` | If `true` and `field` does not exist, the processor quietly exits without modifying the document +| `preserve_trailing`| no | `false` | Preserves empty trailing fields, if any. include::common-options.asciidoc[] |====== @@ -25,3 +26,20 @@ include::common-options.asciidoc[] -------------------------------------------------- // NOTCONSOLE <1> Treat all consecutive whitespace characters as a single separator + +If the `preserve_trailing` option is enabled, any trailing empty fields in the input will be preserved. For example, +in the configuration below, a value of `A,,B,,` in the `my_field` property will be split into an array of five elements +`["A", "", "B", "", ""]` with two empty trailing fields. If the `preserve_trailing` property were not enabled, the two +empty trailing fields would be discarded resulting in the three-element array `["A", "", "B"]`. + +[source,js] +-------------------------------------------------- +{ + "split": { + "field": "my_field", + "separator": ",", + "preserve_trailing": true + } +} +-------------------------------------------------- +// NOTCONSOLE diff --git a/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/SplitProcessor.java b/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/SplitProcessor.java index 96a765b5ba7a3..e43c0d4c20fb5 100644 --- a/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/SplitProcessor.java +++ b/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/SplitProcessor.java @@ -41,13 +41,15 @@ public final class SplitProcessor extends AbstractProcessor { private final String field; private final String separator; private final boolean ignoreMissing; + private final boolean preserveTrailing; private final String targetField; - SplitProcessor(String tag, String field, String separator, boolean ignoreMissing, String targetField) { + SplitProcessor(String tag, String field, String separator, boolean ignoreMissing, boolean preserveTrailing, String targetField) { super(tag); this.field = field; this.separator = separator; this.ignoreMissing = ignoreMissing; + this.preserveTrailing = preserveTrailing; this.targetField = targetField; } @@ -63,6 +65,8 @@ boolean isIgnoreMissing() { return ignoreMissing; } + boolean isPreserveTrailing() { return preserveTrailing; } + String getTargetField() { return targetField; } @@ -77,7 +81,7 @@ public IngestDocument execute(IngestDocument document) { throw new IllegalArgumentException("field [" + field + "] is null, cannot split."); } - String[] strings = oldVal.split(separator); + String[] strings = oldVal.split(separator, preserveTrailing ? -1 : 0); List splitList = new ArrayList<>(strings.length); Collections.addAll(splitList, strings); document.setFieldValue(targetField, splitList); @@ -95,9 +99,10 @@ public SplitProcessor create(Map registry, String pro Map config) throws Exception { String field = ConfigurationUtils.readStringProperty(TYPE, processorTag, config, "field"); boolean ignoreMissing = ConfigurationUtils.readBooleanProperty(TYPE, processorTag, config, "ignore_missing", false); + boolean preserveTrailing = ConfigurationUtils.readBooleanProperty(TYPE, processorTag, config, "preserve_trailing", false); String targetField = ConfigurationUtils.readStringProperty(TYPE, processorTag, config, "target_field", field); - return new SplitProcessor(processorTag, field, - ConfigurationUtils.readStringProperty(TYPE, processorTag, config, "separator"), ignoreMissing, targetField); + String separator = ConfigurationUtils.readStringProperty(TYPE, processorTag, config, "separator"); + return new SplitProcessor(processorTag, field, separator, ignoreMissing, preserveTrailing, targetField); } } } diff --git a/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/SplitProcessorFactoryTests.java b/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/SplitProcessorFactoryTests.java index cb7857f6b459f..8850943c1367f 100644 --- a/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/SplitProcessorFactoryTests.java +++ b/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/SplitProcessorFactoryTests.java @@ -79,6 +79,24 @@ public void testCreateWithTargetField() throws Exception { assertThat(splitProcessor.getField(), equalTo("field1")); assertThat(splitProcessor.getSeparator(), equalTo("\\.")); assertFalse(splitProcessor.isIgnoreMissing()); + assertFalse(splitProcessor.isPreserveTrailing()); assertThat(splitProcessor.getTargetField(), equalTo("target")); } + + public void testCreateWithPreserveTrailing() throws Exception { + SplitProcessor.Factory factory = new SplitProcessor.Factory(); + Map config = new HashMap<>(); + config.put("field", "field1"); + config.put("separator", "\\."); + config.put("target_field", "target"); + config.put("preserve_trailing", true); + String processorTag = randomAlphaOfLength(10); + SplitProcessor splitProcessor = factory.create(null, processorTag, config); + assertThat(splitProcessor.getTag(), equalTo(processorTag)); + assertThat(splitProcessor.getField(), equalTo("field1")); + assertThat(splitProcessor.getSeparator(), equalTo("\\.")); + assertFalse(splitProcessor.isIgnoreMissing()); + assertThat(splitProcessor.getTargetField(), equalTo("target")); + } + } diff --git a/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/SplitProcessorTests.java b/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/SplitProcessorTests.java index a11df5a372664..a979c171ce8cf 100644 --- a/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/SplitProcessorTests.java +++ b/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/SplitProcessorTests.java @@ -39,7 +39,7 @@ public class SplitProcessorTests extends ESTestCase { public void testSplit() throws Exception { IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random()); String fieldName = RandomDocumentPicks.addRandomField(random(), ingestDocument, "127.0.0.1"); - Processor processor = new SplitProcessor(randomAlphaOfLength(10), fieldName, "\\.", false, fieldName); + Processor processor = new SplitProcessor(randomAlphaOfLength(10), fieldName, "\\.", false, false, fieldName); processor.execute(ingestDocument); assertThat(ingestDocument.getFieldValue(fieldName, List.class), equalTo(Arrays.asList("127", "0", "0", "1"))); } @@ -47,7 +47,7 @@ public void testSplit() throws Exception { public void testSplitFieldNotFound() throws Exception { IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random(), new HashMap<>()); String fieldName = RandomDocumentPicks.randomFieldName(random()); - Processor processor = new SplitProcessor(randomAlphaOfLength(10), fieldName, "\\.", false, fieldName); + Processor processor = new SplitProcessor(randomAlphaOfLength(10), fieldName, "\\.", false, false, fieldName); try { processor.execute(ingestDocument); fail("split processor should have failed"); @@ -59,7 +59,7 @@ public void testSplitFieldNotFound() throws Exception { public void testSplitNullValue() throws Exception { IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random(), Collections.singletonMap("field", null)); - Processor processor = new SplitProcessor(randomAlphaOfLength(10), "field", "\\.", false, "field"); + Processor processor = new SplitProcessor(randomAlphaOfLength(10), "field", "\\.", false, false, "field"); try { processor.execute(ingestDocument); fail("split processor should have failed"); @@ -73,7 +73,7 @@ public void testSplitNullValueWithIgnoreMissing() throws Exception { IngestDocument originalIngestDocument = RandomDocumentPicks.randomIngestDocument(random(), Collections.singletonMap(fieldName, null)); IngestDocument ingestDocument = new IngestDocument(originalIngestDocument); - Processor processor = new SplitProcessor(randomAlphaOfLength(10), fieldName, "\\.", true, fieldName); + Processor processor = new SplitProcessor(randomAlphaOfLength(10), fieldName, "\\.", true, false, fieldName); processor.execute(ingestDocument); assertIngestDocument(originalIngestDocument, ingestDocument); } @@ -81,7 +81,7 @@ public void testSplitNullValueWithIgnoreMissing() throws Exception { public void testSplitNonExistentWithIgnoreMissing() throws Exception { IngestDocument originalIngestDocument = RandomDocumentPicks.randomIngestDocument(random(), Collections.emptyMap()); IngestDocument ingestDocument = new IngestDocument(originalIngestDocument); - Processor processor = new SplitProcessor(randomAlphaOfLength(10), "field", "\\.", true, "field"); + Processor processor = new SplitProcessor(randomAlphaOfLength(10), "field", "\\.", true, false, "field"); processor.execute(ingestDocument); assertIngestDocument(originalIngestDocument, ingestDocument); } @@ -90,7 +90,7 @@ public void testSplitNonStringValue() throws Exception { IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random(), new HashMap<>()); String fieldName = RandomDocumentPicks.randomFieldName(random()); ingestDocument.setFieldValue(fieldName, randomInt()); - Processor processor = new SplitProcessor(randomAlphaOfLength(10), fieldName, "\\.", false, fieldName); + Processor processor = new SplitProcessor(randomAlphaOfLength(10), fieldName, "\\.", false, false, fieldName); try { processor.execute(ingestDocument); fail("split processor should have failed"); @@ -121,8 +121,24 @@ public void testSplitWithTargetField() throws Exception { IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random()); String fieldName = RandomDocumentPicks.addRandomField(random(), ingestDocument, "127.0.0.1"); String targetFieldName = fieldName + randomAlphaOfLength(5); - Processor processor = new SplitProcessor(randomAlphaOfLength(10), fieldName, "\\.", false, targetFieldName); + Processor processor = new SplitProcessor(randomAlphaOfLength(10), fieldName, "\\.", false, false, targetFieldName); processor.execute(ingestDocument); assertThat(ingestDocument.getFieldValue(targetFieldName, List.class), equalTo(Arrays.asList("127", "0", "0", "1"))); } + + public void testSplitWithPreserveTrailing() throws Exception { + doTestSplitWithPreserveTrailing(true, "foo|bar|baz||", Arrays.asList("foo", "bar", "baz", "", "")); + } + + public void testSplitWithoutPreserveTrailing() throws Exception { + doTestSplitWithPreserveTrailing(false, "foo|bar|baz||", Arrays.asList("foo", "bar", "baz")); + } + + private void doTestSplitWithPreserveTrailing(boolean preserveTrailing, String fieldValue, List expected) throws Exception { + IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random()); + String fieldName = RandomDocumentPicks.addRandomField(random(), ingestDocument, fieldValue); + Processor processor = new SplitProcessor(randomAlphaOfLength(10), fieldName, "\\|", false, preserveTrailing, fieldName); + processor.execute(ingestDocument); + assertThat(ingestDocument.getFieldValue(fieldName, List.class), equalTo(expected)); + } }