Skip to content

Commit

Permalink
INGEST: Extend KV Processor (elastic#31789) (elastic#32232)
Browse files Browse the repository at this point in the history
* INGEST: Extend KV Processor (elastic#31789)

Added more capabilities supported by LS to the KV processor:
* Stripping of brackets and quotes from values (`include_brackets` in corresponding LS filter)
* Adding key prefixes
* Trimming specified chars from keys and values

Refactored the way the filter is configured to avoid conditionals during execution.
Refactored Tests a little to not have to add more redundant getters for new parameters.

Relates elastic#31786
* Add documentation
  • Loading branch information
original-brownbear committed Jul 21, 2018
1 parent f98ebb2 commit 66e1603
Show file tree
Hide file tree
Showing 3 changed files with 214 additions and 37 deletions.
4 changes: 4 additions & 0 deletions docs/reference/ingest/ingest-node.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -1732,6 +1732,10 @@ For example, if you have a log message which contains `ip=1.2.3.4 error=REFUSED`
| `include_keys` | no | `null` | List of keys to filter and insert into document. Defaults to including all keys
| `exclude_keys` | no | `null` | List of keys to exclude from document
| `ignore_missing` | no | `false` | If `true` and `field` does not exist or is `null`, the processor quietly exits without modifying the document
| `prefix` | no | `null` | Prefix to be added to extracted keys
| `trim_key` | no | `null` | String of characters to trim from extracted keys
| `trim_value` | no | `null` | String of characters to trim from extracted values
| `strip_brackets` | no | `false` | If `true` strip brackets `()`, `<>`, `[]` as well as quotes `'` and `"` from extracted values
|======


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,14 @@
import org.elasticsearch.ingest.IngestDocument;
import org.elasticsearch.ingest.Processor;

import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.function.Consumer;
import java.util.function.Function;
import java.util.function.Predicate;
import java.util.regex.Pattern;

/**
* The KeyValueProcessor parses and extracts messages of the `key=value` variety into fields with values of the keys.
Expand All @@ -38,16 +41,20 @@ public final class KeyValueProcessor extends AbstractProcessor {

public static final String TYPE = "kv";

private static final Pattern STRIP_BRACKETS = Pattern.compile("(^[\\(\\[<\"'])|([\\]\\)>\"']$)");

private final String field;
private final String fieldSplit;
private final String valueSplit;
private final Set<String> includeKeys;
private final Set<String> excludeKeys;
private final String targetField;
private final boolean ignoreMissing;
private final Consumer<IngestDocument> execution;

KeyValueProcessor(String tag, String field, String fieldSplit, String valueSplit, Set<String> includeKeys,
Set<String> excludeKeys, String targetField, boolean ignoreMissing) {
Set<String> excludeKeys, String targetField, boolean ignoreMissing,
String trimKey, String trimValue, boolean stripBrackets, String prefix) {
super(tag);
this.field = field;
this.targetField = targetField;
Expand All @@ -56,6 +63,92 @@ public final class KeyValueProcessor extends AbstractProcessor {
this.includeKeys = includeKeys;
this.excludeKeys = excludeKeys;
this.ignoreMissing = ignoreMissing;
this.execution = buildExecution(
fieldSplit, valueSplit, field, includeKeys, excludeKeys, targetField, ignoreMissing, trimKey, trimValue,
stripBrackets, prefix
);
}

private static Consumer<IngestDocument> buildExecution(String fieldSplit, String valueSplit, String field,
Set<String> includeKeys, Set<String> excludeKeys,
String targetField, boolean ignoreMissing,
String trimKey, String trimValue, boolean stripBrackets,
String prefix) {
final Predicate<String> keyFilter;
if (includeKeys == null) {
if (excludeKeys == null) {
keyFilter = key -> true;
} else {
keyFilter = key -> excludeKeys.contains(key) == false;
}
} else {
if (excludeKeys == null) {
keyFilter = includeKeys::contains;
} else {
keyFilter = key -> includeKeys.contains(key) && excludeKeys.contains(key) == false;
}
}
final String fieldPathPrefix;
String keyPrefix = prefix == null ? "" : prefix;
if (targetField == null) {
fieldPathPrefix = keyPrefix;
} else {
fieldPathPrefix = targetField + "." + keyPrefix;
}
final Function<String, String> keyPrefixer;
if (fieldPathPrefix.isEmpty()) {
keyPrefixer = val -> val;
} else {
keyPrefixer = val -> fieldPathPrefix + val;
}
final Function<String, String[]> fieldSplitter = buildSplitter(fieldSplit, true);
Function<String, String[]> valueSplitter = buildSplitter(valueSplit, false);
final Function<String, String> keyTrimmer = buildTrimmer(trimKey);
final Function<String, String> bracketStrip;
if (stripBrackets) {
bracketStrip = val -> STRIP_BRACKETS.matcher(val).replaceAll("");
} else {
bracketStrip = val -> val;
}
final Function<String, String> valueTrimmer = buildTrimmer(trimValue);
return document -> {
String value = document.getFieldValue(field, String.class, ignoreMissing);
if (value == null) {
if (ignoreMissing) {
return;
}
throw new IllegalArgumentException("field [" + field + "] is null, cannot extract key-value pairs.");
}
for (String part : fieldSplitter.apply(value)) {
String[] kv = valueSplitter.apply(part);
if (kv.length != 2) {
throw new IllegalArgumentException("field [" + field + "] does not contain value_split [" + valueSplit + "]");
}
String key = keyTrimmer.apply(kv[0]);
if (keyFilter.test(key)) {
append(document, keyPrefixer.apply(key), valueTrimmer.apply(bracketStrip.apply(kv[1])));
}
}
};
}

private static Function<String, String> buildTrimmer(String trim) {
if (trim == null) {
return val -> val;
} else {
Pattern pattern = Pattern.compile("(^([" + trim + "]+))|([" + trim + "]+$)");
return val -> pattern.matcher(val).replaceAll("");
}
}

private static Function<String, String[]> buildSplitter(String split, boolean fields) {
int limit = fields ? 0 : 2;
if (split.length() > 2 || split.length() == 2 && split.charAt(0) != '\\') {
Pattern splitPattern = Pattern.compile(split);
return val -> splitPattern.split(val, limit);
} else {
return val -> val.split(split, limit);
}
}

String getField() {
Expand Down Expand Up @@ -86,7 +179,7 @@ boolean isIgnoreMissing() {
return ignoreMissing;
}

public void append(IngestDocument document, String targetField, String value) {
private static void append(IngestDocument document, String targetField, String value) {
if (document.hasField(targetField)) {
document.appendFieldValue(targetField, value);
} else {
Expand All @@ -96,27 +189,7 @@ public void append(IngestDocument document, String targetField, String value) {

@Override
public void execute(IngestDocument document) {
String oldVal = document.getFieldValue(field, String.class, ignoreMissing);

if (oldVal == null && ignoreMissing) {
return;
} else if (oldVal == null) {
throw new IllegalArgumentException("field [" + field + "] is null, cannot extract key-value pairs.");
}

String fieldPathPrefix = (targetField == null) ? "" : targetField + ".";
Arrays.stream(oldVal.split(fieldSplit))
.map((f) -> {
String[] kv = f.split(valueSplit, 2);
if (kv.length != 2) {
throw new IllegalArgumentException("field [" + field + "] does not contain value_split [" + valueSplit + "]");
}
return kv;
})
.filter((p) ->
(includeKeys == null || includeKeys.contains(p[0])) &&
(excludeKeys == null || excludeKeys.contains(p[0]) == false))
.forEach((p) -> append(document, fieldPathPrefix + p[0], p[1]));
execution.accept(document);
}

@Override
Expand All @@ -132,6 +205,11 @@ public KeyValueProcessor create(Map<String, Processor.Factory> registry, String
String targetField = ConfigurationUtils.readOptionalStringProperty(TYPE, processorTag, config, "target_field");
String fieldSplit = ConfigurationUtils.readStringProperty(TYPE, processorTag, config, "field_split");
String valueSplit = ConfigurationUtils.readStringProperty(TYPE, processorTag, config, "value_split");
String trimKey = ConfigurationUtils.readOptionalStringProperty(TYPE, processorTag, config, "trim_key");
String trimValue = ConfigurationUtils.readOptionalStringProperty(TYPE, processorTag, config, "trim_value");
String prefix = ConfigurationUtils.readOptionalStringProperty(TYPE, processorTag, config, "prefix");
boolean stripBrackets =
ConfigurationUtils.readBooleanProperty(TYPE, processorTag, config, "strip_brackets", false);
Set<String> includeKeys = null;
Set<String> excludeKeys = null;
List<String> includeKeysList = ConfigurationUtils.readOptionalList(TYPE, processorTag, config, "include_keys");
Expand All @@ -143,7 +221,10 @@ public KeyValueProcessor create(Map<String, Processor.Factory> registry, String
excludeKeys = Collections.unmodifiableSet(Sets.newHashSet(excludeKeysList));
}
boolean ignoreMissing = ConfigurationUtils.readBooleanProperty(TYPE, processorTag, config, "ignore_missing", false);
return new KeyValueProcessor(processorTag, field, fieldSplit, valueSplit, includeKeys, excludeKeys, targetField, ignoreMissing);
return new KeyValueProcessor(
processorTag, field, fieldSplit, valueSplit, includeKeys, excludeKeys, targetField, ignoreMissing,
trimKey, trimValue, stripBrackets, prefix
);
}
}
}
Loading

0 comments on commit 66e1603

Please sign in to comment.