Skip to content

Commit

Permalink
Add ECS schema for user-agent ingest processor (#37727)
Browse files Browse the repository at this point in the history
This switches the format of the user agent processor to use the schema from [ECS](https://github.com/elastic/ecs).
So rather than something like this:

```
{
  "patch" : "3538",
  "major" : "70",
  "minor" : "0",
  "os" : "Mac OS X 10.14.1",
  "os_minor" : "14",
  "os_major" : "10",
  "name" : "Chrome",
  "os_name" : "Mac OS X",
  "device" : "Other"
}
```

The structure is now like this:

```
{
  "name" : "Chrome",
  "original" : "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36",
  "os" : {
    "name" : "Mac OS X",
    "version" : "10.14.1",
    "full" : "Mac OS X 10.14.1"
  },
  "device" : "Other",
  "version" : "70.0.3538.102"
}
```

This new can be configured by setting `"ecs": true` in the processor
configuration, and will be the default for 7.0. Leaving `ecs` unset or set as
`false` is deprecated.

Resolves #37329

This PR is against the 6.x branch and will be forward-ported with the deprecated parts removed (I will open a subsequent PR for that).
  • Loading branch information
dakrone authored Jan 25, 2019
1 parent 1586cac commit 5dfe193
Show file tree
Hide file tree
Showing 6 changed files with 294 additions and 128 deletions.
18 changes: 10 additions & 8 deletions docs/reference/ingest/processors/user-agent.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ The ingest-user-agent module ships by default with the regexes.yaml made availab
| `regex_file` | no | - | The name of the file in the `config/ingest-user-agent` directory containing the regular expressions for parsing the user agent string. Both the directory and the file have to be created before starting Elasticsearch. If not specified, ingest-user-agent will use the regexes.yaml from uap-core it ships with (see below).
| `properties` | no | [`name`, `major`, `minor`, `patch`, `build`, `os`, `os_name`, `os_major`, `os_minor`, `device`] | Controls what properties are added to `target_field`.
| `ignore_missing` | no | `false` | If `true` and `field` does not exist, the processor quietly exits without modifying the document
| `ecs` | no | `false` | Whether to return the output in Elastic Common Schema format. NOTE: ECS format will be the default in Elasticsearch 7.0 and non-ECS format is deprecated.
|======

Here is an example that adds the user agent details to the `user_agent` field based on the `agent` field:
Expand All @@ -31,7 +32,8 @@ PUT _ingest/pipeline/user_agent
"processors" : [
{
"user_agent" : {
"field" : "agent"
"field" : "agent",
"ecs" : true
}
}
]
Expand Down Expand Up @@ -60,13 +62,13 @@ Which returns
"agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36",
"user_agent": {
"name": "Chrome",
"major": "51",
"minor": "0",
"patch": "2704",
"os_name": "Mac OS X",
"os": "Mac OS X 10.10.5",
"os_major": "10",
"os_minor": "10",
"original": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36",
"version": "51.0.2704",
"os": {
"name": "Mac OS X",
"version": "10.10.5",
"full": "Mac OS X 10.10.5"
},
"device": "Other"
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,19 @@

package org.elasticsearch.ingest.useragent;

import org.apache.logging.log4j.LogManager;
import org.elasticsearch.common.logging.DeprecationLogger;
import org.elasticsearch.ingest.AbstractProcessor;
import org.elasticsearch.ingest.IngestDocument;
import org.elasticsearch.ingest.Processor;
import org.elasticsearch.ingest.useragent.UserAgentParser.Details;
import org.elasticsearch.ingest.useragent.UserAgentParser.VersionedName;

import java.lang.reflect.Field;
import java.util.Arrays;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Map;
Expand All @@ -40,30 +44,34 @@

public class UserAgentProcessor extends AbstractProcessor {

private static final DeprecationLogger deprecationLogger = new DeprecationLogger(LogManager.getLogger(UserAgentProcessor.class));

public static final String TYPE = "user_agent";

private final String field;
private final String targetField;
private final Set<Property> properties;
private final UserAgentParser parser;
private final boolean ignoreMissing;
private final boolean useECS;

public UserAgentProcessor(String tag, String field, String targetField, UserAgentParser parser, Set<Property> properties,
boolean ignoreMissing) {
boolean ignoreMissing, boolean useECS) {
super(tag);
this.field = field;
this.targetField = targetField;
this.parser = parser;
this.properties = properties;
this.ignoreMissing = ignoreMissing;
this.useECS = useECS;
}

boolean isIgnoreMissing() {
return ignoreMissing;
}

@Override
public IngestDocument execute(IngestDocument ingestDocument) throws Exception {
public IngestDocument execute(IngestDocument ingestDocument) {
String userAgent = ingestDocument.getFieldValue(field, String.class, ignoreMissing);

if (userAgent == null && ignoreMissing) {
Expand All @@ -75,71 +83,134 @@ public IngestDocument execute(IngestDocument ingestDocument) throws Exception {
Details uaClient = parser.parse(userAgent);

Map<String, Object> uaDetails = new HashMap<>();
for (Property property : this.properties) {
switch (property) {
case NAME:
if (uaClient.userAgent != null && uaClient.userAgent.name != null) {
uaDetails.put("name", uaClient.userAgent.name);
}
else {
uaDetails.put("name", "Other");
}
break;
case MAJOR:
if (uaClient.userAgent != null && uaClient.userAgent.major != null) {
uaDetails.put("major", uaClient.userAgent.major);
}
break;
case MINOR:
if (uaClient.userAgent != null && uaClient.userAgent.minor != null) {
uaDetails.put("minor", uaClient.userAgent.minor);
}
break;
case PATCH:
if (uaClient.userAgent != null && uaClient.userAgent.patch != null) {
uaDetails.put("patch", uaClient.userAgent.patch);
}
break;
case BUILD:
if (uaClient.userAgent != null && uaClient.userAgent.build != null) {
uaDetails.put("build", uaClient.userAgent.build);
}
break;
case OS:
if (uaClient.operatingSystem != null) {
uaDetails.put("os", buildFullOSName(uaClient.operatingSystem));
}
else {
uaDetails.put("os", "Other");
}

break;
case OS_NAME:
if (uaClient.operatingSystem != null && uaClient.operatingSystem.name != null) {
uaDetails.put("os_name", uaClient.operatingSystem.name);
}
else {
uaDetails.put("os_name", "Other");
}
break;
case OS_MAJOR:
if (uaClient.operatingSystem != null && uaClient.operatingSystem.major != null) {
uaDetails.put("os_major", uaClient.operatingSystem.major);
}
break;
case OS_MINOR:
if (uaClient.operatingSystem != null && uaClient.operatingSystem.minor != null) {
uaDetails.put("os_minor", uaClient.operatingSystem.minor);
}
break;
case DEVICE:
if (uaClient.device != null && uaClient.device.name != null) {
uaDetails.put("device", uaClient.device.name);
}
else {
uaDetails.put("device", "Other");
}
break;
if (useECS) {
// Parse the user agent in the ECS (Elastic Common Schema) format
for (Property property : this.properties) {
switch (property) {
case ORIGINAL:
uaDetails.put("original", userAgent);
break;
case NAME:
if (uaClient.userAgent != null && uaClient.userAgent.name != null) {
uaDetails.put("name", uaClient.userAgent.name);
} else {
uaDetails.put("name", "Other");
}
break;
case VERSION:
StringBuilder version = new StringBuilder();
if (uaClient.userAgent != null && uaClient.userAgent.major != null) {
version.append(uaClient.userAgent.major);
if (uaClient.userAgent.minor != null) {
version.append(".").append(uaClient.userAgent.minor);
if (uaClient.userAgent.patch != null) {
version.append(".").append(uaClient.userAgent.patch);
if (uaClient.userAgent.build != null) {
version.append(".").append(uaClient.userAgent.build);
}
}
}
uaDetails.put("version", version.toString());
}
break;
case OS:
if (uaClient.operatingSystem != null) {
Map<String, String> osDetails = new HashMap<>(3);
if (uaClient.operatingSystem.name != null) {
osDetails.put("name", uaClient.operatingSystem.name);
StringBuilder sb = new StringBuilder();
if (uaClient.operatingSystem.major != null) {
sb.append(uaClient.operatingSystem.major);
if (uaClient.operatingSystem.minor != null) {
sb.append(".").append(uaClient.operatingSystem.minor);
if (uaClient.operatingSystem.patch != null) {
sb.append(".").append(uaClient.operatingSystem.patch);
if (uaClient.operatingSystem.build != null) {
sb.append(".").append(uaClient.operatingSystem.build);
}
}
}
osDetails.put("version", sb.toString());
osDetails.put("full", uaClient.operatingSystem.name + " " + sb.toString());
}
uaDetails.put("os", osDetails);
}
}
break;
case DEVICE:
if (uaClient.device != null && uaClient.device.name != null) {
uaDetails.put("device", uaClient.device.name);
} else {
uaDetails.put("device", "Other");
}
break;
}
}
} else {
// Deprecated format, removed in 7.0
for (Property property : this.properties) {
switch (property) {
case NAME:
if (uaClient.userAgent != null && uaClient.userAgent.name != null) {
uaDetails.put("name", uaClient.userAgent.name);
} else {
uaDetails.put("name", "Other");
}
break;
case MAJOR:
if (uaClient.userAgent != null && uaClient.userAgent.major != null) {
uaDetails.put("major", uaClient.userAgent.major);
}
break;
case MINOR:
if (uaClient.userAgent != null && uaClient.userAgent.minor != null) {
uaDetails.put("minor", uaClient.userAgent.minor);
}
break;
case PATCH:
if (uaClient.userAgent != null && uaClient.userAgent.patch != null) {
uaDetails.put("patch", uaClient.userAgent.patch);
}
break;
case BUILD:
if (uaClient.userAgent != null && uaClient.userAgent.build != null) {
uaDetails.put("build", uaClient.userAgent.build);
}
break;
case OS:
if (uaClient.operatingSystem != null) {
uaDetails.put("os", buildFullOSName(uaClient.operatingSystem));
} else {
uaDetails.put("os", "Other");
}

break;
case OS_NAME:
if (uaClient.operatingSystem != null && uaClient.operatingSystem.name != null) {
uaDetails.put("os_name", uaClient.operatingSystem.name);
} else {
uaDetails.put("os_name", "Other");
}
break;
case OS_MAJOR:
if (uaClient.operatingSystem != null && uaClient.operatingSystem.major != null) {
uaDetails.put("os_major", uaClient.operatingSystem.major);
}
break;
case OS_MINOR:
if (uaClient.operatingSystem != null && uaClient.operatingSystem.minor != null) {
uaDetails.put("os_minor", uaClient.operatingSystem.minor);
}
break;
case DEVICE:
if (uaClient.device != null && uaClient.device.name != null) {
uaDetails.put("device", uaClient.device.name);
} else {
uaDetails.put("device", "Other");
}
break;
}
}
}

Expand Down Expand Up @@ -199,6 +270,10 @@ UserAgentParser getUaParser() {
return parser;
}

public boolean isUseECS() {
return useECS;
}

public static final class Factory implements Processor.Factory {

private final Map<String, UserAgentParser> userAgentParsers;
Expand All @@ -215,6 +290,7 @@ public UserAgentProcessor create(Map<String, Processor.Factory> factories, Strin
String regexFilename = readStringProperty(TYPE, processorTag, config, "regex_file", IngestUserAgentPlugin.DEFAULT_PARSER_NAME);
List<String> propertyNames = readOptionalList(TYPE, processorTag, config, "properties");
boolean ignoreMissing = readBooleanProperty(TYPE, processorTag, config, "ignore_missing", false);
boolean useECS = readBooleanProperty(TYPE, processorTag, config, "ecs", false);

UserAgentParser parser = userAgentParsers.get(regexFilename);
if (parser == null) {
Expand All @@ -236,17 +312,51 @@ public UserAgentProcessor create(Map<String, Processor.Factory> factories, Strin
properties = EnumSet.allOf(Property.class);
}

return new UserAgentProcessor(processorTag, field, targetField, parser, properties, ignoreMissing);
if (useECS == false) {
deprecationLogger.deprecated("setting [ecs] to false for non-common schema " +
"format is deprecated and will be removed in 7.0, set to true to use the non-deprecated format");
}

return new UserAgentProcessor(processorTag, field, targetField, parser, properties, ignoreMissing, useECS);
}
}

enum Property {

NAME, MAJOR, MINOR, PATCH, OS, OS_NAME, OS_MAJOR, OS_MINOR, DEVICE, BUILD;
NAME,
// Deprecated in 6.7 (superceded by VERSION), to be removed in 7.0
@Deprecated MAJOR,
@Deprecated MINOR,
@Deprecated PATCH,
OS,
// Deprecated in 6.7 (superceded by just using OS), to be removed in 7.0
@Deprecated OS_NAME,
@Deprecated OS_MAJOR,
@Deprecated OS_MINOR,
DEVICE,
@Deprecated BUILD, // Same deprecated as OS_* above
ORIGINAL,
VERSION;

private static Set<Property> DEPRECATED_PROPERTIES;

static {
Set<Property> deprecated = new HashSet<>();
for (Field field : Property.class.getFields()) {
if (field.isEnumConstant() && field.isAnnotationPresent(Deprecated.class)) {
deprecated.add(valueOf(field.getName()));
}
}
DEPRECATED_PROPERTIES = deprecated;
}

public static Property parseProperty(String propertyName) {
try {
return valueOf(propertyName.toUpperCase(Locale.ROOT));
Property value = valueOf(propertyName.toUpperCase(Locale.ROOT));
if (DEPRECATED_PROPERTIES.contains(value)) {
deprecationLogger.deprecated("the [{}] property is deprecated for the user-agent processor", propertyName);
}
return value;
}
catch (IllegalArgumentException e) {
throw new IllegalArgumentException("illegal property value [" + propertyName + "]. valid values are " +
Expand Down
Loading

0 comments on commit 5dfe193

Please sign in to comment.