From 865fad435e6855140236c4381450b56f1ee7bb74 Mon Sep 17 00:00:00 2001 From: DeDe Morton Date: Mon, 18 Nov 2019 11:37:43 -0800 Subject: [PATCH] [DOCS] Create separate topics for processor docs (#14410) (#14576) --- libbeat/docs/processors-list.asciidoc | 81 + libbeat/docs/processors-using.asciidoc | 1581 +---------------- libbeat/docs/shared-beats-attributes.asciidoc | 2 + .../actions/docs/add_fields.asciidoc | 37 + .../actions/docs/add_labels.asciidoc | 43 + .../processors/actions/docs/add_tags.asciidoc | 28 + .../actions/docs/decode_base64_field.asciidoc | 34 + .../actions/docs/decode_json_fields.asciidoc | 36 + .../docs/decompress_gzip_field.asciidoc | 34 + .../actions/docs/drop_event.asciidoc | 16 + .../actions/docs/drop_fields.asciidoc | 27 + .../actions/docs/include_fields.asciidoc | 24 + .../processors/actions/docs/rename.asciidoc | 42 + .../docs/add_cloud_metadata.asciidoc | 164 ++ .../docs/add_docker_metadata.asciidoc | 80 + .../docs/add_host_metadata.asciidoc | 74 + .../docs/add_kubernetes_metadata.asciidoc | 92 + .../add_locale/docs/add_locale.asciidoc | 30 + .../docs/add_observer_metadata.asciidoc | 73 + .../docs/add_process_metadata.asciidoc | 63 + .../communityid/docs/communityid.asciidoc | 47 + .../processors/convert/docs/convert.asciidoc | 45 + .../docs/decode_csv_fields.asciidoc | 44 + .../processors/dissect/docs/dissect.asciidoc | 30 + libbeat/processors/dns/docs/dns.asciidoc | 102 ++ .../extract_array/docs/extract_array.asciidoc | 42 + .../docs/registered_domain.asciidoc | 33 + .../processors/script/docs/script.asciidoc | 170 ++ .../timestamp/docs/timestamp.asciidoc | 67 + .../decode_cef/docs/decode_cef.asciidoc | 39 + 30 files changed, 1601 insertions(+), 1579 deletions(-) create mode 100644 libbeat/docs/processors-list.asciidoc create mode 100644 libbeat/processors/actions/docs/add_fields.asciidoc create mode 100644 libbeat/processors/actions/docs/add_labels.asciidoc create mode 100644 libbeat/processors/actions/docs/add_tags.asciidoc create mode 100644 libbeat/processors/actions/docs/decode_base64_field.asciidoc create mode 100644 libbeat/processors/actions/docs/decode_json_fields.asciidoc create mode 100644 libbeat/processors/actions/docs/decompress_gzip_field.asciidoc create mode 100644 libbeat/processors/actions/docs/drop_event.asciidoc create mode 100644 libbeat/processors/actions/docs/drop_fields.asciidoc create mode 100644 libbeat/processors/actions/docs/include_fields.asciidoc create mode 100644 libbeat/processors/actions/docs/rename.asciidoc create mode 100644 libbeat/processors/add_cloud_metadata/docs/add_cloud_metadata.asciidoc create mode 100644 libbeat/processors/add_docker_metadata/docs/add_docker_metadata.asciidoc create mode 100644 libbeat/processors/add_host_metadata/docs/add_host_metadata.asciidoc create mode 100644 libbeat/processors/add_kubernetes_metadata/docs/add_kubernetes_metadata.asciidoc create mode 100644 libbeat/processors/add_locale/docs/add_locale.asciidoc create mode 100644 libbeat/processors/add_observer_metadata/docs/add_observer_metadata.asciidoc create mode 100644 libbeat/processors/add_process_metadata/docs/add_process_metadata.asciidoc create mode 100644 libbeat/processors/communityid/docs/communityid.asciidoc create mode 100644 libbeat/processors/convert/docs/convert.asciidoc create mode 100644 libbeat/processors/decode_csv_fields/docs/decode_csv_fields.asciidoc create mode 100644 libbeat/processors/dissect/docs/dissect.asciidoc create mode 100644 libbeat/processors/dns/docs/dns.asciidoc create mode 100644 libbeat/processors/extract_array/docs/extract_array.asciidoc create mode 100644 libbeat/processors/registered_domain/docs/registered_domain.asciidoc create mode 100644 libbeat/processors/script/docs/script.asciidoc create mode 100644 libbeat/processors/timestamp/docs/timestamp.asciidoc create mode 100644 x-pack/filebeat/processors/decode_cef/docs/decode_cef.asciidoc diff --git a/libbeat/docs/processors-list.asciidoc b/libbeat/docs/processors-list.asciidoc new file mode 100644 index 00000000000..24d0048d210 --- /dev/null +++ b/libbeat/docs/processors-list.asciidoc @@ -0,0 +1,81 @@ +// TODO: Create script that generates this file. Condiitional coding needs to +// be preserved. + +//# tag::processors-list[] +* <> +* <> +* <> +* <> +* <> +* <> +* <> +* <> +* <> +* <> +* <> +* <> +* <> +ifdef::has_decode_cef_processor[] +* <> +endif::[] +ifdef::has_decode_csv_fields_processor[] +* <> +endif::[] +* <> +* <> +* <> +* <> +* <> +* <> +* <> +* <> +* <> +* <> +ifdef::has_script_processor[] +* <> +endif::[] +ifdef::has_timestamp_processor[] +* <> +endif::[] +//# end::processors-list[] + +//# tag::processors-include[] +include::{libbeat-processors-dir}/add_cloud_metadata/docs/add_cloud_metadata.asciidoc[] +include::{libbeat-processors-dir}/add_docker_metadata/docs/add_docker_metadata.asciidoc[] +include::{libbeat-processors-dir}/actions/docs/add_fields.asciidoc[] +include::{libbeat-processors-dir}/add_host_metadata/docs/add_host_metadata.asciidoc[] +include::{libbeat-processors-dir}/add_kubernetes_metadata/docs/add_kubernetes_metadata.asciidoc[] +include::{libbeat-processors-dir}/actions/docs/add_labels.asciidoc[] +include::{libbeat-processors-dir}/add_locale/docs/add_locale.asciidoc[] +include::{libbeat-processors-dir}/add_observer_metadata/docs/add_observer_metadata.asciidoc[] +include::{libbeat-processors-dir}/add_process_metadata/docs/add_process_metadata.asciidoc[] +include::{libbeat-processors-dir}/actions/docs/add_tags.asciidoc[] +include::{libbeat-processors-dir}/communityid/docs/communityid.asciidoc[] +include::{libbeat-processors-dir}/convert/docs/convert.asciidoc[] +include::{libbeat-processors-dir}/actions/docs/decode_base64_field.asciidoc[] +ifdef::has_decode_cef_processor[] +include::{x-filebeat-processors-dir}/decode_cef/docs/decode_cef.asciidoc[] +endif::[] +ifdef::has_decode_csv_fields_processor[] +include::{libbeat-processors-dir}/decode_csv_fields/docs/decode_csv_fields.asciidoc[] +endif::[] +include::{libbeat-processors-dir}/actions/docs/decode_json_fields.asciidoc[] +include::{libbeat-processors-dir}/actions/docs/decompress_gzip_field.asciidoc[] +include::{libbeat-processors-dir}/dissect/docs/dissect.asciidoc[] +include::{libbeat-processors-dir}/dns/docs/dns.asciidoc[] +include::{libbeat-processors-dir}/actions/docs/drop_event.asciidoc[] +include::{libbeat-processors-dir}/actions/docs/drop_fields.asciidoc[] +include::{libbeat-processors-dir}/extract_array/docs/extract_array.asciidoc[] +include::{libbeat-processors-dir}/actions/docs/include_fields.asciidoc[] +include::{libbeat-processors-dir}/registered_domain/docs/registered_domain.asciidoc[] +include::{libbeat-processors-dir}/actions/docs/rename.asciidoc[] +ifdef::has_script_processor[] +include::{libbeat-processors-dir}/script/docs/script.asciidoc[] +endif::[] +ifdef::has_timestamp_processor[] +include::{libbeat-processors-dir}/timestamp/docs/timestamp.asciidoc[] +endif::[] + +//# end::processors-include[] + + diff --git a/libbeat/docs/processors-using.asciidoc b/libbeat/docs/processors-using.asciidoc index f0b886e5c58..87dd305aabb 100644 --- a/libbeat/docs/processors-using.asciidoc +++ b/libbeat/docs/processors-using.asciidoc @@ -194,41 +194,7 @@ endif::[] The supported processors are: - * <> - * <> - * <> - * <> - * <> - * <> - * <> - * <> - * <> - * <> - * <> - * <> - * <> -ifdef::has_decode_cef_processor[] -* <> -endif::[] -ifdef::has_decode_csv_fields_processor[] - * <> -endif::[] - * <> - * <> - * <> - * <> - * <> - * <> - * <> - * <> - * <> - * <> -ifdef::has_script_processor[] - * <> -endif::[] -ifdef::has_timestamp_processor[] - * <> -endif::[] +include::processors-list.asciidoc[tag=processors-list] [[conditions]] ==== Conditions @@ -501,1547 +467,4 @@ not: status: OK ------ -[[add-cloud-metadata]] -=== Add cloud metadata - -The `add_cloud_metadata` processor enriches each event with instance metadata -from the machine's hosting provider. At startup it will query a list of hosting -providers and cache the instance metadata. - -The following cloud providers are supported: - -- Amazon Web Services (AWS) -- Digital Ocean -- Google Compute Engine (GCE) -- https://www.qcloud.com/?lang=en[Tencent Cloud] (QCloud) -- Alibaba Cloud (ECS) -- Azure Virtual Machine -- Openstack Nova - -The Alibaba Cloud and Tencent cloud providers are disabled by default, because -they require to access a remote host. The `providers` setting allows users to -select a list of default providers to query. - -The simple configuration below enables the processor. - -[source,yaml] -------------------------------------------------------------------------------- -processors: -- add_cloud_metadata: ~ -------------------------------------------------------------------------------- - -The `add_cloud_metadata` processor has three optional configuration settings. -The first one is `timeout` which specifies the maximum amount of time to wait -for a successful response when detecting the hosting provider. The default -timeout value is `3s`. - -If a timeout occurs then no instance metadata will be added to the events. This -makes it possible to enable this processor for all your deployments (in the -cloud or on-premise). - -The second optional setting is `providers`. The `providers` settings accepts a -list of cloud provider names to be used. If `providers` is not configured, then -all providers that do not access a remote endpoint are enabled by default. - -List of names the `providers` setting supports: - -- "alibaba", or "ecs" for the Alibaba Cloud provider (disabled by default). -- "azure" for Azure Virtual Machine (enabled by default). -- "digitalocean" for Digital Ocean (enabled by default). -- "aws", or "ec2" for Amazon Web Services (enabled by default). -- "gcp" for Google Copmute Enging (enabled by default). -- "openstack", or "nova" for Openstack Nova (enabled by default). -- "tencent", or "qcloud" for Tencent Cloud (disabled by default). - -The third optional configuration setting is `overwrite`. When `overwrite` is -`true`, `add_cloud_metadata` overwrites existing `cloud.*` fields (`false` by -default). - -The metadata that is added to events varies by hosting provider. Below are -examples for each of the supported providers. - -_AWS_ - -[source,json] -------------------------------------------------------------------------------- -{ - "cloud": { - "account.id": "123456789012", - "availability_zone": "us-east-1c", - "instance.id": "i-4e123456", - "machine.type": "t2.medium", - "image.id": "ami-abcd1234", - "provider": "aws", - "region": "us-east-1" - } -} -------------------------------------------------------------------------------- - -_Digital Ocean_ - -[source,json] -------------------------------------------------------------------------------- -{ - "cloud": { - "instance.id": "1234567", - "provider": "digitalocean", - "region": "nyc2" - } -} -------------------------------------------------------------------------------- - -_GCP_ - -[source,json] -------------------------------------------------------------------------------- -{ - "cloud": { - "availability_zone": "us-east1-b", - "instance.id": "1234556778987654321", - "machine.type": "f1-micro", - "project.id": "my-dev", - "provider": "gcp" - } -} -------------------------------------------------------------------------------- - -_Tencent Cloud_ - -[source,json] -------------------------------------------------------------------------------- -{ - "cloud": { - "availability_zone": "gz-azone2", - "instance.id": "ins-qcloudv5", - "provider": "qcloud", - "region": "china-south-gz" - } -} -------------------------------------------------------------------------------- - -_Alibaba Cloud_ - -This metadata is only available when VPC is selected as the network type of the -ECS instance. - -[source,json] -------------------------------------------------------------------------------- -{ - "cloud": { - "availability_zone": "cn-shenzhen", - "instance.id": "i-wz9g2hqiikg0aliyun2b", - "provider": "ecs", - "region": "cn-shenzhen-a" - } -} -------------------------------------------------------------------------------- - -_Azure Virtual Machine_ - -[source,json] -------------------------------------------------------------------------------- -{ - "cloud": { - "provider": "az", - "instance.id": "04ab04c3-63de-4709-a9f9-9ab8c0411d5e", - "instance.name": "test-az-vm", - "machine.type": "Standard_D3_v2", - "region": "eastus2" - } -} -------------------------------------------------------------------------------- - -_Openstack Nova_ - -[source,json] -------------------------------------------------------------------------------- -{ - "cloud": { - "instance.name": "test-998d932195.mycloud.tld", - "instance.id": "i-00011a84", - "availability_zone": "xxxx-az-c", - "provider": "openstack", - "machine.type": "m2.large" - } -} -------------------------------------------------------------------------------- - -[[add-fields]] -=== Add fields - -The `add_fields` processor adds additional fields to the event. Fields can be -scalar values, arrays, dictionaries, or any nested combination of these. By -default the fields that you specify will be grouped under the `fields` -sub-dictionary in the event. To group the fields under a different -sub-dictionary, use the `target` setting. To store the fields as -top-level fields, set `target: ''`. - -`target`:: (Optional) Sub-dictionary to put all fields into. Defaults to `fields`. -`fields`:: Fields to be added. - - -For example, this configuration: - -[source,yaml] ------------------------------------------------------------------------------- -processors: -- add_fields: - target: project - fields: - name: myproject - id: '574734885120952459' ------------------------------------------------------------------------------- - -Adds these fields to any event: - -[source,json] -------------------------------------------------------------------------------- -{ - "project": { - "name": "myproject", - "id": "574734885120952459" - } -} -------------------------------------------------------------------------------- - - -[[add-labels]] -=== Add labels - -The `add_labels` processors adds a set of key-value pairs to an event. -The processor will flatten nested configuration objects like arrays or -dictionaries into a fully qualified name by merging nested names with a `.`. -Array entries create numeric names starting with 0. Labels are always stored -under the Elastic Common Schema compliant `labels` sub-dictionary. - -`labels`:: dictionaries of labels to be added. - -For example, this configuration: - -[source,yaml] ------------------------------------------------------------------------------- -processors: -- add_labels: - labels: - number: 1 - with.dots: test - nested: - with.dots: nested - array: - - do - - re - - with.field: mi ------------------------------------------------------------------------------- - -Adds these fields to every event: - -[source,json] -------------------------------------------------------------------------------- -{ - "labels": { - "number": 1, - "with.dots": "test", - "nested.with.dots": "nested", - "array.0": "do", - "array.1": "re", - "array.2.with.field": "mi" - } -} -------------------------------------------------------------------------------- - - -[[add-locale]] -=== Add the local time zone - -The `add_locale` processor enriches each event with the machine's time zone -offset from UTC or with the name of the time zone. It supports one configuration -option named `format` that controls whether an offset or time zone abbreviation -is added to the event. The default format is `offset`. The processor adds the -a `event.timezone` value to each event. - -The configuration below enables the processor with the default settings. - -[source,yaml] -------------------------------------------------------------------------------- -processors: -- add_locale: ~ -------------------------------------------------------------------------------- - -This configuration enables the processor and configures it to add the time zone -abbreviation to events. - -[source,yaml] -------------------------------------------------------------------------------- -processors: -- add_locale: - format: abbreviation -------------------------------------------------------------------------------- - -NOTE: Please note that `add_locale` differentiates between daylight savings -time (DST) and regular time. For example `CEST` indicates DST and and `CET` is -regular time. - -[[add-tags]] -=== Add tags - -The `add_tags` processor adds tags to a list of tags. If the target field already exists, -the tags are appended to the existing list of tags. - -`tags`:: List of tags to add. -`target`:: (Optional) Field the tags will be added to. Defaults to `tags`. - -For example, this configuration: - - -[source,yaml] ------------------------------------------------------------------------------- -processors: -- add_tags: - tags: [web, production] - target: "environment" ------------------------------------------------------------------------------- - -Adds the environment field to every event: - -[source,json] -------------------------------------------------------------------------------- -{ - "environment": ["web", "production"] -} -------------------------------------------------------------------------------- - -ifdef::has_decode_cef_processor[] -[[processor-decode-cef]] -[role="xpack"] -=== Decode CEF - -beta[] - -The `decode_cef` processor decodes Common Event Format (CEF) messages. This -processor is available in Filebeat. - -Below is an example configuration that decodes the `message` field as CEF after -renaming it to `event.original`. It is best to rename `message` to -`event.original` because the decoded CEF data contains its own `message` field. - -[source,yaml] ----- -processors: -- rename: - fields: - - {from: "message", to: "event.original"} -- decode_cef: - field: event.original ----- - -The `decode_cef` processor has the following configuration settings. - -.Decode CEF options -[options="header"] -|====== -| `field` | no | message | Source field containing the CEF message to be parsed. | -| `target_field` | no | cef | Target field where the parsed CEF object will be written. | -| `ecs` | no | true | Generate Elastic Common Schema (ECS) fields from the CEF data. - Certain CEF header and extension values will be used to populate ECS fields. | -| `ignore_missing` | no | false | Ignore errors when the source field is missing. | -| `ignore_failure` | no | false | Ignore failures when the source field does not contain a CEF message. | -| `id` | no | | An identifier for this processor instance. Useful for debugging. | -|====== -endif::[] - -ifdef::has_decode_csv_fields_processor[] -[[decode-csv-fields]] -=== Decode CSV fields - -experimental[] - -The `decode_csv_fields` processor decodes fields containing records in -comma-separated format (CSV). It will output the values as an array of strings. -This processor is available for Filebeat and Journalbeat. - -[source,yaml] ------------------------------------------------------ -processors: - - decode_csv_fields: - fields: - message: decoded.csv - separator: , - ignore_missing: false - overwrite_keys: true - trim_leading_whitespace: false - fail_on_error: true ------------------------------------------------------ - -The `decode_csv_fields` has the following settings: - -`fields`:: This is a mapping from the source field containing the CSV data to - the destination field to which the decoded array will be written. -`separator`:: (Optional) Character to be used as a column separator. - The default is the comma character. For using a TAB character you - must set it to "\t". -`ignore_missing`:: (Optional) Whether to ignore events which lack the source - field. The default is `false`, which will fail processing of - an event if a field is missing. -`overwrite_keys`:: Whether the target field is overwritten if it - already exists. The default is false, which will fail - processing of an event when `target` already exists. -`trim_leading_space`:: Whether extra space after the separator is trimmed from - values. This works even if the separator is also a space. - The default is `false`. -`fail_on_error`:: (Optional) If set to true, in case of an error the changes to -the event are reverted, and the original event is returned. If set to `false`, -processing continues also if an error happens. Default is `true`. - -endif::[] - -[[decode-json-fields]] -=== Decode JSON fields - -The `decode_json_fields` processor decodes fields containing JSON strings and -replaces the strings with valid JSON objects. - -[source,yaml] ------------------------------------------------------ -processors: - - decode_json_fields: - fields: ["field1", "field2", ...] - process_array: false - max_depth: 1 - target: "" - overwrite_keys: false - add_error_key: true ------------------------------------------------------ - -The `decode_json_fields` processor has the following configuration settings: - -`fields`:: The fields containing JSON strings to decode. -`process_array`:: (Optional) A boolean that specifies whether to process -arrays. The default is false. -`max_depth`:: (Optional) The maximum parsing depth. The default is 1. -`target`:: (Optional) The field under which the decoded JSON will be written. By -default the decoded JSON object replaces the string field from which it was -read. To merge the decoded JSON fields into the root of the event, specify -`target` with an empty string (`target: ""`). Note that the `null` value (`target:`) -is treated as if the field was not set at all. -`overwrite_keys`:: (Optional) A boolean that specifies whether keys that already -exist in the event are overwritten by keys from the decoded JSON object. The -default value is false. -`add_error_key`:: (Optional) If it set to true, in case of error while decoding json keys -`error` field is going to be part of event with error message. If it set to false, there -will not be any error in event's field. Even error occurs while decoding json keys. The -default value is false - - -[[decode-base64-field]] -=== Decode Base64 fields - -The `decode_base64_field` processor specifies a field to base64 decode. -The `field` key contains a `from: old-key` and a `to: new-key` pair. `from` is -the origin and `to` the target name of the field. - -To overwrite fields either first rename the target field or use the `drop_fields` -processor to drop the field and then rename the field. - -[source,yaml] -------- -processors: -- decode_base64_field: - field: - from: "field1" - to: "field2" - ignore_missing: false - fail_on_error: true -------- - -In the example above: - - field1 is decoded in field2 - -The `decode_base64_field` processor has the following configuration settings: - -`ignore_missing`:: (Optional) If set to true, no error is logged in case a key -which should be base64 decoded is missing. Default is `false`. - -`fail_on_error`:: (Optional) If set to true, in case of an error the base6 4decode -of fields is stopped and the original event is returned. If set to false, decoding -continues also if an error happened during decoding. Default is `true`. - -See <> for a list of supported conditions. - -[[decompress-gzip-field]] -=== Decompress gzip fields - -The `decompress_gzip_field` processor specifies a field to gzip decompress. -The `field` key contains a `from: old-key` and a `to: new-key` pair. `from` is -the origin and `to` the target name of the field. - -To overwrite fields either first rename the target field or use the `drop_fields` -processor to drop the field and then rename the field. - -[source,yaml] -------- -processors: -- decompress_gzip_field: - field: - from: "field1" - to: "field2" - ignore_missing: false - fail_on_error: true -------- - -In the example above: - - field1 is decoded in field2 - -The `decompress_gzip_field` processor has the following configuration settings: - -`ignore_missing`:: (Optional) If set to true, no error is logged in case a key -which should be base64 decoded is missing. Default is `false`. - -`fail_on_error`:: (Optional) If set to true, in case of an error the base64 decode -of fields is stopped and the original event is returned. If set to false, decoding -continues also if an error happened during decoding. Default is `true`. - -See <> for a list of supported conditions. - -[[community-id]] -=== Community ID Network Flow Hash - -The `community_id` processor computes a network flow hash according to the -https://github.com/corelight/community-id-spec[Community ID Flow Hash -specification]. - -The flow hash is useful for correlating all network events related to a -single flow. For example you can filter on a community ID value and you might -get back the Netflow records from multiple collectors and layer 7 protocol -records from Packetbeat. - -By default the processor is configured to read the flow parameters from the -appropriate Elastic Common Schema (ECS) fields. If you are processing ECS data -then no parameters are required. - -[source,yaml] ----- -processors: - - community_id: ----- - -If the data does not conform to ECS then you can customize the field names -that the processor reads from. You can also change the `target` field which -is where the computed hash is written to. - -[source,yaml] ----- -processors: - - community_id: - fields: - source_ip: my_source_ip - source_port: my_source_port - destination_ip: my_dest_ip - destination_port: my_dest_port - iana_number: my_iana_number - transport: my_transport - icmp_type: my_icmp_type - icmp_code: my_icmp_code - target: network.community_id ----- - -If the necessary fields are not present in the event then the processor will -silently continue without adding the target field. - -The processor also accepts an optional `seed` parameter that must be a 16-bit -unsigned integer. This value gets incorporated into all generated hashes. - -[[convert]] -=== Convert - -The `convert` processor converts a field in the event to a different type, such -as converting a string to an integer. - -The supported types include: `integer`, `long`, `float`, `double`, `string`, -`boolean`, and `ip`. - -The `ip` type is effectively an alias for `string`, but with an added validation -that the value is an IPv4 or IPv6 address. - -[source,yaml] ----- -processors: - - convert: - fields: - - {from: "src_ip", to: "source.ip", type: "ip"} - - {from: "src_port", to: "source.port", type: "integer"} - ignore_missing: true - fail_on_error: false ----- - -The `convert` processor has the following configuration settings: - -`fields`:: (Required) This is the list of fields to convert. At least one item -must be contained in the list. Each item in the list must have a `from` key that -specifies the source field. The `to` key is optional and specifies where to -assign the converted value. If `to` is omitted then the `from` field is updated -in-place. The `type` key specifies the data type to convert the value to. If -`type` is omitted then the processor copies or renames the field without any -type conversion. - -`ignore_missing`:: (Optional) If `true` the processor continues to the next -field when the `from` key is not found in the event. If false then the processor -returns an error and does not process the remaining fields. Default is `false`. - -`fail_on_error`:: (Optional) If false type conversion failures are ignored and -the processor continues to the next field. Default is `true`. - -`tag`:: (Optional) An identifier for this processor. Useful for debugging. - -`mode`:: (Optional) When both `from` and `to` are defined for a field then -`mode` controls whether to `copy` or `rename` the field when the type conversion -is successful. Default is `copy`. - -[[drop-event]] -=== Drop events - -The `drop_event` processor drops the entire event if the associated condition -is fulfilled. The condition is mandatory, because without one, all the events -are dropped. - -[source,yaml] ------- -processors: - - drop_event: - when: - condition ------- - -See <> for a list of supported conditions. - -[[drop-fields]] -=== Drop fields from events - -The `drop_fields` processor specifies which fields to drop if a certain -condition is fulfilled. The condition is optional. If it's missing, the -specified fields are always dropped. The `@timestamp` and `type` fields cannot -be dropped, even if they show up in the `drop_fields` list. - -[source,yaml] ------------------------------------------------------ -processors: - - drop_fields: - when: - condition - fields: ["field1", "field2", ...] - ignore_missing: false ------------------------------------------------------ - -See <> for a list of supported conditions. - -NOTE: If you define an empty list of fields under `drop_fields`, then no fields -are dropped. - -The `drop_fields` processor has the following configuration settings: - -`ignore_missing`:: (Optional) If `true` the processor will not return an error -when a specified field does not exist. Defaults to `false`. - -[[extract-array]] -=== Extract array - -experimental[] - -The `extract_array` processor populates fields with values read from an array -field. The following example will populate `source.ip` with the first element of -the `my_array` field, `destination.ip` with the second element, and -`network.transport` with the third. - -[source,yaml] ------------------------------------------------------ -processors: - - extract_array: - field: my_array - mappings: - source.ip: 0 - destination.ip: 1 - network.transport: 2 ------------------------------------------------------ - -The following settings are supported: - -`field`:: The array field whose elements are to be extracted. -`mappings`:: Maps each field name to an array index. Use 0 for the first element in - the array. Multiple fields can be mapped to the same array element. -`ignore_missing`:: (Optional) Whether to ignore events where the array field is - missing. The default is `false`, which will fail processing - of an event if the specified field does not exist. Set it to - `true` to ignore this condition. -`overwrite_keys`:: Whether the target fields specified in the mapping are - overwritten if they already exist. The default is `false`, - which will fail processing if a target field already exists. -`fail_on_error`:: (Optional) If set to `true` and an error happens, changes to - the event are reverted, and the original event is returned. If - set to `false`, processing continues despite errors. - Default is `true`. -`omit_empty`:: (Optional) Whether empty values are extracted from the array. If - set to `true`, instead of the target field being set to an - empty value, it is left unset. The empty string (`""`), an - empty array (`[]`) or an empty object (`{}`) are considered - empty values. Default is `false`. - -[[include-fields]] -=== Keep fields from events - -The `include_fields` processor specifies which fields to export if a certain -condition is fulfilled. The condition is optional. If it's missing, the -specified fields are always exported. The `@timestamp` and `type` fields are -always exported, even if they are not defined in the `include_fields` list. - -[source,yaml] -------- -processors: - - include_fields: - when: - condition - fields: ["field1", "field2", ...] -------- - -See <> for a list of supported conditions. - -You can specify multiple `include_fields` processors under the `processors` -section. - -NOTE: If you define an empty list of fields under `include_fields`, then only -the required fields, `@timestamp` and `type`, are exported. - -[[processor-registered-domain]] -=== Registered Domain - -The `registered_domain` processor reads a field containing a hostname and then -writes the "registered domain" contained in the hostname to the target field. -For example, given `www.google.co.uk` the processor would output `google.co.uk`. -In other words the "registered domain" is the effective top-level domain -(`co.uk`) plus one level (`google`). - -This processor uses the Mozilla Public Suffix list to determine the value. - -[source,yaml] ----- -processors: -- registered_domain: - field: dns.question.name - target_field: dns.question.registered_domain - ignore_missing: true - ignore_failure: true ----- - -The `registered_domain` processor has the following configuration settings: - -.Registered Domain options -[options="header"] -|====== -| Name | Required | Default | Description | -| `field` | yes | | Source field containing a fully qualified domain name (FQDN). | -| `target_field` | yes | | Target field for the registered domain value. | -| `ignore_missing` | no | false | Ignore errors when the source field is missing. | -| `ignore_failure` | no | false | Ignore all errors produced by the processor. | -| `id` | no | | An identifier for this processor instance. Useful for debugging. | -|====== - -[[rename-fields]] -=== Rename fields from events - -The `rename` processor specifies a list of fields to rename. Under the `fields` -key each entry contains a `from: old-key` and a `to: new-key` pair. `from` is -the origin and `to` the target name of the field. - -Renaming fields can be useful in cases where field names cause conflicts. For -example if an event has two fields, `c` and `c.b`, that are both assigned scalar -values (e.g. `{"c": 1, "c.b": 2}`) this will result in an Elasticsearch error at -ingest time. This is because the value of a cannot simultaneously be a scalar -and an object. To prevent this rename_fields can be used to rename `c` to -`c.value`. - -Rename fields cannot be used to overwrite fields. To overwrite fields either -first rename the target field or use the `drop_fields` processor to drop the -field and then rename the field. - -[source,yaml] -------- -processors: -- rename: - fields: - - from: "a.g" - to: "e.d" - ignore_missing: false - fail_on_error: true -------- - -The `rename` processor has the following configuration settings: - -`ignore_missing`:: (Optional) If set to true, no error is logged in case a key -which should be renamed is missing. Default is `false`. - -`fail_on_error`:: (Optional) If set to true, in case of an error the renaming of -fields is stopped and the original event is returned. If set to false, renaming -continues also if an error happened during renaming. Default is `true`. - -See <> for a list of supported conditions. - -You can specify multiple `ignore_missing` processors under the `processors` -section. - -[[add-kubernetes-metadata]] -=== Add Kubernetes metadata - -The `add_kubernetes_metadata` processor annotates each event with relevant -metadata based on which Kubernetes pod the event originated from. -At startup it detects an `in_cluster` environment and caches the -Kubernetes-related metadata. Events are only annotated if a valid configuration -is detected. If it's not able to detect a valid Kubernetes configuration, -the events are not annotated with Kubernetes-related metadata. - -Each event is annotated with: - -* Pod Name -* Pod UID -* Namespace -* Labels - -The `add_kubernetes_metadata` processor has two basic building blocks which are: - -* Indexers -* Matchers - -Indexers take in a pod's metadata and builds indices based on the pod metadata. -For example, the `ip_port` indexer can take a Kubernetes pod and index the pod -metadata based on all `pod_ip:container_port` combinations. - -Matchers are used to construct lookup keys for querying indices. For example, -when the `fields` matcher takes `["metricset.host"]` as a lookup field, it would -construct a lookup key with the value of the field `metricset.host`. - -Each Beat can define its own default indexers and matchers which are enabled by -default. For example, FileBeat enables the `container` indexer, which indexes -pod metadata based on all container IDs, and a `logs_path` matcher, which takes -the `log.file.path` field, extracts the container ID, and uses it to retrieve -metadata. - -The configuration below enables the processor when {beatname_lc} is run as a pod in -Kubernetes. - -[source,yaml] -------------------------------------------------------------------------------- -processors: -- add_kubernetes_metadata: -------------------------------------------------------------------------------- - -The configuration below enables the processor on a Beat running as a process on -the Kubernetes node. - -[source,yaml] -------------------------------------------------------------------------------- -processors: -- add_kubernetes_metadata: - host: - # If kube_config is not set, KUBECONFIG environment variable will be checked - # and if not present it will fall back to InCluster - kube_config: ${HOME}/.kube/config -------------------------------------------------------------------------------- - -The configuration below has the default indexers and matchers disabled and -enables ones that the user is interested in. - -[source,yaml] -------------------------------------------------------------------------------- -processors: -- add_kubernetes_metadata: - host: - # If kube_config is not set, KUBECONFIG environment variable will be checked - # and if not present it will fall back to InCluster - kube_config: ~/.kube/config - default_indexers.enabled: false - default_matchers.enabled: false - indexers: - - ip_port: - matchers: - - fields: - lookup_fields: ["metricset.host"] -------------------------------------------------------------------------------- - -The `add_kubernetes_metadata` processor has the following configuration settings: - -`host`:: (Optional) Specify the node to scope {beatname_lc} to in case it -cannot be accurately detected, as when running {beatname_lc} in host network -mode. -`namespace`:: (Optional) Select the namespace from which to collect the -metadata. If it is not set, the processor collects metadata from all namespaces. -It is unset by default. -`kube_config`:: (Optional) Use given config file as configuration for Kubernetes -client. It defaults to `KUBECONFIG` environment variable if present. -`default_indexers.enabled`:: (Optional) Enable/Disable default pod indexers, in -case you want to specify your own. -`default_matchers.enabled`:: (Optional) Enable/Disable default pod matchers, in -case you want to specify your own. - -[[add-docker-metadata]] -=== Add Docker metadata - -The `add_docker_metadata` processor annotates each event with relevant metadata -from Docker containers. At startup it detects a docker environment and caches the metadata. -The events are annotated with Docker metadata, only if a valid configuration -is detected and the processor is able to reach Docker API. - -Each event is annotated with: - -* Container ID -* Name -* Image -* Labels - -[NOTE] -===== -When running {beatname_uc} in a container, you need to provide access to -Docker’s unix socket in order for the `add_docker_metadata` processor to work. -You can do this by mounting the socket inside the container. For example: - -`docker run -v /var/run/docker.sock:/var/run/docker.sock ...` - -To avoid privilege issues, you may also need to add `--user=root` to the -`docker run` flags. Because the user must be part of the docker group in order -to access `/var/run/docker.sock`, root access is required if {beatname_uc} is -running as non-root inside the container. -===== - -[source,yaml] -------------------------------------------------------------------------------- -processors: -- add_docker_metadata: - host: "unix:///var/run/docker.sock" - #match_fields: ["system.process.cgroup.id"] - #match_pids: ["process.pid", "process.ppid"] - #match_source: true - #match_source_index: 4 - #match_short_id: true - #cleanup_timeout: 60 - #labels.dedot: false - # To connect to Docker over TLS you must specify a client and CA certificate. - #ssl: - # certificate_authority: "/etc/pki/root/ca.pem" - # certificate: "/etc/pki/client/cert.pem" - # key: "/etc/pki/client/cert.key" -------------------------------------------------------------------------------- - -It has the following settings: - -`host`:: (Optional) Docker socket (UNIX or TCP socket). It uses -`unix:///var/run/docker.sock` by default. - -`ssl`:: (Optional) SSL configuration to use when connecting to the Docker -socket. - -`match_fields`:: (Optional) A list of fields to match a container ID, at least -one of them should hold a container ID to get the event enriched. - -`match_pids`:: (Optional) A list of fields that contain process IDs. If the -process is running in Docker then the event will be enriched. The default value -is `["process.pid", "process.ppid"]`. - -`match_source`:: (Optional) Match container ID from a log path present in the -`log.file.path` field. Enabled by default. - -`match_short_id`:: (Optional) Match container short ID from a log path present -in the `log.file.path` field. Disabled by default. -This allows to match directories names that have the first 12 characters -of the container ID. For example, `/var/log/containers/b7e3460e2b21/*.log`. - -`match_source_index`:: (Optional) Index in the source path split by `/` to look -for container ID. It defaults to 4 to match -`/var/lib/docker/containers//*.log` - -`cleanup_timeout`:: (Optional) Time of inactivity to consider we can clean and -forget metadata for a container, 60s by default. - -`labels.dedot`:: (Optional) Default to be false. If set to true, replace dots in - labels with `_`. - -[[add-host-metadata]] -=== Add Host metadata - -[source,yaml] -------------------------------------------------------------------------------- -processors: -- add_host_metadata: - netinfo.enabled: false - cache.ttl: 5m - geo: - name: nyc-dc1-rack1 - location: 40.7128, -74.0060 - continent_name: North America - country_iso_code: US - region_name: New York - region_iso_code: NY - city_name: New York -------------------------------------------------------------------------------- - -It has the following settings: - -`netinfo.enabled`:: (Optional) Default false. Include IP addresses and MAC addresses as fields host.ip and host.mac - -`cache.ttl`:: (Optional) The processor uses an internal cache for the host metadata. This sets the cache expiration time. The default is 5m, negative values disable caching altogether. - -`geo.name`:: (Optional) User definable token to be used for identifying a discrete location. Frequently a datacenter, rack, or similar. - -`geo.location`:: (Optional) Longitude and latitude in comma separated format. - -`geo.continent_name`:: (Optional) Name of the continent. - -`geo.country_name`:: (Optional) Name of the country. - -`geo.region_name`:: (Optional) Name of the region. - -`geo.city_name`:: (Optional) Name of the city. - -`geo.country_iso_code`:: (Optional) ISO country code. - -`geo.region_iso_code`:: (Optional) ISO region code. - - -The `add_host_metadata` processor annotates each event with relevant metadata from the host machine. -The fields added to the event look like the following: - -[source,json] -------------------------------------------------------------------------------- -{ - "host":{ - "architecture":"x86_64", - "name":"example-host", - "id":"", - "os":{ - "family":"darwin", - "build":"16G1212", - "platform":"darwin", - "version":"10.12.6", - "kernel":"16.7.0", - "name":"Mac OS X" - }, - "ip": ["192.168.0.1", "10.0.0.1"], - "mac": ["00:25:96:12:34:56", "72:00:06:ff:79:f1"], - "geo": { - "continent_name": "North America", - "country_iso_code": "US", - "region_name": "New York", - "region_iso_code": "NY", - "city_name": "New York", - "name": "nyc-dc1-rack1", - "location": "40.7128, -74.0060" - } - } -} -------------------------------------------------------------------------------- - -[[add-observer-metadata]] -=== Add Observer metadata - -beta[] - -[source,yaml] -------------------------------------------------------------------------------- -processors: -- add_observer_metadata: - netinfo.enabled: false - cache.ttl: 5m - geo: - name: nyc-dc1-rack1 - location: 40.7128, -74.0060 - continent_name: North America - country_iso_code: US - region_name: New York - region_iso_code: NY - city_name: New York -------------------------------------------------------------------------------- - -It has the following settings: - -`netinfo.enabled`:: (Optional) Default false. Include IP addresses and MAC addresses as fields observer.ip and observer.mac - -`cache.ttl`:: (Optional) The processor uses an internal cache for the observer metadata. This sets the cache expiration time. The default is 5m, negative values disable caching altogether. - -`geo.name`:: (Optional) User definable token to be used for identifying a discrete location. Frequently a datacenter, rack, or similar. - -`geo.location`:: (Optional) Longitude and latitude in comma separated format. - -`geo.continent_name`:: (Optional) Name of the continent. - -`geo.country_name`:: (Optional) Name of the country. - -`geo.region_name`:: (Optional) Name of the region. - -`geo.city_name`:: (Optional) Name of the city. - -`geo.country_iso_code`:: (Optional) ISO country code. - -`geo.region_iso_code`:: (Optional) ISO region code. - - -The `add_geo_metadata` processor annotates each event with relevant metadata from the observer machine. -The fields added to the event look like the following: - -[source,json] -------------------------------------------------------------------------------- -{ - "observer" : { - "hostname" : "avce", - "type" : "heartbeat", - "vendor" : "elastic", - "ip" : [ - "192.168.1.251", - "fe80::64b2:c3ff:fe5b:b974", - ], - "mac" : [ - "dc:c1:02:6f:1b:ed", - ], - "geo": { - "continent_name": "North America", - "country_iso_code": "US", - "region_name": "New York", - "region_iso_code": "NY", - "city_name": "New York", - "name": "nyc-dc1-rack1", - "location": "40.7128, -74.0060" - } - } -} -------------------------------------------------------------------------------- - -[[dissect]] -=== Dissect strings - -The dissect processor tokenizes incoming strings using defined patterns. - -[source,yaml] -------- -processors: -- dissect: - tokenizer: "%{key1} %{key2}" - field: "message" - target_prefix: "dissect" -------- - -The `dissect` processor has the following configuration settings: - -`field`:: (Optional) The event field to tokenize. Default is `message`. - -`target_prefix`:: (Optional) The name of the field where the values will be extracted. When an empty -string is defined, the processor will create the keys at the root of the event. Default is -`dissect`. When the target key already exists in the event, the processor won't replace it and log -an error; you need to either drop or rename the key before using dissect. - -For tokenization to be successful, all keys must be found and extracted, if one of them cannot be -found an error will be logged and no modification is done on the original event. - -NOTE: A key can contain any characters except reserved suffix or prefix modifiers: `/`,`&`, `+` -and `?`. - -See <> for a list of supported conditions. - -[[processor-dns]] -=== DNS Reverse Lookup - -The DNS processor performs reverse DNS lookups of IP addresses. It caches the -responses that it receives in accordance to the time-to-live (TTL) value -contained in the response. It also caches failures that occur during lookups. -Each instance of this processor maintains its own independent cache. - -The processor uses its own DNS resolver to send requests to nameservers and does -not use the operating system's resolver. It does not read any values contained -in `/etc/hosts`. - -This processor can significantly slow down your pipeline's throughput if you -have a high latency network or slow upstream nameserver. The cache will help -with performance, but if the addresses being resolved have a high cardinality -then the cache benefits will be diminished due to the high miss ratio. - -By way of example, if each DNS lookup takes 2 milliseconds, the maximum -throughput you can achieve is 500 events per second (1000 milliseconds / 2 -milliseconds). If you have a high cache hit ratio then your throughput can be -higher. - -This is a minimal configuration example that resolves the IP addresses contained -in two fields. - -[source,yaml] ----- -processors: -- dns: - type: reverse - fields: - source.ip: source.hostname - destination.ip: destination.hostname ----- - -Next is a configuration example showing all options. - -[source,yaml] ----- -processors: -- dns: - type: reverse - action: append - fields: - server.ip: server.hostname - client.ip: client.hostname - success_cache: - capacity.initial: 1000 - capacity.max: 10000 - failure_cache: - capacity.initial: 1000 - capacity.max: 10000 - ttl: 1m - nameservers: ['192.0.2.1', '203.0.113.1'] - timeout: 500ms - tag_on_failure: [_dns_reverse_lookup_failed] ----- - -The `dns` processor has the following configuration settings: - -`type`:: The type of DNS lookup to perform. The only supported type is -`reverse` which queries for a PTR record. - -`action`:: This defines the behavior of the processor when the target field -already exists in the event. The options are `append` (default) and `replace`. - -`fields`:: This is a mapping of source field names to target field names. The -value of the source field will be used in the DNS query and result will be -written to the target field. - -`success_cache.capacity.initial`:: The initial number of items that the success -cache will be allocated to hold. When initialized the processor will allocate -the memory for this number of items. Default value is `1000`. - -`success_cache.capacity.max`:: The maximum number of items that the success -cache can hold. When the maximum capacity is reached a random item is evicted. -Default value is `10000`. - -`failure_cache.capacity.initial`:: The initial number of items that the failure -cache will be allocated to hold. When initialized the processor will allocate -the memory for this number of items. Default value is `1000`. - -`failure_cache.capacity.max`:: The maximum number of items that the failure -cache can hold. When the maximum capacity is reached a random item is evicted. -Default value is `10000`. - -`failure_cache.ttl`:: The duration for which failures are cached. Valid time -units are "ns", "us" (or "µs"), "ms", "s", "m", "h". Default value is `1m`. - -`nameservers`:: A list of nameservers to query. If there are multiple servers, -the resolver queries them in the order listed. If none are specified then it -will read the nameservers listed in `/etc/resolv.conf` once at initialization. -On Windows you must always supply at least one nameserver. - -`timeout`:: The duration after which a DNS query will timeout. This is timeout -for each DNS request so if you have 2 nameservers then the total timeout will be -2 times this value. Valid time units are "ns", "us" (or "µs"), "ms", "s", "m", -"h". Default value is `500ms`. - -`tag_on_failure`:: A list of tags to add to the event when any lookup fails. The -tags are only added once even if multiple lookups fail. By default no tags are -added upon failure. - -[[add-process-metadata]] -=== Add process metadata - -The Add process metadata processor enriches events with information from running -processes, identified by their process ID (PID). - -[source,yaml] -------------------------------------------------------------------------------- -processors: -- add_process_metadata: - match_pids: [system.process.ppid] - target: system.process.parent -------------------------------------------------------------------------------- - -The fields added to the event look as follows: -[source,json] -------------------------------------------------------------------------------- -"process": { - "name": "systemd", - "title": "/usr/lib/systemd/systemd --switched-root --system --deserialize 22", - "exe": "/usr/lib/systemd/systemd", - "args": ["/usr/lib/systemd/systemd", "--switched-root", "--system", "--deserialize", "22"], - "pid": 1, - "ppid": 0, - "start_time": "2018-08-22T08:44:50.684Z", -} -------------------------------------------------------------------------------- - -Optionally, the process environment can be included, too: -[source,json] -------------------------------------------------------------------------------- - ... - "env": { - "HOME": "/", - "TERM": "linux", - "BOOT_IMAGE": "/boot/vmlinuz-4.11.8-300.fc26.x86_64", - "LANG": "en_US.UTF-8", - } - ... -------------------------------------------------------------------------------- -It has the following settings: - -`match_pids`:: List of fields to lookup for a PID. The processor will -search the list sequentially until the field is found in the current event, and -the PID lookup will be applied to the value of this field. - -`target`:: (Optional) Destination prefix where the `process` object will be -created. The default is the event's root. - -`include_fields`:: (Optional) List of fields to add. By default, the processor -will add all the available fields except `process.env`. - -`ignore_missing`:: (Optional) When set to `false`, events that don't contain any -of the fields in match_pids will be discarded and an error will be generated. By -default, this condition is ignored. - -`overwrite_keys`:: (Optional) By default, if a target field already exists, it -will not be overwritten and an error will be logged. If `overwrite_keys` is -set to `true`, this condition will be ignored. - -`restricted_fields`:: (Optional) By default, the `process.env` field is not -output, to avoid leaking sensitive data. If `restricted_fields` is `true`, the -field will be present in the output. - -ifdef::has_script_processor[] -[[processor-script]] -=== Script Processor - -experimental[] - -The script processor executes Javascript code to process an event. The processor -uses a pure Go implementation of ECMAScript 5.1 and has no external -dependencies. This can be useful in situations where one of the other processors -doesn't provide the functionality you need to filter events. - -The processor can be configured by embedding Javascript in your configuration -file or by pointing the processor at external file(s). - -[source,yaml] ----- -processors: -- script: - lang: javascript - id: my_filter - source: > - function process(event) { - event.Tag("js"); - } ----- - -This loads `filter.js` from disk. - -[source,yaml] ----- -processors: -- script: - lang: javascript - id: my_filter - file: ${path.config}/filter.js ----- - -Parameters can be passed to the script by adding `params` to the config. -This allows for a script to be made reusable. When using `params` the -code must define a `register(params)` function to receive the parameters. - -[source,yaml] ----- -processors: -- script: - lang: javascript - id: my_filter - params: - threshold: 15 - source: > - var params = {threshold: 42}; - function register(scriptParams) { - params = scriptParams; - } - function process(event) { - if (event.Get("severity") < params.threshold) { - event.Cancel(); - } - } ----- - -If the script defines a `test()` function it will be invoked when the processor -is loaded. Any exceptions thrown will cause the processor to fail to load. This -can be used to make assertions about the behavior of the script. - -[source,javascript] ----- -function process(event) { - if (event.Get("event.code") === 1102) { - event.Put("event.action", "cleared"); - } -} - -function test() { - var event = process(new Event({event: {code: 1102})); - if (event.Get("event.action") !== "cleared") { - throw "expected event.action === cleared"; - } -} ----- - -[float] -==== Configuration options - -The `script` processor has the following configuration settings: - -`lang`:: This field is required and its value must be `javascript`. - -`tag`:: This is an optional identifier that is added to log messages. If defined -it enables metrics logging for this instance of the processor. The metrics -include the number of exceptions and a histogram of the execution times for -the `process` function. - -`source`:: Inline Javascript source code. - -`file`:: Path to a script file to load. Relative paths are interpreted as -relative to the `path.config` directory. Globs are expanded. - -`files`:: List of script files to load. The scripts are concatenated together. -Relative paths are interpreted as relative to the `path.config` directory. -And globs are expanded. - -`params`:: A dictionary of parameters that are passed to the `register` of the -script. - -`tag_on_exception`:: Tag to add to events in case the Javascript code causes an -exception while processing an event. Defaults to `_js_exception`. - -`timeout`:: This sets an execution timeout for the `process` function. When -the `process` function takes longer than the `timeout` period the function -is interrupted. You can set this option to prevent a script from running for -too long (like preventing an infinite `while` loop). By default there is no -timeout. - -[float] -==== Event API - -The `Event` object passed to the `process` method has the following API. - -[frame="topbot",options="header"] -|=== -|Method |Description - -|`Get(string)` -|Get a value from the event (either a scalar or an object). If the key does not -exist `null` is returned. If no key is provided then an object containing all -fields is returned. - -*Example*: `var value = event.Get(key);` - -|`Put(string, value)` -|Put a value into the event. If the key was already set then the -previous value is returned. It throws an exception if the key cannot be set -because one of the intermediate values is not an object. - -*Example*: `var old = event.Put(key, value);` - -|`Rename(string, string)` -|Rename a key in the event. The target key must not exist. It -returns true if the source key was successfully renamed to the target key. - -*Example*: `var success = event.Rename("source", "target");` - -|`Delete(string)` -|Delete a field from the event. It returns true on success. - -*Example*: `var deleted = event.Delete("user.email");` - -|`Cancel()` -|Flag the event as cancelled which causes the processor to drop -event. - -*Example*: `event.Cancel(); return;` - -|`Tag(string)` -|Append a tag to the `tags` field if the tag does not already -exist. Throws an exception if `tags` exists and is not a string or a list of -strings. - -*Example*: `event.Tag("user_event");` - -|`AppendTo(string, string)` -|`AppendTo` is a specialized `Put` method that converts the existing value to an -array and appends the value if it does not already exist. If there is an -existing value that's not a string or array of strings then an exception is -thrown. - -*Example*: `event.AppendTo("error.message", "invalid file hash");` -|=== -endif::[] - -ifdef::has_timestamp_processor[] -[[processor-timestamp]] -=== Timestamp - -beta[] - -The `timestamp` processor parses a timestamp from a field. By default the -timestamp processor writes the parsed result to the `@timestamp` field. You can -specify a different field by setting the `target_field` parameter. The timestamp -value is parsed according to the `layouts` parameter. Multiple layouts can be -specified and they will be used sequentially to attempt parsing the timestamp -field. - -NOTE: The timestamp layouts used by this processor are different than the - formats supported by date processors in Logstash and Elasticsearch Ingest - Node. - -The `layouts` are described using a reference time that is based on this -specific time: - - Mon Jan 2 15:04:05 MST 2006 - -Since MST is GMT-0700, the reference time is: - - 01/02 03:04:05PM '06 -0700 - -To define your own layout, rewrite the reference time in a format that matches -the timestamps you expect to parse. For more layout examples and details see the -https://godoc.org/time#pkg-constants[Go time package documentation]. - -If a layout does not contain a year then the current year in the specified -`timezone` is added to the time value. - -.Timestamp options -[options="header"] -|====== -| Name | Required | Default | Description | -| `field` | yes | | Source field containing the time to be parsed. | -| `target_field` | no | @timestamp | Target field for the parsed time value. The target value is always written as UTC. | -| `layouts` | yes | | Timestamp layouts that define the expected time value format. In addition layouts, `UNIX` and `UNIX_MS` are accepted. | -| `timezone` | no | UTC | Timezone (e.g. America/New_York) to use when parsing a timestamp not containing a timezone. | -| `ignore_missing` | no | false | Ignore errors when the source field is missing. | -| `ignore_failure` | no | false | Ignore all errors produced by the processor. | -| `test` | no | | A list of timestamps that must parse successfully when loading the processor. | -| `id` | no | | An identifier for this processor instance. Useful for debugging. | -|====== - -Here is an example that parses the `start_time` field and writes the result -to the `@timestamp` field then deletes the `start_time` field. When the -processor is loaded it will immediately validate that the two `test` timestamps -parse with this configuration. - -[source,yaml] ----- -processors: -- timestamp: - field: start_time - layouts: - - '2006-01-02T15:04:05Z' - - '2006-01-02T15:04:05.999Z' - test: - - '2019-06-22T16:33:51Z' - - '2019-11-18T04:59:51.123Z' -- drop_fields: - fields: [start_time] ----- -endif::[] +include::processors-list.asciidoc[tag=processors-include] diff --git a/libbeat/docs/shared-beats-attributes.asciidoc b/libbeat/docs/shared-beats-attributes.asciidoc index 7588a0c70fd..ffba38f84b0 100644 --- a/libbeat/docs/shared-beats-attributes.asciidoc +++ b/libbeat/docs/shared-beats-attributes.asciidoc @@ -3,6 +3,8 @@ :dockerimage: docker.elastic.co/beats/{beatname_lc}:{version} :dockerconfig: https://raw.githubusercontent.com/elastic/beats/{branch}/deploy/docker/{beatname_lc}.docker.yml :downloads: https://artifacts.elastic.co/downloads/beats +:libbeat-processors-dir: {beats-root}/libbeat/processors +:x-filebeat-processors-dir: {beats-root}/x-pack/filebeat/processors :cm-ui: Central Management :libbeat-docs: Beats Platform Reference diff --git a/libbeat/processors/actions/docs/add_fields.asciidoc b/libbeat/processors/actions/docs/add_fields.asciidoc new file mode 100644 index 00000000000..a71ddd1b456 --- /dev/null +++ b/libbeat/processors/actions/docs/add_fields.asciidoc @@ -0,0 +1,37 @@ +[[add-fields]] +=== Add fields + +The `add_fields` processor adds additional fields to the event. Fields can be +scalar values, arrays, dictionaries, or any nested combination of these. By +default the fields that you specify will be grouped under the `fields` +sub-dictionary in the event. To group the fields under a different +sub-dictionary, use the `target` setting. To store the fields as +top-level fields, set `target: ''`. + +`target`:: (Optional) Sub-dictionary to put all fields into. Defaults to `fields`. +`fields`:: Fields to be added. + + +For example, this configuration: + +[source,yaml] +------------------------------------------------------------------------------ +processors: +- add_fields: + target: project + fields: + name: myproject + id: '574734885120952459' +------------------------------------------------------------------------------ + +Adds these fields to any event: + +[source,json] +------------------------------------------------------------------------------- +{ + "project": { + "name": "myproject", + "id": "574734885120952459" + } +} +------------------------------------------------------------------------------- diff --git a/libbeat/processors/actions/docs/add_labels.asciidoc b/libbeat/processors/actions/docs/add_labels.asciidoc new file mode 100644 index 00000000000..8a35066dcdd --- /dev/null +++ b/libbeat/processors/actions/docs/add_labels.asciidoc @@ -0,0 +1,43 @@ +[[add-labels]] +=== Add labels + +The `add_labels` processors adds a set of key-value pairs to an event. +The processor will flatten nested configuration objects like arrays or +dictionaries into a fully qualified name by merging nested names with a `.`. +Array entries create numeric names starting with 0. Labels are always stored +under the Elastic Common Schema compliant `labels` sub-dictionary. + +`labels`:: dictionaries of labels to be added. + +For example, this configuration: + +[source,yaml] +------------------------------------------------------------------------------ +processors: +- add_labels: + labels: + number: 1 + with.dots: test + nested: + with.dots: nested + array: + - do + - re + - with.field: mi +------------------------------------------------------------------------------ + +Adds these fields to every event: + +[source,json] +------------------------------------------------------------------------------- +{ + "labels": { + "number": 1, + "with.dots": "test", + "nested.with.dots": "nested", + "array.0": "do", + "array.1": "re", + "array.2.with.field": "mi" + } +} +------------------------------------------------------------------------------- diff --git a/libbeat/processors/actions/docs/add_tags.asciidoc b/libbeat/processors/actions/docs/add_tags.asciidoc new file mode 100644 index 00000000000..7aaec3ee9ee --- /dev/null +++ b/libbeat/processors/actions/docs/add_tags.asciidoc @@ -0,0 +1,28 @@ +[[add-tags]] +=== Add tags + +The `add_tags` processor adds tags to a list of tags. If the target field already exists, +the tags are appended to the existing list of tags. + +`tags`:: List of tags to add. +`target`:: (Optional) Field the tags will be added to. Defaults to `tags`. + +For example, this configuration: + + +[source,yaml] +------------------------------------------------------------------------------ +processors: +- add_tags: + tags: [web, production] + target: "environment" +------------------------------------------------------------------------------ + +Adds the environment field to every event: + +[source,json] +------------------------------------------------------------------------------- +{ + "environment": ["web", "production"] +} +------------------------------------------------------------------------------- diff --git a/libbeat/processors/actions/docs/decode_base64_field.asciidoc b/libbeat/processors/actions/docs/decode_base64_field.asciidoc new file mode 100644 index 00000000000..26399353c0d --- /dev/null +++ b/libbeat/processors/actions/docs/decode_base64_field.asciidoc @@ -0,0 +1,34 @@ +[[decode-base64-field]] +=== Decode Base64 fields + +The `decode_base64_field` processor specifies a field to base64 decode. +The `field` key contains a `from: old-key` and a `to: new-key` pair. `from` is +the origin and `to` the target name of the field. + +To overwrite fields either first rename the target field or use the `drop_fields` +processor to drop the field and then rename the field. + +[source,yaml] +------- +processors: +- decode_base64_field: + field: + from: "field1" + to: "field2" + ignore_missing: false + fail_on_error: true +------- + +In the example above: + - field1 is decoded in field2 + +The `decode_base64_field` processor has the following configuration settings: + +`ignore_missing`:: (Optional) If set to true, no error is logged in case a key +which should be base64 decoded is missing. Default is `false`. + +`fail_on_error`:: (Optional) If set to true, in case of an error the base64 decode +of fields is stopped and the original event is returned. If set to false, decoding +continues also if an error happened during decoding. Default is `true`. + +See <> for a list of supported conditions. diff --git a/libbeat/processors/actions/docs/decode_json_fields.asciidoc b/libbeat/processors/actions/docs/decode_json_fields.asciidoc new file mode 100644 index 00000000000..fb86cf24fb0 --- /dev/null +++ b/libbeat/processors/actions/docs/decode_json_fields.asciidoc @@ -0,0 +1,36 @@ +[[decode-json-fields]] +=== Decode JSON fields + +The `decode_json_fields` processor decodes fields containing JSON strings and +replaces the strings with valid JSON objects. + +[source,yaml] +----------------------------------------------------- +processors: + - decode_json_fields: + fields: ["field1", "field2", ...] + process_array: false + max_depth: 1 + target: "" + overwrite_keys: false + add_error_key: true +----------------------------------------------------- + +The `decode_json_fields` processor has the following configuration settings: + +`fields`:: The fields containing JSON strings to decode. +`process_array`:: (Optional) A boolean that specifies whether to process +arrays. The default is false. +`max_depth`:: (Optional) The maximum parsing depth. The default is 1. +`target`:: (Optional) The field under which the decoded JSON will be written. By +default the decoded JSON object replaces the string field from which it was +read. To merge the decoded JSON fields into the root of the event, specify +`target` with an empty string (`target: ""`). Note that the `null` value (`target:`) +is treated as if the field was not set at all. +`overwrite_keys`:: (Optional) A boolean that specifies whether keys that already +exist in the event are overwritten by keys from the decoded JSON object. The +default value is false. +`add_error_key`:: (Optional) If it set to true, in case of error while decoding json keys +`error` field is going to be part of event with error message. If it set to false, there +will not be any error in event's field. Even error occurs while decoding json keys. The +default value is false diff --git a/libbeat/processors/actions/docs/decompress_gzip_field.asciidoc b/libbeat/processors/actions/docs/decompress_gzip_field.asciidoc new file mode 100644 index 00000000000..334f0fdb246 --- /dev/null +++ b/libbeat/processors/actions/docs/decompress_gzip_field.asciidoc @@ -0,0 +1,34 @@ +[[decompress-gzip-field]] +=== Decompress gzip fields + +The `decompress_gzip_field` processor specifies a field to gzip decompress. +The `field` key contains a `from: old-key` and a `to: new-key` pair. `from` is +the origin and `to` the target name of the field. + +To overwrite fields either first rename the target field or use the `drop_fields` +processor to drop the field and then rename the field. + +[source,yaml] +------- +processors: +- decompress_gzip_field: + field: + from: "field1" + to: "field2" + ignore_missing: false + fail_on_error: true +------- + +In the example above: + - field1 is decoded in field2 + +The `decompress_gzip_field` processor has the following configuration settings: + +`ignore_missing`:: (Optional) If set to true, no error is logged in case a key +which should be base64 decoded is missing. Default is `false`. + +`fail_on_error`:: (Optional) If set to true, in case of an error the base64 decode +of fields is stopped and the original event is returned. If set to false, decoding +continues also if an error happened during decoding. Default is `true`. + +See <> for a list of supported conditions. diff --git a/libbeat/processors/actions/docs/drop_event.asciidoc b/libbeat/processors/actions/docs/drop_event.asciidoc new file mode 100644 index 00000000000..d5a1d2464d6 --- /dev/null +++ b/libbeat/processors/actions/docs/drop_event.asciidoc @@ -0,0 +1,16 @@ +[[drop-event]] +=== Drop events + +The `drop_event` processor drops the entire event if the associated condition +is fulfilled. The condition is mandatory, because without one, all the events +are dropped. + +[source,yaml] +------ +processors: + - drop_event: + when: + condition +------ + +See <> for a list of supported conditions. diff --git a/libbeat/processors/actions/docs/drop_fields.asciidoc b/libbeat/processors/actions/docs/drop_fields.asciidoc new file mode 100644 index 00000000000..73cb7422ec9 --- /dev/null +++ b/libbeat/processors/actions/docs/drop_fields.asciidoc @@ -0,0 +1,27 @@ +[[drop-fields]] +=== Drop fields from events + +The `drop_fields` processor specifies which fields to drop if a certain +condition is fulfilled. The condition is optional. If it's missing, the +specified fields are always dropped. The `@timestamp` and `type` fields cannot +be dropped, even if they show up in the `drop_fields` list. + +[source,yaml] +----------------------------------------------------- +processors: + - drop_fields: + when: + condition + fields: ["field1", "field2", ...] + ignore_missing: false +----------------------------------------------------- + +See <> for a list of supported conditions. + +NOTE: If you define an empty list of fields under `drop_fields`, then no fields +are dropped. + +The `drop_fields` processor has the following configuration settings: + +`ignore_missing`:: (Optional) If `true` the processor will not return an error +when a specified field does not exist. Defaults to `false`. diff --git a/libbeat/processors/actions/docs/include_fields.asciidoc b/libbeat/processors/actions/docs/include_fields.asciidoc new file mode 100644 index 00000000000..71cc2a327bf --- /dev/null +++ b/libbeat/processors/actions/docs/include_fields.asciidoc @@ -0,0 +1,24 @@ +[[include-fields]] +=== Keep fields from events + +The `include_fields` processor specifies which fields to export if a certain +condition is fulfilled. The condition is optional. If it's missing, the +specified fields are always exported. The `@timestamp` and `type` fields are +always exported, even if they are not defined in the `include_fields` list. + +[source,yaml] +------- +processors: + - include_fields: + when: + condition + fields: ["field1", "field2", ...] +------- + +See <> for a list of supported conditions. + +You can specify multiple `include_fields` processors under the `processors` +section. + +NOTE: If you define an empty list of fields under `include_fields`, then only +the required fields, `@timestamp` and `type`, are exported. diff --git a/libbeat/processors/actions/docs/rename.asciidoc b/libbeat/processors/actions/docs/rename.asciidoc new file mode 100644 index 00000000000..be4c9ccf199 --- /dev/null +++ b/libbeat/processors/actions/docs/rename.asciidoc @@ -0,0 +1,42 @@ +[[rename-fields]] +=== Rename fields from events + +The `rename` processor specifies a list of fields to rename. Under the `fields` +key each entry contains a `from: old-key` and a `to: new-key` pair. `from` is +the origin and `to` the target name of the field. + +Renaming fields can be useful in cases where field names cause conflicts. For +example if an event has two fields, `c` and `c.b`, that are both assigned scalar +values (e.g. `{"c": 1, "c.b": 2}`) this will result in an Elasticsearch error at +ingest time. This is because the value of a cannot simultaneously be a scalar +and an object. To prevent this rename_fields can be used to rename `c` to +`c.value`. + +Rename fields cannot be used to overwrite fields. To overwrite fields either +first rename the target field or use the `drop_fields` processor to drop the +field and then rename the field. + +[source,yaml] +------- +processors: +- rename: + fields: + - from: "a.g" + to: "e.d" + ignore_missing: false + fail_on_error: true +------- + +The `rename` processor has the following configuration settings: + +`ignore_missing`:: (Optional) If set to true, no error is logged in case a key +which should be renamed is missing. Default is `false`. + +`fail_on_error`:: (Optional) If set to true, in case of an error the renaming of +fields is stopped and the original event is returned. If set to false, renaming +continues also if an error happened during renaming. Default is `true`. + +See <> for a list of supported conditions. + +You can specify multiple `ignore_missing` processors under the `processors` +section. diff --git a/libbeat/processors/add_cloud_metadata/docs/add_cloud_metadata.asciidoc b/libbeat/processors/add_cloud_metadata/docs/add_cloud_metadata.asciidoc new file mode 100644 index 00000000000..8e4e5249554 --- /dev/null +++ b/libbeat/processors/add_cloud_metadata/docs/add_cloud_metadata.asciidoc @@ -0,0 +1,164 @@ +[[add-cloud-metadata]] +=== Add cloud metadata + +The `add_cloud_metadata` processor enriches each event with instance metadata +from the machine's hosting provider. At startup it will query a list of hosting +providers and cache the instance metadata. + +The following cloud providers are supported: + +- Amazon Web Services (AWS) +- Digital Ocean +- Google Compute Engine (GCE) +- https://www.qcloud.com/?lang=en[Tencent Cloud] (QCloud) +- Alibaba Cloud (ECS) +- Azure Virtual Machine +- Openstack Nova + +The Alibaba Cloud and Tencent cloud providers are disabled by default, because +they require to access a remote host. The `providers` setting allows users to +select a list of default providers to query. + +The simple configuration below enables the processor. + +[source,yaml] +------------------------------------------------------------------------------- +processors: +- add_cloud_metadata: ~ +------------------------------------------------------------------------------- + +The `add_cloud_metadata` processor has three optional configuration settings. +The first one is `timeout` which specifies the maximum amount of time to wait +for a successful response when detecting the hosting provider. The default +timeout value is `3s`. + +If a timeout occurs then no instance metadata will be added to the events. This +makes it possible to enable this processor for all your deployments (in the +cloud or on-premise). + +The second optional setting is `providers`. The `providers` settings accepts a +list of cloud provider names to be used. If `providers` is not configured, then +all providers that do not access a remote endpoint are enabled by default. + +List of names the `providers` setting supports: + +- "alibaba", or "ecs" for the Alibaba Cloud provider (disabled by default). +- "azure" for Azure Virtual Machine (enabled by default). +- "digitalocean" for Digital Ocean (enabled by default). +- "aws", or "ec2" for Amazon Web Services (enabled by default). +- "gcp" for Google Copmute Enging (enabled by default). +- "openstack", or "nova" for Openstack Nova (enabled by default). +- "tencent", or "qcloud" for Tencent Cloud (disabled by default). + +The third optional configuration setting is `overwrite`. When `overwrite` is +`true`, `add_cloud_metadata` overwrites existing `cloud.*` fields (`false` by +default). + +The metadata that is added to events varies by hosting provider. Below are +examples for each of the supported providers. + +_AWS_ + +[source,json] +------------------------------------------------------------------------------- +{ + "cloud": { + "account.id": "123456789012", + "availability_zone": "us-east-1c", + "instance.id": "i-4e123456", + "machine.type": "t2.medium", + "image.id": "ami-abcd1234", + "provider": "aws", + "region": "us-east-1" + } +} +------------------------------------------------------------------------------- + +_Digital Ocean_ + +[source,json] +------------------------------------------------------------------------------- +{ + "cloud": { + "instance.id": "1234567", + "provider": "digitalocean", + "region": "nyc2" + } +} +------------------------------------------------------------------------------- + +_GCP_ + +[source,json] +------------------------------------------------------------------------------- +{ + "cloud": { + "availability_zone": "us-east1-b", + "instance.id": "1234556778987654321", + "machine.type": "f1-micro", + "project.id": "my-dev", + "provider": "gcp" + } +} +------------------------------------------------------------------------------- + +_Tencent Cloud_ + +[source,json] +------------------------------------------------------------------------------- +{ + "cloud": { + "availability_zone": "gz-azone2", + "instance.id": "ins-qcloudv5", + "provider": "qcloud", + "region": "china-south-gz" + } +} +------------------------------------------------------------------------------- + +_Alibaba Cloud_ + +This metadata is only available when VPC is selected as the network type of the +ECS instance. + +[source,json] +------------------------------------------------------------------------------- +{ + "cloud": { + "availability_zone": "cn-shenzhen", + "instance.id": "i-wz9g2hqiikg0aliyun2b", + "provider": "ecs", + "region": "cn-shenzhen-a" + } +} +------------------------------------------------------------------------------- + +_Azure Virtual Machine_ + +[source,json] +------------------------------------------------------------------------------- +{ + "cloud": { + "provider": "az", + "instance.id": "04ab04c3-63de-4709-a9f9-9ab8c0411d5e", + "instance.name": "test-az-vm", + "machine.type": "Standard_D3_v2", + "region": "eastus2" + } +} +------------------------------------------------------------------------------- + +_Openstack Nova_ + +[source,json] +------------------------------------------------------------------------------- +{ + "cloud": { + "instance.name": "test-998d932195.mycloud.tld", + "instance.id": "i-00011a84", + "availability_zone": "xxxx-az-c", + "provider": "openstack", + "machine.type": "m2.large" + } +} +------------------------------------------------------------------------------- diff --git a/libbeat/processors/add_docker_metadata/docs/add_docker_metadata.asciidoc b/libbeat/processors/add_docker_metadata/docs/add_docker_metadata.asciidoc new file mode 100644 index 00000000000..aed8e205468 --- /dev/null +++ b/libbeat/processors/add_docker_metadata/docs/add_docker_metadata.asciidoc @@ -0,0 +1,80 @@ +[[add-docker-metadata]] +=== Add Docker metadata + +The `add_docker_metadata` processor annotates each event with relevant metadata +from Docker containers. At startup it detects a docker environment and caches the metadata. +The events are annotated with Docker metadata, only if a valid configuration +is detected and the processor is able to reach Docker API. + +Each event is annotated with: + +* Container ID +* Name +* Image +* Labels + +[NOTE] +===== +When running {beatname_uc} in a container, you need to provide access to +Docker’s unix socket in order for the `add_docker_metadata` processor to work. +You can do this by mounting the socket inside the container. For example: + +`docker run -v /var/run/docker.sock:/var/run/docker.sock ...` + +To avoid privilege issues, you may also need to add `--user=root` to the +`docker run` flags. Because the user must be part of the docker group in order +to access `/var/run/docker.sock`, root access is required if {beatname_uc} is +running as non-root inside the container. +===== + +[source,yaml] +------------------------------------------------------------------------------- +processors: +- add_docker_metadata: + host: "unix:///var/run/docker.sock" + #match_fields: ["system.process.cgroup.id"] + #match_pids: ["process.pid", "process.ppid"] + #match_source: true + #match_source_index: 4 + #match_short_id: true + #cleanup_timeout: 60 + #labels.dedot: false + # To connect to Docker over TLS you must specify a client and CA certificate. + #ssl: + # certificate_authority: "/etc/pki/root/ca.pem" + # certificate: "/etc/pki/client/cert.pem" + # key: "/etc/pki/client/cert.key" +------------------------------------------------------------------------------- + +It has the following settings: + +`host`:: (Optional) Docker socket (UNIX or TCP socket). It uses +`unix:///var/run/docker.sock` by default. + +`ssl`:: (Optional) SSL configuration to use when connecting to the Docker +socket. + +`match_fields`:: (Optional) A list of fields to match a container ID, at least +one of them should hold a container ID to get the event enriched. + +`match_pids`:: (Optional) A list of fields that contain process IDs. If the +process is running in Docker then the event will be enriched. The default value +is `["process.pid", "process.ppid"]`. + +`match_source`:: (Optional) Match container ID from a log path present in the +`log.file.path` field. Enabled by default. + +`match_short_id`:: (Optional) Match container short ID from a log path present +in the `log.file.path` field. Disabled by default. +This allows to match directories names that have the first 12 characters +of the container ID. For example, `/var/log/containers/b7e3460e2b21/*.log`. + +`match_source_index`:: (Optional) Index in the source path split by `/` to look +for container ID. It defaults to 4 to match +`/var/lib/docker/containers//*.log` + +`cleanup_timeout`:: (Optional) Time of inactivity to consider we can clean and +forget metadata for a container, 60s by default. + +`labels.dedot`:: (Optional) Default to be false. If set to true, replace dots in + labels with `_`. diff --git a/libbeat/processors/add_host_metadata/docs/add_host_metadata.asciidoc b/libbeat/processors/add_host_metadata/docs/add_host_metadata.asciidoc new file mode 100644 index 00000000000..bd3d47d8e86 --- /dev/null +++ b/libbeat/processors/add_host_metadata/docs/add_host_metadata.asciidoc @@ -0,0 +1,74 @@ +[[add-host-metadata]] +=== Add Host metadata + +[source,yaml] +------------------------------------------------------------------------------- +processors: +- add_host_metadata: + netinfo.enabled: false + cache.ttl: 5m + geo: + name: nyc-dc1-rack1 + location: 40.7128, -74.0060 + continent_name: North America + country_iso_code: US + region_name: New York + region_iso_code: NY + city_name: New York +------------------------------------------------------------------------------- + +It has the following settings: + +`netinfo.enabled`:: (Optional) Default false. Include IP addresses and MAC addresses as fields host.ip and host.mac + +`cache.ttl`:: (Optional) The processor uses an internal cache for the host metadata. This sets the cache expiration time. The default is 5m, negative values disable caching altogether. + +`geo.name`:: (Optional) User definable token to be used for identifying a discrete location. Frequently a datacenter, rack, or similar. + +`geo.location`:: (Optional) Longitude and latitude in comma separated format. + +`geo.continent_name`:: (Optional) Name of the continent. + +`geo.country_name`:: (Optional) Name of the country. + +`geo.region_name`:: (Optional) Name of the region. + +`geo.city_name`:: (Optional) Name of the city. + +`geo.country_iso_code`:: (Optional) ISO country code. + +`geo.region_iso_code`:: (Optional) ISO region code. + + +The `add_host_metadata` processor annotates each event with relevant metadata from the host machine. +The fields added to the event look like the following: + +[source,json] +------------------------------------------------------------------------------- +{ + "host":{ + "architecture":"x86_64", + "name":"example-host", + "id":"", + "os":{ + "family":"darwin", + "build":"16G1212", + "platform":"darwin", + "version":"10.12.6", + "kernel":"16.7.0", + "name":"Mac OS X" + }, + "ip": ["192.168.0.1", "10.0.0.1"], + "mac": ["00:25:96:12:34:56", "72:00:06:ff:79:f1"], + "geo": { + "continent_name": "North America", + "country_iso_code": "US", + "region_name": "New York", + "region_iso_code": "NY", + "city_name": "New York", + "name": "nyc-dc1-rack1", + "location": "40.7128, -74.0060" + } + } +} +------------------------------------------------------------------------------- diff --git a/libbeat/processors/add_kubernetes_metadata/docs/add_kubernetes_metadata.asciidoc b/libbeat/processors/add_kubernetes_metadata/docs/add_kubernetes_metadata.asciidoc new file mode 100644 index 00000000000..94181a44167 --- /dev/null +++ b/libbeat/processors/add_kubernetes_metadata/docs/add_kubernetes_metadata.asciidoc @@ -0,0 +1,92 @@ +[[add-kubernetes-metadata]] +=== Add Kubernetes metadata + +The `add_kubernetes_metadata` processor annotates each event with relevant +metadata based on which Kubernetes pod the event originated from. +At startup it detects an `in_cluster` environment and caches the +Kubernetes-related metadata. Events are only annotated if a valid configuration +is detected. If it's not able to detect a valid Kubernetes configuration, +the events are not annotated with Kubernetes-related metadata. + +Each event is annotated with: + +* Pod Name +* Pod UID +* Namespace +* Labels + +The `add_kubernetes_metadata` processor has two basic building blocks which are: + +* Indexers +* Matchers + +Indexers take in a pod's metadata and builds indices based on the pod metadata. +For example, the `ip_port` indexer can take a Kubernetes pod and index the pod +metadata based on all `pod_ip:container_port` combinations. + +Matchers are used to construct lookup keys for querying indices. For example, +when the `fields` matcher takes `["metricset.host"]` as a lookup field, it would +construct a lookup key with the value of the field `metricset.host`. + +Each Beat can define its own default indexers and matchers which are enabled by +default. For example, FileBeat enables the `container` indexer, which indexes +pod metadata based on all container IDs, and a `logs_path` matcher, which takes +the `log.file.path` field, extracts the container ID, and uses it to retrieve +metadata. + +The configuration below enables the processor when {beatname_lc} is run as a pod in +Kubernetes. + +[source,yaml] +------------------------------------------------------------------------------- +processors: +- add_kubernetes_metadata: +------------------------------------------------------------------------------- + +The configuration below enables the processor on a Beat running as a process on +the Kubernetes node. + +[source,yaml] +------------------------------------------------------------------------------- +processors: +- add_kubernetes_metadata: + host: + # If kube_config is not set, KUBECONFIG environment variable will be checked + # and if not present it will fall back to InCluster + kube_config: ${HOME}/.kube/config +------------------------------------------------------------------------------- + +The configuration below has the default indexers and matchers disabled and +enables ones that the user is interested in. + +[source,yaml] +------------------------------------------------------------------------------- +processors: +- add_kubernetes_metadata: + host: + # If kube_config is not set, KUBECONFIG environment variable will be checked + # and if not present it will fall back to InCluster + kube_config: ~/.kube/config + default_indexers.enabled: false + default_matchers.enabled: false + indexers: + - ip_port: + matchers: + - fields: + lookup_fields: ["metricset.host"] +------------------------------------------------------------------------------- + +The `add_kubernetes_metadata` processor has the following configuration settings: + +`host`:: (Optional) Specify the node to scope {beatname_lc} to in case it +cannot be accurately detected, as when running {beatname_lc} in host network +mode. +`namespace`:: (Optional) Select the namespace from which to collect the +metadata. If it is not set, the processor collects metadata from all namespaces. +It is unset by default. +`kube_config`:: (Optional) Use given config file as configuration for Kubernetes +client. It defaults to `KUBECONFIG` environment variable if present. +`default_indexers.enabled`:: (Optional) Enable/Disable default pod indexers, in +case you want to specify your own. +`default_matchers.enabled`:: (Optional) Enable/Disable default pod matchers, in +case you want to specify your own. diff --git a/libbeat/processors/add_locale/docs/add_locale.asciidoc b/libbeat/processors/add_locale/docs/add_locale.asciidoc new file mode 100644 index 00000000000..69a0bc7029a --- /dev/null +++ b/libbeat/processors/add_locale/docs/add_locale.asciidoc @@ -0,0 +1,30 @@ +[[add-locale]] +=== Add the local time zone + +The `add_locale` processor enriches each event with the machine's time zone +offset from UTC or with the name of the time zone. It supports one configuration +option named `format` that controls whether an offset or time zone abbreviation +is added to the event. The default format is `offset`. The processor adds the +a `event.timezone` value to each event. + +The configuration below enables the processor with the default settings. + +[source,yaml] +------------------------------------------------------------------------------- +processors: +- add_locale: ~ +------------------------------------------------------------------------------- + +This configuration enables the processor and configures it to add the time zone +abbreviation to events. + +[source,yaml] +------------------------------------------------------------------------------- +processors: +- add_locale: + format: abbreviation +------------------------------------------------------------------------------- + +NOTE: Please note that `add_locale` differentiates between daylight savings +time (DST) and regular time. For example `CEST` indicates DST and and `CET` is +regular time. diff --git a/libbeat/processors/add_observer_metadata/docs/add_observer_metadata.asciidoc b/libbeat/processors/add_observer_metadata/docs/add_observer_metadata.asciidoc new file mode 100644 index 00000000000..1bf3e12eab7 --- /dev/null +++ b/libbeat/processors/add_observer_metadata/docs/add_observer_metadata.asciidoc @@ -0,0 +1,73 @@ +[[add-observer-metadata]] +=== Add Observer metadata + +beta[] + +[source,yaml] +------------------------------------------------------------------------------- +processors: +- add_observer_metadata: + netinfo.enabled: false + cache.ttl: 5m + geo: + name: nyc-dc1-rack1 + location: 40.7128, -74.0060 + continent_name: North America + country_iso_code: US + region_name: New York + region_iso_code: NY + city_name: New York +------------------------------------------------------------------------------- + +It has the following settings: + +`netinfo.enabled`:: (Optional) Default false. Include IP addresses and MAC addresses as fields observer.ip and observer.mac + +`cache.ttl`:: (Optional) The processor uses an internal cache for the observer metadata. This sets the cache expiration time. The default is 5m, negative values disable caching altogether. + +`geo.name`:: (Optional) User definable token to be used for identifying a discrete location. Frequently a datacenter, rack, or similar. + +`geo.location`:: (Optional) Longitude and latitude in comma separated format. + +`geo.continent_name`:: (Optional) Name of the continent. + +`geo.country_name`:: (Optional) Name of the country. + +`geo.region_name`:: (Optional) Name of the region. + +`geo.city_name`:: (Optional) Name of the city. + +`geo.country_iso_code`:: (Optional) ISO country code. + +`geo.region_iso_code`:: (Optional) ISO region code. + + +The `add_geo_metadata` processor annotates each event with relevant metadata from the observer machine. +The fields added to the event look like the following: + +[source,json] +------------------------------------------------------------------------------- +{ + "observer" : { + "hostname" : "avce", + "type" : "heartbeat", + "vendor" : "elastic", + "ip" : [ + "192.168.1.251", + "fe80::64b2:c3ff:fe5b:b974", + ], + "mac" : [ + "dc:c1:02:6f:1b:ed", + ], + "geo": { + "continent_name": "North America", + "country_iso_code": "US", + "region_name": "New York", + "region_iso_code": "NY", + "city_name": "New York", + "name": "nyc-dc1-rack1", + "location": "40.7128, -74.0060" + } + } +} +------------------------------------------------------------------------------- diff --git a/libbeat/processors/add_process_metadata/docs/add_process_metadata.asciidoc b/libbeat/processors/add_process_metadata/docs/add_process_metadata.asciidoc new file mode 100644 index 00000000000..a7d54e9b56f --- /dev/null +++ b/libbeat/processors/add_process_metadata/docs/add_process_metadata.asciidoc @@ -0,0 +1,63 @@ +[[add-process-metadata]] +=== Add process metadata + +The Add process metadata processor enriches events with information from running +processes, identified by their process ID (PID). + +[source,yaml] +------------------------------------------------------------------------------- +processors: +- add_process_metadata: + match_pids: [system.process.ppid] + target: system.process.parent +------------------------------------------------------------------------------- + +The fields added to the event look as follows: +[source,json] +------------------------------------------------------------------------------- +"process": { + "name": "systemd", + "title": "/usr/lib/systemd/systemd --switched-root --system --deserialize 22", + "exe": "/usr/lib/systemd/systemd", + "args": ["/usr/lib/systemd/systemd", "--switched-root", "--system", "--deserialize", "22"], + "pid": 1, + "ppid": 0, + "start_time": "2018-08-22T08:44:50.684Z", +} +------------------------------------------------------------------------------- + +Optionally, the process environment can be included, too: +[source,json] +------------------------------------------------------------------------------- + ... + "env": { + "HOME": "/", + "TERM": "linux", + "BOOT_IMAGE": "/boot/vmlinuz-4.11.8-300.fc26.x86_64", + "LANG": "en_US.UTF-8", + } + ... +------------------------------------------------------------------------------- +It has the following settings: + +`match_pids`:: List of fields to lookup for a PID. The processor will +search the list sequentially until the field is found in the current event, and +the PID lookup will be applied to the value of this field. + +`target`:: (Optional) Destination prefix where the `process` object will be +created. The default is the event's root. + +`include_fields`:: (Optional) List of fields to add. By default, the processor +will add all the available fields except `process.env`. + +`ignore_missing`:: (Optional) When set to `false`, events that don't contain any +of the fields in match_pids will be discarded and an error will be generated. By +default, this condition is ignored. + +`overwrite_keys`:: (Optional) By default, if a target field already exists, it +will not be overwritten and an error will be logged. If `overwrite_keys` is +set to `true`, this condition will be ignored. + +`restricted_fields`:: (Optional) By default, the `process.env` field is not +output, to avoid leaking sensitive data. If `restricted_fields` is `true`, the +field will be present in the output. diff --git a/libbeat/processors/communityid/docs/communityid.asciidoc b/libbeat/processors/communityid/docs/communityid.asciidoc new file mode 100644 index 00000000000..5d620d1c2a4 --- /dev/null +++ b/libbeat/processors/communityid/docs/communityid.asciidoc @@ -0,0 +1,47 @@ +[[community-id]] +=== Community ID Network Flow Hash + +The `community_id` processor computes a network flow hash according to the +https://github.com/corelight/community-id-spec[Community ID Flow Hash +specification]. + +The flow hash is useful for correlating all network events related to a +single flow. For example you can filter on a community ID value and you might +get back the Netflow records from multiple collectors and layer 7 protocol +records from Packetbeat. + +By default the processor is configured to read the flow parameters from the +appropriate Elastic Common Schema (ECS) fields. If you are processing ECS data +then no parameters are required. + +[source,yaml] +---- +processors: + - community_id: +---- + +If the data does not conform to ECS then you can customize the field names +that the processor reads from. You can also change the `target` field which +is where the computed hash is written to. + +[source,yaml] +---- +processors: + - community_id: + fields: + source_ip: my_source_ip + source_port: my_source_port + destination_ip: my_dest_ip + destination_port: my_dest_port + iana_number: my_iana_number + transport: my_transport + icmp_type: my_icmp_type + icmp_code: my_icmp_code + target: network.community_id +---- + +If the necessary fields are not present in the event then the processor will +silently continue without adding the target field. + +The processor also accepts an optional `seed` parameter that must be a 16-bit +unsigned integer. This value gets incorporated into all generated hashes. diff --git a/libbeat/processors/convert/docs/convert.asciidoc b/libbeat/processors/convert/docs/convert.asciidoc new file mode 100644 index 00000000000..7032a9f8518 --- /dev/null +++ b/libbeat/processors/convert/docs/convert.asciidoc @@ -0,0 +1,45 @@ +[[convert]] +=== Convert + +The `convert` processor converts a field in the event to a different type, such +as converting a string to an integer. + +The supported types include: `integer`, `long`, `float`, `double`, `string`, +`boolean`, and `ip`. + +The `ip` type is effectively an alias for `string`, but with an added validation +that the value is an IPv4 or IPv6 address. + +[source,yaml] +---- +processors: + - convert: + fields: + - {from: "src_ip", to: "source.ip", type: "ip"} + - {from: "src_port", to: "source.port", type: "integer"} + ignore_missing: true + fail_on_error: false +---- + +The `convert` processor has the following configuration settings: + +`fields`:: (Required) This is the list of fields to convert. At least one item +must be contained in the list. Each item in the list must have a `from` key that +specifies the source field. The `to` key is optional and specifies where to +assign the converted value. If `to` is omitted then the `from` field is updated +in-place. The `type` key specifies the data type to convert the value to. If +`type` is omitted then the processor copies or renames the field without any +type conversion. + +`ignore_missing`:: (Optional) If `true` the processor continues to the next +field when the `from` key is not found in the event. If false then the processor +returns an error and does not process the remaining fields. Default is `false`. + +`fail_on_error`:: (Optional) If false type conversion failures are ignored and +the processor continues to the next field. Default is `true`. + +`tag`:: (Optional) An identifier for this processor. Useful for debugging. + +`mode`:: (Optional) When both `from` and `to` are defined for a field then +`mode` controls whether to `copy` or `rename` the field when the type conversion +is successful. Default is `copy`. diff --git a/libbeat/processors/decode_csv_fields/docs/decode_csv_fields.asciidoc b/libbeat/processors/decode_csv_fields/docs/decode_csv_fields.asciidoc new file mode 100644 index 00000000000..1e833b4a711 --- /dev/null +++ b/libbeat/processors/decode_csv_fields/docs/decode_csv_fields.asciidoc @@ -0,0 +1,44 @@ +ifdef::has_decode_csv_fields_processor[] +[[decode-csv-fields]] +=== Decode CSV fields + +experimental[] + +The `decode_csv_fields` processor decodes fields containing records in +comma-separated format (CSV). It will output the values as an array of strings. +This processor is available for Filebeat and Journalbeat. + +[source,yaml] +----------------------------------------------------- +processors: + - decode_csv_fields: + fields: + message: decoded.csv + separator: , + ignore_missing: false + overwrite_keys: true + trim_leading_whitespace: false + fail_on_error: true +----------------------------------------------------- + +The `decode_csv_fields` has the following settings: + +`fields`:: This is a mapping from the source field containing the CSV data to + the destination field to which the decoded array will be written. +`separator`:: (Optional) Character to be used as a column separator. + The default is the comma character. For using a TAB character you + must set it to "\t". +`ignore_missing`:: (Optional) Whether to ignore events which lack the source + field. The default is `false`, which will fail processing of + an event if a field is missing. +`overwrite_keys`:: Whether the target field is overwritten if it + already exists. The default is false, which will fail + processing of an event when `target` already exists. +`trim_leading_space`:: Whether extra space after the separator is trimmed from + values. This works even if the separator is also a space. + The default is `false`. +`fail_on_error`:: (Optional) If set to true, in case of an error the changes to +the event are reverted, and the original event is returned. If set to `false`, +processing continues also if an error happens. Default is `true`. + +endif::[] diff --git a/libbeat/processors/dissect/docs/dissect.asciidoc b/libbeat/processors/dissect/docs/dissect.asciidoc new file mode 100644 index 00000000000..5ecac9e8554 --- /dev/null +++ b/libbeat/processors/dissect/docs/dissect.asciidoc @@ -0,0 +1,30 @@ +[[dissect]] +=== Dissect strings + +The dissect processor tokenizes incoming strings using defined patterns. + +[source,yaml] +------- +processors: +- dissect: + tokenizer: "%{key1} %{key2}" + field: "message" + target_prefix: "dissect" +------- + +The `dissect` processor has the following configuration settings: + +`field`:: (Optional) The event field to tokenize. Default is `message`. + +`target_prefix`:: (Optional) The name of the field where the values will be extracted. When an empty +string is defined, the processor will create the keys at the root of the event. Default is +`dissect`. When the target key already exists in the event, the processor won't replace it and log +an error; you need to either drop or rename the key before using dissect. + +For tokenization to be successful, all keys must be found and extracted, if one of them cannot be +found an error will be logged and no modification is done on the original event. + +NOTE: A key can contain any characters except reserved suffix or prefix modifiers: `/`,`&`, `+` +and `?`. + +See <> for a list of supported conditions. diff --git a/libbeat/processors/dns/docs/dns.asciidoc b/libbeat/processors/dns/docs/dns.asciidoc new file mode 100644 index 00000000000..d184ae0069a --- /dev/null +++ b/libbeat/processors/dns/docs/dns.asciidoc @@ -0,0 +1,102 @@ +[[processor-dns]] +=== DNS Reverse Lookup + +The DNS processor performs reverse DNS lookups of IP addresses. It caches the +responses that it receives in accordance to the time-to-live (TTL) value +contained in the response. It also caches failures that occur during lookups. +Each instance of this processor maintains its own independent cache. + +The processor uses its own DNS resolver to send requests to nameservers and does +not use the operating system's resolver. It does not read any values contained +in `/etc/hosts`. + +This processor can significantly slow down your pipeline's throughput if you +have a high latency network or slow upstream nameserver. The cache will help +with performance, but if the addresses being resolved have a high cardinality +then the cache benefits will be diminished due to the high miss ratio. + +By way of example, if each DNS lookup takes 2 milliseconds, the maximum +throughput you can achieve is 500 events per second (1000 milliseconds / 2 +milliseconds). If you have a high cache hit ratio then your throughput can be +higher. + +This is a minimal configuration example that resolves the IP addresses contained +in two fields. + +[source,yaml] +---- +processors: +- dns: + type: reverse + fields: + source.ip: source.hostname + destination.ip: destination.hostname +---- + +Next is a configuration example showing all options. + +[source,yaml] +---- +processors: +- dns: + type: reverse + action: append + fields: + server.ip: server.hostname + client.ip: client.hostname + success_cache: + capacity.initial: 1000 + capacity.max: 10000 + failure_cache: + capacity.initial: 1000 + capacity.max: 10000 + ttl: 1m + nameservers: ['192.0.2.1', '203.0.113.1'] + timeout: 500ms + tag_on_failure: [_dns_reverse_lookup_failed] +---- + +The `dns` processor has the following configuration settings: + +`type`:: The type of DNS lookup to perform. The only supported type is +`reverse` which queries for a PTR record. + +`action`:: This defines the behavior of the processor when the target field +already exists in the event. The options are `append` (default) and `replace`. + +`fields`:: This is a mapping of source field names to target field names. The +value of the source field will be used in the DNS query and result will be +written to the target field. + +`success_cache.capacity.initial`:: The initial number of items that the success +cache will be allocated to hold. When initialized the processor will allocate +the memory for this number of items. Default value is `1000`. + +`success_cache.capacity.max`:: The maximum number of items that the success +cache can hold. When the maximum capacity is reached a random item is evicted. +Default value is `10000`. + +`failure_cache.capacity.initial`:: The initial number of items that the failure +cache will be allocated to hold. When initialized the processor will allocate +the memory for this number of items. Default value is `1000`. + +`failure_cache.capacity.max`:: The maximum number of items that the failure +cache can hold. When the maximum capacity is reached a random item is evicted. +Default value is `10000`. + +`failure_cache.ttl`:: The duration for which failures are cached. Valid time +units are "ns", "us" (or "µs"), "ms", "s", "m", "h". Default value is `1m`. + +`nameservers`:: A list of nameservers to query. If there are multiple servers, +the resolver queries them in the order listed. If none are specified then it +will read the nameservers listed in `/etc/resolv.conf` once at initialization. +On Windows you must always supply at least one nameserver. + +`timeout`:: The duration after which a DNS query will timeout. This is timeout +for each DNS request so if you have 2 nameservers then the total timeout will be +2 times this value. Valid time units are "ns", "us" (or "µs"), "ms", "s", "m", +"h". Default value is `500ms`. + +`tag_on_failure`:: A list of tags to add to the event when any lookup fails. The +tags are only added once even if multiple lookups fail. By default no tags are +added upon failure. diff --git a/libbeat/processors/extract_array/docs/extract_array.asciidoc b/libbeat/processors/extract_array/docs/extract_array.asciidoc new file mode 100644 index 00000000000..ccdab1e7e6c --- /dev/null +++ b/libbeat/processors/extract_array/docs/extract_array.asciidoc @@ -0,0 +1,42 @@ +[[extract-array]] +=== Extract array + +experimental[] + +The `extract_array` processor populates fields with values read from an array +field. The following example will populate `source.ip` with the first element of +the `my_array` field, `destination.ip` with the second element, and +`network.transport` with the third. + +[source,yaml] +----------------------------------------------------- +processors: + - extract_array: + field: my_array + mappings: + source.ip: 0 + destination.ip: 1 + network.transport: 2 +----------------------------------------------------- + +The following settings are supported: + +`field`:: The array field whose elements are to be extracted. +`mappings`:: Maps each field name to an array index. Use 0 for the first element in + the array. Multiple fields can be mapped to the same array element. +`ignore_missing`:: (Optional) Whether to ignore events where the array field is + missing. The default is `false`, which will fail processing + of an event if the specified field does not exist. Set it to + `true` to ignore this condition. +`overwrite_keys`:: Whether the target fields specified in the mapping are + overwritten if they already exist. The default is `false`, + which will fail processing if a target field already exists. +`fail_on_error`:: (Optional) If set to `true` and an error happens, changes to + the event are reverted, and the original event is returned. If + set to `false`, processing continues despite errors. + Default is `true`. +`omit_empty`:: (Optional) Whether empty values are extracted from the array. If + set to `true`, instead of the target field being set to an + empty value, it is left unset. The empty string (`""`), an + empty array (`[]`) or an empty object (`{}`) are considered + empty values. Default is `false`. diff --git a/libbeat/processors/registered_domain/docs/registered_domain.asciidoc b/libbeat/processors/registered_domain/docs/registered_domain.asciidoc new file mode 100644 index 00000000000..983867d5cd6 --- /dev/null +++ b/libbeat/processors/registered_domain/docs/registered_domain.asciidoc @@ -0,0 +1,33 @@ +[[processor-registered-domain]] +=== Registered Domain + +The `registered_domain` processor reads a field containing a hostname and then +writes the "registered domain" contained in the hostname to the target field. +For example, given `www.google.co.uk` the processor would output `google.co.uk`. +In other words the "registered domain" is the effective top-level domain +(`co.uk`) plus one level (`google`). + +This processor uses the Mozilla Public Suffix list to determine the value. + +[source,yaml] +---- +processors: +- registered_domain: + field: dns.question.name + target_field: dns.question.registered_domain + ignore_missing: true + ignore_failure: true +---- + +The `registered_domain` processor has the following configuration settings: + +.Registered Domain options +[options="header"] +|====== +| Name | Required | Default | Description | +| `field` | yes | | Source field containing a fully qualified domain name (FQDN). | +| `target_field` | yes | | Target field for the registered domain value. | +| `ignore_missing` | no | false | Ignore errors when the source field is missing. | +| `ignore_failure` | no | false | Ignore all errors produced by the processor. | +| `id` | no | | An identifier for this processor instance. Useful for debugging. | +|====== diff --git a/libbeat/processors/script/docs/script.asciidoc b/libbeat/processors/script/docs/script.asciidoc new file mode 100644 index 00000000000..ca5c73977b4 --- /dev/null +++ b/libbeat/processors/script/docs/script.asciidoc @@ -0,0 +1,170 @@ +ifdef::has_script_processor[] +[[processor-script]] +=== Script Processor + +experimental[] + +The script processor executes Javascript code to process an event. The processor +uses a pure Go implementation of ECMAScript 5.1 and has no external +dependencies. This can be useful in situations where one of the other processors +doesn't provide the functionality you need to filter events. + +The processor can be configured by embedding Javascript in your configuration +file or by pointing the processor at external file(s). + +[source,yaml] +---- +processors: +- script: + lang: javascript + id: my_filter + source: > + function process(event) { + event.Tag("js"); + } +---- + +This loads `filter.js` from disk. + +[source,yaml] +---- +processors: +- script: + lang: javascript + id: my_filter + file: ${path.config}/filter.js +---- + +Parameters can be passed to the script by adding `params` to the config. +This allows for a script to be made reusable. When using `params` the +code must define a `register(params)` function to receive the parameters. + +[source,yaml] +---- +processors: +- script: + lang: javascript + id: my_filter + params: + threshold: 15 + source: > + var params = {threshold: 42}; + function register(scriptParams) { + params = scriptParams; + } + function process(event) { + if (event.Get("severity") < params.threshold) { + event.Cancel(); + } + } +---- + +If the script defines a `test()` function it will be invoked when the processor +is loaded. Any exceptions thrown will cause the processor to fail to load. This +can be used to make assertions about the behavior of the script. + +[source,javascript] +---- +function process(event) { + if (event.Get("event.code") === 1102) { + event.Put("event.action", "cleared"); + } +} + +function test() { + var event = process(new Event({event: {code: 1102})); + if (event.Get("event.action") !== "cleared") { + throw "expected event.action === cleared"; + } +} +---- + +[float] +==== Configuration options + +The `script` processor has the following configuration settings: + +`lang`:: This field is required and its value must be `javascript`. + +`tag`:: This is an optional identifier that is added to log messages. If defined +it enables metrics logging for this instance of the processor. The metrics +include the number of exceptions and a histogram of the execution times for +the `process` function. + +`source`:: Inline Javascript source code. + +`file`:: Path to a script file to load. Relative paths are interpreted as +relative to the `path.config` directory. Globs are expanded. + +`files`:: List of script files to load. The scripts are concatenated together. +Relative paths are interpreted as relative to the `path.config` directory. +And globs are expanded. + +`params`:: A dictionary of parameters that are passed to the `register` of the +script. + +`tag_on_exception`:: Tag to add to events in case the Javascript code causes an +exception while processing an event. Defaults to `_js_exception`. + +`timeout`:: This sets an execution timeout for the `process` function. When +the `process` function takes longer than the `timeout` period the function +is interrupted. You can set this option to prevent a script from running for +too long (like preventing an infinite `while` loop). By default there is no +timeout. + +[float] +==== Event API + +The `Event` object passed to the `process` method has the following API. + +[frame="topbot",options="header"] +|=== +|Method |Description + +|`Get(string)` +|Get a value from the event (either a scalar or an object). If the key does not +exist `null` is returned. If no key is provided then an object containing all +fields is returned. + +*Example*: `var value = event.Get(key);` + +|`Put(string, value)` +|Put a value into the event. If the key was already set then the +previous value is returned. It throws an exception if the key cannot be set +because one of the intermediate values is not an object. + +*Example*: `var old = event.Put(key, value);` + +|`Rename(string, string)` +|Rename a key in the event. The target key must not exist. It +returns true if the source key was successfully renamed to the target key. + +*Example*: `var success = event.Rename("source", "target");` + +|`Delete(string)` +|Delete a field from the event. It returns true on success. + +*Example*: `var deleted = event.Delete("user.email");` + +|`Cancel()` +|Flag the event as cancelled which causes the processor to drop +event. + +*Example*: `event.Cancel(); return;` + +|`Tag(string)` +|Append a tag to the `tags` field if the tag does not already +exist. Throws an exception if `tags` exists and is not a string or a list of +strings. + +*Example*: `event.Tag("user_event");` + +|`AppendTo(string, string)` +|`AppendTo` is a specialized `Put` method that converts the existing value to an +array and appends the value if it does not already exist. If there is an +existing value that's not a string or array of strings then an exception is +thrown. + +*Example*: `event.AppendTo("error.message", "invalid file hash");` +|=== +endif::[] diff --git a/libbeat/processors/timestamp/docs/timestamp.asciidoc b/libbeat/processors/timestamp/docs/timestamp.asciidoc new file mode 100644 index 00000000000..98da765db28 --- /dev/null +++ b/libbeat/processors/timestamp/docs/timestamp.asciidoc @@ -0,0 +1,67 @@ +ifdef::has_timestamp_processor[] +[[processor-timestamp]] +=== Timestamp + +beta[] + +The `timestamp` processor parses a timestamp from a field. By default the +timestamp processor writes the parsed result to the `@timestamp` field. You can +specify a different field by setting the `target_field` parameter. The timestamp +value is parsed according to the `layouts` parameter. Multiple layouts can be +specified and they will be used sequentially to attempt parsing the timestamp +field. + +NOTE: The timestamp layouts used by this processor are different than the + formats supported by date processors in Logstash and Elasticsearch Ingest + Node. + +The `layouts` are described using a reference time that is based on this +specific time: + + Mon Jan 2 15:04:05 MST 2006 + +Since MST is GMT-0700, the reference time is: + + 01/02 03:04:05PM '06 -0700 + +To define your own layout, rewrite the reference time in a format that matches +the timestamps you expect to parse. For more layout examples and details see the +https://godoc.org/time#pkg-constants[Go time package documentation]. + +If a layout does not contain a year then the current year in the specified +`timezone` is added to the time value. + +.Timestamp options +[options="header"] +|====== +| Name | Required | Default | Description | +| `field` | yes | | Source field containing the time to be parsed. | +| `target_field` | no | @timestamp | Target field for the parsed time value. The target value is always written as UTC. | +| `layouts` | yes | | Timestamp layouts that define the expected time value format. In addition layouts, `UNIX` and `UNIX_MS` are accepted. | +| `timezone` | no | UTC | Timezone (e.g. America/New_York) to use when parsing a timestamp not containing a timezone. | +| `ignore_missing` | no | false | Ignore errors when the source field is missing. | +| `ignore_failure` | no | false | Ignore all errors produced by the processor. | +| `test` | no | | A list of timestamps that must parse successfully when loading the processor. | +| `id` | no | | An identifier for this processor instance. Useful for debugging. | +|====== + +Here is an example that parses the `start_time` field and writes the result +to the `@timestamp` field then deletes the `start_time` field. When the +processor is loaded it will immediately validate that the two `test` timestamps +parse with this configuration. + +[source,yaml] +---- +processors: +- timestamp: + field: start_time + layouts: + - '2006-01-02T15:04:05Z' + - '2006-01-02T15:04:05.999Z' + test: + - '2019-06-22T16:33:51Z' + - '2019-11-18T04:59:51.123Z' +- drop_fields: + fields: [start_time] +---- +endif::[] diff --git a/x-pack/filebeat/processors/decode_cef/docs/decode_cef.asciidoc b/x-pack/filebeat/processors/decode_cef/docs/decode_cef.asciidoc new file mode 100644 index 00000000000..796f60b7f55 --- /dev/null +++ b/x-pack/filebeat/processors/decode_cef/docs/decode_cef.asciidoc @@ -0,0 +1,39 @@ +ifdef::has_decode_cef_processor[] +[[processor-decode-cef]] +[role="xpack"] +=== Decode CEF + +beta[] + +The `decode_cef` processor decodes Common Event Format (CEF) messages. This +processor is available in Filebeat. + +Below is an example configuration that decodes the `message` field as CEF after +renaming it to `event.original`. It is best to rename `message` to +`event.original` because the decoded CEF data contains its own `message` field. + +[source,yaml] +---- +processors: +- rename: + fields: + - {from: "message", to: "event.original"} +- decode_cef: + field: event.original +---- + +The `decode_cef` processor has the following configuration settings. + +.Decode CEF options +[options="header"] +|====== +| Name | Required | Default | Description +| `field` | no | message | Source field containing the CEF message to be parsed. | +| `target_field` | no | cef | Target field where the parsed CEF object will be written. | +| `ecs` | no | true | Generate Elastic Common Schema (ECS) fields from the CEF data. + Certain CEF header and extension values will be used to populate ECS fields. | +| `ignore_missing` | no | false | Ignore errors when the source field is missing. | +| `ignore_failure` | no | false | Ignore failures when the source field does not contain a CEF message. | +| `id` | no | | An identifier for this processor instance. Useful for debugging. | +|====== +endif::[]