diff --git a/code/go/ecs/dataset.go b/code/go/ecs/dataset.go new file mode 100644 index 0000000000..57232e1f3b --- /dev/null +++ b/code/go/ecs/dataset.go @@ -0,0 +1,56 @@ +// Licensed to Elasticsearch B.V. under one or more contributor +// license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright +// ownership. Elasticsearch B.V. licenses this file to you under +// the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// Code generated by scripts/gocodegen.go - DO NOT EDIT. + +package ecs + +// The dataset fields are part of the new [indexing +// strategy](https://github.com/elastic/kibana/blob/master/docs/ingest_manager/index.asciidoc#indexing-strategy-1). +// These fields are used to determine into which index the data is shipped in +// Elasticsearch and allow efficient querying of data. Initially these fields +// are mainly used by data shipped by the Elastic Agent but any time series +// data shipper should switch to using data streams and the new indexing +// strategy with these fields. +// All three fields are `constant_keyword` fields. +type Dataset struct { + // Type of the dataset. + // The type of the dataset can be `logs` or `metrics`. More types can be + // added in the future but no other types then the one describe here should + // be used. + Type string `ecs:"type"` + + // Dataset name describes the structure of the data. + // The dataset name describes the structure of the data. All data shipped + // into a single dataset should have the same or very similar data + // structure. For example `system.cpu` and `system.disk` are two different + // datasets as they have very different fields. + // The name of the dataset should be descriptive of the data and it is + // encourage to use `.` to combine multiple words. All characters which are + // allowed in index names can be used for the dataset except `-`. + // The default for dataset is `generic`. + Name string `ecs:"name"` + + // Namespace of the dataset. + // This is the namespace used in your index. The namespace is used to + // separate the same structure into different Data Streams. For example if + // nginx logs are shipped for testing and production into the same cluster, + // two different namespaces can be used. This allows to assign different + // ILM policies as an example. + // The default value for a namespace is `default`. + Namespace string `ecs:"namespace"` +} diff --git a/docs/field-details.asciidoc b/docs/field-details.asciidoc index ffe4744f88..d9b9a918ec 100644 --- a/docs/field-details.asciidoc +++ b/docs/field-details.asciidoc @@ -807,6 +807,76 @@ example: `docker` |===== +[[ecs-dataset]] +=== Dataset Fields + +The dataset fields are part of the new [indexing strategy](https://github.com/elastic/kibana/blob/master/docs/ingest_manager/index.asciidoc#indexing-strategy-1). + +These fields are used to determine into which index the data is shipped in Elasticsearch and allow efficient querying of data. Initially these fields are mainly used by data shipped by the Elastic Agent but any time series data shipper should switch to using data streams and the new indexing strategy with these fields. + +All three fields are `constant_keyword` fields. + +==== Dataset Field Details + +[options="header"] +|===== +| Field | Description | Level + +// =============================================================== + +| dataset.name +| Dataset name describes the structure of the data. + +The dataset name describes the structure of the data. All data shipped into a single dataset should have the same or very similar data structure. For example `system.cpu` and `system.disk` are two different datasets as they have very different fields. + +The name of the dataset should be descriptive of the data and it is encourage to use `.` to combine multiple words. All characters which are allowed in index names can be used for the dataset except `-`. + +The default for dataset is `generic`. + +type: constant_keyword + + + +example: `nginx.access` + +| extended + +// =============================================================== + +| dataset.namespace +| Namespace of the dataset. + +This is the namespace used in your index. The namespace is used to separate the same structure into different Data Streams. For example if nginx logs are shipped for testing and production into the same cluster, two different namespaces can be used. This allows to assign different ILM policies as an example. + +The default value for a namespace is `default`. + +type: constant_keyword + + + +example: `production` + +| extended + +// =============================================================== + +| dataset.type +| Type of the dataset. + +The type of the dataset can be `logs` or `metrics`. More types can be added in the future but no other types then the one describe here should be used. + +type: constant_keyword + + + +example: `logs` + +| extended + +// =============================================================== + +|===== + [[ecs-destination]] === Destination Fields diff --git a/docs/fields.asciidoc b/docs/fields.asciidoc index ead1723d98..b45b6fa276 100644 --- a/docs/fields.asciidoc +++ b/docs/fields.asciidoc @@ -32,6 +32,8 @@ all fields are defined. | <> | Fields describing the container that generated this event. +| <> | Fields about the dataset of this document. + | <> | Fields about the destination side of a network connection, used with source. | <> | These fields contain information about code libraries dynamically loaded into processes. diff --git a/generated/beats/fields.ecs.yml b/generated/beats/fields.ecs.yml index b527d3192b..7dc98148f3 100644 --- a/generated/beats/fields.ecs.yml +++ b/generated/beats/fields.ecs.yml @@ -552,6 +552,61 @@ ignore_above: 1024 description: Runtime managing this container. example: docker + - name: dataset + title: Dataset + group: 2 + description: 'The dataset fields are part of the new [indexing strategy](https://github.com/elastic/kibana/blob/master/docs/ingest_manager/index.asciidoc#indexing-strategy-1). + + These fields are used to determine into which index the data is shipped in Elasticsearch + and allow efficient querying of data. Initially these fields are mainly used + by data shipped by the Elastic Agent but any time series data shipper should + switch to using data streams and the new indexing strategy with these fields. + + All three fields are `constant_keyword` fields.' + footnote: 'Examples: The new indexing strategy is {dataset.type}-{dataset.name}-{dataset.namespace}`.` + As an example, nginx access logs are shipped into `logs-nginx.access-default`.' + type: group + fields: + - name: name + level: extended + type: constant_keyword + description: 'Dataset name describes the structure of the data. + + The dataset name describes the structure of the data. All data shipped into + a single dataset should have the same or very similar data structure. For + example `system.cpu` and `system.disk` are two different datasets as they + have very different fields. + + The name of the dataset should be descriptive of the data and it is encourage + to use `.` to combine multiple words. All characters which are allowed in + index names can be used for the dataset except `-`. + + The default for dataset is `generic`.' + example: nginx.access + default_field: false + - name: namespace + level: extended + type: constant_keyword + description: 'Namespace of the dataset. + + This is the namespace used in your index. The namespace is used to separate + the same structure into different Data Streams. For example if nginx logs + are shipped for testing and production into the same cluster, two different + namespaces can be used. This allows to assign different ILM policies as an + example. + + The default value for a namespace is `default`.' + example: production + default_field: false + - name: type + level: extended + type: constant_keyword + description: 'Type of the dataset. + + The type of the dataset can be `logs` or `metrics`. More types can be added + in the future but no other types then the one describe here should be used.' + example: logs + default_field: false - name: destination title: Destination group: 2 diff --git a/generated/csv/fields.csv b/generated/csv/fields.csv index 2859067da8..8431cee602 100644 --- a/generated/csv/fields.csv +++ b/generated/csv/fields.csv @@ -58,6 +58,9 @@ ECS_Version,Indexed,Field_Set,Field,Type,Level,Normalization,Example,Description 1.6.0-dev,true,container,container.labels,object,extended,,,Image labels. 1.6.0-dev,true,container,container.name,keyword,extended,,,Container name. 1.6.0-dev,true,container,container.runtime,keyword,extended,,docker,Runtime managing this container. +1.6.0-dev,true,dataset,dataset.name,constant_keyword,extended,,nginx.access,Dataset name describing the structure of the data. +1.6.0-dev,true,dataset,dataset.namespace,constant_keyword,extended,,production,Namespace of the dataset. +1.6.0-dev,true,dataset,dataset.type,constant_keyword,extended,,logs,Type of the dataset. 1.6.0-dev,true,destination,destination.address,keyword,extended,,,Destination network address. 1.6.0-dev,true,destination,destination.as.number,long,extended,,15169,Unique number allocated to the autonomous system. The autonomous system number (ASN) uniquely identifies each network on the Internet. 1.6.0-dev,true,destination,destination.as.organization.name,keyword,extended,,Google LLC,Organization name. diff --git a/generated/ecs/ecs_flat.yml b/generated/ecs/ecs_flat.yml index 5f6aa7025b..212ea884e3 100644 --- a/generated/ecs/ecs_flat.yml +++ b/generated/ecs/ecs_flat.yml @@ -667,6 +667,57 @@ container.runtime: normalize: [] short: Runtime managing this container. type: keyword +dataset.name: + dashed_name: dataset-name + description: 'Dataset name describes the structure of the data. + + The dataset name describes the structure of the data. All data shipped into a + single dataset should have the same or very similar data structure. For example + `system.cpu` and `system.disk` are two different datasets as they have very different + fields. + + The name of the dataset should be descriptive of the data and it is encourage + to use `.` to combine multiple words. All characters which are allowed in index + names can be used for the dataset except `-`. + + The default for dataset is `generic`.' + example: nginx.access + flat_name: dataset.name + level: extended + name: name + normalize: [] + short: Dataset name describing the structure of the data. + type: constant_keyword +dataset.namespace: + dashed_name: dataset-namespace + description: 'Namespace of the dataset. + + This is the namespace used in your index. The namespace is used to separate the + same structure into different Data Streams. For example if nginx logs are shipped + for testing and production into the same cluster, two different namespaces can + be used. This allows to assign different ILM policies as an example. + + The default value for a namespace is `default`.' + example: production + flat_name: dataset.namespace + level: extended + name: namespace + normalize: [] + short: Namespace of the dataset. + type: constant_keyword +dataset.type: + dashed_name: dataset-type + description: 'Type of the dataset. + + The type of the dataset can be `logs` or `metrics`. More types can be added in + the future but no other types then the one describe here should be used.' + example: logs + flat_name: dataset.type + level: extended + name: type + normalize: [] + short: Type of the dataset. + type: constant_keyword destination.address: dashed_name: destination-address description: 'Some event destination addresses are defined ambiguously. The event diff --git a/generated/ecs/ecs_nested.yml b/generated/ecs/ecs_nested.yml index 14cc581f24..fdafdeba96 100644 --- a/generated/ecs/ecs_nested.yml +++ b/generated/ecs/ecs_nested.yml @@ -923,6 +923,76 @@ container: short: Fields describing the container that generated this event. title: Container type: group +dataset: + description: 'The dataset fields are part of the new [indexing strategy](https://github.com/elastic/kibana/blob/master/docs/ingest_manager/index.asciidoc#indexing-strategy-1). + + These fields are used to determine into which index the data is shipped in Elasticsearch + and allow efficient querying of data. Initially these fields are mainly used by + data shipped by the Elastic Agent but any time series data shipper should switch + to using data streams and the new indexing strategy with these fields. + + All three fields are `constant_keyword` fields.' + fields: + name: + dashed_name: dataset-name + description: 'Dataset name describes the structure of the data. + + The dataset name describes the structure of the data. All data shipped into + a single dataset should have the same or very similar data structure. For + example `system.cpu` and `system.disk` are two different datasets as they + have very different fields. + + The name of the dataset should be descriptive of the data and it is encourage + to use `.` to combine multiple words. All characters which are allowed in + index names can be used for the dataset except `-`. + + The default for dataset is `generic`.' + example: nginx.access + flat_name: dataset.name + level: extended + name: name + normalize: [] + short: Dataset name describing the structure of the data. + type: constant_keyword + namespace: + dashed_name: dataset-namespace + description: 'Namespace of the dataset. + + This is the namespace used in your index. The namespace is used to separate + the same structure into different Data Streams. For example if nginx logs + are shipped for testing and production into the same cluster, two different + namespaces can be used. This allows to assign different ILM policies as an + example. + + The default value for a namespace is `default`.' + example: production + flat_name: dataset.namespace + level: extended + name: namespace + normalize: [] + short: Namespace of the dataset. + type: constant_keyword + type: + dashed_name: dataset-type + description: 'Type of the dataset. + + The type of the dataset can be `logs` or `metrics`. More types can be added + in the future but no other types then the one describe here should be used.' + example: logs + flat_name: dataset.type + level: extended + name: type + normalize: [] + short: Type of the dataset. + type: constant_keyword + footnote: 'Examples: The new indexing strategy is {dataset.type}-{dataset.name}-{dataset.namespace}`.` + As an example, nginx access logs are shipped into `logs-nginx.access-default`.' + group: 2 + name: dataset + prefix: dataset. + short: Fields about the dataset of this document. + title: Dataset + type: group destination: description: 'Destination fields describe details about the destination of a packet/event. diff --git a/generated/elasticsearch/6/template.json b/generated/elasticsearch/6/template.json index d5f033b22e..dfd3dbb789 100644 --- a/generated/elasticsearch/6/template.json +++ b/generated/elasticsearch/6/template.json @@ -304,6 +304,19 @@ } } }, + "dataset": { + "properties": { + "name": { + "type": "constant_keyword" + }, + "namespace": { + "type": "constant_keyword" + }, + "type": { + "type": "constant_keyword" + } + } + }, "destination": { "properties": { "address": { diff --git a/generated/elasticsearch/7/template.json b/generated/elasticsearch/7/template.json index f756237e21..064186bb72 100644 --- a/generated/elasticsearch/7/template.json +++ b/generated/elasticsearch/7/template.json @@ -303,6 +303,19 @@ } } }, + "dataset": { + "properties": { + "name": { + "type": "constant_keyword" + }, + "namespace": { + "type": "constant_keyword" + }, + "type": { + "type": "constant_keyword" + } + } + }, "destination": { "properties": { "address": { diff --git a/schemas/dataset.yml b/schemas/dataset.yml new file mode 100644 index 0000000000..bf1abe6bd9 --- /dev/null +++ b/schemas/dataset.yml @@ -0,0 +1,63 @@ +--- +- name: dataset + title: Dataset + group: 2 + short: Fields about the dataset of this document. + description: > + The dataset fields are part of the new [indexing strategy](https://github.com/elastic/kibana/blob/master/docs/ingest_manager/index.asciidoc#indexing-strategy-1). + + These fields are used to determine into which index the data is shipped in Elasticsearch and + allow efficient querying of data. Initially these fields are mainly used by data shipped by + the Elastic Agent but any time series data shipper should switch to using data streams and + the new indexing strategy with these fields. + + All three fields are `constant_keyword` fields. + footnote: > + Examples: The new indexing strategy is {dataset.type}-{dataset.name}-{dataset.namespace}`.` + As an example, nginx access logs are shipped into `logs-nginx.access-default`. + type: group + fields: + - name: type + level: extended + type: constant_keyword + short: Type of the dataset. + description: > + Type of the dataset. + + The type of the dataset can be `logs` or `metrics`. More types can be added in the future but no + other types then the one describe here should be used. + + example: logs + + - name: name + level: extended + type: constant_keyword + short: Dataset name describing the structure of the data. + description: > + Dataset name describes the structure of the data. + + The dataset name describes the structure of the data. All data shipped into a single dataset + should have the same or very similar data structure. For example `system.cpu` and `system.disk` + are two different datasets as they have very different fields. + + The name of the dataset should be descriptive of the data and it is encourage to use `.` to + combine multiple words. All characters which are allowed in index names can be used for the dataset + except `-`. + + The default for dataset is `generic`. + example: nginx.access + + - name: namespace + level: extended + type: constant_keyword + short: Namespace of the dataset. + description: > + Namespace of the dataset. + + This is the namespace used in your index. The namespace is used to separate + the same structure into different Data Streams. For example if nginx logs + are shipped for testing and production into the same cluster, two different + namespaces can be used. This allows to assign different ILM policies as an example. + + The default value for a namespace is `default`. + example: production diff --git a/scripts/cmd/gocodegen/gocodegen.go b/scripts/cmd/gocodegen/gocodegen.go index c202691ce0..6802722efa 100644 --- a/scripts/cmd/gocodegen/gocodegen.go +++ b/scripts/cmd/gocodegen/gocodegen.go @@ -274,7 +274,7 @@ func goDataType(fieldName, elasticsearchDataType string) string { } switch elasticsearchDataType { - case "keyword", "text", "ip", "geo_point": + case "keyword", "constant_keyword", "text", "ip", "geo_point": return "string" case "long": return "int64" diff --git a/scripts/generators/es_template.py b/scripts/generators/es_template.py index 6a04461008..9bbf510d5c 100644 --- a/scripts/generators/es_template.py +++ b/scripts/generators/es_template.py @@ -50,6 +50,9 @@ def entry_for(field): elif field['type'] == 'text': ecs_helpers.dict_copy_existing_keys(field, field_entry, ['norms']) + if field['type'] == 'constant_keyword': + ecs_helpers.dict_copy_existing_keys(field, field_entry, []) + if 'multi_fields' in field: field_entry['fields'] = {} for mf in field['multi_fields']: