elastic · ruflin · May 13, 2020 · May 13, 2020 · May 13, 2020 · May 13, 2020
diff --git a/code/go/ecs/dataset.go b/code/go/ecs/dataset.go
diff --git a/docs/field-details.asciidoc b/docs/field-details.asciidoc
@@ -807,6 +807,76 @@ example: `docker`
 
 |=====
 
+[[ecs-dataset]]
+=== Dataset Fields
+
+The dataset fields are part of the new [indexing strategy](https://github.com/elastic/kibana/blob/master/docs/ingest_manager/index.asciidoc#indexing-strategy-1).
+
+These fields are used to determine into which index the data is shipped in Elasticsearch and allow efficient querying of data. Initially these fields are mainly used by data shipped by the Elastic Agent but any time series data shipper should switch to using data streams and the new indexing strategy with these fields.
+
+All three fields are `constant_keyword` fields.
+
+==== Dataset Field Details
+
+[options="header"]
+|=====
+| Field  | Description | Level
+
+// ===============================================================
+
+| dataset.name
+| Dataset name describes the structure of the data.
+
+The dataset name describes the structure of the data. All data shipped into a single dataset should have the same or very similar data structure. For example `system.cpu` and `system.disk` are two different datasets as they have very different fields.
+
+The name of the dataset should be descriptive of the data and it is encourage to use `.` to combine multiple words. All characters which are allowed in index names can be used for the dataset except `-`.
+
+The default for dataset is `generic`.
+
+type: constant_keyword
+
+
+
+example: `nginx.access`
+
+| extended
+
+// ===============================================================
+
+| dataset.namespace
+| Namespace of the dataset.
+
+This is the namespace used in your index. The namespace is used to separate the same structure into different Data Streams. For example if nginx logs are shipped for testing and production into the same cluster, two different namespaces can be used. This allows to assign different ILM policies as an example.
+
+The default value for a namespace is `default`.
+
+type: constant_keyword
+
+
+
+example: `production`
+
+| extended
+
+// ===============================================================
+
+| dataset.type
+| Type of the dataset.
+
+The type of the dataset can be `logs` or `metrics`. More types can be added in the future but no other types then the one describe here should be used.
+
+type: constant_keyword
+
+
+
+example: `logs`
+
+| extended
+
+// ===============================================================
+
+|=====
+
 [[ecs-destination]]
 === Destination Fields
 

diff --git a/docs/fields.asciidoc b/docs/fields.asciidoc
@@ -32,6 +32,8 @@ all fields are defined.
 
 | <<ecs-container,Container>> | Fields describing the container that generated this event.
 
+| <<ecs-dataset,Dataset>> | Fields about the dataset of this document.
+
 | <<ecs-destination,Destination>> | Fields about the destination side of a network connection, used with source.
 
 | <<ecs-dll,DLL>> | These fields contain information about code libraries dynamically loaded into processes.

diff --git a/generated/beats/fields.ecs.yml b/generated/beats/fields.ecs.yml
@@ -552,6 +552,61 @@
       ignore_above: 1024
       description: Runtime managing this container.
       example: docker
+  - name: dataset
+    title: Dataset
+    group: 2
+    description: 'The dataset fields are part of the new [indexing strategy](https://github.com/elastic/kibana/blob/master/docs/ingest_manager/index.asciidoc#indexing-strategy-1).
+
+      These fields are used to determine into which index the data is shipped in Elasticsearch
+      and allow efficient querying of data. Initially these fields are mainly used
+      by data shipped by the Elastic Agent but any time series data shipper should
+      switch to using data streams and the new indexing strategy with these fields.
+
+      All three fields are `constant_keyword` fields.'
+    footnote: 'Examples: The new indexing strategy is {dataset.type}-{dataset.name}-{dataset.namespace}`.`
+      As an example, nginx access logs are shipped into `logs-nginx.access-default`.'
+    type: group
+    fields:
+    - name: name
+      level: extended
+      type: constant_keyword
+      description: 'Dataset name describes the structure of the data.
+
+        The dataset name describes the structure of the data. All data shipped into
+        a single dataset should have the same or very similar data structure. For
+        example `system.cpu` and `system.disk` are two different datasets as they
+        have very different fields.
+
+        The name of the dataset should be descriptive of the data and it is encourage
+        to use `.` to combine multiple words. All characters which are allowed in
+        index names can be used for the dataset except `-`.
+
+        The default for dataset is `generic`.'
+      example: nginx.access
+      default_field: false
+    - name: namespace
+      level: extended
+      type: constant_keyword
+      description: 'Namespace of the dataset.
+
+        This is the namespace used in your index. The namespace is used to separate
+        the same structure into different Data Streams. For example if nginx logs
+        are shipped for testing and production into the same cluster, two different
+        namespaces can be used. This allows to assign different ILM policies as an
+        example.
+
+        The default value for a namespace is `default`.'
+      example: production
+      default_field: false
+    - name: type
+      level: extended
+      type: constant_keyword
+      description: 'Type of the dataset.
+
+        The type of the dataset can be `logs` or `metrics`. More types can be added
+        in the future but no other types then the one describe here should be used.'
+      example: logs
+      default_field: false
   - name: destination
     title: Destination
     group: 2

diff --git a/generated/csv/fields.csv b/generated/csv/fields.csv
@@ -58,6 +58,9 @@ ECS_Version,Indexed,Field_Set,Field,Type,Level,Normalization,Example,Description
 1.6.0-dev,true,container,container.labels,object,extended,,,Image labels.
 1.6.0-dev,true,container,container.name,keyword,extended,,,Container name.
 1.6.0-dev,true,container,container.runtime,keyword,extended,,docker,Runtime managing this container.
+1.6.0-dev,true,dataset,dataset.name,constant_keyword,extended,,nginx.access,Dataset name describing the structure of the data.
+1.6.0-dev,true,dataset,dataset.namespace,constant_keyword,extended,,production,Namespace of the dataset.
+1.6.0-dev,true,dataset,dataset.type,constant_keyword,extended,,logs,Type of the dataset.
 1.6.0-dev,true,destination,destination.address,keyword,extended,,,Destination network address.
 1.6.0-dev,true,destination,destination.as.number,long,extended,,15169,Unique number allocated to the autonomous system. The autonomous system number (ASN) uniquely identifies each network on the Internet.
 1.6.0-dev,true,destination,destination.as.organization.name,keyword,extended,,Google LLC,Organization name.

diff --git a/generated/ecs/ecs_flat.yml b/generated/ecs/ecs_flat.yml
@@ -667,6 +667,57 @@ container.runtime:
   normalize: []
   short: Runtime managing this container.
   type: keyword
+dataset.name:
+  dashed_name: dataset-name
+  description: 'Dataset name describes the structure of the data.
+
+    The dataset name describes the structure of the data. All data shipped into a
+    single dataset should have the same or very similar data structure. For example
+    `system.cpu` and `system.disk` are two different datasets as they have very different
+    fields.
+
+    The name of the dataset should be descriptive of the data and it is encourage
+    to use `.` to combine multiple words. All characters which are allowed in index
+    names can be used for the dataset except `-`.
+
+    The default for dataset is `generic`.'
+  example: nginx.access
+  flat_name: dataset.name
+  level: extended
+  name: name
+  normalize: []
+  short: Dataset name describing the structure of the data.
+  type: constant_keyword
+dataset.namespace:
+  dashed_name: dataset-namespace
+  description: 'Namespace of the dataset.
+
+    This is the namespace used in your index. The namespace is used to separate the
+    same structure into different Data Streams. For example if nginx logs are shipped
+    for testing and production into the same cluster, two different namespaces can
+    be used. This allows to assign different ILM policies as an example.
+
+    The default value for a namespace is `default`.'
+  example: production
+  flat_name: dataset.namespace
+  level: extended
+  name: namespace
+  normalize: []
+  short: Namespace of the dataset.
+  type: constant_keyword
+dataset.type:
+  dashed_name: dataset-type
+  description: 'Type of the dataset.
+
+    The type of the dataset can be `logs` or `metrics`. More types can be added in
+    the future but no other types then the one describe here should be used.'
+  example: logs
+  flat_name: dataset.type
+  level: extended
+  name: type
+  normalize: []
+  short: Type of the dataset.
+  type: constant_keyword
 destination.address:
   dashed_name: destination-address
   description: 'Some event destination addresses are defined ambiguously. The event

diff --git a/generated/ecs/ecs_nested.yml b/generated/ecs/ecs_nested.yml
@@ -923,6 +923,76 @@ container:
   short: Fields describing the container that generated this event.
   title: Container
   type: group
+dataset:
+  description: 'The dataset fields are part of the new [indexing strategy](https://github.com/elastic/kibana/blob/master/docs/ingest_manager/index.asciidoc#indexing-strategy-1).
+
+    These fields are used to determine into which index the data is shipped in Elasticsearch
+    and allow efficient querying of data. Initially these fields are mainly used by
+    data shipped by the Elastic Agent but any time series data shipper should switch
+    to using data streams and the new indexing strategy with these fields.
+
+    All three fields are `constant_keyword` fields.'
+  fields:
+    name:
+      dashed_name: dataset-name
+      description: 'Dataset name describes the structure of the data.
+
+        The dataset name describes the structure of the data. All data shipped into
+        a single dataset should have the same or very similar data structure. For
+        example `system.cpu` and `system.disk` are two different datasets as they
+        have very different fields.
+
+        The name of the dataset should be descriptive of the data and it is encourage
+        to use `.` to combine multiple words. All characters which are allowed in
+        index names can be used for the dataset except `-`.
+
+        The default for dataset is `generic`.'
+      example: nginx.access
+      flat_name: dataset.name
+      level: extended
+      name: name
+      normalize: []
+      short: Dataset name describing the structure of the data.
+      type: constant_keyword
+    namespace:
+      dashed_name: dataset-namespace
+      description: 'Namespace of the dataset.
+
+        This is the namespace used in your index. The namespace is used to separate
+        the same structure into different Data Streams. For example if nginx logs
+        are shipped for testing and production into the same cluster, two different
+        namespaces can be used. This allows to assign different ILM policies as an
+        example.
+
+        The default value for a namespace is `default`.'
+      example: production
+      flat_name: dataset.namespace
+      level: extended
+      name: namespace
+      normalize: []
+      short: Namespace of the dataset.
+      type: constant_keyword
+    type:
+      dashed_name: dataset-type
+      description: 'Type of the dataset.
+
+        The type of the dataset can be `logs` or `metrics`. More types can be added
+        in the future but no other types then the one describe here should be used.'
+      example: logs
+      flat_name: dataset.type
+      level: extended
+      name: type
+      normalize: []
+      short: Type of the dataset.
+      type: constant_keyword
+  footnote: 'Examples: The new indexing strategy is {dataset.type}-{dataset.name}-{dataset.namespace}`.`
+    As an example, nginx access logs are shipped into `logs-nginx.access-default`.'
+  group: 2
+  name: dataset
+  prefix: dataset.
+  short: Fields about the dataset of this document.
+  title: Dataset
+  type: group
 destination:
   description: 'Destination fields describe details about the destination of a packet/event.
 

diff --git a/generated/elasticsearch/6/template.json b/generated/elasticsearch/6/template.json
@@ -304,6 +304,19 @@
             }
           }
         },
+        "dataset": {
+          "properties": {
+            "name": {
+              "type": "constant_keyword"
+            },
+            "namespace": {
+              "type": "constant_keyword"
+            },
+            "type": {
+              "type": "constant_keyword"
+            }
+          }
+        },
         "destination": {
           "properties": {
             "address": {

diff --git a/generated/elasticsearch/7/template.json b/generated/elasticsearch/7/template.json
@@ -303,6 +303,19 @@
           }
         }
       },
+      "dataset": {
+        "properties": {
+          "name": {
+            "type": "constant_keyword"
+          },
+          "namespace": {
+            "type": "constant_keyword"
+          },
+          "type": {
+            "type": "constant_keyword"
+          }
+        }
+      },
       "destination": {
         "properties": {
           "address": {