Add missing hash fields to process.parent (elastic#739)

This issue also adds flexibility to the reuse mechanism, in allowing us to nest at a nested destination
dcode · Apr 15, 2020 · df06404 · df06404
1 parent 7700c88
commit df06404
Show file tree

Hide file tree

Showing 11 changed files with 283 additions and 4 deletions.
diff --git a/CHANGELOG.next.md b/CHANGELOG.next.md
@@ -35,6 +35,7 @@ Thanks, you're awesome :-) -->
 
 * ECS scripts now use Python 3.6+. #674
 * schema_reader.py now reliably supports chaining reusable fieldsets together. #722
+* Add support for reusing fields in places other than the top level of the destination fieldset. #739
 
 #### Deprecated
 

diff --git a/docs/field-details.asciidoc b/docs/field-details.asciidoc
@@ -1967,7 +1967,7 @@ type: keyword
 
 ==== Field Reuse
 
-The `hash` fields are expected to be nested at: `file.hash`, `process.hash`.
+The `hash` fields are expected to be nested at: `file.hash`, `process.hash`, `process.parent.hash`.
 
 Note also that the `hash` fields are not expected to be used directly at the top level.
 

diff --git a/generated/beats/fields.ecs.yml b/generated/beats/fields.ecs.yml
@@ -2505,6 +2505,30 @@
         start).'
       example: 137
       default_field: false
+    - name: parent.hash.md5
+      level: extended
+      type: keyword
+      ignore_above: 1024
+      description: MD5 hash.
+      default_field: false
+    - name: parent.hash.sha1
+      level: extended
+      type: keyword
+      ignore_above: 1024
+      description: SHA1 hash.
+      default_field: false
+    - name: parent.hash.sha256
+      level: extended
+      type: keyword
+      ignore_above: 1024
+      description: SHA256 hash.
+      default_field: false
+    - name: parent.hash.sha512
+      level: extended
+      type: keyword
+      ignore_above: 1024
+      description: SHA512 hash.
+      default_field: false
     - name: parent.name
       level: extended
       type: keyword

diff --git a/generated/csv/fields.csv b/generated/csv/fields.csv
@@ -318,6 +318,10 @@ ECS_Version,Indexed,Field_Set,Field,Type,Level,Example,Description
 1.5.0-dev,true,process,process.parent.executable,keyword,extended,/usr/bin/ssh,Absolute path to the process executable.
 1.5.0-dev,true,process,process.parent.executable.text,text,extended,/usr/bin/ssh,Absolute path to the process executable.
 1.5.0-dev,true,process,process.parent.exit_code,long,extended,137,The exit code of the process.
+1.5.0-dev,true,process,process.parent.hash.md5,keyword,extended,,MD5 hash.
+1.5.0-dev,true,process,process.parent.hash.sha1,keyword,extended,,SHA1 hash.
+1.5.0-dev,true,process,process.parent.hash.sha256,keyword,extended,,SHA256 hash.
+1.5.0-dev,true,process,process.parent.hash.sha512,keyword,extended,,SHA512 hash.
 1.5.0-dev,true,process,process.parent.name,keyword,extended,ssh,Process name.
 1.5.0-dev,true,process,process.parent.name.text,text,extended,ssh,Process name.
 1.5.0-dev,true,process,process.parent.pgid,long,extended,,Identifier of the group of processes the process belongs to.

diff --git a/generated/ecs/ecs_flat.yml b/generated/ecs/ecs_flat.yml
@@ -4064,6 +4064,50 @@ process.parent.exit_code:
   order: 29
   short: The exit code of the process.
   type: long
+process.parent.hash.md5:
+  dashed_name: process-parent-hash-md5
+  description: MD5 hash.
+  flat_name: process.parent.hash.md5
+  ignore_above: 1024
+  level: extended
+  name: md5
+  order: 0
+  original_fieldset: hash
+  short: MD5 hash.
+  type: keyword
+process.parent.hash.sha1:
+  dashed_name: process-parent-hash-sha1
+  description: SHA1 hash.
+  flat_name: process.parent.hash.sha1
+  ignore_above: 1024
+  level: extended
+  name: sha1
+  order: 1
+  original_fieldset: hash
+  short: SHA1 hash.
+  type: keyword
+process.parent.hash.sha256:
+  dashed_name: process-parent-hash-sha256
+  description: SHA256 hash.
+  flat_name: process.parent.hash.sha256
+  ignore_above: 1024
+  level: extended
+  name: sha256
+  order: 2
+  original_fieldset: hash
+  short: SHA256 hash.
+  type: keyword
+process.parent.hash.sha512:
+  dashed_name: process-parent-hash-sha512
+  description: SHA512 hash.
+  flat_name: process.parent.hash.sha512
+  ignore_above: 1024
+  level: extended
+  name: sha512
+  order: 3
+  original_fieldset: hash
+  short: SHA512 hash.
+  type: keyword
 process.parent.name:
   dashed_name: process-parent-name
   description: 'Process name.

diff --git a/generated/ecs/ecs_nested.yml b/generated/ecs/ecs_nested.yml
@@ -2698,6 +2698,7 @@ hash:
     expected:
     - file
     - process
+    - process.parent
     top_level: false
   short: Hashes, usually file hashes.
   title: Hash
@@ -4440,6 +4441,50 @@ process:
       order: 29
       short: The exit code of the process.
       type: long
+    parent.hash.md5:
+      dashed_name: process-parent-hash-md5
+      description: MD5 hash.
+      flat_name: process.parent.hash.md5
+      ignore_above: 1024
+      level: extended
+      name: md5
+      order: 0
+      original_fieldset: hash
+      short: MD5 hash.
+      type: keyword
+    parent.hash.sha1:
+      dashed_name: process-parent-hash-sha1
+      description: SHA1 hash.
+      flat_name: process.parent.hash.sha1
+      ignore_above: 1024
+      level: extended
+      name: sha1
+      order: 1
+      original_fieldset: hash
+      short: SHA1 hash.
+      type: keyword
+    parent.hash.sha256:
+      dashed_name: process-parent-hash-sha256
+      description: SHA256 hash.
+      flat_name: process.parent.hash.sha256
+      ignore_above: 1024
+      level: extended
+      name: sha256
+      order: 2
+      original_fieldset: hash
+      short: SHA256 hash.
+      type: keyword
+    parent.hash.sha512:
+      dashed_name: process-parent-hash-sha512
+      description: SHA512 hash.
+      flat_name: process.parent.hash.sha512
+      ignore_above: 1024
+      level: extended
+      name: sha512
+      order: 3
+      original_fieldset: hash
+      short: SHA512 hash.
+      type: keyword
     parent.name:
       dashed_name: process-parent-name
       description: 'Process name.

diff --git a/generated/elasticsearch/6/template.json b/generated/elasticsearch/6/template.json
@@ -1520,6 +1520,26 @@
                 "exit_code": {
                   "type": "long"
                 },
+                "hash": {
+                  "properties": {
+                    "md5": {
+                      "ignore_above": 1024,
+                      "type": "keyword"
+                    },
+                    "sha1": {
+                      "ignore_above": 1024,
+                      "type": "keyword"
+                    },
+                    "sha256": {
+                      "ignore_above": 1024,
+                      "type": "keyword"
+                    },
+                    "sha512": {
+                      "ignore_above": 1024,
+                      "type": "keyword"
+                    }
+                  }
+                },
                 "name": {
                   "fields": {
                     "text": {

diff --git a/generated/elasticsearch/7/template.json b/generated/elasticsearch/7/template.json
@@ -1519,6 +1519,26 @@
               "exit_code": {
                 "type": "long"
               },
+              "hash": {
+                "properties": {
+                  "md5": {
+                    "ignore_above": 1024,
+                    "type": "keyword"
+                  },
+                  "sha1": {
+                    "ignore_above": 1024,
+                    "type": "keyword"
+                  },
+                  "sha256": {
+                    "ignore_above": 1024,
+                    "type": "keyword"
+                  },
+                  "sha512": {
+                    "ignore_above": 1024,
+                    "type": "keyword"
+                  }
+                }
+              },
               "name": {
                 "fields": {
                   "text": {

diff --git a/schemas/hash.yml b/schemas/hash.yml
@@ -16,6 +16,7 @@
     expected:
       - file
       - process
+      - process.parent
 
   fields:
 

diff --git a/scripts/schema_reader.py b/scripts/schema_reader.py
@@ -128,12 +128,24 @@ def duplicate_reusable_fieldsets(schema, fields_nested):
     # which is in turn reusable in a few places.
     if 'reusable' in schema:
         for new_nesting in schema['reusable']['expected']:
+            split_flat_name = new_nesting.split('.')
+            top_level = split_flat_name[0]
             # List field set names expected under another field set.
             # E.g. host.nestings = [ 'geo', 'os', 'user' ]
-            nestings = fields_nested[new_nesting].setdefault('nestings', [])
-            nestings.append(schema['name'])
+            nestings = fields_nested[top_level].setdefault('nestings', [])
+            if schema['name'] not in nestings:
+                nestings.append(schema['name'])
             nestings.sort()
-            fields_nested[new_nesting]['fields'][schema['name']] = schema
+            nested_schema = fields_nested[top_level]['fields']
+            for level in split_flat_name[1:]:
+                nested_schema = nested_schema.get(level, None)
+                if not nested_schema:
+                    raise ValueError('Field {} in path {} not found in schema'.format(level, new_nesting))
+                if nested_schema.get('reusable', None):
+                    raise ValueError(
+                        'Reusable fields cannot be put inside other reusable fields except when the destination reusable is at the top level')
+                nested_schema = nested_schema.setdefault('fields', {})
+            nested_schema[schema['name']] = schema
 
 # Main
 

diff --git a/scripts/tests/test_schema_reader.py b/scripts/tests/test_schema_reader.py
@@ -250,6 +250,114 @@ def test_cleanup_fields_recursive(self):
         }
         self.assertEqual(fields, expected)
 
+    def test_reusable_dot_notation(self):
+        fieldset = {
+            'reusable_fieldset1': {
+                'name': 'reusable_fieldset1',
+                'reusable': {
+                    'top_level': False,
+                    'expected': [
+                        'test_fieldset.sub_field'
+                    ]
+                },
+                'fields': {
+                    'reusable_field': {
+                        'field_details': {
+                            'name': 'reusable_field',
+                            'type': 'keyword',
+                            'description': 'A test field'
+                        }
+                    }
+                }
+            },
+            'test_fieldset': {
+                'name': 'test_fieldset',
+                'fields': {
+                    'sub_field': {
+                        'fields': {}
+                    }
+                }
+            }
+        }
+        expected = {
+            'sub_field': {
+                'fields': {
+                    'reusable_fieldset1': {
+                        'name': 'reusable_fieldset1',
+                        'reusable': {
+                            'top_level': False,
+                            'expected': [
+                                'test_fieldset.sub_field'
+                            ]
+                        },
+                        'fields': {
+                            'reusable_field': {
+                                'field_details': {
+                                    'name': 'reusable_field',
+                                    'type': 'keyword',
+                                    'description': 'A test field'
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+        schema_reader.duplicate_reusable_fieldsets(fieldset['reusable_fieldset1'], fieldset)
+        self.assertEqual(fieldset['test_fieldset']['fields'], expected)
+
+    def test_improper_reusable_fails(self):
+        fieldset = {
+            'reusable_fieldset1': {
+                'name': 'reusable_fieldset1',
+                'reusable': {
+                    'top_level': False,
+                    'expected': [
+                        'test_fieldset'
+                    ]
+                },
+                'fields': {
+                    'reusable_field': {
+                        'field_details': {
+                            'name': 'reusable_field',
+                            'type': 'keyword',
+                            'description': 'A test field'
+                        }
+                    }
+                }
+            },
+            'reusable_fieldset2': {
+                'name': 'reusable_fieldset2',
+                'reusable': {
+                    'top_level': False,
+                    'expected': [
+                        'test_fieldset.reusable_fieldset1'
+                    ]
+                },
+                'fields': {
+                    'reusable_field': {
+                        'field_details': {
+                            'name': 'reusable_field',
+                            'type': 'keyword',
+                            'description': 'A test field'
+                        }
+                    }
+                }
+            },
+            'test_fieldset': {
+                'name': 'test_fieldset',
+                'fields': {}
+            }
+        }
+        # This should fail because test_fieldset.reusable_fieldset1 doesn't exist yet
+        with self.assertRaises(ValueError):
+            schema_reader.duplicate_reusable_fieldsets(fieldset['reusable_fieldset2'], fieldset)
+        schema_reader.duplicate_reusable_fieldsets(fieldset['reusable_fieldset1'], fieldset)
+        # Then this should fail because even though test_fieldset.reusable_fieldset1 now exists, test_fieldset.reusable_fieldset1 is not
+        # an allowed reusable location (it's the destination of another reusable)
+        with self.assertRaises(ValueError):
+            schema_reader.duplicate_reusable_fieldsets(fieldset['reusable_fieldset2'], fieldset)
+
 
 if __name__ == '__main__':
     unittest.main()
-Original file line number
+Diff line change
@@ Expand Up / @@ -16,6 +16,7 @@ @@
         expected:
           - file
           - process
+          - process.parent
       fields:
@@ Expand Down @@