diff --git a/CHANGELOG.next.md b/CHANGELOG.next.md index 161fe3144b..196058e144 100644 --- a/CHANGELOG.next.md +++ b/CHANGELOG.next.md @@ -35,6 +35,7 @@ Thanks, you're awesome :-) --> * ECS scripts now use Python 3.6+. #674 * schema_reader.py now reliably supports chaining reusable fieldsets together. #722 +* Add support for reusing fields in places other than the top level of the destination fieldset. #739 #### Deprecated diff --git a/docs/field-details.asciidoc b/docs/field-details.asciidoc index 6cdbbb963e..cd21b3ea4e 100644 --- a/docs/field-details.asciidoc +++ b/docs/field-details.asciidoc @@ -1967,7 +1967,7 @@ type: keyword ==== Field Reuse -The `hash` fields are expected to be nested at: `file.hash`, `process.hash`. +The `hash` fields are expected to be nested at: `file.hash`, `process.hash`, `process.parent.hash`. Note also that the `hash` fields are not expected to be used directly at the top level. diff --git a/generated/beats/fields.ecs.yml b/generated/beats/fields.ecs.yml index 98257e129d..c692103f80 100644 --- a/generated/beats/fields.ecs.yml +++ b/generated/beats/fields.ecs.yml @@ -2505,6 +2505,30 @@ start).' example: 137 default_field: false + - name: parent.hash.md5 + level: extended + type: keyword + ignore_above: 1024 + description: MD5 hash. + default_field: false + - name: parent.hash.sha1 + level: extended + type: keyword + ignore_above: 1024 + description: SHA1 hash. + default_field: false + - name: parent.hash.sha256 + level: extended + type: keyword + ignore_above: 1024 + description: SHA256 hash. + default_field: false + - name: parent.hash.sha512 + level: extended + type: keyword + ignore_above: 1024 + description: SHA512 hash. + default_field: false - name: parent.name level: extended type: keyword diff --git a/generated/csv/fields.csv b/generated/csv/fields.csv index 419079ffb6..7386d21b88 100644 --- a/generated/csv/fields.csv +++ b/generated/csv/fields.csv @@ -318,6 +318,10 @@ ECS_Version,Indexed,Field_Set,Field,Type,Level,Example,Description 1.5.0-dev,true,process,process.parent.executable,keyword,extended,/usr/bin/ssh,Absolute path to the process executable. 1.5.0-dev,true,process,process.parent.executable.text,text,extended,/usr/bin/ssh,Absolute path to the process executable. 1.5.0-dev,true,process,process.parent.exit_code,long,extended,137,The exit code of the process. +1.5.0-dev,true,process,process.parent.hash.md5,keyword,extended,,MD5 hash. +1.5.0-dev,true,process,process.parent.hash.sha1,keyword,extended,,SHA1 hash. +1.5.0-dev,true,process,process.parent.hash.sha256,keyword,extended,,SHA256 hash. +1.5.0-dev,true,process,process.parent.hash.sha512,keyword,extended,,SHA512 hash. 1.5.0-dev,true,process,process.parent.name,keyword,extended,ssh,Process name. 1.5.0-dev,true,process,process.parent.name.text,text,extended,ssh,Process name. 1.5.0-dev,true,process,process.parent.pgid,long,extended,,Identifier of the group of processes the process belongs to. diff --git a/generated/ecs/ecs_flat.yml b/generated/ecs/ecs_flat.yml index 712aa5e6c0..1fb6cbe035 100644 --- a/generated/ecs/ecs_flat.yml +++ b/generated/ecs/ecs_flat.yml @@ -4064,6 +4064,50 @@ process.parent.exit_code: order: 29 short: The exit code of the process. type: long +process.parent.hash.md5: + dashed_name: process-parent-hash-md5 + description: MD5 hash. + flat_name: process.parent.hash.md5 + ignore_above: 1024 + level: extended + name: md5 + order: 0 + original_fieldset: hash + short: MD5 hash. + type: keyword +process.parent.hash.sha1: + dashed_name: process-parent-hash-sha1 + description: SHA1 hash. + flat_name: process.parent.hash.sha1 + ignore_above: 1024 + level: extended + name: sha1 + order: 1 + original_fieldset: hash + short: SHA1 hash. + type: keyword +process.parent.hash.sha256: + dashed_name: process-parent-hash-sha256 + description: SHA256 hash. + flat_name: process.parent.hash.sha256 + ignore_above: 1024 + level: extended + name: sha256 + order: 2 + original_fieldset: hash + short: SHA256 hash. + type: keyword +process.parent.hash.sha512: + dashed_name: process-parent-hash-sha512 + description: SHA512 hash. + flat_name: process.parent.hash.sha512 + ignore_above: 1024 + level: extended + name: sha512 + order: 3 + original_fieldset: hash + short: SHA512 hash. + type: keyword process.parent.name: dashed_name: process-parent-name description: 'Process name. diff --git a/generated/ecs/ecs_nested.yml b/generated/ecs/ecs_nested.yml index 3403d3cb49..edfa26a4eb 100644 --- a/generated/ecs/ecs_nested.yml +++ b/generated/ecs/ecs_nested.yml @@ -2698,6 +2698,7 @@ hash: expected: - file - process + - process.parent top_level: false short: Hashes, usually file hashes. title: Hash @@ -4440,6 +4441,50 @@ process: order: 29 short: The exit code of the process. type: long + parent.hash.md5: + dashed_name: process-parent-hash-md5 + description: MD5 hash. + flat_name: process.parent.hash.md5 + ignore_above: 1024 + level: extended + name: md5 + order: 0 + original_fieldset: hash + short: MD5 hash. + type: keyword + parent.hash.sha1: + dashed_name: process-parent-hash-sha1 + description: SHA1 hash. + flat_name: process.parent.hash.sha1 + ignore_above: 1024 + level: extended + name: sha1 + order: 1 + original_fieldset: hash + short: SHA1 hash. + type: keyword + parent.hash.sha256: + dashed_name: process-parent-hash-sha256 + description: SHA256 hash. + flat_name: process.parent.hash.sha256 + ignore_above: 1024 + level: extended + name: sha256 + order: 2 + original_fieldset: hash + short: SHA256 hash. + type: keyword + parent.hash.sha512: + dashed_name: process-parent-hash-sha512 + description: SHA512 hash. + flat_name: process.parent.hash.sha512 + ignore_above: 1024 + level: extended + name: sha512 + order: 3 + original_fieldset: hash + short: SHA512 hash. + type: keyword parent.name: dashed_name: process-parent-name description: 'Process name. diff --git a/generated/elasticsearch/6/template.json b/generated/elasticsearch/6/template.json index 73913739e4..c89f9b6948 100644 --- a/generated/elasticsearch/6/template.json +++ b/generated/elasticsearch/6/template.json @@ -1520,6 +1520,26 @@ "exit_code": { "type": "long" }, + "hash": { + "properties": { + "md5": { + "ignore_above": 1024, + "type": "keyword" + }, + "sha1": { + "ignore_above": 1024, + "type": "keyword" + }, + "sha256": { + "ignore_above": 1024, + "type": "keyword" + }, + "sha512": { + "ignore_above": 1024, + "type": "keyword" + } + } + }, "name": { "fields": { "text": { diff --git a/generated/elasticsearch/7/template.json b/generated/elasticsearch/7/template.json index a509ae3619..70375f4c9a 100644 --- a/generated/elasticsearch/7/template.json +++ b/generated/elasticsearch/7/template.json @@ -1519,6 +1519,26 @@ "exit_code": { "type": "long" }, + "hash": { + "properties": { + "md5": { + "ignore_above": 1024, + "type": "keyword" + }, + "sha1": { + "ignore_above": 1024, + "type": "keyword" + }, + "sha256": { + "ignore_above": 1024, + "type": "keyword" + }, + "sha512": { + "ignore_above": 1024, + "type": "keyword" + } + } + }, "name": { "fields": { "text": { diff --git a/schemas/hash.yml b/schemas/hash.yml index 7a3f815613..1c6f21c131 100644 --- a/schemas/hash.yml +++ b/schemas/hash.yml @@ -16,6 +16,7 @@ expected: - file - process + - process.parent fields: diff --git a/scripts/schema_reader.py b/scripts/schema_reader.py index e97f42a0ee..7b6eb08cc1 100644 --- a/scripts/schema_reader.py +++ b/scripts/schema_reader.py @@ -128,12 +128,24 @@ def duplicate_reusable_fieldsets(schema, fields_nested): # which is in turn reusable in a few places. if 'reusable' in schema: for new_nesting in schema['reusable']['expected']: + split_flat_name = new_nesting.split('.') + top_level = split_flat_name[0] # List field set names expected under another field set. # E.g. host.nestings = [ 'geo', 'os', 'user' ] - nestings = fields_nested[new_nesting].setdefault('nestings', []) - nestings.append(schema['name']) + nestings = fields_nested[top_level].setdefault('nestings', []) + if schema['name'] not in nestings: + nestings.append(schema['name']) nestings.sort() - fields_nested[new_nesting]['fields'][schema['name']] = schema + nested_schema = fields_nested[top_level]['fields'] + for level in split_flat_name[1:]: + nested_schema = nested_schema.get(level, None) + if not nested_schema: + raise ValueError('Field {} in path {} not found in schema'.format(level, new_nesting)) + if nested_schema.get('reusable', None): + raise ValueError( + 'Reusable fields cannot be put inside other reusable fields except when the destination reusable is at the top level') + nested_schema = nested_schema.setdefault('fields', {}) + nested_schema[schema['name']] = schema # Main diff --git a/scripts/tests/test_schema_reader.py b/scripts/tests/test_schema_reader.py index c93b61c910..13464ac652 100644 --- a/scripts/tests/test_schema_reader.py +++ b/scripts/tests/test_schema_reader.py @@ -250,6 +250,114 @@ def test_cleanup_fields_recursive(self): } self.assertEqual(fields, expected) + def test_reusable_dot_notation(self): + fieldset = { + 'reusable_fieldset1': { + 'name': 'reusable_fieldset1', + 'reusable': { + 'top_level': False, + 'expected': [ + 'test_fieldset.sub_field' + ] + }, + 'fields': { + 'reusable_field': { + 'field_details': { + 'name': 'reusable_field', + 'type': 'keyword', + 'description': 'A test field' + } + } + } + }, + 'test_fieldset': { + 'name': 'test_fieldset', + 'fields': { + 'sub_field': { + 'fields': {} + } + } + } + } + expected = { + 'sub_field': { + 'fields': { + 'reusable_fieldset1': { + 'name': 'reusable_fieldset1', + 'reusable': { + 'top_level': False, + 'expected': [ + 'test_fieldset.sub_field' + ] + }, + 'fields': { + 'reusable_field': { + 'field_details': { + 'name': 'reusable_field', + 'type': 'keyword', + 'description': 'A test field' + } + } + } + } + } + } + } + schema_reader.duplicate_reusable_fieldsets(fieldset['reusable_fieldset1'], fieldset) + self.assertEqual(fieldset['test_fieldset']['fields'], expected) + + def test_improper_reusable_fails(self): + fieldset = { + 'reusable_fieldset1': { + 'name': 'reusable_fieldset1', + 'reusable': { + 'top_level': False, + 'expected': [ + 'test_fieldset' + ] + }, + 'fields': { + 'reusable_field': { + 'field_details': { + 'name': 'reusable_field', + 'type': 'keyword', + 'description': 'A test field' + } + } + } + }, + 'reusable_fieldset2': { + 'name': 'reusable_fieldset2', + 'reusable': { + 'top_level': False, + 'expected': [ + 'test_fieldset.reusable_fieldset1' + ] + }, + 'fields': { + 'reusable_field': { + 'field_details': { + 'name': 'reusable_field', + 'type': 'keyword', + 'description': 'A test field' + } + } + } + }, + 'test_fieldset': { + 'name': 'test_fieldset', + 'fields': {} + } + } + # This should fail because test_fieldset.reusable_fieldset1 doesn't exist yet + with self.assertRaises(ValueError): + schema_reader.duplicate_reusable_fieldsets(fieldset['reusable_fieldset2'], fieldset) + schema_reader.duplicate_reusable_fieldsets(fieldset['reusable_fieldset1'], fieldset) + # Then this should fail because even though test_fieldset.reusable_fieldset1 now exists, test_fieldset.reusable_fieldset1 is not + # an allowed reusable location (it's the destination of another reusable) + with self.assertRaises(ValueError): + schema_reader.duplicate_reusable_fieldsets(fieldset['reusable_fieldset2'], fieldset) + if __name__ == '__main__': unittest.main()