From 6ee7433f0ce0fbb384f72b039ea264afa383f3b2 Mon Sep 17 00:00:00 2001 From: Marshall Main Date: Wed, 25 Mar 2020 16:19:39 -0400 Subject: [PATCH 1/7] handle nestings better and refactor asciidoc generation --- generated/ecs/ecs_flat.yml | 44 ------------------ generated/ecs/ecs_nested.yml | 67 ++++++++++++++------------- scripts/generator.py | 2 +- scripts/generators/asciidoc_fields.py | 63 +++++++++++++------------ scripts/generators/ecs_helpers.py | 8 ++++ scripts/schema_reader.py | 19 ++++---- scripts/tests/test_ecs_helpers.py | 32 +++++++++++++ scripts/tests/test_schema_reader.py | 40 +--------------- 8 files changed, 120 insertions(+), 155 deletions(-) diff --git a/generated/ecs/ecs_flat.yml b/generated/ecs/ecs_flat.yml index c208c747ca..bfaf747f41 100644 --- a/generated/ecs/ecs_flat.yml +++ b/generated/ecs/ecs_flat.yml @@ -102,7 +102,6 @@ as.number: name: number normalize: [] order: 0 - original_fieldset: as short: Unique number allocated to the autonomous system. The autonomous system number (ASN) uniquely identifies each network on the Internet. type: long @@ -121,7 +120,6 @@ as.organization.name: name: organization.name normalize: [] order: 1 - original_fieldset: as short: Organization name. type: keyword client.address: @@ -636,7 +634,6 @@ code_signature.exists: name: exists normalize: [] order: 0 - original_fieldset: code_signature short: Boolean to capture if a signature is present. type: boolean code_signature.status: @@ -653,7 +650,6 @@ code_signature.status: name: status normalize: [] order: 4 - original_fieldset: code_signature short: Additional information about the certificate status. type: keyword code_signature.subject_name: @@ -666,7 +662,6 @@ code_signature.subject_name: name: subject_name normalize: [] order: 1 - original_fieldset: code_signature short: Subject name of the code signer type: keyword code_signature.trusted: @@ -681,7 +676,6 @@ code_signature.trusted: name: trusted normalize: [] order: 3 - original_fieldset: code_signature short: Stores the trust status of the certificate chain. type: boolean code_signature.valid: @@ -696,7 +690,6 @@ code_signature.valid: name: valid normalize: [] order: 2 - original_fieldset: code_signature short: Boolean to capture if the digital signature is verified against the binary content. type: boolean @@ -3021,7 +3014,6 @@ geo.city_name: name: city_name normalize: [] order: 4 - original_fieldset: geo short: City name. type: keyword geo.continent_name: @@ -3034,7 +3026,6 @@ geo.continent_name: name: continent_name normalize: [] order: 1 - original_fieldset: geo short: Name of the continent. type: keyword geo.country_iso_code: @@ -3047,7 +3038,6 @@ geo.country_iso_code: name: country_iso_code normalize: [] order: 5 - original_fieldset: geo short: Country ISO code. type: keyword geo.country_name: @@ -3060,7 +3050,6 @@ geo.country_name: name: country_name normalize: [] order: 2 - original_fieldset: geo short: Country name. type: keyword geo.location: @@ -3072,7 +3061,6 @@ geo.location: name: location normalize: [] order: 0 - original_fieldset: geo short: Longitude and latitude. type: geo_point geo.name: @@ -3091,7 +3079,6 @@ geo.name: name: name normalize: [] order: 7 - original_fieldset: geo short: User-defined description of a location. type: keyword geo.region_iso_code: @@ -3104,7 +3091,6 @@ geo.region_iso_code: name: region_iso_code normalize: [] order: 6 - original_fieldset: geo short: Region ISO code. type: keyword geo.region_name: @@ -3117,7 +3103,6 @@ geo.region_name: name: region_name normalize: [] order: 3 - original_fieldset: geo short: Region name. type: keyword group.domain: @@ -3131,7 +3116,6 @@ group.domain: name: domain normalize: [] order: 2 - original_fieldset: group short: Name of the directory the group is a member of. type: keyword group.id: @@ -3143,7 +3127,6 @@ group.id: name: id normalize: [] order: 0 - original_fieldset: group short: Unique identifier for the group on the system/platform. type: keyword group.name: @@ -3155,7 +3138,6 @@ group.name: name: name normalize: [] order: 1 - original_fieldset: group short: Name of the group. type: keyword hash.md5: @@ -3167,7 +3149,6 @@ hash.md5: name: md5 normalize: [] order: 0 - original_fieldset: hash short: MD5 hash. type: keyword hash.sha1: @@ -3179,7 +3160,6 @@ hash.sha1: name: sha1 normalize: [] order: 1 - original_fieldset: hash short: SHA1 hash. type: keyword hash.sha256: @@ -3191,7 +3171,6 @@ hash.sha256: name: sha256 normalize: [] order: 2 - original_fieldset: hash short: SHA256 hash. type: keyword hash.sha512: @@ -3203,7 +3182,6 @@ hash.sha512: name: sha512 normalize: [] order: 3 - original_fieldset: hash short: SHA512 hash. type: keyword host.architecture: @@ -3792,7 +3770,6 @@ interface.alias: name: alias normalize: [] order: 2 - original_fieldset: interface short: Interface alias type: keyword interface.id: @@ -3805,7 +3782,6 @@ interface.id: name: id normalize: [] order: 0 - original_fieldset: interface short: Interface ID type: keyword interface.name: @@ -3818,7 +3794,6 @@ interface.name: name: name normalize: [] order: 1 - original_fieldset: interface short: Interface name type: keyword labels: @@ -4797,7 +4772,6 @@ os.family: name: family normalize: [] order: 3 - original_fieldset: os short: OS family (such as redhat, debian, freebsd, windows). type: keyword os.full: @@ -4815,7 +4789,6 @@ os.full: name: full normalize: [] order: 2 - original_fieldset: os short: Operating system name, including the version or code name. type: keyword os.kernel: @@ -4828,7 +4801,6 @@ os.kernel: name: kernel normalize: [] order: 5 - original_fieldset: os short: Operating system kernel version as a raw string. type: keyword os.name: @@ -4846,7 +4818,6 @@ os.name: name: name normalize: [] order: 1 - original_fieldset: os short: Operating system name, without the version. type: keyword os.platform: @@ -4859,7 +4830,6 @@ os.platform: name: platform normalize: [] order: 0 - original_fieldset: os short: Operating system platform (such centos, ubuntu, windows). type: keyword os.version: @@ -4872,7 +4842,6 @@ os.version: name: version normalize: [] order: 4 - original_fieldset: os short: Operating system version as a raw string. type: keyword package.architecture: @@ -5047,7 +5016,6 @@ pe.company: name: company normalize: [] order: 4 - original_fieldset: pe short: Internal company name of the file, provided at compile-time. type: keyword pe.description: @@ -5060,7 +5028,6 @@ pe.description: name: description normalize: [] order: 2 - original_fieldset: pe short: Internal description of the file, provided at compile-time. type: keyword pe.file_version: @@ -5073,7 +5040,6 @@ pe.file_version: name: file_version normalize: [] order: 1 - original_fieldset: pe short: Process name. type: keyword pe.original_file_name: @@ -5086,7 +5052,6 @@ pe.original_file_name: name: original_file_name normalize: [] order: 0 - original_fieldset: pe short: Internal name of the file, provided at compile-time. type: keyword pe.product: @@ -5099,7 +5064,6 @@ pe.product: name: product normalize: [] order: 3 - original_fieldset: pe short: Internal product name of the file, provided at compile-time. type: keyword process.args: @@ -7963,7 +7927,6 @@ user.domain: name: domain normalize: [] order: 5 - original_fieldset: user short: Name of the directory the user is a member of. type: keyword user.email: @@ -7975,7 +7938,6 @@ user.email: name: email normalize: [] order: 3 - original_fieldset: user short: User email address. type: keyword user.full_name: @@ -7993,7 +7955,6 @@ user.full_name: name: full_name normalize: [] order: 2 - original_fieldset: user short: User's full name, if available. type: keyword user.group.domain: @@ -8047,7 +8008,6 @@ user.hash: name: hash normalize: [] order: 4 - original_fieldset: user short: Unique user hash to correlate information for a user in anonymized form. type: keyword user.id: @@ -8059,7 +8019,6 @@ user.id: name: id normalize: [] order: 0 - original_fieldset: user short: Unique identifiers of the user. type: keyword user.name: @@ -8077,7 +8036,6 @@ user.name: name: name normalize: [] order: 1 - original_fieldset: user short: Short name or login of the user. type: keyword user_agent.device.name: @@ -8232,7 +8190,6 @@ vlan.id: name: id normalize: [] order: 0 - original_fieldset: vlan short: VLAN ID as reported by the observer. type: keyword vlan.name: @@ -8245,7 +8202,6 @@ vlan.name: name: name normalize: [] order: 1 - original_fieldset: vlan short: Optional VLAN name as reported by the observer. type: keyword vulnerability.category: diff --git a/generated/ecs/ecs_nested.yml b/generated/ecs/ecs_nested.yml index 4a265b9433..c3dbb910ff 100644 --- a/generated/ecs/ecs_nested.yml +++ b/generated/ecs/ecs_nested.yml @@ -655,9 +655,9 @@ client: group: 2 name: client nestings: - - as - - geo - - user + - client.as + - client.geo + - client.user prefix: client. short: Fields about the client side of a network connection, used with server. title: Client @@ -1351,9 +1351,9 @@ destination: group: 2 name: destination nestings: - - as - - geo - - user + - destination.as + - destination.geo + - destination.user prefix: destination. short: Fields about the destination side of a network connection, used with source. title: Destination @@ -1587,9 +1587,9 @@ dll: group: 2 name: dll nestings: - - code_signature - - hash - - pe + - dll.code_signature + - dll.hash + - dll.pe prefix: dll. short: These fields contain information about code libraries dynamically loaded into processes. @@ -3296,9 +3296,9 @@ file: group: 2 name: file nestings: - - code_signature - - hash - - pe + - file.code_signature + - file.hash + - file.pe prefix: file. short: Fields describing files. title: File @@ -3994,9 +3994,9 @@ host: group: 2 name: host nestings: - - geo - - os - - user + - host.geo + - host.os + - host.user prefix: host. short: Fields describing the relevant computing instance. title: Host @@ -4637,7 +4637,8 @@ network: group: 2 name: network nestings: - - vlan + - network.inner.vlan + - network.vlan prefix: network. short: Fields describing the communication path over which the event happened. title: Network @@ -5158,10 +5159,12 @@ observer: group: 2 name: observer nestings: - - geo - - interface - - os - - vlan + - observer.egress.interface + - observer.egress.vlan + - observer.geo + - observer.ingress.interface + - observer.ingress.vlan + - observer.os prefix: observer. short: Fields describing an entity observing the event from outside the host. title: Observer @@ -6354,9 +6357,11 @@ process: group: 2 name: process nestings: - - code_signature - - hash - - pe + - process.code_signature + - process.hash + - process.parent.code_signature + - process.parent.hash + - process.pe prefix: process. short: These fields contain information about a process. title: Process @@ -7207,9 +7212,9 @@ server: group: 2 name: server nestings: - - as - - geo - - user + - server.as + - server.geo + - server.user prefix: server. short: Fields about the server side of a network connection, used with client. title: Server @@ -7770,9 +7775,9 @@ source: group: 2 name: source nestings: - - as - - geo - - user + - source.as + - source.geo + - source.user prefix: source. short: Fields about the source side of a network connection, used with destination. title: Source @@ -8713,7 +8718,7 @@ user: group: 2 name: user nestings: - - group + - user.group prefix: user. reusable: expected: @@ -8876,7 +8881,7 @@ user_agent: group: 2 name: user_agent nestings: - - os + - user_agent.os prefix: user_agent. short: Fields to describe a browser user_agent string. title: User agent diff --git a/scripts/generator.py b/scripts/generator.py index b232197d38..2cafd9dc22 100644 --- a/scripts/generator.py +++ b/scripts/generator.py @@ -58,7 +58,7 @@ def main(): csv_generator.generate(flat, ecs_version, out_dir) es_template.generate(flat, ecs_version, out_dir) beats.generate(nested, ecs_version, out_dir) - asciidoc_fields.generate(nested, flat, ecs_version, docs_dir) + asciidoc_fields.generate(intermediate_fields, ecs_version, docs_dir) def argument_parser(): diff --git a/scripts/generators/asciidoc_fields.py b/scripts/generators/asciidoc_fields.py index df5931bce8..de82d1a7ef 100644 --- a/scripts/generators/asciidoc_fields.py +++ b/scripts/generators/asciidoc_fields.py @@ -2,10 +2,10 @@ from generators import ecs_helpers -def generate(ecs_nested, ecs_flat, ecs_version, out_dir): - save_asciidoc(join(out_dir, 'fields.asciidoc'), page_field_index(ecs_nested, ecs_version)) - save_asciidoc(join(out_dir, 'field-details.asciidoc'), page_field_details(ecs_nested)) - save_asciidoc(join(out_dir, 'field-values.asciidoc'), page_field_values(ecs_flat)) +def generate(intermediate_nested, ecs_version, out_dir): + save_asciidoc(join(out_dir, 'fields.asciidoc'), page_field_index(intermediate_nested, ecs_version)) + save_asciidoc(join(out_dir, 'field-details.asciidoc'), page_field_details(intermediate_nested)) + save_asciidoc(join(out_dir, 'field-values.asciidoc'), page_field_values(intermediate_nested)) # Helpers @@ -20,9 +20,9 @@ def save_asciidoc(file, text): # Field Index -def page_field_index(ecs_nested, ecs_version): +def page_field_index(intermediate_nested, ecs_version): page_text = index_header(ecs_version) - for fieldset in ecs_helpers.dict_sorted_by_keys(ecs_nested, ['group', 'name']): + for fieldset in ecs_helpers.dict_sorted_by_keys(intermediate_nested, ['group', 'name']): page_text += render_field_index_row(fieldset) page_text += table_footer() page_text += index_footer() @@ -39,32 +39,41 @@ def render_field_index_row(fieldset): # Field Details Page -def page_field_details(ecs_nested): +def page_field_details(intermediate_nested): page_text = '' - for fieldset in ecs_helpers.dict_sorted_by_keys(ecs_nested, ['group', 'name']): - page_text += render_fieldset(fieldset, ecs_nested) + for fieldset in ecs_helpers.dict_sorted_by_keys(intermediate_nested, ['group', 'name']): + page_text += render_fieldset(fieldset, intermediate_nested) return page_text -def render_fieldset(fieldset, ecs_nested): +def render_fieldset(fieldset, intermediate_nested): text = field_details_table_header().format( fieldset_title=fieldset['title'], fieldset_name=fieldset['name'], fieldset_description=render_asciidoc_paragraphs(fieldset['description']) ) - for field in ecs_helpers.dict_sorted_by_keys(fieldset['fields'], 'flat_name'): - # Skip fields nested in this field set - if 'original_fieldset' not in field: - text += render_field_details_row(field) + text += render_fields(fieldset['fields']) text += table_footer() - text += render_fieldset_reuse_section(fieldset, ecs_nested) + text += render_fieldset_reuse_section(fieldset, intermediate_nested) return text +def render_fields(fields): + text = '' + for field_name, field in sorted(fields.items()): + # Skip fields nested in this field set + if 'field_details' in field and 'original_fieldset' not in field['field_details']: + text += render_field_details_row(field['field_details']) + if 'fields' in field: + text += render_fields(field['fields']) + return text + + + def render_asciidoc_paragraphs(string): '''Simply double the \n''' return string.replace("\n", "\n\n") @@ -109,7 +118,7 @@ def render_field_details_row(field): return text -def render_fieldset_reuse_section(fieldset, ecs_nested): +def render_fieldset_reuse_section(fieldset, intermediate_nested): '''Render the section on where field set can be nested, and which field sets can be nested here''' if not ('nestings' in fieldset or 'reusable' in fieldset): return '' @@ -124,18 +133,12 @@ def render_fieldset_reuse_section(fieldset, ecs_nested): ) rows = [] for nested_fs_name in fieldset['nestings']: - ecs = ecs_nested[nested_fs_name] - if 'reusable' in ecs: - target_fields = filter(lambda x: x == fieldset['name'] or x.startswith( - fieldset['name'] + '.'), ecs['reusable']['expected']) - else: - target_fields = [fieldset['name']] - for field in target_fields: - rows.append({ - 'flat_nesting': "{}.{}.*".format(field, nested_fs_name), - 'name': nested_fs_name, - 'short': ecs['short'] - }) + ecs = ecs_helpers.get_nested_field(nested_fs_name, intermediate_nested) + rows.append({ + 'flat_nesting': "{}.*".format(nested_fs_name), + 'name': nested_fs_name.split('.')[-1], + 'short': ecs['short'] + }) for row in sorted(rows, key=lambda x: x['flat_nesting']): text += render_nesting_row(row) text += table_footer() @@ -309,11 +312,11 @@ def nestings_row(): # Allowed values section -def page_field_values(ecs_flat): +def page_field_values(intermediate_nested): section_text = values_section_header() category_fields = ['event.kind', 'event.category', 'event.type', 'event.outcome'] for cat_field in category_fields: - section_text += render_field_values_page(ecs_flat[cat_field]) + section_text += render_field_values_page(ecs_helpers.get_nested_field(cat_field, intermediate_nested)['field_details']) return section_text diff --git a/scripts/generators/ecs_helpers.py b/scripts/generators/ecs_helpers.py index cce9792e24..15f8b05e6b 100644 --- a/scripts/generators/ecs_helpers.py +++ b/scripts/generators/ecs_helpers.py @@ -133,3 +133,11 @@ def list_split_by(lst, size): for i in range(0, len(lst), size): acc.append(lst[i:i + size]) return acc + + +def get_nested_field(fieldname, field_dict): + fields = fieldname.split('.') + nested_field = field_dict[fields[0]] + for field in fields[1:]: + nested_field = nested_field['fields'][field] + return nested_field \ No newline at end of file diff --git a/scripts/schema_reader.py b/scripts/schema_reader.py index 806efcc54f..3e996d55ae 100644 --- a/scripts/schema_reader.py +++ b/scripts/schema_reader.py @@ -161,8 +161,7 @@ def duplicate_reusable_fieldsets(schema, fields_nested): # List field set names expected under another field set. # E.g. host.nestings = [ 'geo', 'os', 'user' ] nestings = fields_nested[top_level].setdefault('nestings', []) - if schema['name'] not in nestings: - nestings.append(schema['name']) + nestings.append(new_nesting + "." + schema['name']) nestings.sort() nested_schema = fields_nested[top_level]['fields'] for level in split_flat_name[1:]: @@ -194,22 +193,17 @@ def assemble_reusables(fields_nested): duplicate_reusable_fieldsets(schema, fields_nested) -def flatten_fields(fields, key_prefix, original_fieldset=None): +def flatten_fields(fields, key_prefix): flat_fields = {} for (name, field) in fields.items(): new_key = key_prefix + name - temp_original_fieldset = original_fieldset - if 'reusable' in field: - temp_original_fieldset = name if 'field_details' in field: flat_fields[new_key] = field['field_details'].copy() - if temp_original_fieldset: - flat_fields[new_key]['original_fieldset'] = temp_original_fieldset if 'fields' in field: new_prefix = new_key + "." if 'root' in field and field['root']: new_prefix = "" - flat_fields.update(flatten_fields(field['fields'], new_prefix, temp_original_fieldset)) + flat_fields.update(flatten_fields(field['fields'], new_prefix)) return flat_fields @@ -227,17 +221,20 @@ def generate_fully_flattened_fields(fields_nested): return flatten_fields(fields_nested, "") -def cleanup_fields_recursive(fields, prefix): +def cleanup_fields_recursive(fields, prefix, original_fieldset=None): for (name, field) in fields.items(): # Copy field here so reusable field sets become unique copies instead of references to the original set field = field.copy() fields[name] = field + temp_original_fieldset = name if ('reusable' in field and prefix != "") else original_fieldset if 'field_details' in field: # Deep copy the field details so we can insert different flat names for each reusable fieldset field_details = copy.deepcopy(field['field_details']) new_flat_name = prefix + name field_details['flat_name'] = new_flat_name field_details['dashed_name'] = new_flat_name.replace('.', '-').replace('_', '-') + if temp_original_fieldset: + field_details['original_fieldset'] = temp_original_fieldset dict_clean_string_values(field_details) field_set_defaults(field_details) field['field_details'] = field_details @@ -246,7 +243,7 @@ def cleanup_fields_recursive(fields, prefix): new_prefix = prefix + name + "." if 'root' in field and field['root']: new_prefix = "" - cleanup_fields_recursive(field['fields'], new_prefix) + cleanup_fields_recursive(field['fields'], new_prefix, temp_original_fieldset) def load_schemas(files=ecs_files()): diff --git a/scripts/tests/test_ecs_helpers.py b/scripts/tests/test_ecs_helpers.py index 2c58c3a09f..fc6a12ad6c 100644 --- a/scripts/tests/test_ecs_helpers.py +++ b/scripts/tests/test_ecs_helpers.py @@ -198,6 +198,38 @@ def test_fields_subset(self): actual = ecs_helpers.fields_subset(subset, fields) self.assertEqual(actual, expected) + def test_get_nested_field(self): + fields = { + 'test_fieldset': { + 'name': 'test_fieldset', + 'fields': { + 'test_field1': { + 'field_details': { + 'name': 'test_field1', + 'type': 'keyword', + 'description': 'A test field' + } + }, + 'test_field2': { + 'field_details': { + 'name': 'test_field2', + 'type': 'keyword', + 'description': 'Another test field' + } + } + } + } + } + nested_field_name = 'test_fieldset.test_field1' + expected = { + 'field_details': { + 'name': 'test_field1', + 'type': 'keyword', + 'description': 'A test field' + } + } + actual = ecs_helpers.get_nested_field(nested_field_name, fields) + self.assertEqual(actual, expected) if __name__ == '__main__': unittest.main() diff --git a/scripts/tests/test_schema_reader.py b/scripts/tests/test_schema_reader.py index 7cc6e2729f..c33a13f7a4 100644 --- a/scripts/tests/test_schema_reader.py +++ b/scripts/tests/test_schema_reader.py @@ -120,43 +120,6 @@ def test_flatten_fields(self): } self.assertEqual(flat_fields, expected) - def test_flatten_fields_reusable(self): - fields = { - 'top_level': { - 'field_details': { - 'name': 'top_level' - }, - 'fields': { - 'nested_field': { - 'reusable': { - 'top_level': False, - 'expected': [ - 'top_level' - ] - }, - 'fields': { - 'double_nested_field': { - 'field_details': { - 'name': 'double_nested_field' - } - } - } - } - } - } - } - flat_fields = schema_reader.flatten_fields(fields, "") - expected = { - 'top_level': { - 'name': 'top_level' - }, - 'top_level.nested_field.double_nested_field': { - 'name': 'double_nested_field', - 'original_fieldset': 'nested_field' - } - } - self.assertEqual(flat_fields, expected) - def test_cleanup_fields_recursive(self): """Reuse a fieldset under two other fieldsets and check that the flat names are properly generated.""" reusable = { @@ -215,6 +178,7 @@ def test_cleanup_fields_recursive(self): 'ignore_above': 1024, 'short': 'A test field', 'normalize': [], + 'original_fieldset': 'reusable_fieldset' } } } @@ -243,7 +207,7 @@ def test_cleanup_fields_recursive(self): 'ignore_above': 1024, 'short': 'A test field', 'normalize': [], - + 'original_fieldset': 'reusable_fieldset' } } } From 4476165c8b5e8d157e24edbfd6414caf13a857db Mon Sep 17 00:00:00 2001 From: Marshall Main Date: Wed, 25 Mar 2020 16:45:50 -0400 Subject: [PATCH 2/7] update changelog --- CHANGELOG.next.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.next.md b/CHANGELOG.next.md index d04029f171..6a66b8bd66 100644 --- a/CHANGELOG.next.md +++ b/CHANGELOG.next.md @@ -36,6 +36,8 @@ Thanks, you're awesome :-) --> #### Improvements +* Add full path names to reused fieldsets in `nestings` array in ecs_nested.yml. #803 + #### Deprecated From 1f0130b4ddbebe5a7cb124b6186789cc7a046961 Mon Sep 17 00:00:00 2001 From: Marshall Main Date: Wed, 25 Mar 2020 16:54:23 -0400 Subject: [PATCH 3/7] linting --- scripts/generators/asciidoc_fields.py | 4 ++-- scripts/tests/test_ecs_helpers.py | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/scripts/generators/asciidoc_fields.py b/scripts/generators/asciidoc_fields.py index de82d1a7ef..84096b5332 100644 --- a/scripts/generators/asciidoc_fields.py +++ b/scripts/generators/asciidoc_fields.py @@ -73,7 +73,6 @@ def render_fields(fields): return text - def render_asciidoc_paragraphs(string): '''Simply double the \n''' return string.replace("\n", "\n\n") @@ -316,7 +315,8 @@ def page_field_values(intermediate_nested): section_text = values_section_header() category_fields = ['event.kind', 'event.category', 'event.type', 'event.outcome'] for cat_field in category_fields: - section_text += render_field_values_page(ecs_helpers.get_nested_field(cat_field, intermediate_nested)['field_details']) + section_text += render_field_values_page(ecs_helpers.get_nested_field(cat_field, + intermediate_nested)['field_details']) return section_text diff --git a/scripts/tests/test_ecs_helpers.py b/scripts/tests/test_ecs_helpers.py index fc6a12ad6c..915cc20496 100644 --- a/scripts/tests/test_ecs_helpers.py +++ b/scripts/tests/test_ecs_helpers.py @@ -231,5 +231,6 @@ def test_get_nested_field(self): actual = ecs_helpers.get_nested_field(nested_field_name, fields) self.assertEqual(actual, expected) + if __name__ == '__main__': unittest.main() From f1432df3150b097fc657e6102ab1b8ae6b716ffe Mon Sep 17 00:00:00 2001 From: Marshall Main Date: Fri, 27 Mar 2020 12:04:35 -0400 Subject: [PATCH 4/7] move nesting logic to separate function and find re-nested fields --- docs/field-details.asciidoc | 30 +++++++++++++++++++++++ generated/ecs/ecs_nested.yml | 5 ++++ scripts/schema_reader.py | 18 +++++++++++--- scripts/tests/test_schema_reader.py | 38 +++++++++++++++++++++++++++++ 4 files changed, 88 insertions(+), 3 deletions(-) diff --git a/docs/field-details.asciidoc b/docs/field-details.asciidoc index a04ef0a46f..59128b2b0a 100644 --- a/docs/field-details.asciidoc +++ b/docs/field-details.asciidoc @@ -440,6 +440,12 @@ example: `co.uk` // =============================================================== +| <> +| User's group relevant to the event. + +// =============================================================== + + |===== [[ecs-cloud]] @@ -950,6 +956,12 @@ example: `co.uk` // =============================================================== +| <> +| User's group relevant to the event. + +// =============================================================== + + |===== [[ecs-dll]] @@ -2681,6 +2693,12 @@ example: `1325` // =============================================================== +| <> +| User's group relevant to the event. + +// =============================================================== + + |===== [[ecs-http]] @@ -5235,6 +5253,12 @@ example: `co.uk` // =============================================================== +| <> +| User's group relevant to the event. + +// =============================================================== + + |===== [[ecs-service]] @@ -5572,6 +5596,12 @@ example: `co.uk` // =============================================================== +| <> +| User's group relevant to the event. + +// =============================================================== + + |===== [[ecs-threat]] diff --git a/generated/ecs/ecs_nested.yml b/generated/ecs/ecs_nested.yml index c3dbb910ff..43cc9c05fb 100644 --- a/generated/ecs/ecs_nested.yml +++ b/generated/ecs/ecs_nested.yml @@ -658,6 +658,7 @@ client: - client.as - client.geo - client.user + - client.user.group prefix: client. short: Fields about the client side of a network connection, used with server. title: Client @@ -1354,6 +1355,7 @@ destination: - destination.as - destination.geo - destination.user + - destination.user.group prefix: destination. short: Fields about the destination side of a network connection, used with source. title: Destination @@ -3997,6 +3999,7 @@ host: - host.geo - host.os - host.user + - host.user.group prefix: host. short: Fields describing the relevant computing instance. title: Host @@ -7215,6 +7218,7 @@ server: - server.as - server.geo - server.user + - server.user.group prefix: server. short: Fields about the server side of a network connection, used with client. title: Server @@ -7778,6 +7782,7 @@ source: - source.as - source.geo - source.user + - source.user.group prefix: source. short: Fields about the source side of a network connection, used with destination. title: Source diff --git a/scripts/schema_reader.py b/scripts/schema_reader.py index 3e996d55ae..73c4b2acf5 100644 --- a/scripts/schema_reader.py +++ b/scripts/schema_reader.py @@ -160,9 +160,6 @@ def duplicate_reusable_fieldsets(schema, fields_nested): top_level = split_flat_name[0] # List field set names expected under another field set. # E.g. host.nestings = [ 'geo', 'os', 'user' ] - nestings = fields_nested[top_level].setdefault('nestings', []) - nestings.append(new_nesting + "." + schema['name']) - nestings.sort() nested_schema = fields_nested[top_level]['fields'] for level in split_flat_name[1:]: nested_schema = nested_schema.get(level, None) @@ -174,6 +171,16 @@ def duplicate_reusable_fieldsets(schema, fields_nested): nested_schema = nested_schema.setdefault('fields', {}) nested_schema[schema['name']] = schema + +def find_nestings(fields_nested, prefix): + nestings = [] + for field_name, field in fields_nested.items(): + if 'reusable' in field: + nestings.append(prefix + field_name) + if 'fields' in field: + nestings.extend(find_nestings(field['fields'], prefix + field_name + '.')) + return nestings + # Main @@ -256,6 +263,11 @@ def load_schemas(files=ecs_files()): def generate_nested_flat(fields_intermediate): assemble_reusables(fields_intermediate) cleanup_fields_recursive(fields_intermediate, "") + for field_name, field in fields_intermediate.items(): + nestings = find_nestings(field['fields'], field_name + ".") + nestings.sort() + if len(nestings) > 0: + field['nestings'] = nestings fields_nested = generate_partially_flattened_fields(fields_intermediate) fields_flat = generate_fully_flattened_fields(fields_intermediate) return (fields_nested, fields_flat) diff --git a/scripts/tests/test_schema_reader.py b/scripts/tests/test_schema_reader.py index c33a13f7a4..037b4860ea 100644 --- a/scripts/tests/test_schema_reader.py +++ b/scripts/tests/test_schema_reader.py @@ -431,6 +431,44 @@ def test_improper_reusable_fails(self): with self.assertRaises(ValueError): schema_reader.duplicate_reusable_fieldsets(fieldset['reusable_fieldset2'], fieldset) + def test_find_nestings(self): + field = { + 'sub_field': { + 'reusable': { + 'top_level': True, + 'expected': [ + 'some_other_field' + ] + }, + 'fields': { + 'reusable_fieldset1': { + 'name': 'reusable_fieldset1', + 'reusable': { + 'top_level': False, + 'expected': [ + 'sub_field' + ] + }, + 'fields': { + 'nested_reusable_field': { + 'reusable': { + 'top_level': False, + 'expected': 'sub_field.nested_reusable_field' + }, + 'field_details': { + 'name': 'reusable_field', + 'type': 'keyword', + 'description': 'A test field' + } + } + } + } + } + } + } + expected = ['sub_field.reusable_fieldset1', 'sub_field.reusable_fieldset1.nested_reusable_field'] + self.assertEqual(schema_reader.find_nestings(field['sub_field']['fields'], 'sub_field.'), expected) + if __name__ == '__main__': unittest.main() From 80f3b1c57f237023e5df981032f00e3a24560621 Mon Sep 17 00:00:00 2001 From: Marshall Main Date: Fri, 27 Mar 2020 12:06:02 -0400 Subject: [PATCH 5/7] better naming --- scripts/schema_reader.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/schema_reader.py b/scripts/schema_reader.py index 73c4b2acf5..b62eee9627 100644 --- a/scripts/schema_reader.py +++ b/scripts/schema_reader.py @@ -172,9 +172,9 @@ def duplicate_reusable_fieldsets(schema, fields_nested): nested_schema[schema['name']] = schema -def find_nestings(fields_nested, prefix): +def find_nestings(fields, prefix): nestings = [] - for field_name, field in fields_nested.items(): + for field_name, field in fields.items(): if 'reusable' in field: nestings.append(prefix + field_name) if 'fields' in field: From 49862bb834ca87cf3291ed8e3423e16571843eb6 Mon Sep 17 00:00:00 2001 From: Marshall Main Date: Mon, 30 Mar 2020 16:22:11 -0400 Subject: [PATCH 6/7] dont make asciidocs with subset or include options --- scripts/generator.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scripts/generator.py b/scripts/generator.py index 2cafd9dc22..12a392c31a 100644 --- a/scripts/generator.py +++ b/scripts/generator.py @@ -57,6 +57,8 @@ def main(): csv_generator.generate(flat, ecs_version, out_dir) es_template.generate(flat, ecs_version, out_dir) + if args.include or args.subset: + exit() beats.generate(nested, ecs_version, out_dir) asciidoc_fields.generate(intermediate_fields, ecs_version, docs_dir) From 0ce2e5500a5851673059b33f0dba8983ec50304e Mon Sep 17 00:00:00 2001 From: Marshall Main Date: Tue, 7 Apr 2020 15:11:48 -0400 Subject: [PATCH 7/7] add docstrings --- scripts/generators/ecs_helpers.py | 1 + scripts/schema_reader.py | 1 + 2 files changed, 2 insertions(+) diff --git a/scripts/generators/ecs_helpers.py b/scripts/generators/ecs_helpers.py index 15f8b05e6b..4d58256055 100644 --- a/scripts/generators/ecs_helpers.py +++ b/scripts/generators/ecs_helpers.py @@ -136,6 +136,7 @@ def list_split_by(lst, size): def get_nested_field(fieldname, field_dict): + """Takes a field name in dot notation and a dictionary of fields and finds the field in the dictionary""" fields = fieldname.split('.') nested_field = field_dict[fields[0]] for field in fields[1:]: diff --git a/scripts/schema_reader.py b/scripts/schema_reader.py index b62eee9627..7b5064b5cd 100644 --- a/scripts/schema_reader.py +++ b/scripts/schema_reader.py @@ -173,6 +173,7 @@ def duplicate_reusable_fieldsets(schema, fields_nested): def find_nestings(fields, prefix): + """Recursively finds all reusable fields in the fields dictionary.""" nestings = [] for field_name, field in fields.items(): if 'reusable' in field: