From 76ab51e639b70b10bc7152d0898b754a4acc2ef2 Mon Sep 17 00:00:00 2001 From: djptek Date: Wed, 12 May 2021 13:54:17 +0200 Subject: [PATCH 01/38] commit with example test files, to be removed at a later date --- rfcs/text/0017/exclude-set-fail1.yml | 5 +++ rfcs/text/0017/exclude-set-fail2.yml | 5 +++ rfcs/text/0017/exclude-set-fail3.yml | 5 +++ rfcs/text/0017/exclude-set-pass2.yml | 8 ++++ rfcs/text/0017/exclude-set.yml | 4 ++ scripts/generator.py | 10 ++--- scripts/schema/subset_filter.py | 63 +++++++++++++++++++++++++--- 7 files changed, 88 insertions(+), 12 deletions(-) create mode 100644 rfcs/text/0017/exclude-set-fail1.yml create mode 100644 rfcs/text/0017/exclude-set-fail2.yml create mode 100644 rfcs/text/0017/exclude-set-fail3.yml create mode 100644 rfcs/text/0017/exclude-set-pass2.yml create mode 100644 rfcs/text/0017/exclude-set.yml diff --git a/rfcs/text/0017/exclude-set-fail1.yml b/rfcs/text/0017/exclude-set-fail1.yml new file mode 100644 index 0000000000..abaf9ad798 --- /dev/null +++ b/rfcs/text/0017/exclude-set-fail1.yml @@ -0,0 +1,5 @@ +- name: test_fail1_nonexistent_root + fields: + - name: file + fields: + - name: path \ No newline at end of file diff --git a/rfcs/text/0017/exclude-set-fail2.yml b/rfcs/text/0017/exclude-set-fail2.yml new file mode 100644 index 0000000000..db8da4fd23 --- /dev/null +++ b/rfcs/text/0017/exclude-set-fail2.yml @@ -0,0 +1,5 @@ + - name: log + fields: + - name: test_fail2_nonexistent_node + fields: + - name: path diff --git a/rfcs/text/0017/exclude-set-fail3.yml b/rfcs/text/0017/exclude-set-fail3.yml new file mode 100644 index 0000000000..5e48dbeca5 --- /dev/null +++ b/rfcs/text/0017/exclude-set-fail3.yml @@ -0,0 +1,5 @@ + - name: log + fields: + - name: path + fields: + - name: test_fail3_nonexistent_leaf diff --git a/rfcs/text/0017/exclude-set-pass2.yml b/rfcs/text/0017/exclude-set-pass2.yml new file mode 100644 index 0000000000..c82b0b7860 --- /dev/null +++ b/rfcs/text/0017/exclude-set-pass2.yml @@ -0,0 +1,8 @@ +--- +- name: log + fields: + - name: original + + - name: file + fields: + - name: path \ No newline at end of file diff --git a/rfcs/text/0017/exclude-set.yml b/rfcs/text/0017/exclude-set.yml new file mode 100644 index 0000000000..09dd1dce15 --- /dev/null +++ b/rfcs/text/0017/exclude-set.yml @@ -0,0 +1,4 @@ +--- +- name: log + fields: + - name: original \ No newline at end of file diff --git a/scripts/generator.py b/scripts/generator.py index c099ee54ff..768e3108ec 100644 --- a/scripts/generator.py +++ b/scripts/generator.py @@ -17,7 +17,6 @@ from schema import finalizer from schema import subset_filter - def main(): args = argument_parser() @@ -26,11 +25,9 @@ def main(): # default location to save files out_dir = 'generated' - docs_dir = 'docs' if args.out: default_dirs = False out_dir = os.path.join(args.out, out_dir) - docs_dir = os.path.join(args.out, docs_dir) else: default_dirs = True @@ -50,7 +47,7 @@ def main(): oss.fallback(fields) cleaner.clean(fields, strict=args.strict) finalizer.finalize(fields) - fields = subset_filter.filter(fields, args.subset, out_dir) + fields = subset_filter.filter(fields, args.subset, args.exclude, out_dir) nested, flat = intermediate_files.generate(fields, os.path.join(out_dir, 'ecs'), default_dirs) if args.intermediate_only: @@ -63,9 +60,6 @@ def main(): if args.include or args.subset: exit() - ecs_helpers.make_dirs(docs_dir) - asciidoc_fields.generate(nested, ecs_generated_version, docs_dir) - def argument_parser(): parser = argparse.ArgumentParser() @@ -75,6 +69,8 @@ def argument_parser(): help='include user specified directory of custom field definitions') parser.add_argument('--subset', nargs='+', help='render a subset of the schema') + parser.add_argument('--exclude', nargs='+', + help='exclude user specified subset of the schema') parser.add_argument('--out', action='store', help='directory to output the generated files') parser.add_argument('--template-settings', action='store', help='index template settings to use when generating elasticsearch template') diff --git a/scripts/schema/subset_filter.py b/scripts/schema/subset_filter.py index 791d6d3cc7..962e90907f 100644 --- a/scripts/schema/subset_filter.py +++ b/scripts/schema/subset_filter.py @@ -1,4 +1,5 @@ import glob +from os.path import join import yaml import os from generators import intermediate_files @@ -8,8 +9,9 @@ # filter out the ones they don't need. -def filter(fields, subset_file_globs, out_dir): +def filter(fields, subset_file_globs, exclude_file_globs, out_dir): subsets = load_subset_definitions(subset_file_globs) + excludes = load_exclude_definitions(exclude_file_globs) for subset in subsets: subfields = extract_matching_fields(fields, subset['fields']) intermediate_files.generate(subfields, os.path.join(out_dir, 'ecs', 'subset', subset['name']), False) @@ -18,9 +20,49 @@ def filter(fields, subset_file_globs, out_dir): if merged_subset: fields = extract_matching_fields(fields, merged_subset) + if excludes: + fields = exclude_fields(fields, excludes) + return fields +# pops a field from yaml derived dict using path derived from ordered list of nodes +def pop_field(fields, path): + node_path = path.copy() + if node_path[0] in fields: + if len(node_path) == 1: + b4 = fields.copy() + fields.pop(node_path[0]) + print("removed field:", (set(b4.keys() ^ set(fields.keys()))).pop()) + else: + inner_field = node_path.pop(0) + print("prefix:", inner_field) + pop_field(fields[inner_field]["fields"], node_path) + else: + print("No match for exclusion:", ".".join([e for e in path])) + + +# traverses paths to one or more nodes in a yaml derived dict +def exclude_trace_path(fields, item, path): + for list_item in item: + node_path = path.copy() + node_path.append(list_item["name"]) + if not "fields" in list_item: + pop_field(fields, node_path) + else: + exclude_trace_path(fields, list_item["fields"], node_path) + + +# excludes one or more fields from a yaml derived dict according to an exclude list +def exclude_fields(fields, excludes): + '''Traverses subset and eliminates any field which matches the excludes''' + if excludes: + for ex_list in excludes: + for item in ex_list: + exclude_trace_path(fields, item["fields"], [item["name"]]) + return fields + + def combine_all_subsets(subsets): '''Merges N subsets into one. Strips top level 'name' and 'fields' keys as well as non-ECS field options since we can't know how to merge those.''' merged_subset = {} @@ -30,17 +72,28 @@ def combine_all_subsets(subsets): return merged_subset +def load_definitions(file_globs): + sets = [] + for f in eval_globs(file_globs): + raw = load_yaml_file(f) + sets.append(raw) + return sets + def load_subset_definitions(file_globs): if not file_globs: return [] - subsets = [] - for f in eval_globs(file_globs): - raw = load_yaml_file(f) - subsets.append(raw) + subsets = load_definitions(file_globs) if not subsets: raise ValueError('--subset specified, but no subsets found in {}'.format(file_globs)) return subsets +def load_exclude_definitions(file_globs): + if not file_globs: + return [] + excludes = load_definitions(file_globs) + if not excludes: + raise ValueError('--exclude specified, but no exclusions found in {}'.format(file_globs)) + return excludes def load_yaml_file(file_name): with open(file_name) as f: From 2cc6523c0894a41baf0a3f0867e6b200c9ec9b97 Mon Sep 17 00:00:00 2001 From: djptek Date: Wed, 12 May 2021 13:56:14 +0200 Subject: [PATCH 02/38] tbc changelog message --- CHANGELOG.next.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.next.md b/CHANGELOG.next.md index be936302d8..04059b1dbe 100644 --- a/CHANGELOG.next.md +++ b/CHANGELOG.next.md @@ -19,6 +19,7 @@ Thanks, you're awesome :-) --> #### Improvements * Fix ecs GitHub repo link source branch #1393 +* Add --exclude flag to Generetor to support field removal testing #### Deprecated From b530f5b06e121602470b6e790db0e78062702cd5 Mon Sep 17 00:00:00 2001 From: djptek Date: Wed, 12 May 2021 13:59:44 +0200 Subject: [PATCH 03/38] update PR --- CHANGELOG.next.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.next.md b/CHANGELOG.next.md index 04059b1dbe..8819b4b557 100644 --- a/CHANGELOG.next.md +++ b/CHANGELOG.next.md @@ -19,7 +19,7 @@ Thanks, you're awesome :-) --> #### Improvements * Fix ecs GitHub repo link source branch #1393 -* Add --exclude flag to Generetor to support field removal testing +* Add --exclude flag to Generator to support field removal testing #1411 #### Deprecated From 071bb636867c9206a1c2dda681a839d255303eba Mon Sep 17 00:00:00 2001 From: djptek Date: Wed, 12 May 2021 14:12:32 +0200 Subject: [PATCH 04/38] fixed regression --- scripts/generator.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/scripts/generator.py b/scripts/generator.py index 768e3108ec..c50d35c942 100644 --- a/scripts/generator.py +++ b/scripts/generator.py @@ -17,6 +17,7 @@ from schema import finalizer from schema import subset_filter + def main(): args = argument_parser() @@ -25,9 +26,11 @@ def main(): # default location to save files out_dir = 'generated' + docs_dir = 'docs' if args.out: default_dirs = False out_dir = os.path.join(args.out, out_dir) + docs_dir = os.path.join(args.out, docs_dir) else: default_dirs = True @@ -60,6 +63,9 @@ def main(): if args.include or args.subset: exit() + ecs_helpers.make_dirs(docs_dir) + asciidoc_fields.generate(nested, ecs_generated_version, docs_dir) + def argument_parser(): parser = argparse.ArgumentParser() @@ -67,10 +73,10 @@ def argument_parser(): Note that "--include experimental/schemas" will also respect this git ref.') parser.add_argument('--include', nargs='+', help='include user specified directory of custom field definitions') - parser.add_argument('--subset', nargs='+', - help='render a subset of the schema') parser.add_argument('--exclude', nargs='+', help='exclude user specified subset of the schema') + parser.add_argument('--subset', nargs='+', + help='render a subset of the schema') parser.add_argument('--out', action='store', help='directory to output the generated files') parser.add_argument('--template-settings', action='store', help='index template settings to use when generating elasticsearch template') From b3f5bf76698854f0bf3b7b2facf6f27583b1e355 Mon Sep 17 00:00:00 2001 From: djptek Date: Wed, 12 May 2021 14:13:52 +0200 Subject: [PATCH 05/38] reorder imports --- scripts/schema/subset_filter.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/scripts/schema/subset_filter.py b/scripts/schema/subset_filter.py index 962e90907f..dc80ad3ca8 100644 --- a/scripts/schema/subset_filter.py +++ b/scripts/schema/subset_filter.py @@ -1,8 +1,8 @@ import glob -from os.path import join import yaml import os from generators import intermediate_files +from os.path import join from schema import cleaner # This script takes all ECS and custom fields already loaded, and lets users @@ -36,8 +36,8 @@ def pop_field(fields, path): print("removed field:", (set(b4.keys() ^ set(fields.keys()))).pop()) else: inner_field = node_path.pop(0) - print("prefix:", inner_field) - pop_field(fields[inner_field]["fields"], node_path) + print("prefix:", inner_field) + pop_field(fields[inner_field]["fields"], node_path) else: print("No match for exclusion:", ".".join([e for e in path])) @@ -51,9 +51,9 @@ def exclude_trace_path(fields, item, path): pop_field(fields, node_path) else: exclude_trace_path(fields, list_item["fields"], node_path) - -# excludes one or more fields from a yaml derived dict according to an exclude list + +# excludes one or more fields from a yaml derived dict according to an exclude list def exclude_fields(fields, excludes): '''Traverses subset and eliminates any field which matches the excludes''' if excludes: @@ -61,7 +61,7 @@ def exclude_fields(fields, excludes): for item in ex_list: exclude_trace_path(fields, item["fields"], [item["name"]]) return fields - + def combine_all_subsets(subsets): '''Merges N subsets into one. Strips top level 'name' and 'fields' keys as well as non-ECS field options since we can't know how to merge those.''' @@ -79,6 +79,7 @@ def load_definitions(file_globs): sets.append(raw) return sets + def load_subset_definitions(file_globs): if not file_globs: return [] @@ -87,6 +88,7 @@ def load_subset_definitions(file_globs): raise ValueError('--subset specified, but no subsets found in {}'.format(file_globs)) return subsets + def load_exclude_definitions(file_globs): if not file_globs: return [] @@ -95,6 +97,7 @@ def load_exclude_definitions(file_globs): raise ValueError('--exclude specified, but no exclusions found in {}'.format(file_globs)) return excludes + def load_yaml_file(file_name): with open(file_name) as f: return yaml.safe_load(f.read()) From c23266c01745af90cb15c601e6c54c27ac97de98 Mon Sep 17 00:00:00 2001 From: djptek Date: Wed, 12 May 2021 14:17:59 +0200 Subject: [PATCH 06/38] make comments more consistent --- scripts/schema/subset_filter.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/scripts/schema/subset_filter.py b/scripts/schema/subset_filter.py index dc80ad3ca8..4520008eb6 100644 --- a/scripts/schema/subset_filter.py +++ b/scripts/schema/subset_filter.py @@ -26,8 +26,8 @@ def filter(fields, subset_file_globs, exclude_file_globs, out_dir): return fields -# pops a field from yaml derived dict using path derived from ordered list of nodes def pop_field(fields, path): + '''pops a field from yaml derived dict using path derived from ordered list of nodes''' node_path = path.copy() if node_path[0] in fields: if len(node_path) == 1: @@ -42,8 +42,8 @@ def pop_field(fields, path): print("No match for exclusion:", ".".join([e for e in path])) -# traverses paths to one or more nodes in a yaml derived dict def exclude_trace_path(fields, item, path): + '''traverses paths to one or more nodes in a yaml derived dict''' for list_item in item: node_path = path.copy() node_path.append(list_item["name"]) @@ -53,7 +53,6 @@ def exclude_trace_path(fields, item, path): exclude_trace_path(fields, list_item["fields"], node_path) -# excludes one or more fields from a yaml derived dict according to an exclude list def exclude_fields(fields, excludes): '''Traverses subset and eliminates any field which matches the excludes''' if excludes: From 117481319c469db17a81f413c5c429006bd30e9d Mon Sep 17 00:00:00 2001 From: djptek Date: Wed, 12 May 2021 14:23:31 +0200 Subject: [PATCH 07/38] make comments more consistent --- scripts/schema/subset_filter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/schema/subset_filter.py b/scripts/schema/subset_filter.py index 4520008eb6..d40c068f26 100644 --- a/scripts/schema/subset_filter.py +++ b/scripts/schema/subset_filter.py @@ -54,7 +54,7 @@ def exclude_trace_path(fields, item, path): def exclude_fields(fields, excludes): - '''Traverses subset and eliminates any field which matches the excludes''' + '''Traverses fields and eliminates any field which matches the excludes''' if excludes: for ex_list in excludes: for item in ex_list: From 8c9eb530f0061c4d7aaf0f34f76b529fc27261bc Mon Sep 17 00:00:00 2001 From: djptek Date: Thu, 13 May 2021 18:42:05 +0200 Subject: [PATCH 08/38] refactor add exclude_filter.py --- scripts/generator.py | 5 ++- scripts/schema/exclude_filter.py | 63 ++++++++++++++++++++++++++++++++ scripts/schema/subset_filter.py | 51 +------------------------- 3 files changed, 68 insertions(+), 51 deletions(-) create mode 100644 scripts/schema/exclude_filter.py diff --git a/scripts/generator.py b/scripts/generator.py index c50d35c942..8a6a9c68cb 100644 --- a/scripts/generator.py +++ b/scripts/generator.py @@ -16,6 +16,7 @@ from schema import cleaner from schema import finalizer from schema import subset_filter +from schema import exclude_filter def main(): @@ -50,7 +51,9 @@ def main(): oss.fallback(fields) cleaner.clean(fields, strict=args.strict) finalizer.finalize(fields) - fields = subset_filter.filter(fields, args.subset, args.exclude, out_dir) + fields = exclude_filter.exclude( + subset_filter.filter(fields, args.subset, out_dir), + args.exclude, out_dir) nested, flat = intermediate_files.generate(fields, os.path.join(out_dir, 'ecs'), default_dirs) if args.intermediate_only: diff --git a/scripts/schema/exclude_filter.py b/scripts/schema/exclude_filter.py new file mode 100644 index 0000000000..125a8778d7 --- /dev/null +++ b/scripts/schema/exclude_filter.py @@ -0,0 +1,63 @@ +import glob +import yaml +import os +from schema import subset_filter + +# This script should be run downstream of the subset filters - it takes +# all ECS and custom fields already loaded by the latter and explicitly +# removes a subset, for example, to simulate impact of future removals + + +def exclude(fields, exclude_file_globs, out_dir): + excludes = load_exclude_definitions(exclude_file_globs) + + if excludes: + fields = exclude_fields(fields, excludes) + + return fields + +def pop_field(fields, path): + '''pops a field from yaml derived dict using path derived from ordered list of nodes''' + node_path = path.copy() + if node_path[0] in fields: + if len(node_path) == 1: + b4 = fields.copy() + fields.pop(node_path[0]) + print("removed field:", (set(b4.keys() ^ set(fields.keys()))).pop()) + else: + inner_field = node_path.pop(0) + print("prefix:", inner_field) + pop_field(fields[inner_field]["fields"], node_path) + else: + print("No match for exclusion:", ".".join([e for e in path])) + + +def exclude_trace_path(fields, item, path): + '''traverses paths to one or more nodes in a yaml derived dict''' + for list_item in item: + node_path = path.copy() + node_path.append(list_item["name"]) + if not "fields" in list_item: + pop_field(fields, node_path) + else: + exclude_trace_path(fields, list_item["fields"], node_path) + + +def exclude_fields(fields, excludes): + '''Traverses fields and eliminates any field which matches the excludes''' + if excludes: + for ex_list in excludes: + for item in ex_list: + exclude_trace_path(fields, item["fields"], [item["name"]]) + return fields + + +def load_exclude_definitions(file_globs): + if not file_globs: + return [] + excludes = subset_filter.load_definitions(file_globs) + if not excludes: + raise ValueError('--exclude specified, but no exclusions found in {}'.format(file_globs)) + return excludes + + diff --git a/scripts/schema/subset_filter.py b/scripts/schema/subset_filter.py index d40c068f26..3fa573c9cf 100644 --- a/scripts/schema/subset_filter.py +++ b/scripts/schema/subset_filter.py @@ -9,9 +9,8 @@ # filter out the ones they don't need. -def filter(fields, subset_file_globs, exclude_file_globs, out_dir): +def filter(fields, subset_file_globs, out_dir): subsets = load_subset_definitions(subset_file_globs) - excludes = load_exclude_definitions(exclude_file_globs) for subset in subsets: subfields = extract_matching_fields(fields, subset['fields']) intermediate_files.generate(subfields, os.path.join(out_dir, 'ecs', 'subset', subset['name']), False) @@ -20,45 +19,6 @@ def filter(fields, subset_file_globs, exclude_file_globs, out_dir): if merged_subset: fields = extract_matching_fields(fields, merged_subset) - if excludes: - fields = exclude_fields(fields, excludes) - - return fields - - -def pop_field(fields, path): - '''pops a field from yaml derived dict using path derived from ordered list of nodes''' - node_path = path.copy() - if node_path[0] in fields: - if len(node_path) == 1: - b4 = fields.copy() - fields.pop(node_path[0]) - print("removed field:", (set(b4.keys() ^ set(fields.keys()))).pop()) - else: - inner_field = node_path.pop(0) - print("prefix:", inner_field) - pop_field(fields[inner_field]["fields"], node_path) - else: - print("No match for exclusion:", ".".join([e for e in path])) - - -def exclude_trace_path(fields, item, path): - '''traverses paths to one or more nodes in a yaml derived dict''' - for list_item in item: - node_path = path.copy() - node_path.append(list_item["name"]) - if not "fields" in list_item: - pop_field(fields, node_path) - else: - exclude_trace_path(fields, list_item["fields"], node_path) - - -def exclude_fields(fields, excludes): - '''Traverses fields and eliminates any field which matches the excludes''' - if excludes: - for ex_list in excludes: - for item in ex_list: - exclude_trace_path(fields, item["fields"], [item["name"]]) return fields @@ -88,15 +48,6 @@ def load_subset_definitions(file_globs): return subsets -def load_exclude_definitions(file_globs): - if not file_globs: - return [] - excludes = load_definitions(file_globs) - if not excludes: - raise ValueError('--exclude specified, but no exclusions found in {}'.format(file_globs)) - return excludes - - def load_yaml_file(file_name): with open(file_name) as f: return yaml.safe_load(f.read()) From 7305ea2af756abf8035b7c646ba8515975d87f52 Mon Sep 17 00:00:00 2001 From: djptek Date: Thu, 13 May 2021 19:41:54 +0200 Subject: [PATCH 09/38] simplified console messages --- scripts/schema/exclude_filter.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/scripts/schema/exclude_filter.py b/scripts/schema/exclude_filter.py index 125a8778d7..f72fcd3c01 100644 --- a/scripts/schema/exclude_filter.py +++ b/scripts/schema/exclude_filter.py @@ -22,11 +22,9 @@ def pop_field(fields, path): if node_path[0] in fields: if len(node_path) == 1: b4 = fields.copy() - fields.pop(node_path[0]) - print("removed field:", (set(b4.keys() ^ set(fields.keys()))).pop()) + print("Removed field {0}".format(str(fields.pop(node_path[0]).get("field_details").get("flat_name")))) else: inner_field = node_path.pop(0) - print("prefix:", inner_field) pop_field(fields[inner_field]["fields"], node_path) else: print("No match for exclusion:", ".".join([e for e in path])) @@ -48,6 +46,7 @@ def exclude_fields(fields, excludes): if excludes: for ex_list in excludes: for item in ex_list: + #print("Removing: ", end='') exclude_trace_path(fields, item["fields"], [item["name"]]) return fields From 853b0c39500a50b0cd7a2e24226d1ce34780c102 Mon Sep 17 00:00:00 2001 From: djptek Date: Thu, 13 May 2021 19:48:13 +0200 Subject: [PATCH 10/38] simplify console messages --- scripts/schema/exclude_filter.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/scripts/schema/exclude_filter.py b/scripts/schema/exclude_filter.py index f72fcd3c01..a8c308920d 100644 --- a/scripts/schema/exclude_filter.py +++ b/scripts/schema/exclude_filter.py @@ -4,18 +4,19 @@ from schema import subset_filter # This script should be run downstream of the subset filters - it takes -# all ECS and custom fields already loaded by the latter and explicitly +# all ECS and custom fields already loaded by the latter and explicitly # removes a subset, for example, to simulate impact of future removals def exclude(fields, exclude_file_globs, out_dir): excludes = load_exclude_definitions(exclude_file_globs) - + if excludes: fields = exclude_fields(fields, excludes) return fields + def pop_field(fields, path): '''pops a field from yaml derived dict using path derived from ordered list of nodes''' node_path = path.copy() @@ -58,5 +59,3 @@ def load_exclude_definitions(file_globs): if not excludes: raise ValueError('--exclude specified, but no exclusions found in {}'.format(file_globs)) return excludes - - From 8d9161678fd259dcc522b980bc6d644e67fbf020 Mon Sep 17 00:00:00 2001 From: djptek Date: Thu, 13 May 2021 20:03:59 +0200 Subject: [PATCH 11/38] separated calls to subsetfulter exclude_filter for clarity --- scripts/generator.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/scripts/generator.py b/scripts/generator.py index ad28f23f5f..5123ec2dd5 100644 --- a/scripts/generator.py +++ b/scripts/generator.py @@ -48,9 +48,8 @@ def main(): fields = loader.load_schemas(ref=args.ref, included_files=args.include) cleaner.clean(fields, strict=args.strict) finalizer.finalize(fields) - fields = exclude_filter.exclude( - subset_filter.filter(fields, args.subset, out_dir), - args.exclude, out_dir) + fields = subset_filter.filter(fields, args.subset, out_dir) + fields = exclude_filter.exclude(fields, args.exclude, out_dir) nested, flat = intermediate_files.generate(fields, os.path.join(out_dir, 'ecs'), default_dirs) if args.intermediate_only: From 14e05a7cb3e7300459d3dd266cfeaca71e4419d5 Mon Sep 17 00:00:00 2001 From: djptek Date: Thu, 13 May 2021 20:07:59 +0200 Subject: [PATCH 12/38] added trailing newlines to test files --- rfcs/text/0017/exclude-set-fail1.yml | 3 ++- rfcs/text/0017/exclude-set-pass2.yml | 3 ++- rfcs/text/0017/exclude-set.yml | 3 ++- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/rfcs/text/0017/exclude-set-fail1.yml b/rfcs/text/0017/exclude-set-fail1.yml index abaf9ad798..4315b814ad 100644 --- a/rfcs/text/0017/exclude-set-fail1.yml +++ b/rfcs/text/0017/exclude-set-fail1.yml @@ -2,4 +2,5 @@ fields: - name: file fields: - - name: path \ No newline at end of file + - name: path + \ No newline at end of file diff --git a/rfcs/text/0017/exclude-set-pass2.yml b/rfcs/text/0017/exclude-set-pass2.yml index c82b0b7860..4a7a2021ba 100644 --- a/rfcs/text/0017/exclude-set-pass2.yml +++ b/rfcs/text/0017/exclude-set-pass2.yml @@ -5,4 +5,5 @@ - name: file fields: - - name: path \ No newline at end of file + - name: path + \ No newline at end of file diff --git a/rfcs/text/0017/exclude-set.yml b/rfcs/text/0017/exclude-set.yml index 09dd1dce15..55b72a213f 100644 --- a/rfcs/text/0017/exclude-set.yml +++ b/rfcs/text/0017/exclude-set.yml @@ -1,4 +1,5 @@ --- - name: log fields: - - name: original \ No newline at end of file + - name: original + \ No newline at end of file From 6cdbcd025056a806b61674097a032038e2ce895c Mon Sep 17 00:00:00 2001 From: djptek Date: Thu, 13 May 2021 20:21:57 +0200 Subject: [PATCH 13/38] removed fake test files --- rfcs/text/0017/exclude-set-fail1.yml | 6 ------ rfcs/text/0017/exclude-set-fail2.yml | 5 ----- rfcs/text/0017/exclude-set-fail3.yml | 5 ----- rfcs/text/0017/exclude-set-pass2.yml | 9 --------- 4 files changed, 25 deletions(-) delete mode 100644 rfcs/text/0017/exclude-set-fail1.yml delete mode 100644 rfcs/text/0017/exclude-set-fail2.yml delete mode 100644 rfcs/text/0017/exclude-set-fail3.yml delete mode 100644 rfcs/text/0017/exclude-set-pass2.yml diff --git a/rfcs/text/0017/exclude-set-fail1.yml b/rfcs/text/0017/exclude-set-fail1.yml deleted file mode 100644 index 4315b814ad..0000000000 --- a/rfcs/text/0017/exclude-set-fail1.yml +++ /dev/null @@ -1,6 +0,0 @@ -- name: test_fail1_nonexistent_root - fields: - - name: file - fields: - - name: path - \ No newline at end of file diff --git a/rfcs/text/0017/exclude-set-fail2.yml b/rfcs/text/0017/exclude-set-fail2.yml deleted file mode 100644 index db8da4fd23..0000000000 --- a/rfcs/text/0017/exclude-set-fail2.yml +++ /dev/null @@ -1,5 +0,0 @@ - - name: log - fields: - - name: test_fail2_nonexistent_node - fields: - - name: path diff --git a/rfcs/text/0017/exclude-set-fail3.yml b/rfcs/text/0017/exclude-set-fail3.yml deleted file mode 100644 index 5e48dbeca5..0000000000 --- a/rfcs/text/0017/exclude-set-fail3.yml +++ /dev/null @@ -1,5 +0,0 @@ - - name: log - fields: - - name: path - fields: - - name: test_fail3_nonexistent_leaf diff --git a/rfcs/text/0017/exclude-set-pass2.yml b/rfcs/text/0017/exclude-set-pass2.yml deleted file mode 100644 index 4a7a2021ba..0000000000 --- a/rfcs/text/0017/exclude-set-pass2.yml +++ /dev/null @@ -1,9 +0,0 @@ ---- -- name: log - fields: - - name: original - - - name: file - fields: - - name: path - \ No newline at end of file From fead411dc678219e09217b7b71cec085e4a7d745 Mon Sep 17 00:00:00 2001 From: djptek Date: Fri, 14 May 2021 11:14:55 +0200 Subject: [PATCH 14/38] refactor - move shared defs from subset to loader --- scripts/schema/exclude_filter.py | 26 +++++++++++++------------- scripts/schema/loader.py | 27 +++++++++++++++++++++++++++ scripts/schema/subset_filter.py | 30 ++---------------------------- 3 files changed, 42 insertions(+), 41 deletions(-) diff --git a/scripts/schema/exclude_filter.py b/scripts/schema/exclude_filter.py index a8c308920d..0e41625bf7 100644 --- a/scripts/schema/exclude_filter.py +++ b/scripts/schema/exclude_filter.py @@ -1,7 +1,7 @@ import glob import yaml import os -from schema import subset_filter +from schema import loader # This script should be run downstream of the subset filters - it takes # all ECS and custom fields already loaded by the latter and explicitly @@ -18,44 +18,44 @@ def exclude(fields, exclude_file_globs, out_dir): def pop_field(fields, path): - '''pops a field from yaml derived dict using path derived from ordered list of nodes''' + """pops a field from yaml derived dict using path derived from ordered list of nodes""" node_path = path.copy() if node_path[0] in fields: if len(node_path) == 1: b4 = fields.copy() - print("Removed field {0}".format(str(fields.pop(node_path[0]).get("field_details").get("flat_name")))) + print('Removed field {0}'.format(str(fields.pop(node_path[0]).get('field_details').get('flat_name')))) else: inner_field = node_path.pop(0) - pop_field(fields[inner_field]["fields"], node_path) + pop_field(fields[inner_field]['fields'], node_path) else: - print("No match for exclusion:", ".".join([e for e in path])) + print('No match for exclusion:', '.'.join([e for e in path])) def exclude_trace_path(fields, item, path): - '''traverses paths to one or more nodes in a yaml derived dict''' + """traverses paths to one or more nodes in a yaml derived dict""" for list_item in item: node_path = path.copy() - node_path.append(list_item["name"]) - if not "fields" in list_item: + node_path.append(list_item['name']) + if not 'fields' in list_item: pop_field(fields, node_path) else: - exclude_trace_path(fields, list_item["fields"], node_path) + exclude_trace_path(fields, list_item['fields'], node_path) def exclude_fields(fields, excludes): - '''Traverses fields and eliminates any field which matches the excludes''' + """Traverses fields and eliminates any field which matches the excludes""" if excludes: for ex_list in excludes: for item in ex_list: - #print("Removing: ", end='') - exclude_trace_path(fields, item["fields"], [item["name"]]) + #print('Removing: ', end='') + exclude_trace_path(fields, item['fields'], [item['name']]) return fields def load_exclude_definitions(file_globs): if not file_globs: return [] - excludes = subset_filter.load_definitions(file_globs) + excludes = loader.load_definitions(file_globs) if not excludes: raise ValueError('--exclude specified, but no exclusions found in {}'.format(file_globs)) return excludes diff --git a/scripts/schema/loader.py b/scripts/schema/loader.py index 04f3218ae4..7d9546f761 100644 --- a/scripts/schema/loader.py +++ b/scripts/schema/loader.py @@ -251,3 +251,30 @@ def merge_fields(a, b): a[key].setdefault('fields', {}) a[key]['fields'] = merge_fields(a[key]['fields'], b[key]['fields']) return a + + +def load_yaml_file(file_name): + with open(file_name) as f: + return yaml.safe_load(f.read()) + + +def eval_globs(globs): + '''Accepts an array of glob patterns or file names, returns the array of actual files''' + all_files = [] + for g in globs: + new_files = glob.glob(g) + if len(new_files) == 0: + warn("{} did not match any files".format(g)) + else: + all_files.extend(new_files) + return all_files + + +def load_definitions(file_globs): + sets = [] + for f in eval_globs(file_globs): + raw = load_yaml_file(f) + sets.append(raw) + return sets + + diff --git a/scripts/schema/subset_filter.py b/scripts/schema/subset_filter.py index 3fa573c9cf..ab543c3e17 100644 --- a/scripts/schema/subset_filter.py +++ b/scripts/schema/subset_filter.py @@ -2,8 +2,7 @@ import yaml import os from generators import intermediate_files -from os.path import join -from schema import cleaner +from schema import cleaner, loader # This script takes all ECS and custom fields already loaded, and lets users # filter out the ones they don't need. @@ -31,40 +30,15 @@ def combine_all_subsets(subsets): return merged_subset -def load_definitions(file_globs): - sets = [] - for f in eval_globs(file_globs): - raw = load_yaml_file(f) - sets.append(raw) - return sets - - def load_subset_definitions(file_globs): if not file_globs: return [] - subsets = load_definitions(file_globs) + subsets = loader.load_definitions(file_globs) if not subsets: raise ValueError('--subset specified, but no subsets found in {}'.format(file_globs)) return subsets -def load_yaml_file(file_name): - with open(file_name) as f: - return yaml.safe_load(f.read()) - - -def eval_globs(globs): - '''Accepts an array of glob patterns or file names, returns the array of actual files''' - all_files = [] - for g in globs: - new_files = glob.glob(g) - if len(new_files) == 0: - warn("{} did not match any files".format(g)) - else: - all_files.extend(new_files) - return all_files - - # You know, for silent tests def warn(message): print(message) From cbd45c30e705eb4569615af80c47203045659580 Mon Sep 17 00:00:00 2001 From: djptek Date: Fri, 14 May 2021 11:16:34 +0200 Subject: [PATCH 15/38] refactor - move warn to loader --- scripts/schema/loader.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/scripts/schema/loader.py b/scripts/schema/loader.py index 7d9546f761..1c82bdeb3a 100644 --- a/scripts/schema/loader.py +++ b/scripts/schema/loader.py @@ -258,6 +258,11 @@ def load_yaml_file(file_name): return yaml.safe_load(f.read()) +# You know, for silent tests +def warn(message): + print(message) + + def eval_globs(globs): '''Accepts an array of glob patterns or file names, returns the array of actual files''' all_files = [] From 30f88b5cc3e394fd46f77e867585f83de8cf6936 Mon Sep 17 00:00:00 2001 From: djptek Date: Fri, 14 May 2021 11:23:31 +0200 Subject: [PATCH 16/38] moved test_eval_globs to loader unit tests --- scripts/tests/unit/test_schema_loader.py | 9 +++++++++ scripts/tests/unit/test_schema_subset_filter.py | 8 -------- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/scripts/tests/unit/test_schema_loader.py b/scripts/tests/unit/test_schema_loader.py index fde33e0a1c..c6838cb40d 100644 --- a/scripts/tests/unit/test_schema_loader.py +++ b/scripts/tests/unit/test_schema_loader.py @@ -14,6 +14,15 @@ class TestSchemaLoader(unittest.TestCase): def setUp(self): self.maxDiff = None + @mock.patch('schema.loader.warn') + def test_eval_globs(self, mock_warn): + files = loader.eval_globs(['schemas/*.yml', 'missing*']) + self.assertTrue(mock_warn.called, "a warning should have been printed for missing*") + self.assertIn('schemas/base.yml', files) + self.assertEqual(list(filter(lambda f: f.startswith('missing'), files)), [], + "The 'missing*' pattern should not show up in the resulting files") + + # Pseudo-fixtures def schema_base(self): diff --git a/scripts/tests/unit/test_schema_subset_filter.py b/scripts/tests/unit/test_schema_subset_filter.py index f108dba4bc..f610fcbddc 100644 --- a/scripts/tests/unit/test_schema_subset_filter.py +++ b/scripts/tests/unit/test_schema_subset_filter.py @@ -14,14 +14,6 @@ class TestSchemaSubsetFilter(unittest.TestCase): def setUp(self): self.maxDiff = None - @mock.patch('schema.subset_filter.warn') - def test_eval_globs(self, mock_warn): - files = subset_filter.eval_globs(['schemas/*.yml', 'missing*']) - self.assertTrue(mock_warn.called, "a warning should have been printed for missing*") - self.assertIn('schemas/base.yml', files) - self.assertEqual(list(filter(lambda f: f.startswith('missing'), files)), [], - "The 'missing*' pattern should not show up in the resulting files") - @mock.patch('schema.subset_filter.warn') def test_load_subset_definitions_raises_when_no_subset_found(self, mock_warn): with self.assertRaisesRegex(ValueError, From d17e7b1df765ab23f7fceed00a9a5640773f49dc Mon Sep 17 00:00:00 2001 From: djptek Date: Fri, 14 May 2021 11:24:44 +0200 Subject: [PATCH 17/38] moved test_eval_globs to loader unit tests --- scripts/schema/loader.py | 2 -- scripts/tests/unit/test_schema_loader.py | 1 - 2 files changed, 3 deletions(-) diff --git a/scripts/schema/loader.py b/scripts/schema/loader.py index 1c82bdeb3a..63097ed6f0 100644 --- a/scripts/schema/loader.py +++ b/scripts/schema/loader.py @@ -281,5 +281,3 @@ def load_definitions(file_globs): raw = load_yaml_file(f) sets.append(raw) return sets - - diff --git a/scripts/tests/unit/test_schema_loader.py b/scripts/tests/unit/test_schema_loader.py index c6838cb40d..b9b263f5df 100644 --- a/scripts/tests/unit/test_schema_loader.py +++ b/scripts/tests/unit/test_schema_loader.py @@ -22,7 +22,6 @@ def test_eval_globs(self, mock_warn): self.assertEqual(list(filter(lambda f: f.startswith('missing'), files)), [], "The 'missing*' pattern should not show up in the resulting files") - # Pseudo-fixtures def schema_base(self): From 20bc1a5db251c5b6e12fefacd6a2d5633ddcf363 Mon Sep 17 00:00:00 2001 From: djptek Date: Fri, 14 May 2021 11:30:20 +0200 Subject: [PATCH 18/38] add test_schema_exclude_filter --- .../tests/unit/test_schema_exclude_filter.py | 267 ++++++++++++++++++ 1 file changed, 267 insertions(+) create mode 100644 scripts/tests/unit/test_schema_exclude_filter.py diff --git a/scripts/tests/unit/test_schema_exclude_filter.py b/scripts/tests/unit/test_schema_exclude_filter.py new file mode 100644 index 0000000000..35f9f00ca4 --- /dev/null +++ b/scripts/tests/unit/test_schema_exclude_filter.py @@ -0,0 +1,267 @@ +import mock +import os +import pprint +import sys +import unittest + +sys.path.append(os.path.join(os.path.dirname(__file__), '../..')) + +from schema import exclude_filter + + +class TestSchemaExcludeFilter(unittest.TestCase): + + def setUp(self): + self.maxDiff = None + + @mock.patch('schema.exclude_filter.warn') + def test_load_exclude_definitions_raises_when_no_exclude_found(self, mock_warn): + with self.assertRaisesRegex(ValueError, + "--exclude specified, but no excludes found in \['foo\*.yml'\]"): + exclude_filter.load_exclude_definitions(['foo*.yml']) +''' + def test_basic_merging(self): + basics = {'base': {'fields': '*'}, 'event': {}} + network = {'network': {'fields': '*'}} + excludes = {} + exclude_filter.merge_excludes(excludes, basics) + exclude_filter.merge_excludes(excludes, network) + expected_excludes = {**basics, **network} + self.assertEqual(excludes, expected_excludes) + + def test_merging_superset(self): + # 'log' is used to test superset with the explicit '{'fields': '*'}' notation + supersets = {'log': {'fields': '*'}, 'process': {'fields': '*'}} + supserseded = { + 'log': {'fields': {'syslog': {'fields': '*'}}}, + 'process': {'fields': {'parent': {'fields': '*'}}}, + } + excludes = {} + exclude_filter.merge_excludes(excludes, supersets) + exclude_filter.merge_excludes(excludes, supserseded) + self.assertEqual(excludes, supersets) + # reverse order + excludes = {} + exclude_filter.merge_excludes(excludes, supserseded) + exclude_filter.merge_excludes(excludes, supersets) + self.assertEqual(excludes, supersets) + + def test_exclude_option_merging(self): + exclude1 = { + 'log': {'enabled': False}, + 'network': {'enabled': False, 'fields': '*'}, + 'base': {'fields': {'message': {'index': False}}}, + } + exclude2 = { + 'log': {'enabled': False}, + 'network': {'fields': '*'}, + 'base': {'fields': {'message': {}}}, + } + expected = { + 'log': {'enabled': False}, + 'network': {'fields': '*'}, + 'base': {'fields': {'message': {}}}, + } + merged = {} + exclude_filter.merge_excludes(merged, exclude1) + exclude_filter.merge_excludes(merged, exclude2) + self.assertEqual(merged, expected) + + def test_strip_non_ecs_options(self): + exclude = { + 'log': { + 'custom_option': True, + 'enabled': False, + 'fields': { + 'syslog': { + 'custom_option': True + } + } + } + } + expected = { + 'log': { + 'enabled': False, + 'fields': { + 'syslog': {} + } + } + } + exclude_filter.strip_non_ecs_options(exclude) + self.assertEqual(exclude, expected) + + def schema_log(self): + return { + 'log': { + 'schema_details': {'root': False}, + 'field_details': { + 'name': 'log', + 'type': 'group' + }, + 'fields': { + 'level': { + 'field_details': { + 'name': 'level', + 'type': 'keyword' + } + }, + 'origin': { + 'field_details': { + 'name': 'origin', + 'intermediate': True, + 'type': 'object' + }, + 'fields': { + 'function': { + 'field_details': { + 'name': 'function', + 'type': 'keyword' + } + }, + 'foo': { + 'field_details': { + 'name': 'foo', + 'type': 'keyword' + } + }, + } + } + } + } + } + + def test_extract_matching_fields_explicit_all_fields_notation(self): + exclude = {'log': {'fields': '*'}} + filtered_fields = exclude_filter.extract_matching_fields(self.schema_log(), exclude) + self.assertEqual(filtered_fields, self.schema_log()) + + def test_extract_matching_fields_subfields_only_notation(self): + exclude = {'log': {'fields': {'origin': {'fields': '*'}}}} + filtered_fields = exclude_filter.extract_matching_fields(self.schema_log(), exclude) + expected_fields = { + 'log': { + 'schema_details': {'root': False}, + 'field_details': { + 'name': 'log', + 'type': 'group' + }, + 'fields': { + 'origin': { + 'field_details': { + 'name': 'origin', + 'intermediate': True, + 'type': 'object' + }, + 'fields': { + 'function': { + 'field_details': { + 'name': 'function', + 'type': 'keyword' + } + }, + 'foo': { + 'field_details': { + 'name': 'foo', + 'type': 'keyword' + } + }, + } + } + } + } + } + self.assertEqual(filtered_fields, expected_fields) + + def test_extract_matching_individual_field(self): + exclude = {'log': {'fields': {'origin': {'fields': {'function': {}}}}}} + filtered_fields = exclude_filter.extract_matching_fields(self.schema_log(), exclude) + expected_fields = { + 'log': { + 'schema_details': {'root': False}, + 'field_details': { + 'name': 'log', + 'type': 'group' + }, + 'fields': { + 'origin': { + 'field_details': { + 'name': 'origin', + 'intermediate': True, + 'type': 'object' + }, + 'fields': { + 'function': { + 'field_details': { + 'name': 'function', + 'type': 'keyword' + } + }, + } + } + } + } + } + self.assertEqual(filtered_fields, expected_fields) + + def test_extract_field_with_options(self): + exclude = { + 'log': { + 'enabled': False, + 'fields': { + 'level': { + 'custom_option': True + }, + 'origin': { + 'custom_option': False, + 'fields': { + 'function': {} + } + } + } + } + } + filtered_fields = exclude_filter.extract_matching_fields(self.schema_log(), exclude) + expected_fields = { + 'log': { + 'schema_details': {'root': False}, + 'field_details': { + 'name': 'log', + 'type': 'group', + 'enabled': False + }, + 'fields': { + 'level': { + 'field_details': { + 'name': 'level', + 'type': 'keyword', + 'custom_option': True + } + }, + 'origin': { + 'field_details': { + # This field is changed by the exclude_filter from an intermediate field to non-intermediate by adding + # a custom option, so the exclude_filter is responsible for filling in more field_detail attributes + 'name': 'origin', + 'intermediate': False, + 'custom_option': False, + 'description': 'Intermediate field included by adding option with exclude', + 'level': 'custom', + 'type': 'object', + 'short': 'Intermediate field included by adding option with exclude', + 'normalize': [] + }, + 'fields': { + 'function': { + 'field_details': { + 'name': 'function', + 'type': 'keyword' + } + }, + } + } + } + } + } + self.assertEqual(filtered_fields, expected_fields) + +''' \ No newline at end of file From 8764112f8f5eb2b061908fec873d9ef61e128412 Mon Sep 17 00:00:00 2001 From: djptek Date: Fri, 14 May 2021 11:44:59 +0200 Subject: [PATCH 19/38] test_load_exclude_definitions_raises_when_no_exclude_found --- scripts/tests/unit/test_schema_exclude_filter.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/scripts/tests/unit/test_schema_exclude_filter.py b/scripts/tests/unit/test_schema_exclude_filter.py index 35f9f00ca4..b20670f99c 100644 --- a/scripts/tests/unit/test_schema_exclude_filter.py +++ b/scripts/tests/unit/test_schema_exclude_filter.py @@ -14,11 +14,12 @@ class TestSchemaExcludeFilter(unittest.TestCase): def setUp(self): self.maxDiff = None - @mock.patch('schema.exclude_filter.warn') + @mock.patch('schema.exclude_filter.load_exclude_definitions') def test_load_exclude_definitions_raises_when_no_exclude_found(self, mock_warn): with self.assertRaisesRegex(ValueError, - "--exclude specified, but no excludes found in \['foo\*.yml'\]"): + "--exclude specified, but no exclusions found in \['foo\*.yml'\]"): exclude_filter.load_exclude_definitions(['foo*.yml']) + ''' def test_basic_merging(self): basics = {'base': {'fields': '*'}, 'event': {}} From 29b56b677f0bf5bc414a4e6e1ca96304ae0d73ad Mon Sep 17 00:00:00 2001 From: djptek Date: Fri, 14 May 2021 12:35:59 +0200 Subject: [PATCH 20/38] added test_exclude_field --- scripts/schema/exclude_filter.py | 2 +- scripts/schema/subset_filter.py | 5 ---- .../tests/unit/test_schema_exclude_filter.py | 26 ++++++++++--------- .../tests/unit/test_schema_subset_filter.py | 2 +- 4 files changed, 16 insertions(+), 19 deletions(-) diff --git a/scripts/schema/exclude_filter.py b/scripts/schema/exclude_filter.py index 0e41625bf7..fb0f4f746e 100644 --- a/scripts/schema/exclude_filter.py +++ b/scripts/schema/exclude_filter.py @@ -44,10 +44,10 @@ def exclude_trace_path(fields, item, path): def exclude_fields(fields, excludes): """Traverses fields and eliminates any field which matches the excludes""" + print(str(excludes)) if excludes: for ex_list in excludes: for item in ex_list: - #print('Removing: ', end='') exclude_trace_path(fields, item['fields'], [item['name']]) return fields diff --git a/scripts/schema/subset_filter.py b/scripts/schema/subset_filter.py index ab543c3e17..e2e14e5aff 100644 --- a/scripts/schema/subset_filter.py +++ b/scripts/schema/subset_filter.py @@ -39,11 +39,6 @@ def load_subset_definitions(file_globs): return subsets -# You know, for silent tests -def warn(message): - print(message) - - ecs_options = ['fields', 'enabled', 'index'] diff --git a/scripts/tests/unit/test_schema_exclude_filter.py b/scripts/tests/unit/test_schema_exclude_filter.py index b20670f99c..1e7939f8fb 100644 --- a/scripts/tests/unit/test_schema_exclude_filter.py +++ b/scripts/tests/unit/test_schema_exclude_filter.py @@ -14,22 +14,24 @@ class TestSchemaExcludeFilter(unittest.TestCase): def setUp(self): self.maxDiff = None - @mock.patch('schema.exclude_filter.load_exclude_definitions') + @mock.patch('schema.loader.warn') def test_load_exclude_definitions_raises_when_no_exclude_found(self, mock_warn): with self.assertRaisesRegex(ValueError, "--exclude specified, but no exclusions found in \['foo\*.yml'\]"): exclude_filter.load_exclude_definitions(['foo*.yml']) - -''' - def test_basic_merging(self): - basics = {'base': {'fields': '*'}, 'event': {}} - network = {'network': {'fields': '*'}} - excludes = {} - exclude_filter.merge_excludes(excludes, basics) - exclude_filter.merge_excludes(excludes, network) - expected_excludes = {**basics, **network} - self.assertEqual(excludes, expected_excludes) + def test_exclude_field(self): + fields = {'my_field_set': {'fields': { + 'my_field_exclude': {'field_details': {'flat_name': 'my_field_set.my_field_exclude'}}, + 'my_field_persist': {'field_details': {'flat_name': 'my_field_set.my_field_persist'}}}}} + excludes = [[{'name': 'my_field_set', 'fields': [{'name': 'my_field_exclude'}]}]] + fields = exclude_filter.exclude_fields(fields, excludes) + expect_persisted = {'my_field_set': {'fields': { + 'my_field_persist': {'field_details': {'flat_name': 'my_field_set.my_field_persist'}}}}} + self.assertEqual(fields, expect_persisted) + + +''' def test_merging_superset(self): # 'log' is used to test superset with the explicit '{'fields': '*'}' notation supersets = {'log': {'fields': '*'}, 'process': {'fields': '*'}} @@ -265,4 +267,4 @@ def test_extract_field_with_options(self): } self.assertEqual(filtered_fields, expected_fields) -''' \ No newline at end of file +''' diff --git a/scripts/tests/unit/test_schema_subset_filter.py b/scripts/tests/unit/test_schema_subset_filter.py index f610fcbddc..e7ae5fd211 100644 --- a/scripts/tests/unit/test_schema_subset_filter.py +++ b/scripts/tests/unit/test_schema_subset_filter.py @@ -14,7 +14,7 @@ class TestSchemaSubsetFilter(unittest.TestCase): def setUp(self): self.maxDiff = None - @mock.patch('schema.subset_filter.warn') + @mock.patch('schema.loader.warn') def test_load_subset_definitions_raises_when_no_subset_found(self, mock_warn): with self.assertRaisesRegex(ValueError, "--subset specified, but no subsets found in \['foo\*.yml'\]"): From f32b83cc7ceb9b321b060c8203d4fbccfc6c46b9 Mon Sep 17 00:00:00 2001 From: djptek Date: Fri, 14 May 2021 12:40:13 +0200 Subject: [PATCH 21/38] added test_exclude_fields --- scripts/tests/unit/test_schema_exclude_filter.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/scripts/tests/unit/test_schema_exclude_filter.py b/scripts/tests/unit/test_schema_exclude_filter.py index 1e7939f8fb..f6d3f1638b 100644 --- a/scripts/tests/unit/test_schema_exclude_filter.py +++ b/scripts/tests/unit/test_schema_exclude_filter.py @@ -30,6 +30,16 @@ def test_exclude_field(self): 'my_field_persist': {'field_details': {'flat_name': 'my_field_set.my_field_persist'}}}}} self.assertEqual(fields, expect_persisted) + def test_exclude_fields(self): + fields = {'my_field_set': {'fields': { + 'my_field_exclude_1': {'field_details': {'flat_name': 'my_field_set.my_field_exclude_1'}}, + 'my_field_exclude_2': {'field_details': {'flat_name': 'my_field_set.my_field_exclude_2'}}}}} + excludes = [[{'name': 'my_field_set', 'fields': [ + {'name': 'my_field_exclude_1'}, {'name': 'my_field_exclude_2'}]}]] + fields = exclude_filter.exclude_fields(fields, excludes) + expect_persisted = {'my_field_set': {'fields': {}}} + self.assertEqual(fields, expect_persisted) + ''' def test_merging_superset(self): From 17c62ef4914319547b605f0593715eb6c67b1895 Mon Sep 17 00:00:00 2001 From: djptek Date: Fri, 14 May 2021 12:51:05 +0200 Subject: [PATCH 22/38] added test_exclude_non_existing_field_set --- scripts/schema/exclude_filter.py | 3 +-- scripts/tests/unit/test_schema_exclude_filter.py | 9 +++++++++ 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/scripts/schema/exclude_filter.py b/scripts/schema/exclude_filter.py index fb0f4f746e..990aa0692e 100644 --- a/scripts/schema/exclude_filter.py +++ b/scripts/schema/exclude_filter.py @@ -28,7 +28,7 @@ def pop_field(fields, path): inner_field = node_path.pop(0) pop_field(fields[inner_field]['fields'], node_path) else: - print('No match for exclusion:', '.'.join([e for e in path])) + raise ValueError('--exclude specified, but no field {} found'.format('.'.join([e for e in path]))) def exclude_trace_path(fields, item, path): @@ -44,7 +44,6 @@ def exclude_trace_path(fields, item, path): def exclude_fields(fields, excludes): """Traverses fields and eliminates any field which matches the excludes""" - print(str(excludes)) if excludes: for ex_list in excludes: for item in ex_list: diff --git a/scripts/tests/unit/test_schema_exclude_filter.py b/scripts/tests/unit/test_schema_exclude_filter.py index f6d3f1638b..7656ca5184 100644 --- a/scripts/tests/unit/test_schema_exclude_filter.py +++ b/scripts/tests/unit/test_schema_exclude_filter.py @@ -40,6 +40,15 @@ def test_exclude_fields(self): expect_persisted = {'my_field_set': {'fields': {}}} self.assertEqual(fields, expect_persisted) + def test_exclude_non_existing_field_set(self): + fields = {'my_field_set': {'fields': { + 'my_field_exclude': {'field_details': {'flat_name': 'my_field_set.my_field_exclude'}}}}} + excludes = [[{'name': 'my_non_existing_field_set', 'fields': [ + {'name': 'my_field_exclude_1'}]}]] + with self.assertRaisesRegex(ValueError, + "--exclude specified, but no field my_non_existing_field_set.my_field_exclude_1 found"): + exclude_filter.exclude_fields(fields, excludes) + ''' def test_merging_superset(self): From c8c1c70ffb1d0ecfb4b053b7b8375a1234009a26 Mon Sep 17 00:00:00 2001 From: djptek Date: Fri, 14 May 2021 13:20:40 +0200 Subject: [PATCH 23/38] added test_exclude_non_existing_field --- scripts/schema/exclude_filter.py | 8 +++----- scripts/tests/unit/test_schema_exclude_filter.py | 15 ++++++++++++--- 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/scripts/schema/exclude_filter.py b/scripts/schema/exclude_filter.py index 990aa0692e..f076c882db 100644 --- a/scripts/schema/exclude_filter.py +++ b/scripts/schema/exclude_filter.py @@ -17,16 +17,14 @@ def exclude(fields, exclude_file_globs, out_dir): return fields -def pop_field(fields, path): +def pop_field(fields, node_path, path): """pops a field from yaml derived dict using path derived from ordered list of nodes""" - node_path = path.copy() if node_path[0] in fields: if len(node_path) == 1: - b4 = fields.copy() print('Removed field {0}'.format(str(fields.pop(node_path[0]).get('field_details').get('flat_name')))) else: inner_field = node_path.pop(0) - pop_field(fields[inner_field]['fields'], node_path) + pop_field(fields[inner_field]['fields'], node_path, path) else: raise ValueError('--exclude specified, but no field {} found'.format('.'.join([e for e in path]))) @@ -37,7 +35,7 @@ def exclude_trace_path(fields, item, path): node_path = path.copy() node_path.append(list_item['name']) if not 'fields' in list_item: - pop_field(fields, node_path) + pop_field(fields, node_path, node_path.copy()) else: exclude_trace_path(fields, list_item['fields'], node_path) diff --git a/scripts/tests/unit/test_schema_exclude_filter.py b/scripts/tests/unit/test_schema_exclude_filter.py index 7656ca5184..70824b6944 100644 --- a/scripts/tests/unit/test_schema_exclude_filter.py +++ b/scripts/tests/unit/test_schema_exclude_filter.py @@ -42,11 +42,20 @@ def test_exclude_fields(self): def test_exclude_non_existing_field_set(self): fields = {'my_field_set': {'fields': { - 'my_field_exclude': {'field_details': {'flat_name': 'my_field_set.my_field_exclude'}}}}} + 'my_field': {'field_details': {'flat_name': 'my_field_set.my_field'}}}}} excludes = [[{'name': 'my_non_existing_field_set', 'fields': [ - {'name': 'my_field_exclude_1'}]}]] + {'name': 'my_field_exclude'}]}]] with self.assertRaisesRegex(ValueError, - "--exclude specified, but no field my_non_existing_field_set.my_field_exclude_1 found"): + "--exclude specified, but no field my_non_existing_field_set.my_field_exclude found"): + exclude_filter.exclude_fields(fields, excludes) + + def test_exclude_non_existing_field(self): + fields = {'my_field_set': {'fields': { + 'my_field': {'field_details': {'flat_name': 'my_field_set.my_field'}}}}} + excludes = [[{'name': 'my_field_set', 'fields': [ + {'name': 'my_non_existing_field'}]}]] + with self.assertRaisesRegex(ValueError, + "--exclude specified, but no field my_field_set.my_non_existing_field found"): exclude_filter.exclude_fields(fields, excludes) From 90f0b5fba7ecd09c92f05811f54f30d491b5d569 Mon Sep 17 00:00:00 2001 From: djptek Date: Fri, 14 May 2021 13:33:57 +0200 Subject: [PATCH 24/38] added test_exclude_field_deep_path --- scripts/tests/unit/test_schema_exclude_filter.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/scripts/tests/unit/test_schema_exclude_filter.py b/scripts/tests/unit/test_schema_exclude_filter.py index 70824b6944..0434c4f677 100644 --- a/scripts/tests/unit/test_schema_exclude_filter.py +++ b/scripts/tests/unit/test_schema_exclude_filter.py @@ -30,6 +30,21 @@ def test_exclude_field(self): 'my_field_persist': {'field_details': {'flat_name': 'my_field_set.my_field_persist'}}}}} self.assertEqual(fields, expect_persisted) + def test_exclude_field_deep_path(self): + fields = {'d0': {'fields': { + 'd1': {'field_details': {'flat_name': 'd0.d1'}, 'fields': { + 'd2': {'field_details': {'flat_name': 'd0.d1.d2'}, 'fields': { + 'd3': {'field_details': {'flat_name': 'd0.d1.d2.d3'}}}}}}}}} + excludes = [[{'name': 'd0', 'fields': [{ + 'name': 'd1', 'fields': [{ + 'name': 'd2', 'fields': [{ + 'name': 'd3'}]}]}]}]] + fields = exclude_filter.exclude_fields(fields, excludes) + expect_persisted = {'d0': {'fields': { + 'd1': {'field_details': {'flat_name': 'd0.d1'}, 'fields': { + 'd2': {'field_details': {'flat_name': 'd0.d1.d2'}, 'fields': {}}}}}}} + self.assertEqual(fields, expect_persisted) + def test_exclude_fields(self): fields = {'my_field_set': {'fields': { 'my_field_exclude_1': {'field_details': {'flat_name': 'my_field_set.my_field_exclude_1'}}, From c5a7a2aa22de8edf3fcb3a63dc35cb29aa59cc16 Mon Sep 17 00:00:00 2001 From: djptek Date: Fri, 14 May 2021 14:13:28 +0200 Subject: [PATCH 25/38] adding test_exclude_non_existing_field_deep_path --- scripts/schema/exclude_filter.py | 5 +- .../tests/unit/test_schema_exclude_filter.py | 261 ++---------------- 2 files changed, 24 insertions(+), 242 deletions(-) diff --git a/scripts/schema/exclude_filter.py b/scripts/schema/exclude_filter.py index f076c882db..7d9be2ae64 100644 --- a/scripts/schema/exclude_filter.py +++ b/scripts/schema/exclude_filter.py @@ -24,7 +24,10 @@ def pop_field(fields, node_path, path): print('Removed field {0}'.format(str(fields.pop(node_path[0]).get('field_details').get('flat_name')))) else: inner_field = node_path.pop(0) - pop_field(fields[inner_field]['fields'], node_path, path) + if 'fields' in fields[inner_field]: + pop_field(fields[inner_field]['fields'], node_path, path) + else: + raise ValueError('--exclude specified, but no path to field {} found'.format('.'.join([e for e in path]))) else: raise ValueError('--exclude specified, but no field {} found'.format('.'.join([e for e in path]))) diff --git a/scripts/tests/unit/test_schema_exclude_filter.py b/scripts/tests/unit/test_schema_exclude_filter.py index 0434c4f677..c9f77db246 100644 --- a/scripts/tests/unit/test_schema_exclude_filter.py +++ b/scripts/tests/unit/test_schema_exclude_filter.py @@ -32,17 +32,17 @@ def test_exclude_field(self): def test_exclude_field_deep_path(self): fields = {'d0': {'fields': { - 'd1': {'field_details': {'flat_name': 'd0.d1'}, 'fields': { - 'd2': {'field_details': {'flat_name': 'd0.d1.d2'}, 'fields': { - 'd3': {'field_details': {'flat_name': 'd0.d1.d2.d3'}}}}}}}}} + 'd1': {'field_details': {'flat_name': 'd0.d1'}}, 'fields': { + 'd2': {'field_details': {'flat_name': 'd0.d1.d2'}}, 'fields': { + 'd3': {'field_details': {'flat_name': 'd0.d1.d2.d3'}}}}}}} excludes = [[{'name': 'd0', 'fields': [{ 'name': 'd1', 'fields': [{ 'name': 'd2', 'fields': [{ 'name': 'd3'}]}]}]}]] fields = exclude_filter.exclude_fields(fields, excludes) expect_persisted = {'d0': {'fields': { - 'd1': {'field_details': {'flat_name': 'd0.d1'}, 'fields': { - 'd2': {'field_details': {'flat_name': 'd0.d1.d2'}, 'fields': {}}}}}}} + 'd1': {'field_details': {'flat_name': 'd0.d1'}}, 'fields': { + 'd2': {'field_details': {'flat_name': 'd0.d1.d2'}}, 'fields': {}}}}} self.assertEqual(fields, expect_persisted) def test_exclude_fields(self): @@ -74,240 +74,19 @@ def test_exclude_non_existing_field(self): exclude_filter.exclude_fields(fields, excludes) -''' - def test_merging_superset(self): - # 'log' is used to test superset with the explicit '{'fields': '*'}' notation - supersets = {'log': {'fields': '*'}, 'process': {'fields': '*'}} - supserseded = { - 'log': {'fields': {'syslog': {'fields': '*'}}}, - 'process': {'fields': {'parent': {'fields': '*'}}}, - } - excludes = {} - exclude_filter.merge_excludes(excludes, supersets) - exclude_filter.merge_excludes(excludes, supserseded) - self.assertEqual(excludes, supersets) - # reverse order - excludes = {} - exclude_filter.merge_excludes(excludes, supserseded) - exclude_filter.merge_excludes(excludes, supersets) - self.assertEqual(excludes, supersets) - - def test_exclude_option_merging(self): - exclude1 = { - 'log': {'enabled': False}, - 'network': {'enabled': False, 'fields': '*'}, - 'base': {'fields': {'message': {'index': False}}}, - } - exclude2 = { - 'log': {'enabled': False}, - 'network': {'fields': '*'}, - 'base': {'fields': {'message': {}}}, - } - expected = { - 'log': {'enabled': False}, - 'network': {'fields': '*'}, - 'base': {'fields': {'message': {}}}, - } - merged = {} - exclude_filter.merge_excludes(merged, exclude1) - exclude_filter.merge_excludes(merged, exclude2) - self.assertEqual(merged, expected) - - def test_strip_non_ecs_options(self): - exclude = { - 'log': { - 'custom_option': True, - 'enabled': False, - 'fields': { - 'syslog': { - 'custom_option': True - } - } - } - } - expected = { - 'log': { - 'enabled': False, - 'fields': { - 'syslog': {} - } - } - } - exclude_filter.strip_non_ecs_options(exclude) - self.assertEqual(exclude, expected) - - def schema_log(self): - return { - 'log': { - 'schema_details': {'root': False}, - 'field_details': { - 'name': 'log', - 'type': 'group' - }, - 'fields': { - 'level': { - 'field_details': { - 'name': 'level', - 'type': 'keyword' - } - }, - 'origin': { - 'field_details': { - 'name': 'origin', - 'intermediate': True, - 'type': 'object' - }, - 'fields': { - 'function': { - 'field_details': { - 'name': 'function', - 'type': 'keyword' - } - }, - 'foo': { - 'field_details': { - 'name': 'foo', - 'type': 'keyword' - } - }, - } - } - } - } - } - - def test_extract_matching_fields_explicit_all_fields_notation(self): - exclude = {'log': {'fields': '*'}} - filtered_fields = exclude_filter.extract_matching_fields(self.schema_log(), exclude) - self.assertEqual(filtered_fields, self.schema_log()) - - def test_extract_matching_fields_subfields_only_notation(self): - exclude = {'log': {'fields': {'origin': {'fields': '*'}}}} - filtered_fields = exclude_filter.extract_matching_fields(self.schema_log(), exclude) - expected_fields = { - 'log': { - 'schema_details': {'root': False}, - 'field_details': { - 'name': 'log', - 'type': 'group' - }, - 'fields': { - 'origin': { - 'field_details': { - 'name': 'origin', - 'intermediate': True, - 'type': 'object' - }, - 'fields': { - 'function': { - 'field_details': { - 'name': 'function', - 'type': 'keyword' - } - }, - 'foo': { - 'field_details': { - 'name': 'foo', - 'type': 'keyword' - } - }, - } - } - } - } - } - self.assertEqual(filtered_fields, expected_fields) - - def test_extract_matching_individual_field(self): - exclude = {'log': {'fields': {'origin': {'fields': {'function': {}}}}}} - filtered_fields = exclude_filter.extract_matching_fields(self.schema_log(), exclude) - expected_fields = { - 'log': { - 'schema_details': {'root': False}, - 'field_details': { - 'name': 'log', - 'type': 'group' - }, - 'fields': { - 'origin': { - 'field_details': { - 'name': 'origin', - 'intermediate': True, - 'type': 'object' - }, - 'fields': { - 'function': { - 'field_details': { - 'name': 'function', - 'type': 'keyword' - } - }, - } - } - } - } - } - self.assertEqual(filtered_fields, expected_fields) - - def test_extract_field_with_options(self): - exclude = { - 'log': { - 'enabled': False, - 'fields': { - 'level': { - 'custom_option': True - }, - 'origin': { - 'custom_option': False, - 'fields': { - 'function': {} - } - } - } - } - } - filtered_fields = exclude_filter.extract_matching_fields(self.schema_log(), exclude) - expected_fields = { - 'log': { - 'schema_details': {'root': False}, - 'field_details': { - 'name': 'log', - 'type': 'group', - 'enabled': False - }, - 'fields': { - 'level': { - 'field_details': { - 'name': 'level', - 'type': 'keyword', - 'custom_option': True - } - }, - 'origin': { - 'field_details': { - # This field is changed by the exclude_filter from an intermediate field to non-intermediate by adding - # a custom option, so the exclude_filter is responsible for filling in more field_detail attributes - 'name': 'origin', - 'intermediate': False, - 'custom_option': False, - 'description': 'Intermediate field included by adding option with exclude', - 'level': 'custom', - 'type': 'object', - 'short': 'Intermediate field included by adding option with exclude', - 'normalize': [] - }, - 'fields': { - 'function': { - 'field_details': { - 'name': 'function', - 'type': 'keyword' - } - }, - } - } - } - } - } - self.assertEqual(filtered_fields, expected_fields) + def test_exclude_non_existing_field_deep_path(self): + fields = {'d0': {'fields': { + 'd1': {'field_details': {'flat_name': 'd0.d1'}}, 'fields': { + 'd2': {'field_details': {'flat_name': 'd0.d1.d2'}}, 'fields': { + 'd3': {'field_details': {'flat_name': 'd0.d1.d2.d3'}}}}}}} + excludes = [[{'name': 'd0', 'fields': [{ + 'name': 'd1', 'fields': [{ + 'name': 'd2', 'fields': [{ + 'name': 'd3', 'fields': [{ + 'name': 'd4', 'fields': [{ + 'name': 'd5'}]}]}]}]}]}]] + with self.assertRaisesRegex(ValueError, + "--exclude specified, but no path to field d0.d1.d2.d3.d4.d5 found"): + exclude_filter.exclude_fields(fields, excludes) -''' + \ No newline at end of file From a196562913fc5ab600ba29d46d6af7f542673632 Mon Sep 17 00:00:00 2001 From: djptek Date: Fri, 14 May 2021 14:48:05 +0200 Subject: [PATCH 26/38] WIP - test_exclude_field_deep_path is failing --- scripts/schema/exclude_filter.py | 4 +++- .../tests/unit/test_schema_exclude_filter.py | 23 +++++++++++-------- 2 files changed, 17 insertions(+), 10 deletions(-) diff --git a/scripts/schema/exclude_filter.py b/scripts/schema/exclude_filter.py index 7d9be2ae64..e97d8e5e13 100644 --- a/scripts/schema/exclude_filter.py +++ b/scripts/schema/exclude_filter.py @@ -27,7 +27,8 @@ def pop_field(fields, node_path, path): if 'fields' in fields[inner_field]: pop_field(fields[inner_field]['fields'], node_path, path) else: - raise ValueError('--exclude specified, but no path to field {} found'.format('.'.join([e for e in path]))) + raise ValueError( + '--exclude specified, but no path to field {} found'.format('.'.join([e for e in path]))) else: raise ValueError('--exclude specified, but no field {} found'.format('.'.join([e for e in path]))) @@ -44,6 +45,7 @@ def exclude_trace_path(fields, item, path): def exclude_fields(fields, excludes): + print(fields, excludes) """Traverses fields and eliminates any field which matches the excludes""" if excludes: for ex_list in excludes: diff --git a/scripts/tests/unit/test_schema_exclude_filter.py b/scripts/tests/unit/test_schema_exclude_filter.py index c9f77db246..243d9e7cff 100644 --- a/scripts/tests/unit/test_schema_exclude_filter.py +++ b/scripts/tests/unit/test_schema_exclude_filter.py @@ -1,3 +1,4 @@ +from schema import exclude_filter import mock import os import pprint @@ -6,8 +7,6 @@ sys.path.append(os.path.join(os.path.dirname(__file__), '../..')) -from schema import exclude_filter - class TestSchemaExcludeFilter(unittest.TestCase): @@ -24,26 +23,35 @@ def test_exclude_field(self): fields = {'my_field_set': {'fields': { 'my_field_exclude': {'field_details': {'flat_name': 'my_field_set.my_field_exclude'}}, 'my_field_persist': {'field_details': {'flat_name': 'my_field_set.my_field_persist'}}}}} - excludes = [[{'name': 'my_field_set', 'fields': [{'name': 'my_field_exclude'}]}]] + excludes = [ + [{'name': 'my_field_set', 'fields': [{'name': 'my_field_exclude'}]}]] fields = exclude_filter.exclude_fields(fields, excludes) expect_persisted = {'my_field_set': {'fields': { 'my_field_persist': {'field_details': {'flat_name': 'my_field_set.my_field_persist'}}}}} self.assertEqual(fields, expect_persisted) + """ def test_exclude_field_deep_path(self): fields = {'d0': {'fields': { 'd1': {'field_details': {'flat_name': 'd0.d1'}}, 'fields': { 'd2': {'field_details': {'flat_name': 'd0.d1.d2'}}, 'fields': { - 'd3': {'field_details': {'flat_name': 'd0.d1.d2.d3'}}}}}}} + 'd3': {'field_details': {'flat_name': 'd0.d1.d2.d3'}}, 'fields': { + 'd4': {'field_details': {'flat_name': 'd0.d1.d2.d3.d4'}}, 'fields': { + 'd5': {'field_details': {'flat_name': 'd0.d1.d2.d3.d3.d4.d5'}}}}}}}}} excludes = [[{'name': 'd0', 'fields': [{ 'name': 'd1', 'fields': [{ 'name': 'd2', 'fields': [{ - 'name': 'd3'}]}]}]}]] + 'name': 'd3', 'fields': [{ + 'name': 'd4', 'fields': [{ + 'name': 'd5'}]}]}]}]}]}]] fields = exclude_filter.exclude_fields(fields, excludes) expect_persisted = {'d0': {'fields': { 'd1': {'field_details': {'flat_name': 'd0.d1'}}, 'fields': { - 'd2': {'field_details': {'flat_name': 'd0.d1.d2'}}, 'fields': {}}}}} + 'd2': {'field_details': {'flat_name': 'd0.d1.d2'}}, 'fields': { + 'd3': {'field_details': {'flat_name': 'd0.d1.d2.d3'}}, 'fields': { + 'd4': {'field_details': {'flat_name': 'd0.d1.d2.d3.d4'}}, 'fields': {}}}}}}} self.assertEqual(fields, expect_persisted) + """ def test_exclude_fields(self): fields = {'my_field_set': {'fields': { @@ -73,7 +81,6 @@ def test_exclude_non_existing_field(self): "--exclude specified, but no field my_field_set.my_non_existing_field found"): exclude_filter.exclude_fields(fields, excludes) - def test_exclude_non_existing_field_deep_path(self): fields = {'d0': {'fields': { 'd1': {'field_details': {'flat_name': 'd0.d1'}}, 'fields': { @@ -88,5 +95,3 @@ def test_exclude_non_existing_field_deep_path(self): with self.assertRaisesRegex(ValueError, "--exclude specified, but no path to field d0.d1.d2.d3.d4.d5 found"): exclude_filter.exclude_fields(fields, excludes) - - \ No newline at end of file From 9128f6e89069894e54a4c7b97b5e74f1aef7d18d Mon Sep 17 00:00:00 2001 From: djptek Date: Fri, 14 May 2021 17:37:08 +0200 Subject: [PATCH 27/38] resolved test_exclude_field_deep_path --- scripts/schema/exclude_filter.py | 1 - .../tests/unit/test_schema_exclude_filter.py | 22 +++++++++---------- 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/scripts/schema/exclude_filter.py b/scripts/schema/exclude_filter.py index e97d8e5e13..085f9ca13b 100644 --- a/scripts/schema/exclude_filter.py +++ b/scripts/schema/exclude_filter.py @@ -45,7 +45,6 @@ def exclude_trace_path(fields, item, path): def exclude_fields(fields, excludes): - print(fields, excludes) """Traverses fields and eliminates any field which matches the excludes""" if excludes: for ex_list in excludes: diff --git a/scripts/tests/unit/test_schema_exclude_filter.py b/scripts/tests/unit/test_schema_exclude_filter.py index 243d9e7cff..c3ab85a8a9 100644 --- a/scripts/tests/unit/test_schema_exclude_filter.py +++ b/scripts/tests/unit/test_schema_exclude_filter.py @@ -1,3 +1,5 @@ +from unittest import result +from unittest.case import TestCase from schema import exclude_filter import mock import os @@ -30,14 +32,13 @@ def test_exclude_field(self): 'my_field_persist': {'field_details': {'flat_name': 'my_field_set.my_field_persist'}}}}} self.assertEqual(fields, expect_persisted) - """ def test_exclude_field_deep_path(self): fields = {'d0': {'fields': { - 'd1': {'field_details': {'flat_name': 'd0.d1'}}, 'fields': { - 'd2': {'field_details': {'flat_name': 'd0.d1.d2'}}, 'fields': { - 'd3': {'field_details': {'flat_name': 'd0.d1.d2.d3'}}, 'fields': { - 'd4': {'field_details': {'flat_name': 'd0.d1.d2.d3.d4'}}, 'fields': { - 'd5': {'field_details': {'flat_name': 'd0.d1.d2.d3.d3.d4.d5'}}}}}}}}} + 'd1': {'field_details': {'flat_name': 'd0.d1'}, 'fields': { + 'd2': {'field_details': {'flat_name': 'd0.d1.d2'}, 'fields': { + 'd3': {'field_details': {'flat_name': 'd0.d1.d2.d3'}, 'fields': { + 'd4': {'field_details': {'flat_name': 'd0.d1.d2.d3.d4'}, 'fields': { + 'd5': {'field_details': {'flat_name': 'd0.d1.d2.d3.d4.d5'}}}}}}}}}}}}} excludes = [[{'name': 'd0', 'fields': [{ 'name': 'd1', 'fields': [{ 'name': 'd2', 'fields': [{ @@ -46,12 +47,11 @@ def test_exclude_field_deep_path(self): 'name': 'd5'}]}]}]}]}]}]] fields = exclude_filter.exclude_fields(fields, excludes) expect_persisted = {'d0': {'fields': { - 'd1': {'field_details': {'flat_name': 'd0.d1'}}, 'fields': { - 'd2': {'field_details': {'flat_name': 'd0.d1.d2'}}, 'fields': { - 'd3': {'field_details': {'flat_name': 'd0.d1.d2.d3'}}, 'fields': { - 'd4': {'field_details': {'flat_name': 'd0.d1.d2.d3.d4'}}, 'fields': {}}}}}}} + 'd1': {'field_details': {'flat_name': 'd0.d1'}, 'fields': { + 'd2': {'field_details': {'flat_name': 'd0.d1.d2'}, 'fields': { + 'd3': {'field_details': {'flat_name': 'd0.d1.d2.d3'}, 'fields': { + 'd4': {'field_details': {'flat_name': 'd0.d1.d2.d3.d4'}, 'fields': {}}}}}}}}}}} self.assertEqual(fields, expect_persisted) - """ def test_exclude_fields(self): fields = {'my_field_set': {'fields': { From 2dab2af7d73fbbe966273008ca41c4d9200ba185 Mon Sep 17 00:00:00 2001 From: djptek Date: Fri, 14 May 2021 18:29:59 +0200 Subject: [PATCH 28/38] remove print statement --- scripts/schema/exclude_filter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/schema/exclude_filter.py b/scripts/schema/exclude_filter.py index 085f9ca13b..527bed64d5 100644 --- a/scripts/schema/exclude_filter.py +++ b/scripts/schema/exclude_filter.py @@ -21,7 +21,7 @@ def pop_field(fields, node_path, path): """pops a field from yaml derived dict using path derived from ordered list of nodes""" if node_path[0] in fields: if len(node_path) == 1: - print('Removed field {0}'.format(str(fields.pop(node_path[0]).get('field_details').get('flat_name')))) + fields.pop(node_path[0]) else: inner_field = node_path.pop(0) if 'fields' in fields[inner_field]: From 2e5abb050f7fd49f77a3239c786f11ab635d1fc6 Mon Sep 17 00:00:00 2001 From: djptek Date: Thu, 20 May 2021 10:52:43 +0200 Subject: [PATCH 29/38] normalize use of 3xdouble quote in comments --- scripts/generators/beats.py | 4 ++-- scripts/generators/ecs_helpers.py | 4 ++-- scripts/generators/es_template.py | 4 ++-- scripts/generators/intermediate_files.py | 12 ++++++------ scripts/schema/cleaner.py | 10 +++++----- scripts/schema/finalizer.py | 20 ++++++++++---------- scripts/schema/loader.py | 6 +++--- scripts/schema/subset_filter.py | 6 +++--- scripts/schema/visitor.py | 12 ++++++------ scripts/tests/unit/test_schema_cleaner.py | 2 +- 10 files changed, 40 insertions(+), 40 deletions(-) diff --git a/scripts/generators/beats.py b/scripts/generators/beats.py index 6f70921f67..5708a03555 100644 --- a/scripts/generators/beats.py +++ b/scripts/generators/beats.py @@ -83,9 +83,9 @@ def write_beats_yaml(beats_file, ecs_version, out_dir): def file_header(): - return ''' + return """ # WARNING! Do not edit this file directly, it was generated by the ECS project, # based on ECS version {version}. # Please visit https://github.com/elastic/ecs to suggest changes to ECS fields. -'''.lstrip() +""".lstrip() diff --git a/scripts/generators/ecs_helpers.py b/scripts/generators/ecs_helpers.py index 086f4d592d..fbf7f4a2a1 100644 --- a/scripts/generators/ecs_helpers.py +++ b/scripts/generators/ecs_helpers.py @@ -159,7 +159,7 @@ def yaml_load(filename): def list_subtract(original, subtracted): - '''Subtract two lists. original = subtracted''' + """Subtract two lists. original = subtracted""" return [item for item in original if item not in subtracted] @@ -175,7 +175,7 @@ def list_extract_keys(lst, key_name): def is_intermediate(field): - '''Encapsulates the check to see if a field is an intermediate field or a "real" field.''' + """Encapsulates the check to see if a field is an intermediate field or a "real" field.""" return ('intermediate' in field['field_details'] and field['field_details']['intermediate']) diff --git a/scripts/generators/es_template.py b/scripts/generators/es_template.py index c555c188c7..46d529963e 100644 --- a/scripts/generators/es_template.py +++ b/scripts/generators/es_template.py @@ -269,13 +269,13 @@ def default_mapping_settings(): def es6_type_fallback(mappings): - ''' + """ Visits each leaf in mappings object and fallback to an Elasticsearch 6.x supported type. Since a field like `wildcard` won't have the same defaults as a `keyword` field, we must add any missing defaults. - ''' + """ for (name, details) in mappings.items(): if 'type' in details: diff --git a/scripts/generators/intermediate_files.py b/scripts/generators/intermediate_files.py index d21800936f..c085039b62 100644 --- a/scripts/generators/intermediate_files.py +++ b/scripts/generators/intermediate_files.py @@ -20,7 +20,7 @@ def generate(fields, out_dir, default_dirs): def generate_flat_fields(fields): - '''Generate ecs_flat.yml''' + """Generate ecs_flat.yml""" filtered = remove_non_root_reusables(fields) flattened = {} visitor.visit_fields_with_memo(filtered, accumulate_field, flattened) @@ -28,7 +28,7 @@ def generate_flat_fields(fields): def accumulate_field(details, memo): - '''Visitor function that accumulates all field details in the memo dict''' + """Visitor function that accumulates all field details in the memo dict""" if 'schema_details' in details or ecs_helpers.is_intermediate(details): return field_details = copy.deepcopy(details['field_details']) @@ -39,7 +39,7 @@ def accumulate_field(details, memo): def generate_nested_fields(fields): - '''Generate ecs_nested.yml''' + """Generate ecs_nested.yml""" nested = {} # Flatten each field set, but keep all resulting fields nested under their # parent/host field set. @@ -71,13 +71,13 @@ def generate_nested_fields(fields): def remove_internal_attributes(field_details): - '''Remove attributes only relevant to the deeply nested structure, but not to ecs_flat/nested.yml.''' + """Remove attributes only relevant to the deeply nested structure, but not to ecs_flat/nested.yml.""" field_details.pop('node_name', None) field_details.pop('intermediate', None) def remove_non_root_reusables(fields_nested): - ''' + """ Remove field sets that have top_level=false from the root of the field definitions. This attribute means they're only meant to be in the "reusable/expected" locations @@ -87,7 +87,7 @@ def remove_non_root_reusables(fields_nested): still needs to keep all field sets at the root of the YAML file, as it the official information about each field set. It's the responsibility of users consuming ecs_nested.yml to skip the field sets with top_level=false. - ''' + """ fields = {} for (name, field) in fields_nested.items(): if 'reusable' not in field['schema_details'] or field['schema_details']['reusable']['top_level']: diff --git a/scripts/schema/cleaner.py b/scripts/schema/cleaner.py index b316ef7298..efcc1d08a2 100644 --- a/scripts/schema/cleaner.py +++ b/scripts/schema/cleaner.py @@ -56,7 +56,7 @@ def schema_cleanup(schema): def schema_mandatory_attributes(schema): - '''Ensures for the presence of the mandatory schema attributes and raises if any are missing''' + """Ensures for the presence of the mandatory schema attributes and raises if any are missing""" current_schema_attributes = sorted(list(schema['field_details'].keys()) + list(schema['schema_details'].keys())) missing_attributes = ecs_helpers.list_subtract(SCHEMA_MANDATORY_ATTRIBUTES, current_schema_attributes) @@ -74,7 +74,7 @@ def schema_mandatory_attributes(schema): def schema_assertions_and_warnings(schema): - '''Additional checks on a fleshed out schema''' + """Additional checks on a fleshed out schema""" single_line_short_description(schema, strict=strict_mode) if 'beta' in schema['field_details']: single_line_beta_description(schema, strict=strict_mode) @@ -143,7 +143,7 @@ def field_defaults(field): def field_or_multi_field_datatype_defaults(field_details): - '''Sets datatype-related defaults on a canonical field or multi-field entries.''' + """Sets datatype-related defaults on a canonical field or multi-field entries.""" if field_details['type'] == 'keyword': field_details.setdefault('ignore_above', 1024) if field_details['type'] == 'text': @@ -160,7 +160,7 @@ def field_or_multi_field_datatype_defaults(field_details): def field_mandatory_attributes(field): - '''Ensures for the presence of the mandatory field attributes and raises if any are missing''' + """Ensures for the presence of the mandatory field attributes and raises if any are missing""" if ecs_helpers.is_intermediate(field): return current_field_attributes = sorted(field['field_details'].keys()) @@ -180,7 +180,7 @@ def field_mandatory_attributes(field): def field_assertions_and_warnings(field): - '''Additional checks on a fleshed out field''' + """Additional checks on a fleshed out field""" if not ecs_helpers.is_intermediate(field): # check short description length if in strict mode single_line_short_description(field, strict=strict_mode) diff --git a/scripts/schema/finalizer.py b/scripts/schema/finalizer.py index 84b4c21062..648349b8ef 100644 --- a/scripts/schema/finalizer.py +++ b/scripts/schema/finalizer.py @@ -19,7 +19,7 @@ def finalize(fields): - '''Intended entrypoint of the finalizer.''' + """Intended entrypoint of the finalizer.""" perform_reuse(fields) calculate_final_values(fields) @@ -46,7 +46,7 @@ def order_reuses(fields): def perform_reuse(fields): - '''Performs field reuse in two phases''' + """Performs field reuse in two phases""" foreign_reuses, self_nestings = order_reuses(fields) # Phase 1: foreign reuse @@ -99,11 +99,11 @@ def perform_reuse(fields): def ensure_valid_reuse(reused_schema, destination_schema=None): - ''' + """ Raise if either the reused schema or destination schema have root=true. Second param is optional, if testing for a self-nesting (where source=destination). - ''' + """ if reused_schema['schema_details']['root']: msg = "Schema {} has attribute root=true and therefore cannot be reused.".format( reused_schema['field_details']['name']) @@ -115,7 +115,7 @@ def ensure_valid_reuse(reused_schema, destination_schema=None): def append_reused_here(reused_schema, reuse_entry, destination_schema): - '''Captures two ways of denoting what field sets are reused under a given field set''' + """Captures two ways of denoting what field sets are reused under a given field set""" # Legacy, too limited destination_schema['schema_details'].setdefault('nestings', []) destination_schema['schema_details']['nestings'] = sorted( @@ -136,7 +136,7 @@ def append_reused_here(reused_schema, reuse_entry, destination_schema): def set_original_fieldset(fields, original_fieldset): - '''Recursively set the 'original_fieldset' attribute for all fields in a group of fields''' + """Recursively set the 'original_fieldset' attribute for all fields in a group of fields""" def func(details): # Don't override if already set (e.g. 'group' for user.group.* fields) details['field_details'].setdefault('original_fieldset', original_fieldset) @@ -144,7 +144,7 @@ def func(details): def field_group_at_path(dotted_path, fields): - '''Returns the ['fields'] hash at the dotted_path.''' + """Returns the ['fields'] hash at the dotted_path.""" path = dotted_path.split('.') nesting = fields for next_field in path: @@ -163,17 +163,17 @@ def field_group_at_path(dotted_path, fields): def calculate_final_values(fields): - ''' + """ This function navigates all fields recursively. It populates a few more values for the fields, especially path-based values like flat_name. - ''' + """ visitor.visit_fields_with_path(fields, field_finalizer) def field_finalizer(details, path): - '''This is the function called by the visitor to perform the work of calculate_final_values''' + """This is the function called by the visitor to perform the work of calculate_final_values""" name_array = path + [details['field_details']['node_name']] flat_name = '.'.join(name_array) details['field_details']['flat_name'] = flat_name diff --git a/scripts/schema/loader.py b/scripts/schema/loader.py index 63097ed6f0..88b89bdde0 100644 --- a/scripts/schema/loader.py +++ b/scripts/schema/loader.py @@ -109,12 +109,12 @@ def read_schema_blob(blob, ref): def nest_schema(raw, file_name): - ''' + """ Raw schema files are an array of schema details: [{'name': 'base', ...}] This function loops over the array (usually 1 schema per file) and turns it into a dict with the schema name as the key: { 'base': { 'name': 'base', ...}} - ''' + """ fields = {} for schema in raw: if 'name' not in schema: @@ -264,7 +264,7 @@ def warn(message): def eval_globs(globs): - '''Accepts an array of glob patterns or file names, returns the array of actual files''' + """Accepts an array of glob patterns or file names, returns the array of actual files""" all_files = [] for g in globs: new_files = glob.glob(g) diff --git a/scripts/schema/subset_filter.py b/scripts/schema/subset_filter.py index e2e14e5aff..9fa74f9e82 100644 --- a/scripts/schema/subset_filter.py +++ b/scripts/schema/subset_filter.py @@ -22,7 +22,7 @@ def filter(fields, subset_file_globs, out_dir): def combine_all_subsets(subsets): - '''Merges N subsets into one. Strips top level 'name' and 'fields' keys as well as non-ECS field options since we can't know how to merge those.''' + """Merges N subsets into one. Strips top level 'name' and 'fields' keys as well as non-ECS field options since we can't know how to merge those.""" merged_subset = {} for subset in subsets: strip_non_ecs_options(subset['fields']) @@ -50,7 +50,7 @@ def strip_non_ecs_options(subset): def merge_subsets(a, b): - '''Merges field subset definitions together. The b subset is merged into the a subset. Assumes that subsets have been stripped of non-ecs options.''' + """Merges field subset definitions together. The b subset is merged into the a subset. Assumes that subsets have been stripped of non-ecs options.""" for key in b: if key not in a: a[key] = b[key] @@ -71,7 +71,7 @@ def merge_subsets(a, b): def extract_matching_fields(fields, subset_definitions): - '''Removes fields that are not in the subset definition. Returns a copy without modifying the input fields dict.''' + """Removes fields that are not in the subset definition. Returns a copy without modifying the input fields dict.""" retained_fields = {x: fields[x].copy() for x in subset_definitions} for key, val in subset_definitions.items(): retained_fields[key]['field_details'] = fields[key]['field_details'].copy() diff --git a/scripts/schema/visitor.py b/scripts/schema/visitor.py index 5c2e030da5..3c3d762bad 100644 --- a/scripts/schema/visitor.py +++ b/scripts/schema/visitor.py @@ -1,5 +1,5 @@ def visit_fields(fields, fieldset_func=None, field_func=None): - ''' + """ This function navigates the deeply nested tree structure and runs provided functions on each fieldset or field encountered (both optional). @@ -11,7 +11,7 @@ def visit_fields(fields, fieldset_func=None, field_func=None): The 'field_func(details)' provided will be called for each field, with the dictionary containing the field's details ({'field_details': {}, 'fields': {}). - ''' + """ for (name, details) in fields.items(): if fieldset_func and 'schema_details' in details: fieldset_func(details) @@ -24,14 +24,14 @@ def visit_fields(fields, fieldset_func=None, field_func=None): def visit_fields_with_path(fields, func, path=[]): - ''' + """ This function navigates the deeply nested tree structure and runs the provided function on all fields and field sets. The 'func' provided will be called for each field, with the dictionary containing their details ({'field_details': {}, 'fields': {}) as well as the path array leading to the location of the field in question. - ''' + """ for (name, details) in fields.items(): if 'field_details' in details: func(details, path) @@ -44,14 +44,14 @@ def visit_fields_with_path(fields, func, path=[]): def visit_fields_with_memo(fields, func, memo=None): - ''' + """ This function navigates the deeply nested tree structure and runs the provided function on all fields and field sets. The 'func' provided will be called for each field, with the dictionary containing their details ({'field_details': {}, 'fields': {}) as well as the 'memo' you pass in. - ''' + """ for (name, details) in fields.items(): if 'field_details' in details: func(details, memo) diff --git a/scripts/tests/unit/test_schema_cleaner.py b/scripts/tests/unit/test_schema_cleaner.py index 3a6a7830c7..a6c8fd7284 100644 --- a/scripts/tests/unit/test_schema_cleaner.py +++ b/scripts/tests/unit/test_schema_cleaner.py @@ -425,7 +425,7 @@ def test_multiline_short_override_description_warns_strict_disabled(self): self.fail("cleaner.single_line_short_override_description() raised Exception unexpectedly.") def test_clean(self): - '''A high level sanity test''' + """A high level sanity test""" fields = self.schema_process() cleaner.clean(fields) # schemas are processed From 84880690a4b28503c9286ba803a761386ae92225 Mon Sep 17 00:00:00 2001 From: djptek Date: Thu, 20 May 2021 10:59:56 +0200 Subject: [PATCH 30/38] removed exclude-set.yml --- rfcs/text/0017/exclude-set.yml | 5 ----- 1 file changed, 5 deletions(-) delete mode 100644 rfcs/text/0017/exclude-set.yml diff --git a/rfcs/text/0017/exclude-set.yml b/rfcs/text/0017/exclude-set.yml deleted file mode 100644 index 55b72a213f..0000000000 --- a/rfcs/text/0017/exclude-set.yml +++ /dev/null @@ -1,5 +0,0 @@ ---- -- name: log - fields: - - name: original - \ No newline at end of file From 3972beadabc1e3f884f105b9f3afd71dd8c9dda4 Mon Sep 17 00:00:00 2001 From: djptek Date: Thu, 20 May 2021 20:29:33 +0200 Subject: [PATCH 31/38] update USAGE and add dot paths --- USAGE.md | 36 +++++++++++++++++++++++++++++++ scripts/schema/exclude_filter.py | 37 ++++++++++++++++++++++++-------- 2 files changed, 64 insertions(+), 9 deletions(-) diff --git a/USAGE.md b/USAGE.md index e8d83f94c1..5ba60b306f 100644 --- a/USAGE.md +++ b/USAGE.md @@ -27,6 +27,7 @@ relevant artifacts for their unique set of data sources. * [Generator Options](#generator-options) + [Out](#out) + [Include](#include) + + [Exclude](#exclude) + [Subset](#subset) + [Ref](#ref) + [Mapping & Template Settings](#mapping--template-settings) @@ -231,6 +232,41 @@ Include can be used together with the `--ref` flag to merge custom fields into a > NOTE: The `--include` mechanism will not validate custom YAML files prior to merging. This allows for modifying existing ECS fields in a custom schema without having to redefine all the mandatory field attributes. +#### Exclude + +Use the `--exclude` flag to generate ephemeral ECS artifacts based on the current ECS schema field definitions minus fields considered for removal, e.g. to assess impact of removing these. Warning! This is not the recommended route to remove a field permanently as it is not intentended to be invoked during the build process. Definitive field removal should be implemented using a custom [Subset](#subset) or via the [RFC process](https://github.com/elastic/ecs/tree/master/rfcs/README.md). Example: + +``` +$ python scripts/generator.py --exclude=../my-project/my-exclude-file.yml +$ python scripts/generator.py --exclude="../my-project/schemas/a*.yml" +``` + +The `--exclude` flag expects a path to one or more YAML files using the same [file format](https://github.com/elastic/ecs/tree/master/schemas#fields-supported-in-schemasyml) as the ECS schema files. You can also use a subset, provided that relevant `name` and `fields` fields are preserved. + +``` +--- +- name: log + fields: + - name: original +``` + +The root Field Set `name` must always be present and specified with no dots `.`. Subfields may be specified using dot notation, for example: + +``` +--- +- name: log + fields: + - name: syslog.severity.name +``` + +Generate artifacts using `--exclude` to load our custom definitions in addition to `--out` to place them in the desired output directory: + +``` +$ python scripts/generator.py --exclude ../myproject/exclude-set.yml/ --out ../myproject/out/ +Loading schemas from local files +Running generator. ECS version 1.11.0 +``` + #### Subset If your indices will never populate particular ECS fields, there's no need to include those field definitions in your index mappings. The `--subset` argument allows for passing a subset definition YAML file which indicates which field sets or specific fields to include in the generated artifacts. diff --git a/scripts/schema/exclude_filter.py b/scripts/schema/exclude_filter.py index 527bed64d5..ebc7027edf 100644 --- a/scripts/schema/exclude_filter.py +++ b/scripts/schema/exclude_filter.py @@ -17,31 +17,50 @@ def exclude(fields, exclude_file_globs, out_dir): return fields -def pop_field(fields, node_path, path): +def long_path(path_as_list): + return '.'.join([e for e in path_as_list]) + + +def pop_field(fields, node_path, path, removed): """pops a field from yaml derived dict using path derived from ordered list of nodes""" if node_path[0] in fields: if len(node_path) == 1: + flat_name = long_path(path) fields.pop(node_path[0]) + return flat_name else: inner_field = node_path.pop(0) if 'fields' in fields[inner_field]: - pop_field(fields[inner_field]['fields'], node_path, path) + popped = pop_field(fields[inner_field]['fields'], node_path, path, removed) + # is this an object field with no remaining children, if so, pop it + if fields[inner_field]['fields'] == {} and fields[inner_field]['field_details']['type'] == 'object': + fields.pop(inner_field) + return popped else: raise ValueError( - '--exclude specified, but no path to field {} found'.format('.'.join([e for e in path]))) + '--exclude specified, but no path to field {} found'.format(long_path(path))) else: - raise ValueError('--exclude specified, but no field {} found'.format('.'.join([e for e in path]))) + this_long_path = long_path(path) + # Check in case already removed parent + if not any([this_long_path.startswith(long_path) for long_path in removed if long_path != None]): + raise ValueError('--exclude specified, but no field {} found'.format(this_long_path)) -def exclude_trace_path(fields, item, path): +def exclude_trace_path(fields, item, path, removed): """traverses paths to one or more nodes in a yaml derived dict""" for list_item in item: node_path = path.copy() - node_path.append(list_item['name']) + # cater for name.with.dots + for name in list_item['name'].split('.'): + node_path.append(name) if not 'fields' in list_item: - pop_field(fields, node_path, node_path.copy()) + parent = node_path[0] + removed.append(pop_field(fields, node_path, node_path.copy(), removed)) + # did we consume ALL the fields? if so, delete the Parent, unless it's base + if parent != 'base' and parent in fields and len(fields[parent]['fields']) == 0: + fields.pop(parent) else: - exclude_trace_path(fields, list_item['fields'], node_path) + raise ValueError('--exclude specified, can\'t parse fields in file {}'.format(item)) def exclude_fields(fields, excludes): @@ -49,7 +68,7 @@ def exclude_fields(fields, excludes): if excludes: for ex_list in excludes: for item in ex_list: - exclude_trace_path(fields, item['fields'], [item['name']]) + exclude_trace_path(fields, item['fields'], [item['name']], []) return fields From 6782f67c3bd7f9bc9d70efa7b8dbc43157345e7a Mon Sep 17 00:00:00 2001 From: djptek Date: Thu, 20 May 2021 20:35:49 +0200 Subject: [PATCH 32/38] add test_exclude_field_dot_path test_exclude_field_base_always_persists --- .../tests/unit/test_schema_exclude_filter.py | 26 +++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/scripts/tests/unit/test_schema_exclude_filter.py b/scripts/tests/unit/test_schema_exclude_filter.py index c3ab85a8a9..846f3b777a 100644 --- a/scripts/tests/unit/test_schema_exclude_filter.py +++ b/scripts/tests/unit/test_schema_exclude_filter.py @@ -53,6 +53,32 @@ def test_exclude_field_deep_path(self): 'd4': {'field_details': {'flat_name': 'd0.d1.d2.d3.d4'}, 'fields': {}}}}}}}}}}} self.assertEqual(fields, expect_persisted) + def test_exclude_field_dot_path(self): + fields = {'d0': {'fields': { + 'd1': {'field_details': {'flat_name': 'd0.d1'}, 'fields': { + 'd2': {'field_details': {'flat_name': 'd0.d1.d2'}, 'fields': { + 'd3': {'field_details': {'flat_name': 'd0.d1.d2.d3'}, 'fields': { + 'd4': {'field_details': {'flat_name': 'd0.d1.d2.d3.d4'}, 'fields': { + 'd5': {'field_details': {'flat_name': 'd0.d1.d2.d3.d4.d5'}}}}}}}}}}}}} + excludes = [[{'name': 'd0', 'fields': [{ + 'name': 'd1.d2.d3.d4.d5d5'}]}]] + fields = exclude_filter.exclude_fields(fields, excludes) + expect_persisted = {} + self.assertEqual(fields, expect_persisted) + + def test_exclude_field_base_always_persists(self): + fields = {'base': {'fields': { + 'd1': {'field_details': {'flat_name': 'd0.d1'}, 'fields': { + 'd2': {'field_details': {'flat_name': 'd0.d1.d2'}, 'fields': { + 'd3': {'field_details': {'flat_name': 'd0.d1.d2.d3'}, 'fields': { + 'd4': {'field_details': {'flat_name': 'd0.d1.d2.d3.d4'}, 'fields': { + 'd5': {'field_details': {'flat_name': 'd0.d1.d2.d3.d4.d5'}}}}}}}}}}}}} + excludes = [[{'name': 'base', 'fields': [{ + 'name': 'd1.d2.d3.d4.d5d5'}]}]] + fields = exclude_filter.exclude_fields(fields, excludes) + expect_persisted = {'base': {'fields': {}}} + self.assertEqual(fields, expect_persisted) + def test_exclude_fields(self): fields = {'my_field_set': {'fields': { 'my_field_exclude_1': {'field_details': {'flat_name': 'my_field_set.my_field_exclude_1'}}, From a17bc292aa806ac43c9599220685509810fa52a4 Mon Sep 17 00:00:00 2001 From: djptek Date: Thu, 20 May 2021 22:49:42 +0200 Subject: [PATCH 33/38] update tests to reflect delete vestigial parent --- scripts/schema/exclude_filter.py | 6 ++-- .../tests/unit/test_schema_exclude_filter.py | 34 ++++++------------- 2 files changed, 14 insertions(+), 26 deletions(-) diff --git a/scripts/schema/exclude_filter.py b/scripts/schema/exclude_filter.py index ebc7027edf..cb7e4a2eda 100644 --- a/scripts/schema/exclude_filter.py +++ b/scripts/schema/exclude_filter.py @@ -32,8 +32,8 @@ def pop_field(fields, node_path, path, removed): inner_field = node_path.pop(0) if 'fields' in fields[inner_field]: popped = pop_field(fields[inner_field]['fields'], node_path, path, removed) - # is this an object field with no remaining children, if so, pop it - if fields[inner_field]['fields'] == {} and fields[inner_field]['field_details']['type'] == 'object': + # if object field with no remaining fields and not 'base', pop it + if fields[inner_field]['fields'] == {} and inner_field != 'base': fields.pop(inner_field) return popped else: @@ -60,7 +60,7 @@ def exclude_trace_path(fields, item, path, removed): if parent != 'base' and parent in fields and len(fields[parent]['fields']) == 0: fields.pop(parent) else: - raise ValueError('--exclude specified, can\'t parse fields in file {}'.format(item)) + raise ValueError('--exclude specified, can\'t parse fields in file {}'.format(item)) def exclude_fields(fields, excludes): diff --git a/scripts/tests/unit/test_schema_exclude_filter.py b/scripts/tests/unit/test_schema_exclude_filter.py index 846f3b777a..255f48ee55 100644 --- a/scripts/tests/unit/test_schema_exclude_filter.py +++ b/scripts/tests/unit/test_schema_exclude_filter.py @@ -40,17 +40,9 @@ def test_exclude_field_deep_path(self): 'd4': {'field_details': {'flat_name': 'd0.d1.d2.d3.d4'}, 'fields': { 'd5': {'field_details': {'flat_name': 'd0.d1.d2.d3.d4.d5'}}}}}}}}}}}}} excludes = [[{'name': 'd0', 'fields': [{ - 'name': 'd1', 'fields': [{ - 'name': 'd2', 'fields': [{ - 'name': 'd3', 'fields': [{ - 'name': 'd4', 'fields': [{ - 'name': 'd5'}]}]}]}]}]}]] + 'name': 'd1.d2.d3.d4.d5'}]}]] fields = exclude_filter.exclude_fields(fields, excludes) - expect_persisted = {'d0': {'fields': { - 'd1': {'field_details': {'flat_name': 'd0.d1'}, 'fields': { - 'd2': {'field_details': {'flat_name': 'd0.d1.d2'}, 'fields': { - 'd3': {'field_details': {'flat_name': 'd0.d1.d2.d3'}, 'fields': { - 'd4': {'field_details': {'flat_name': 'd0.d1.d2.d3.d4'}, 'fields': {}}}}}}}}}}} + expect_persisted = {} self.assertEqual(fields, expect_persisted) def test_exclude_field_dot_path(self): @@ -61,20 +53,20 @@ def test_exclude_field_dot_path(self): 'd4': {'field_details': {'flat_name': 'd0.d1.d2.d3.d4'}, 'fields': { 'd5': {'field_details': {'flat_name': 'd0.d1.d2.d3.d4.d5'}}}}}}}}}}}}} excludes = [[{'name': 'd0', 'fields': [{ - 'name': 'd1.d2.d3.d4.d5d5'}]}]] + 'name': 'd1.d2.d3.d4.d5'}]}]] fields = exclude_filter.exclude_fields(fields, excludes) expect_persisted = {} self.assertEqual(fields, expect_persisted) def test_exclude_field_base_always_persists(self): fields = {'base': {'fields': { - 'd1': {'field_details': {'flat_name': 'd0.d1'}, 'fields': { - 'd2': {'field_details': {'flat_name': 'd0.d1.d2'}, 'fields': { - 'd3': {'field_details': {'flat_name': 'd0.d1.d2.d3'}, 'fields': { - 'd4': {'field_details': {'flat_name': 'd0.d1.d2.d3.d4'}, 'fields': { - 'd5': {'field_details': {'flat_name': 'd0.d1.d2.d3.d4.d5'}}}}}}}}}}}}} + 'd1': {'field_details': {'flat_name': 'base.d1'}, 'fields': { + 'd2': {'field_details': {'flat_name': 'base.d1.d2'}, 'fields': { + 'd3': {'field_details': {'flat_name': 'base.d1.d2.d3'}, 'fields': { + 'd4': {'field_details': {'flat_name': 'base.d1.d2.d3.d4'}, 'fields': { + 'd5': {'field_details': {'flat_name': 'base.d1.d2.d3.d4.d5'}}}}}}}}}}}}} excludes = [[{'name': 'base', 'fields': [{ - 'name': 'd1.d2.d3.d4.d5d5'}]}]] + 'name': 'd1.d2.d3.d4.d5'}]}]] fields = exclude_filter.exclude_fields(fields, excludes) expect_persisted = {'base': {'fields': {}}} self.assertEqual(fields, expect_persisted) @@ -86,7 +78,7 @@ def test_exclude_fields(self): excludes = [[{'name': 'my_field_set', 'fields': [ {'name': 'my_field_exclude_1'}, {'name': 'my_field_exclude_2'}]}]] fields = exclude_filter.exclude_fields(fields, excludes) - expect_persisted = {'my_field_set': {'fields': {}}} + expect_persisted = {} self.assertEqual(fields, expect_persisted) def test_exclude_non_existing_field_set(self): @@ -113,11 +105,7 @@ def test_exclude_non_existing_field_deep_path(self): 'd2': {'field_details': {'flat_name': 'd0.d1.d2'}}, 'fields': { 'd3': {'field_details': {'flat_name': 'd0.d1.d2.d3'}}}}}}} excludes = [[{'name': 'd0', 'fields': [{ - 'name': 'd1', 'fields': [{ - 'name': 'd2', 'fields': [{ - 'name': 'd3', 'fields': [{ - 'name': 'd4', 'fields': [{ - 'name': 'd5'}]}]}]}]}]}]] + 'name': 'd1.d2.d3.d4.d5'}]}]] with self.assertRaisesRegex(ValueError, "--exclude specified, but no path to field d0.d1.d2.d3.d4.d5 found"): exclude_filter.exclude_fields(fields, excludes) From c1a57c6c5d3e99222435fa51b373d1bd4d03dfdb Mon Sep 17 00:00:00 2001 From: djptek Date: Thu, 20 May 2021 22:53:18 +0200 Subject: [PATCH 34/38] fix #1426 thanks @ebeahan --- scripts/generator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/generator.py b/scripts/generator.py index 5123ec2dd5..e14678d286 100644 --- a/scripts/generator.py +++ b/scripts/generator.py @@ -59,7 +59,7 @@ def main(): es_template.generate(nested, ecs_generated_version, out_dir, args.mapping_settings) es_template.generate_legacy(flat, ecs_generated_version, out_dir, args.template_settings, args.mapping_settings) beats.generate(nested, ecs_generated_version, out_dir) - if args.include or args.subset: + if args.include or args.subset or args.exclude: exit() ecs_helpers.make_dirs(docs_dir) From da24cc59aafe395f72107c8c74da72040c06c4f6 Mon Sep 17 00:00:00 2001 From: djptek Date: Thu, 20 May 2021 22:56:11 +0200 Subject: [PATCH 35/38] fix comment --- scripts/schema/exclude_filter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/schema/exclude_filter.py b/scripts/schema/exclude_filter.py index cb7e4a2eda..a461e9081b 100644 --- a/scripts/schema/exclude_filter.py +++ b/scripts/schema/exclude_filter.py @@ -56,7 +56,7 @@ def exclude_trace_path(fields, item, path, removed): if not 'fields' in list_item: parent = node_path[0] removed.append(pop_field(fields, node_path, node_path.copy(), removed)) - # did we consume ALL the fields? if so, delete the Parent, unless it's base + # if parent field has no remaining fields and not 'base', pop it if parent != 'base' and parent in fields and len(fields[parent]['fields']) == 0: fields.pop(parent) else: From 847a0bcd4ac6d32bbc940749efa2a5cecc2bc45b Mon Sep 17 00:00:00 2001 From: djptek Date: Thu, 20 May 2021 22:59:51 +0200 Subject: [PATCH 36/38] remove unused imports --- scripts/schema/exclude_filter.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/scripts/schema/exclude_filter.py b/scripts/schema/exclude_filter.py index a461e9081b..c07635df62 100644 --- a/scripts/schema/exclude_filter.py +++ b/scripts/schema/exclude_filter.py @@ -1,5 +1,3 @@ -import glob -import yaml import os from schema import loader From b66d63e18c717dbef70e68760322f2cce8f921cc Mon Sep 17 00:00:00 2001 From: djptek Date: Thu, 20 May 2021 23:05:27 +0200 Subject: [PATCH 37/38] remove unused imports part 2 --- scripts/generator.py | 2 +- scripts/schema/exclude_filter.py | 3 +-- scripts/schema/loader.py | 1 - scripts/schema/subset_filter.py | 2 -- 4 files changed, 2 insertions(+), 6 deletions(-) diff --git a/scripts/generator.py b/scripts/generator.py index e14678d286..31772f3c79 100644 --- a/scripts/generator.py +++ b/scripts/generator.py @@ -49,7 +49,7 @@ def main(): cleaner.clean(fields, strict=args.strict) finalizer.finalize(fields) fields = subset_filter.filter(fields, args.subset, out_dir) - fields = exclude_filter.exclude(fields, args.exclude, out_dir) + fields = exclude_filter.exclude(fields, args.exclude) nested, flat = intermediate_files.generate(fields, os.path.join(out_dir, 'ecs'), default_dirs) if args.intermediate_only: diff --git a/scripts/schema/exclude_filter.py b/scripts/schema/exclude_filter.py index c07635df62..5717ecfb6f 100644 --- a/scripts/schema/exclude_filter.py +++ b/scripts/schema/exclude_filter.py @@ -1,4 +1,3 @@ -import os from schema import loader # This script should be run downstream of the subset filters - it takes @@ -6,7 +5,7 @@ # removes a subset, for example, to simulate impact of future removals -def exclude(fields, exclude_file_globs, out_dir): +def exclude(fields, exclude_file_globs): excludes = load_exclude_definitions(exclude_file_globs) if excludes: diff --git a/scripts/schema/loader.py b/scripts/schema/loader.py index 88b89bdde0..a662622274 100644 --- a/scripts/schema/loader.py +++ b/scripts/schema/loader.py @@ -1,6 +1,5 @@ import copy import glob -import os import yaml from generators import ecs_helpers diff --git a/scripts/schema/subset_filter.py b/scripts/schema/subset_filter.py index 9fa74f9e82..8c91929f0d 100644 --- a/scripts/schema/subset_filter.py +++ b/scripts/schema/subset_filter.py @@ -1,5 +1,3 @@ -import glob -import yaml import os from generators import intermediate_files from schema import cleaner, loader From f1750a7a60654608af919dc1d2e41963fbca90b4 Mon Sep 17 00:00:00 2001 From: djptek Date: Thu, 20 May 2021 23:09:35 +0200 Subject: [PATCH 38/38] removed unused imports the gift that just keeps giving --- scripts/tests/unit/test_schema_exclude_filter.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/scripts/tests/unit/test_schema_exclude_filter.py b/scripts/tests/unit/test_schema_exclude_filter.py index 255f48ee55..5b6cb5d6ad 100644 --- a/scripts/tests/unit/test_schema_exclude_filter.py +++ b/scripts/tests/unit/test_schema_exclude_filter.py @@ -1,9 +1,6 @@ -from unittest import result -from unittest.case import TestCase from schema import exclude_filter import mock import os -import pprint import sys import unittest