From 955026030ea5c3a8763cd6863f7e092b51fe67ef Mon Sep 17 00:00:00 2001 From: dtrai2 Date: Mon, 23 Dec 2024 15:57:53 +0100 Subject: [PATCH 01/10] allow FieldManager to merge dicts - Simplified and streamlined logic for merging fields and resolving overwrite/extend conflicts. - Improved support for merging dictionaries and extending lists in `add_fields_to`. - Added tests to validate new behaviors and handle edge cases more effectively. --- logprep/processor/field_manager/processor.py | 75 ++++--------------- logprep/util/helper.py | 24 +++--- .../field_manager/test_field_manager.py | 13 ++++ .../generic_adder/test_generic_adder.py | 2 +- tests/unit/util/test_helper_add_field.py | 37 ++++++--- 5 files changed, 68 insertions(+), 83 deletions(-) diff --git a/logprep/processor/field_manager/processor.py b/logprep/processor/field_manager/processor.py index 4c1ed4cd8..4db6a24f9 100644 --- a/logprep/processor/field_manager/processor.py +++ b/logprep/processor/field_manager/processor.py @@ -27,12 +27,9 @@ .. automodule:: logprep.processor.field_manager.rule """ -from collections import namedtuple - from logprep.abc.processor import Processor from logprep.processor.field_manager.rule import FieldManagerRule from logprep.util.helper import ( - add_and_overwrite, add_fields_to, get_dotted_field_value, pop_dotted_field_value, @@ -88,66 +85,20 @@ def _apply_mapping(self, event, rule, rule_args): def _write_to_single_target(self, args, extend_target_list, overwrite_target, rule): event, target_field, source_fields_values = args - target_field_value = get_dotted_field_value(event, target_field) - State = namedtuple( - "State", - ["overwrite", "extend", "single_source_element", "target_is_list", "target_is_none"], - ) - state = State( - overwrite=overwrite_target, - extend=extend_target_list, - single_source_element=len(source_fields_values) == 1, - target_is_list=isinstance(target_field_value, list), - target_is_none=target_field_value is None, - ) - if state.single_source_element and not state.extend: + if len(source_fields_values) == 1 and not extend_target_list: source_fields_values = source_fields_values.pop() - - match state: - case State( - extend=True, overwrite=True, single_source_element=False, target_is_list=False - ): - add_and_overwrite(event, fields={target_field: source_fields_values}, rule=rule) - return - - case State( - extend=True, - overwrite=False, - single_source_element=False, - target_is_list=False, - target_is_none=True, - ): - flattened_source_fields = self._overwrite_from_source_values(source_fields_values) - source_fields_values = [*flattened_source_fields] - add_and_overwrite(event, fields={target_field: source_fields_values}, rule=rule) - return - - case State(extend=True, overwrite=False, target_is_list=False, target_is_none=True): - add_and_overwrite(event, fields={target_field: source_fields_values}, rule=rule) - return - - case State(extend=True, overwrite=False, target_is_list=False): - source_fields_values = [target_field_value, *source_fields_values] - add_and_overwrite(event, fields={target_field: source_fields_values}, rule=rule) - return - - case State( - extend=True, overwrite=False, single_source_element=False, target_is_list=True - ): - flattened_source_fields = self._overwrite_from_source_values(source_fields_values) - source_fields_values = [*target_field_value, *flattened_source_fields] - add_and_overwrite(event, fields={target_field: source_fields_values}, rule=rule) - return - - case State(overwrite=True, extend=True): - flattened_source_fields = self._overwrite_from_source_values(source_fields_values) - source_fields_values = [*flattened_source_fields] - add_and_overwrite(event, fields={target_field: source_fields_values}, rule=rule) - return - - case _: - field = {target_field: source_fields_values} - add_fields_to(event, field, rule, state.extend, state.overwrite) + if extend_target_list: + flattened_source_fields = self._overwrite_from_source_values(source_fields_values) + new_value = [*flattened_source_fields] + if all(isinstance(elem, dict) for elem in new_value): + new_value = {key: value for d in new_value for key, value in d.items()} + add_fields_to( + event, {target_field: new_value}, rule, extend_target_list, overwrite_target + ) + return + else: + field = {target_field: source_fields_values} + add_fields_to(event, field, rule, extend_target_list, overwrite_target) def _overwrite_from_source_values(self, source_fields_values): duplicates = [] diff --git a/logprep/util/helper.py b/logprep/util/helper.py index a040ac112..437dc0d64 100644 --- a/logprep/util/helper.py +++ b/logprep/util/helper.py @@ -86,14 +86,10 @@ def _add_field_to( Flag that determines whether the target_field should be overwritten Raises ------ - ValueError - If both extends_lists and overwrite_target_field are set to True. FieldExistsWarning If the target_field already exists and overwrite_target_field is False, or if extends_lists is True but the existing field is not a list. """ - if extends_lists and overwrite_target_field: - raise ValueError("An output field can't be overwritten and extended at the same time") target_field, content = field field_path = [event, *get_dotted_field_list(target_field)] target_key = field_path.pop() @@ -110,18 +106,28 @@ def _add_field_to( if existing_value is None: target_parent[target_key] = content return - if not extends_lists or not isinstance(existing_value, list): + if not extends_lists: raise FieldExistsWarning(rule, event, [target_field]) - if isinstance(content, list | set): - target_parent[target_key].extend(content) + if isinstance(existing_value, dict) and isinstance(content, dict): + existing_value.update(content) + target_parent[target_key] = existing_value + elif isinstance(existing_value, list) and isinstance(content, list): + existing_value.extend(content) + target_parent[target_key] = existing_value + elif isinstance(existing_value, list) and isinstance(content, (int, float, str, bool)): + target_parent[target_key] = existing_value + [content] + elif isinstance(existing_value, (int, float, str, bool)) and isinstance(content, list): + target_parent[target_key] = [existing_value] + content else: - target_parent[target_key].append(content) + if not overwrite_target_field: + raise FieldExistsWarning(rule, event, [target_field]) + target_parent[target_key] = [existing_value, content] def _add_field_to_silent_fail(*args, **kwargs) -> None | str: """ Adds a field to an object, ignoring the FieldExistsWarning if the field already exists. Is only needed in the - add_batch_to map function. Without this the map would terminate early. + add_batch_to map function. Without this, the map would terminate early. Parameters: args: tuple diff --git a/tests/unit/processor/field_manager/test_field_manager.py b/tests/unit/processor/field_manager/test_field_manager.py index 9938f47fb..7f0d88297 100644 --- a/tests/unit/processor/field_manager/test_field_manager.py +++ b/tests/unit/processor/field_manager/test_field_manager.py @@ -8,6 +8,19 @@ from tests.unit.processor.base import BaseProcessorTestCase test_cases = [ # testcase, rule, event, expected + ( + "Merge source dict into existing target dict", + { + "filter": "source", + "field_manager": { + "source_fields": ["source"], + "target_field": "target", + "extend_target_list": True, + }, + }, + {"source": {"source1": "value"}, "target": {"target1": "value"}}, + {"source": {"source1": "value"}, "target": {"source1": "value", "target1": "value"}}, + ), ( "copies single field to non existing target field", { diff --git a/tests/unit/processor/generic_adder/test_generic_adder.py b/tests/unit/processor/generic_adder/test_generic_adder.py index 6fa7944d8..54b1d898d 100644 --- a/tests/unit/processor/generic_adder/test_generic_adder.py +++ b/tests/unit/processor/generic_adder/test_generic_adder.py @@ -327,9 +327,9 @@ def test_generic_adder_testcases_failure_handling( ): self._load_rule(rule) result = self.object.process(event) + assert event == expected, testcase assert len(result.warnings) == 1 assert re.match(rf".*FieldExistsWarning.*{error_message}", str(result.warnings[0])) - assert event == expected, testcase def test_add_generic_fields_from_file_missing_and_existing_with_all_required(self): with pytest.raises(InvalidRuleDefinitionError, match=r"files do not exist"): diff --git a/tests/unit/util/test_helper_add_field.py b/tests/unit/util/test_helper_add_field.py index 7e7731415..f08e4b563 100644 --- a/tests/unit/util/test_helper_add_field.py +++ b/tests/unit/util/test_helper_add_field.py @@ -95,17 +95,6 @@ def test_add_field_to_extends_list_when_given_a_list(self): add_fields_to(document, {"some_list": ["first", "second"]}, extends_lists=True) assert document.get("some_list") == ["with a value", "first", "second"] - def test_add_field_to_raises_if_list_should_be_extended_and_overwritten_at_the_same_time(self): - document = {"some": "field", "some_list": ["with a value"]} - with pytest.raises(ValueError, match=r"can't be overwritten and extended at the same time"): - add_fields_to( - document, - {"some_list": ["first", "second"]}, - extends_lists=True, - overwrite_target_field=True, - ) - assert document - def test_returns_false_if_dotted_field_value_key_exists(self): document = {"user": "Franz"} with pytest.raises(FieldExistsWarning, match=r"could not be written"): @@ -174,3 +163,29 @@ def test_add_field_adds_multiple_fields_and_raises_one_field_exists_warning(self "exists_already": "original content", "new": "another content", } + + def test_add_fields_to_merges_existing_dict_with_new_dict(self): + document = {"some": "field", "existing": {"old": "dict"}} + expected = { + "some": "field", + "existing": {"new": "dict", "old": "dict"}, + } + add_fields_to(document, {"existing": {"new": "dict"}}, extends_lists=True) + assert document == expected + + def test_add_fields_to_converts_element_to_list_when_extends_lists_is_true(self): + document = {"existing": "element"} + expected = {"existing": ["element", "new element"]} + add_fields_to(document, {"existing": "new element"}, extends_lists=True) + assert document == expected + + def test_add_fields_to_extends_but_does_not_overwrite_target(self): + document = {"existing": "element"} + expected = {"existing": "element"} + add_fields_to( + document, {"existing": "new element"}, extends_lists=True, overwrite_target_field=False + ) + assert document == expected + + # merge with target (keeps existing target value) + # overwrite target (replaces existing target value) From af7d38e21286e1ace2944aa210510cda208c8f5a Mon Sep 17 00:00:00 2001 From: dtrai2 Date: Mon, 23 Dec 2024 16:37:42 +0100 Subject: [PATCH 02/10] update field processing logic to use `merge_with_target` - Replaced `extend_target_list` with `merge_with_target` across processors and utilities for clarity and better functionality. - Ensured proper handling of scenarios where merging and overwriting targets could conflict. --- logprep/abc/processor.py | 4 +- logprep/processor/calculator/rule.py | 2 +- logprep/processor/clusterer/processor.py | 4 +- logprep/processor/dissector/processor.py | 4 +- .../domain_label_extractor/processor.py | 2 +- .../processor/domain_resolver/processor.py | 2 +- logprep/processor/field_manager/processor.py | 29 +++++----- logprep/processor/field_manager/rule.py | 8 +-- logprep/processor/generic_adder/processor.py | 2 +- logprep/processor/generic_adder/rule.py | 2 +- .../processor/generic_resolver/processor.py | 10 ++-- logprep/processor/geoip_enricher/processor.py | 4 +- logprep/processor/grokker/processor.py | 4 +- .../processor/hyperscan_resolver/processor.py | 6 +-- logprep/processor/labeler/processor.py | 4 +- .../processor/list_comparison/processor.py | 2 +- logprep/processor/pseudonymizer/processor.py | 2 +- logprep/processor/requester/processor.py | 6 +-- logprep/processor/selective_extractor/rule.py | 4 +- .../processor/template_replacer/processor.py | 2 +- logprep/util/helper.py | 40 +++++++------- .../processor/calculator/test_calculator.py | 2 +- .../field_manager/test_field_manager.py | 38 ++++++------- .../generic_adder/test_generic_adder.py | 14 ++--- .../generic_resolver/test_generic_resolver.py | 6 +-- .../test_generic_resolver_rule.py | 16 +++--- .../test_hyperscan_resolver.py | 18 +++---- .../test_selective_extractor_rule.py | 6 +-- .../timestamp_differ/test_timestamp_differ.py | 2 +- tests/unit/util/test_helper_add_field.py | 54 +++++++++---------- 30 files changed, 151 insertions(+), 148 deletions(-) diff --git a/logprep/abc/processor.py b/logprep/abc/processor.py index 391b366ac..681293eac 100644 --- a/logprep/abc/processor.py +++ b/logprep/abc/processor.py @@ -329,8 +329,8 @@ def _write_target_field(self, event: dict, rule: "Rule", result: any) -> None: add_fields_to( event, fields={rule.target_field: result}, - extends_lists=rule.extend_target_list, - overwrite_target_field=rule.overwrite_target, + merge_with_target=rule.merge_with_target, + overwrite_target=rule.overwrite_target, ) def setup(self): diff --git a/logprep/processor/calculator/rule.py b/logprep/processor/calculator/rule.py index c81d0ef40..8fd3e6f9e 100644 --- a/logprep/processor/calculator/rule.py +++ b/logprep/processor/calculator/rule.py @@ -107,7 +107,7 @@ class Config(FieldManagerRule.Config): """The calculation expression. Fields from the event can be used by surrounding them with :code:`${` and :code:`}`.""" source_fields: list = field(factory=list, init=False, repr=False, eq=False) - extend_target_list: bool = field(validator=validators.instance_of(bool), default=False) + merge_with_target: bool = field(validator=validators.instance_of(bool), default=False) """If the target field exists and is a list, the list will be extended with the values of the source fields. """ diff --git a/logprep/processor/clusterer/processor.py b/logprep/processor/clusterer/processor.py index 8f33a8093..f1094dd95 100644 --- a/logprep/processor/clusterer/processor.py +++ b/logprep/processor/clusterer/processor.py @@ -139,8 +139,8 @@ def _cluster(self, event: dict, rule: ClustererRule): add_fields_to( event, fields={self._config.output_field_name: cluster_signature}, - extends_lists=rule.extend_target_list, - overwrite_target_field=rule.overwrite_target, + merge_with_target=rule.merge_with_target, + overwrite_target=rule.overwrite_target, ) self._last_non_extracted_signature = sig_text diff --git a/logprep/processor/dissector/processor.py b/logprep/processor/dissector/processor.py index bc350ffe0..b6d63bfe2 100644 --- a/logprep/processor/dissector/processor.py +++ b/logprep/processor/dissector/processor.py @@ -92,8 +92,6 @@ def _apply_convert_datatype(self, event, rule): for target_field, converter in rule.convert_actions: try: target_value = converter(get_dotted_field_value(event, target_field)) - add_fields_to( - event, {target_field: target_value}, rule, overwrite_target_field=True - ) + add_fields_to(event, {target_field: target_value}, rule, overwrite_target=True) except ValueError as error: self._handle_warning_error(event, rule, error) diff --git a/logprep/processor/domain_label_extractor/processor.py b/logprep/processor/domain_label_extractor/processor.py index e8087fea0..8003eedcb 100644 --- a/logprep/processor/domain_label_extractor/processor.py +++ b/logprep/processor/domain_label_extractor/processor.py @@ -105,7 +105,7 @@ def _apply_rules(self, event, rule: DomainLabelExtractorRule): f"{rule.target_field}.top_level_domain": labels.suffix, f"{rule.target_field}.subdomain": labels.subdomain, } - add_fields_to(event, fields, rule, overwrite_target_field=rule.overwrite_target) + add_fields_to(event, fields, rule, overwrite_target=rule.overwrite_target) else: tagging_field.append(f"invalid_domain_in_{rule.source_fields[0].replace('.', '_')}") add_and_overwrite( diff --git a/logprep/processor/domain_resolver/processor.py b/logprep/processor/domain_resolver/processor.py index 098fce281..d74d48ee8 100644 --- a/logprep/processor/domain_resolver/processor.py +++ b/logprep/processor/domain_resolver/processor.py @@ -195,4 +195,4 @@ def _store_debug_infos(self, event, requires_storing): "cache_size": len(self._domain_ip_map.keys()), } } - add_fields_to(event, event_dbg, overwrite_target_field=True) + add_fields_to(event, event_dbg, overwrite_target=True) diff --git a/logprep/processor/field_manager/processor.py b/logprep/processor/field_manager/processor.py index 4db6a24f9..e0a2c5274 100644 --- a/logprep/processor/field_manager/processor.py +++ b/logprep/processor/field_manager/processor.py @@ -30,6 +30,7 @@ from logprep.abc.processor import Processor from logprep.processor.field_manager.rule import FieldManagerRule from logprep.util.helper import ( + add_and_overwrite, add_fields_to, get_dotted_field_value, pop_dotted_field_value, @@ -46,7 +47,7 @@ def _apply_rules(self, event, rule): rule.source_fields, rule.target_field, rule.mapping, - rule.extend_target_list, + rule.merge_with_target, rule.overwrite_target, ) if rule.mapping: @@ -55,17 +56,17 @@ def _apply_rules(self, event, rule): self._apply_single_target_processing(event, rule, rule_args) def _apply_single_target_processing(self, event, rule, rule_args): - source_fields, target_field, _, extend_target_list, overwrite_target = rule_args + source_fields, target_field, _, merge_with_target, overwrite_target = rule_args source_field_values = self._get_field_values(event, rule.source_fields) self._handle_missing_fields(event, rule, source_fields, source_field_values) source_field_values = list(filter(lambda x: x is not None, source_field_values)) if not source_field_values: return args = (event, target_field, source_field_values) - self._write_to_single_target(args, extend_target_list, overwrite_target, rule) + self._write_to_single_target(args, merge_with_target, overwrite_target, rule) def _apply_mapping(self, event, rule, rule_args): - source_fields, _, mapping, extend_target_list, overwrite_target = rule_args + source_fields, _, mapping, merge_with_target, overwrite_target = rule_args source_fields, targets = list(zip(*mapping.items())) source_field_values = self._get_field_values(event, mapping.keys()) self._handle_missing_fields(event, rule, source_fields, source_field_values) @@ -76,29 +77,31 @@ def _apply_mapping(self, event, rule, rule_args): event, dict(zip(targets, source_field_values)), rule, - extend_target_list, + merge_with_target, overwrite_target, ) if rule.delete_source_fields: for dotted_field in source_fields: pop_dotted_field_value(event, dotted_field) - def _write_to_single_target(self, args, extend_target_list, overwrite_target, rule): + def _write_to_single_target(self, args, merge_with_target, overwrite_target, rule): event, target_field, source_fields_values = args - if len(source_fields_values) == 1 and not extend_target_list: + if len(source_fields_values) == 1 and not merge_with_target: source_fields_values = source_fields_values.pop() - if extend_target_list: + if merge_with_target: flattened_source_fields = self._overwrite_from_source_values(source_fields_values) new_value = [*flattened_source_fields] if all(isinstance(elem, dict) for elem in new_value): new_value = {key: value for d in new_value for key, value in d.items()} - add_fields_to( - event, {target_field: new_value}, rule, extend_target_list, overwrite_target - ) - return + if overwrite_target: + add_and_overwrite(event, {target_field: new_value}, rule) + else: + add_fields_to( + event, {target_field: new_value}, rule, merge_with_target, overwrite_target + ) else: field = {target_field: source_fields_values} - add_fields_to(event, field, rule, extend_target_list, overwrite_target) + add_fields_to(event, field, rule, merge_with_target, overwrite_target) def _overwrite_from_source_values(self, source_fields_values): duplicates = [] diff --git a/logprep/processor/field_manager/rule.py b/logprep/processor/field_manager/rule.py index 8cef9ec97..f893e0c2e 100644 --- a/logprep/processor/field_manager/rule.py +++ b/logprep/processor/field_manager/rule.py @@ -20,7 +20,7 @@ - server.nat.ip - client.nat.ip target_field: related.ip - extend_target_list: True + merge_with_target: True description: '...' .. code-block:: json @@ -120,7 +120,7 @@ class Config(Rule.Config): """Whether to delete all the source fields or not. Defaults to :code:`False`""" overwrite_target: bool = field(validator=validators.instance_of(bool), default=False) """Overwrite the target field value if exists. Defaults to :code:`False`""" - extend_target_list: bool = field(validator=validators.instance_of(bool), default=False) + merge_with_target: bool = field(validator=validators.instance_of(bool), default=False) """If the target field exists and is a list, the list will be extended with the values of the source fields. If the source field is a list, the lists will be merged. If the target field does not exist, a new field will be added with the @@ -162,8 +162,8 @@ def overwrite_target(self): return self._config.overwrite_target @property - def extend_target_list(self): - return self._config.extend_target_list + def merge_with_target(self): + return self._config.merge_with_target @property def ignore_missing_fields(self): diff --git a/logprep/processor/generic_adder/processor.py b/logprep/processor/generic_adder/processor.py index 9913c1e83..090753c0b 100644 --- a/logprep/processor/generic_adder/processor.py +++ b/logprep/processor/generic_adder/processor.py @@ -37,4 +37,4 @@ class GenericAdder(Processor): def _apply_rules(self, event: dict, rule: GenericAdderRule): items_to_add = rule.add if items_to_add: - add_fields_to(event, items_to_add, rule, rule.extend_target_list, rule.overwrite_target) + add_fields_to(event, items_to_add, rule, rule.merge_with_target, rule.overwrite_target) diff --git a/logprep/processor/generic_adder/rule.py b/logprep/processor/generic_adder/rule.py index 73844114d..c6f70762f 100644 --- a/logprep/processor/generic_adder/rule.py +++ b/logprep/processor/generic_adder/rule.py @@ -94,7 +94,7 @@ class GenericAdderRule(FieldManagerRule): class Config(FieldManagerRule.Config): """Config for GenericAdderRule""" - extend_target_list: bool = field(validator=validators.instance_of(bool), default=False) + merge_with_target: bool = field(validator=validators.instance_of(bool), default=False) """If the target field exists and is a list, the list will be extended with the values of the source fields. """ diff --git a/logprep/processor/generic_resolver/processor.py b/logprep/processor/generic_resolver/processor.py index b5d4adbb4..c29691f66 100644 --- a/logprep/processor/generic_resolver/processor.py +++ b/logprep/processor/generic_resolver/processor.py @@ -24,8 +24,8 @@ """ from functools import cached_property, lru_cache - from typing import Optional + from attrs import define, field, validators from logprep.abc.processor import Processor @@ -52,7 +52,7 @@ class Config(Processor.Config): validator=validators.optional(validators.instance_of(int)), default=1 ) """(Optional) Cache metrics won't be updated immediately. - Instead updating is skipped for a number of events before it's next update. + Instead updating is skipped for a number of events before it's next update. :code:`cache_metrics_interval` sets the number of events between updates (default: 1).""" @define(kw_only=True) @@ -130,15 +130,15 @@ def _apply_rules(self, event, rule): current_content = get_dotted_field_value(event, target_field) if isinstance(current_content, list) and content in current_content: continue - if rule.extend_target_list and current_content is None: + if rule.merge_with_target and current_content is None: content = [content] try: add_fields_to( event, fields={target_field: content}, rule=rule, - extends_lists=rule.extend_target_list, - overwrite_target_field=rule.overwrite_target, + merge_with_target=rule.merge_with_target, + overwrite_target=rule.overwrite_target, ) except FieldExistsWarning as error: conflicting_fields.extend(error.skipped_fields) diff --git a/logprep/processor/geoip_enricher/processor.py b/logprep/processor/geoip_enricher/processor.py index fa8592b71..e6c61dc58 100644 --- a/logprep/processor/geoip_enricher/processor.py +++ b/logprep/processor/geoip_enricher/processor.py @@ -134,6 +134,6 @@ def _apply_rules(self, event, rule): event, fields, rule=rule, - extends_lists=False, - overwrite_target_field=rule.overwrite_target, + merge_with_target=False, + overwrite_target=rule.overwrite_target, ) diff --git a/logprep/processor/grokker/processor.py b/logprep/processor/grokker/processor.py index 77257c982..f2684e5a0 100644 --- a/logprep/processor/grokker/processor.py +++ b/logprep/processor/grokker/processor.py @@ -86,8 +86,8 @@ def _apply_rules(self, event: dict, rule: GrokkerRule): event, result, rule=rule, - extends_lists=rule.extend_target_list, - overwrite_target_field=rule.overwrite_target, + merge_with_target=rule.merge_with_target, + overwrite_target=rule.overwrite_target, ) if self._handle_missing_fields(event, rule, rule.actions.keys(), source_values): return diff --git a/logprep/processor/hyperscan_resolver/processor.py b/logprep/processor/hyperscan_resolver/processor.py index 960824369..f75c0174f 100644 --- a/logprep/processor/hyperscan_resolver/processor.py +++ b/logprep/processor/hyperscan_resolver/processor.py @@ -114,15 +114,15 @@ def _apply_rules(self, event: dict, rule: HyperscanResolverRule): current_content = get_dotted_field_value(event, resolve_target) if isinstance(current_content, list) and dest_val in current_content: continue - if rule.extend_target_list and current_content is None: + if rule.merge_with_target and current_content is None: dest_val = [dest_val] try: add_fields_to( event, fields={resolve_target: dest_val}, rule=rule, - extends_lists=rule.extend_target_list, - overwrite_target_field=rule.overwrite_target, + merge_with_target=rule.merge_with_target, + overwrite_target=rule.overwrite_target, ) except FieldExistsWarning as error: conflicting_fields.extend(error.skipped_fields) diff --git a/logprep/processor/labeler/processor.py b/logprep/processor/labeler/processor.py index 278aff0dd..7dd596120 100644 --- a/logprep/processor/labeler/processor.py +++ b/logprep/processor/labeler/processor.py @@ -72,10 +72,10 @@ def setup(self): def _apply_rules(self, event, rule): """Applies the rule to the current event""" fields = {key: value for key, value in rule.prefixed_label.items()} - add_fields_to(event, fields, rule=rule, extends_lists=True) + add_fields_to(event, fields, rule=rule, merge_with_target=True) # convert sets into sorted lists fields = { key: sorted(set(get_dotted_field_value(event, key))) for key, _ in rule.prefixed_label.items() } - add_fields_to(event, fields, rule=rule, overwrite_target_field=True) + add_fields_to(event, fields, rule=rule, overwrite_target=True) diff --git a/logprep/processor/list_comparison/processor.py b/logprep/processor/list_comparison/processor.py index 4bf85ad4d..a3c872a6d 100644 --- a/logprep/processor/list_comparison/processor.py +++ b/logprep/processor/list_comparison/processor.py @@ -72,7 +72,7 @@ def _apply_rules(self, event, rule): comparison_result, comparison_key = self._list_comparison(rule, event) if comparison_result is not None: fields = {f"{rule.target_field}.{comparison_key}": comparison_result} - add_fields_to(event, fields, rule=rule, extends_lists=True) + add_fields_to(event, fields, rule=rule, merge_with_target=True) def _list_comparison(self, rule: ListComparisonRule, event: dict): """ diff --git a/logprep/processor/pseudonymizer/processor.py b/logprep/processor/pseudonymizer/processor.py index dfeb9d1d0..735660425 100644 --- a/logprep/processor/pseudonymizer/processor.py +++ b/logprep/processor/pseudonymizer/processor.py @@ -243,7 +243,7 @@ def _apply_rules(self, event: dict, rule: PseudonymizerRule): else: field_value = self._pseudonymize_field(rule, dotted_field, regex, field_value) add_fields_to( - event, fields={dotted_field: field_value}, rule=rule, overwrite_target_field=True + event, fields={dotted_field: field_value}, rule=rule, overwrite_target=True ) if "@timestamp" in event: for pseudonym, _ in self.result.data: diff --git a/logprep/processor/requester/processor.py b/logprep/processor/requester/processor.py index 86b817466..265fd4115 100644 --- a/logprep/processor/requester/processor.py +++ b/logprep/processor/requester/processor.py @@ -72,8 +72,8 @@ def _handle_response(self, event, rule, response): event, fields={rule.target_field: self._get_result(response)}, rule=rule, - extends_lists=rule.extend_target_list, - overwrite_target_field=rule.overwrite_target, + merge_with_target=rule.merge_with_target, + overwrite_target=rule.overwrite_target, ) except FieldExistsWarning as error: conflicting_fields.extend(error.skipped_fields) @@ -86,7 +86,7 @@ def _handle_response(self, event, rule, response): event, dict(zip(targets, contents)), rule, - rule.extend_target_list, + rule.merge_with_target, rule.overwrite_target, ) except FieldExistsWarning as error: diff --git a/logprep/processor/selective_extractor/rule.py b/logprep/processor/selective_extractor/rule.py index 54f636372..860a033d6 100644 --- a/logprep/processor/selective_extractor/rule.py +++ b/logprep/processor/selective_extractor/rule.py @@ -55,7 +55,7 @@ filter: extract_test selective_extractor: extract_from_file: /path/to/file - outputs: + outputs: - opensearch: topic_to_send_to description: '...' @@ -160,7 +160,7 @@ class Config(FieldManagerRule.Config): is not tagged with the failure tag. Defaults to :code:`True`""" target_field: str = field(default="", init=False, repr=False, eq=False) overwrite_target: bool = field(default=False, init=False, repr=False, eq=False) - extend_target_list: bool = field(default=False, init=False, repr=False, eq=False) + merge_with_target: bool = field(default=False, init=False, repr=False, eq=False) mapping: dict = field(default="", init=False, repr=False, eq=False) def __attrs_post_init__(self): diff --git a/logprep/processor/template_replacer/processor.py b/logprep/processor/template_replacer/processor.py index 9b036aec9..05317b126 100644 --- a/logprep/processor/template_replacer/processor.py +++ b/logprep/processor/template_replacer/processor.py @@ -116,7 +116,7 @@ def _perform_replacement(self, event: dict, replacement: str, rule: TemplateRepl event, fields={self._target_field: replacement}, rule=rule, - overwrite_target_field=overwrite, + overwrite_target=overwrite, ) def setup(self): diff --git a/logprep/util/helper.py b/logprep/util/helper.py index 437dc0d64..ec14201c3 100644 --- a/logprep/util/helper.py +++ b/logprep/util/helper.py @@ -64,8 +64,8 @@ def _add_field_to( event: dict, field: tuple, rule: "Rule", - extends_lists: bool = False, - overwrite_target_field: bool = False, + merge_with_target: bool = False, + overwrite_target: bool = False, ) -> None: """ Add content to the target_field in the given event. target_field can be a dotted subfield. @@ -80,21 +80,23 @@ def _add_field_to( str, float, int, list, dict. rule: Rule A rule that initiated the field addition, is used for proper error handling. - extends_lists: bool - Flag that determines whether target_field lists should be extended - overwrite_target_field: bool - Flag that determines whether the target_field should be overwritten + merge_with_target: bool + Flag that determines whether the content should be merged with an existing target_field + overwrite_target: bool + Flag that determines whether the target_field should be overwritten by content Raises ------ FieldExistsWarning If the target_field already exists and overwrite_target_field is False, or if extends_lists is True but the existing field is not a list. """ + if merge_with_target and overwrite_target: + raise ValueError("Can't merge with and overwrite a target field at the same time") target_field, content = field field_path = [event, *get_dotted_field_list(target_field)] target_key = field_path.pop() - if overwrite_target_field: + if overwrite_target: target_parent = reduce(_add_and_overwrite_key, field_path) target_parent[target_key] = content return @@ -106,7 +108,7 @@ def _add_field_to( if existing_value is None: target_parent[target_key] = content return - if not extends_lists: + if not merge_with_target: raise FieldExistsWarning(rule, event, [target_field]) if isinstance(existing_value, dict) and isinstance(content, dict): existing_value.update(content) @@ -119,7 +121,7 @@ def _add_field_to( elif isinstance(existing_value, (int, float, str, bool)) and isinstance(content, list): target_parent[target_key] = [existing_value] + content else: - if not overwrite_target_field: + if not overwrite_target: raise FieldExistsWarning(rule, event, [target_field]) target_parent[target_key] = [existing_value, content] @@ -151,8 +153,8 @@ def add_fields_to( event: dict, fields: dict, rule: "Rule" = None, - extends_lists: bool = False, - overwrite_target_field: bool = False, + merge_with_target: bool = False, + overwrite_target: bool = False, ) -> None: """ Handles the batch addition operation while raising a FieldExistsWarning with all unsuccessful targets. @@ -166,9 +168,9 @@ def add_fields_to( content can be of type: str, float, int, list, dict. rule: Rule A rule that initiated the field addition, is used for proper error handling. - extends_lists: bool - A boolean indicating whether to extend lists if the target field already exists. - overwrite_target_field: bool + merge_with_target: bool + A boolean indicating whether to merge if the target field already exists. + overwrite_target: bool A boolean indicating whether to overwrite the target field if it already exists. Raises: @@ -179,15 +181,15 @@ def add_fields_to( fields = {key: value for key, value in fields.items() if value is not None} number_fields = len(dict(fields)) if number_fields == 1: - _add_field_to(event, list(fields.items())[0], rule, extends_lists, overwrite_target_field) + _add_field_to(event, list(fields.items())[0], rule, merge_with_target, overwrite_target) return unsuccessful_targets = map( _add_field_to_silent_fail, itertools.repeat(event, number_fields), fields.items(), itertools.repeat(rule, number_fields), - itertools.repeat(extends_lists, number_fields), - itertools.repeat(overwrite_target_field, number_fields), + itertools.repeat(merge_with_target, number_fields), + itertools.repeat(overwrite_target, number_fields), ) unsuccessful_targets = [item for item in unsuccessful_targets if item is not None] if unsuccessful_targets: @@ -360,12 +362,12 @@ def snake_to_camel(snake: str) -> str: return camel -append_as_list = partial(add_fields_to, extends_lists=True) +append_as_list = partial(add_fields_to, merge_with_target=True) def add_and_overwrite(event, fields, rule, *_): """wrapper for add_field_to""" - add_fields_to(event, fields, rule, overwrite_target_field=True) + add_fields_to(event, fields, rule, overwrite_target=True) def append(event, field, separator, rule): diff --git a/tests/unit/processor/calculator/test_calculator.py b/tests/unit/processor/calculator/test_calculator.py index e3cbd6cbc..a5704a04c 100644 --- a/tests/unit/processor/calculator/test_calculator.py +++ b/tests/unit/processor/calculator/test_calculator.py @@ -125,7 +125,7 @@ "calculator": { "calc": "${field1} + ${field2} +${field3}", "target_field": "target", - "extend_target_list": True, + "merge_with_target": True, }, }, {"field1": "6", "field2": "4", "field3": 2, "target": [1, 5, 3]}, diff --git a/tests/unit/processor/field_manager/test_field_manager.py b/tests/unit/processor/field_manager/test_field_manager.py index 7f0d88297..dee40032b 100644 --- a/tests/unit/processor/field_manager/test_field_manager.py +++ b/tests/unit/processor/field_manager/test_field_manager.py @@ -15,7 +15,7 @@ "field_manager": { "source_fields": ["source"], "target_field": "target", - "extend_target_list": True, + "merge_with_target": True, }, }, {"source": {"source1": "value"}, "target": {"target1": "value"}}, @@ -80,7 +80,7 @@ "field_manager": { "source_fields": ["message"], "target_field": "new_field", - "extend_target_list": True, + "merge_with_target": True, "delete_source_fields": True, }, }, @@ -94,7 +94,7 @@ "field_manager": { "source_fields": ["field1", "field2", "field3"], "target_field": "new_field", - "extend_target_list": True, + "merge_with_target": True, "delete_source_fields": True, }, }, @@ -112,7 +112,7 @@ "field_manager": { "source_fields": ["field1", "field2", "field3"], "target_field": "new_field", - "extend_target_list": True, + "merge_with_target": True, "delete_source_fields": True, "overwrite_target": True, }, @@ -132,7 +132,7 @@ "field_manager": { "source_fields": ["field1", "field2", "field3"], "target_field": "new_field", - "extend_target_list": True, + "merge_with_target": True, "delete_source_fields": True, }, }, @@ -151,7 +151,7 @@ "field_manager": { "source_fields": ["field1", "field2", "field3"], "target_field": "new_field", - "extend_target_list": True, + "merge_with_target": True, "delete_source_fields": True, }, }, @@ -170,7 +170,7 @@ "field_manager": { "source_fields": ["field1", "field2", "field3"], "target_field": "new_field", - "extend_target_list": True, + "merge_with_target": True, "delete_source_fields": True, }, }, @@ -189,7 +189,7 @@ "field_manager": { "source_fields": ["field1", "field2", "field3"], "target_field": "new_field", - "extend_target_list": True, + "merge_with_target": True, "delete_source_fields": True, }, }, @@ -208,7 +208,7 @@ "field_manager": { "source_fields": ["field1", "field2", "field3"], "target_field": "new_field", - "extend_target_list": True, + "merge_with_target": True, "delete_source_fields": True, }, }, @@ -230,7 +230,7 @@ "field_manager": { "source_fields": ["field1", "field2", "field3"], "target_field": "new_field", - "extend_target_list": True, + "merge_with_target": True, "delete_source_fields": True, }, }, @@ -252,7 +252,7 @@ "field_manager": { "source_fields": ["field1", "field2", "field3"], "target_field": "new_field", - "extend_target_list": True, + "merge_with_target": True, "delete_source_fields": True, "overwrite_target": True, }, @@ -281,7 +281,7 @@ "client.nat.ip", ], "target_field": "related.ip", - "extend_target_list": True, + "merge_with_target": True, }, }, { @@ -342,7 +342,7 @@ "filter": "field", "field_manager": { "mapping": {"field.one": "one", "field.two": "two", "field.three": "three"}, - "extend_target_list": True, + "merge_with_target": True, }, }, {"field": {"one": 1, "two": 2, "three": 3}, "three": ["exists already"]}, @@ -359,7 +359,7 @@ "filter": "field", "field_manager": { "mapping": {"field.one": "one", "field.two": "two", "field.three": "three"}, - "extend_target_list": True, + "merge_with_target": True, }, }, {"field": {"one": 1, "two": 2, "three": [3, 3]}, "three": ["exists already"]}, @@ -420,7 +420,7 @@ "source_fields": ["source.one", "source.two"], "target_field": "merged", "mapping": {"field.one": "one", "field.two": "two", "field.three": "three"}, - "extend_target_list": True, + "merge_with_target": True, }, }, {"field": {"one": 1, "two": 2, "three": 3}, "source": {"one": ["a"], "two": ["b"]}}, @@ -452,7 +452,7 @@ }, ), ( - "extend_target_list preserves list ordering", + "merge_with_target preserves list ordering", { "filter": "(foo) OR (test)", "field_manager": { @@ -461,7 +461,7 @@ "target_field": "existing_list", "delete_source_fields": False, "overwrite_target": False, - "extend_target_list": True, + "merge_with_target": True, }, }, {"existing_list": ["hello", "world"], "foo": "bar", "test": "value"}, @@ -474,7 +474,7 @@ "field_manager": { "source_fields": ["message"], "target_field": "new_field", - "extend_target_list": True, + "merge_with_target": True, }, }, {"message": "Value B", "new_field": "Value A"}, @@ -487,7 +487,7 @@ "field_manager": { "source_fields": ["field1", "field2", "field3"], "target_field": "new_field", - "extend_target_list": True, + "merge_with_target": True, }, }, { diff --git a/tests/unit/processor/generic_adder/test_generic_adder.py b/tests/unit/processor/generic_adder/test_generic_adder.py index 54b1d898d..b6b27fe4a 100644 --- a/tests/unit/processor/generic_adder/test_generic_adder.py +++ b/tests/unit/processor/generic_adder/test_generic_adder.py @@ -193,7 +193,7 @@ class TestGenericAdder(BaseProcessorTestCase): }, ), ( - "Extend list field with 'extend_target_list' enabled", + "Extend list field with 'merge_with_target' enabled", { "filter": "extend_generic_test", "generic_adder": { @@ -202,7 +202,7 @@ class TestGenericAdder(BaseProcessorTestCase): "another_added_field": "another_value", "dotted.added.field": "yet_another_value", }, - "extend_target_list": True, + "merge_with_target": True, }, }, {"extend_generic_test": "Test", "event_id": 123, "some_added_field": []}, @@ -215,7 +215,7 @@ class TestGenericAdder(BaseProcessorTestCase): }, ), ( - "Extend list field with 'extend_target_list' enabled", + "Extend list field with 'merge_with_target' enabled", { "filter": "*", "generic_adder": { @@ -259,7 +259,7 @@ class TestGenericAdder(BaseProcessorTestCase): r"subfields existed and could not be extended: some_added_field", ), ( - "Extend list field with 'extend_target_list' disabled", + "Extend list field with 'merge_with_target' disabled", { "filter": "extend_generic_test", "generic_adder": { @@ -268,7 +268,7 @@ class TestGenericAdder(BaseProcessorTestCase): "another_added_field": "another_value", "dotted.added.field": "yet_another_value", }, - "extend_target_list": False, + "merge_with_target": False, }, }, {"extend_generic_test": "Test", "event_id": 123, "some_added_field": []}, @@ -283,7 +283,7 @@ class TestGenericAdder(BaseProcessorTestCase): r"subfields existed and could not be extended: some_added_field", ), ( - "Extend list field with 'extend_target_list' enabled, but non-list target", + "Extend list field with 'merge_with_target' enabled, but non-list target", { "filter": "extend_generic_test", "generic_adder": { @@ -292,7 +292,7 @@ class TestGenericAdder(BaseProcessorTestCase): "another_added_field": "another_value", "dotted.added.field": "yet_another_value", }, - "extend_target_list": True, + "merge_with_target": True, }, }, {"extend_generic_test": "Test", "event_id": 123, "some_added_field": "not_a_list"}, diff --git a/tests/unit/processor/generic_resolver/test_generic_resolver.py b/tests/unit/processor/generic_resolver/test_generic_resolver.py index 1b909d809..9bb1f4af4 100644 --- a/tests/unit/processor/generic_resolver/test_generic_resolver.py +++ b/tests/unit/processor/generic_resolver/test_generic_resolver.py @@ -270,7 +270,7 @@ def test_resolve_dotted_field_no_conflict_match_from_file_and_list( "path": "tests/testdata/unit/generic_resolver/resolve_mapping.yml", "pattern": r"\d*(?P[a-z]+)\d*", }, - "extend_target_list": True, + "merge_with_target": True, }, } self._load_rule(rule) @@ -294,7 +294,7 @@ def test_resolve_dotted_field_no_conflict_match_from_file_and_list_has_conflict( "path": "tests/testdata/unit/generic_resolver/resolve_mapping.yml", "pattern": r"\d*(?P[a-z]+)\d*", }, - "extend_target_list": True, + "merge_with_target": True, }, } self._load_rule(rule) @@ -322,7 +322,7 @@ def test_resolve_dotted_field_no_conflict_match_from_file_and_list_has_conflict_ "path": "tests/testdata/unit/generic_resolver/resolve_mapping.yml", "pattern": r"\d*(?P[a-z]+)\d*", }, - "extend_target_list": True, + "merge_with_target": True, }, } self._load_rule(rule) diff --git a/tests/unit/processor/generic_resolver/test_generic_resolver_rule.py b/tests/unit/processor/generic_resolver/test_generic_resolver_rule.py index 411702a91..6e7175eba 100644 --- a/tests/unit/processor/generic_resolver/test_generic_resolver_rule.py +++ b/tests/unit/processor/generic_resolver/test_generic_resolver_rule.py @@ -19,7 +19,7 @@ def fixture_rule_definition(): "path": "tests/testdata/unit/generic_resolver/resolve_mapping.yml", "pattern": r"\d*(?P[a-z]+)\d*", }, - "extend_target_list": False, + "merge_with_target": False, }, "description": "insert a description text", } @@ -40,7 +40,7 @@ class TestGenericResolverRule: "path": "tests/testdata/unit/generic_resolver/resolve_mapping.yml", "pattern": r"\d*(?P[a-z]+)\d*", }, - "extend_target_list": False, + "merge_with_target": False, }, }, True, @@ -71,7 +71,7 @@ class TestGenericResolverRule: "path": "tests/testdata/unit/generic_resolver/resolve_mapping.yml", "pattern": r"\d*(?P[a-z]+)\d*", }, - "extend_target_list": False, + "merge_with_target": False, }, }, False, @@ -87,7 +87,7 @@ class TestGenericResolverRule: "path": "tests/testdata/unit/generic_resolver/resolve_mapping.yml", "pattern": r"\d*(?P[a-z]+)\d*", }, - "extend_target_list": False, + "merge_with_target": False, }, }, False, @@ -103,7 +103,7 @@ class TestGenericResolverRule: "path": "tests/testdata/unit/generic_resolver/resolve_mapping.yml", "pattern": r"\d*(?P[a-z]+)\d*", }, - "extend_target_list": False, + "merge_with_target": False, }, }, False, @@ -118,7 +118,7 @@ class TestGenericResolverRule: "path": "tests/testdata/unit/generic_resolver/resolve_mapping.yml", "pattern": r"\d*(?P[a-z]+)\d*", }, - "extend_target_list": False, + "merge_with_target": False, }, }, False, @@ -134,7 +134,7 @@ class TestGenericResolverRule: "path": "tests/testdata/unit/generic_resolver/resolve_mapping.yml", "pattern": r"other_\d*(?P[a-z]+)\d*", }, - "extend_target_list": False, + "merge_with_target": False, }, }, False, @@ -146,7 +146,7 @@ class TestGenericResolverRule: "generic_resolver": { "field_mapping": {"to_resolve": "resolved"}, "resolve_list": {"pattern": "result"}, - "extend_target_list": False, + "merge_with_target": False, }, }, False, diff --git a/tests/unit/processor/hyperscan_resolver/test_hyperscan_resolver.py b/tests/unit/processor/hyperscan_resolver/test_hyperscan_resolver.py index 83bb92ae9..7b0c81f68 100644 --- a/tests/unit/processor/hyperscan_resolver/test_hyperscan_resolver.py +++ b/tests/unit/processor/hyperscan_resolver/test_hyperscan_resolver.py @@ -194,7 +194,7 @@ def test_resolve_dotted_no_conflict_from_file_and_list( "field_mapping": {"to_resolve": "resolved"}, "resolve_from_file": "tests/testdata/unit/hyperscan_resolver/" "resolve_mapping_no_regex.yml", - "extend_target_list": True, + "merge_with_target": True, }, } @@ -216,7 +216,7 @@ def test_resolve_dotted_no_conflict_from_file_and_list_has_conflict( "field_mapping": {"to_resolve": "resolved"}, "resolve_from_file": "tests/testdata/unit/hyperscan_resolver/" "resolve_mapping_no_regex.yml", - "extend_target_list": True, + "merge_with_target": True, }, } @@ -239,7 +239,7 @@ def test_resolve_dotted_no_conflict_from_file_and_list_has_conflict_and_diff_inp "field_mapping": {"to_resolve": "resolved", "other_to_resolve": "resolved"}, "resolve_from_file": "tests/testdata/unit/hyperscan_resolver/" "resolve_mapping_no_regex.yml", - "extend_target_list": True, + "merge_with_target": True, }, } @@ -563,7 +563,7 @@ def test_resolve_no_conflict_from_file_and_list( "/resolve_mapping_no_regex.yml", "pattern": r"\d*(?P[a-z]+)\d*", }, - "extend_target_list": True, + "merge_with_target": True, }, } @@ -586,7 +586,7 @@ def test_resolve_with_parenthesis_in_mapping(self): "/resolve_mapping_with_parenthesis.yml", "pattern": r"\d*(?P(([a-z])+)())\d*", }, - "extend_target_list": True, + "merge_with_target": True, }, } @@ -609,7 +609,7 @@ def test_resolve_with_partially_matching_mapping(self): "/resolve_mapping_no_regex.yml", "pattern": r"\d*(?P[a-z]+)\d*", }, - "extend_target_list": True, + "merge_with_target": True, }, } @@ -632,7 +632,7 @@ def test_resolve_no_matching_pattern(self): "/resolve_mapping_no_regex.yml", "pattern": r"\d*(?P[123]+)\d*", }, - "extend_target_list": True, + "merge_with_target": True, }, } self._load_rule(rule) @@ -652,7 +652,7 @@ def test_resolve_no_conflict_from_file_and_list_has_conflict( "/resolve_mapping_no_regex.yml", "pattern": r"\d*(?P[a-z]+)\d*", }, - "extend_target_list": True, + "merge_with_target": True, }, } @@ -678,7 +678,7 @@ def test_resolve_no_conflict_from_file_and_list_has_conflict_and_diff_inputs( "/resolve_mapping_no_regex.yml", "pattern": r"\d*(?P[a-z]+)\d*", }, - "extend_target_list": True, + "merge_with_target": True, }, } diff --git a/tests/unit/processor/selective_extractor/test_selective_extractor_rule.py b/tests/unit/processor/selective_extractor/test_selective_extractor_rule.py index e6e7c7b4e..53d086073 100644 --- a/tests/unit/processor/selective_extractor/test_selective_extractor_rule.py +++ b/tests/unit/processor/selective_extractor/test_selective_extractor_rule.py @@ -4,10 +4,10 @@ from typing import Hashable from unittest import mock + import pytest from logprep.processor.base.exceptions import InvalidRuleDefinitionError -from logprep.factory_error import InvalidConfigurationError from logprep.processor.selective_extractor.rule import ( SelectiveExtractorRule, SelectiveExtractorRuleError, @@ -280,12 +280,12 @@ def test_rules_equality(self, rule_definition, testcase, other_rule_definition, "selective_extractor": { "source_fields": ["field1"], "outputs": [{"kafka": "topic"}], - "extend_target_list": True, + "merge_with_target": True, }, }, b"", TypeError, - "got an unexpected keyword argument 'extend_target_list'", + "got an unexpected keyword argument 'merge_with_target'", ), ], ) diff --git a/tests/unit/processor/timestamp_differ/test_timestamp_differ.py b/tests/unit/processor/timestamp_differ/test_timestamp_differ.py index cd8943772..0bdb0b92f 100644 --- a/tests/unit/processor/timestamp_differ/test_timestamp_differ.py +++ b/tests/unit/processor/timestamp_differ/test_timestamp_differ.py @@ -183,7 +183,7 @@ "timestamp_differ": { "diff": "${subfield.field2} - ${field1:%Y-%m-%d %H:%M:%S}", "target_field": "time_diff", - "extend_target_list": True, + "merge_with_target": True, }, }, { diff --git a/tests/unit/util/test_helper_add_field.py b/tests/unit/util/test_helper_add_field.py index f08e4b563..5559f40ba 100644 --- a/tests/unit/util/test_helper_add_field.py +++ b/tests/unit/util/test_helper_add_field.py @@ -75,26 +75,35 @@ def test_provoke_dict_duplicate_in_dotted_subfield(self): def test_add_field_to_overwrites_output_field_in_root_level(self): document = {"some": "field", "output_field": "has already content"} - add_fields_to(document, {"output_field": {"dict": "content"}}, overwrite_target_field=True) + add_fields_to(document, {"output_field": {"dict": "content"}}, overwrite_target=True) assert document.get("output_field") == {"dict": "content"} def test_add_field_to_overwrites_output_field_in_nested_level(self): document = {"some": "field", "nested": {"output": {"field": "has already content"}}} - add_fields_to( - document, {"nested.output.field": {"dict": "content"}}, overwrite_target_field=True - ) + add_fields_to(document, {"nested.output.field": {"dict": "content"}}, overwrite_target=True) assert document.get("nested", {}).get("output", {}).get("field") == {"dict": "content"} - def test_add_field_to_extends_list_when_only_given_a_string(self): + def test_add_field_to_merges_with_target_when_only_given_a_string(self): document = {"some": "field", "some_list": ["with a value"]} - add_fields_to(document, {"some_list": "new value"}, extends_lists=True) + add_fields_to(document, {"some_list": "new value"}, merge_with_target=True) assert document.get("some_list") == ["with a value", "new value"] - def test_add_field_to_extends_list_when_given_a_list(self): + def test_add_field_to_merges_with_target_when_given_a_list(self): document = {"some": "field", "some_list": ["with a value"]} - add_fields_to(document, {"some_list": ["first", "second"]}, extends_lists=True) + add_fields_to(document, {"some_list": ["first", "second"]}, merge_with_target=True) assert document.get("some_list") == ["with a value", "first", "second"] + def test_add_field_to_raises_if_list_should_be_extended_and_overwritten_at_the_same_time(self): + document = {"some": "field", "some_list": ["with a value"]} + with pytest.raises(ValueError, match=r"Can't merge with and overwrite a target"): + add_fields_to( + document, + {"some_list": ["first", "second"]}, + merge_with_target=True, + overwrite_target=True, + ) + assert document + def test_returns_false_if_dotted_field_value_key_exists(self): document = {"user": "Franz"} with pytest.raises(FieldExistsWarning, match=r"could not be written"): @@ -112,13 +121,15 @@ def test_add_list_with_nested_keys(self): } } } - add_fields_to(testdict, {"key1.key2.key3.key4.key5.list": ["content"]}, extends_lists=True) + add_fields_to( + testdict, {"key1.key2.key3.key4.key5.list": ["content"]}, merge_with_target=True + ) assert testdict == expected def test_add_field_to_adds_value_not_as_list(self): - # checks if a newly added field is added not as list, even when `extends_list` is True + # checks if a newly added field is added not as list, even when `merge_with_target` is True document = {"some": "field"} - add_fields_to(document, {"new": "list"}, extends_lists=True) + add_fields_to(document, {"new": "list"}, merge_with_target=True) assert document.get("new") == "list" assert not isinstance(document.get("new"), list) @@ -140,7 +151,7 @@ def test_add_field_too_adds_multiple_fields_and_overwrites_one(self): "new": "another content", } new_fields = {"exists_already": {"updated": "content"}, "new": "another content"} - add_fields_to(document, new_fields, overwrite_target_field=True) + add_fields_to(document, new_fields, overwrite_target=True) assert document == expected def test_add_field_too_adds_multiple_fields_and_extends_one(self): @@ -151,7 +162,7 @@ def test_add_field_too_adds_multiple_fields_and_extends_one(self): "new": "another content", } new_fields = {"exists_already": ["extended content"], "new": "another content"} - add_fields_to(document, new_fields, extends_lists=True) + add_fields_to(document, new_fields, merge_with_target=True) assert document == expected def test_add_field_adds_multiple_fields_and_raises_one_field_exists_warning(self): @@ -170,22 +181,11 @@ def test_add_fields_to_merges_existing_dict_with_new_dict(self): "some": "field", "existing": {"new": "dict", "old": "dict"}, } - add_fields_to(document, {"existing": {"new": "dict"}}, extends_lists=True) + add_fields_to(document, {"existing": {"new": "dict"}}, merge_with_target=True) assert document == expected - def test_add_fields_to_converts_element_to_list_when_extends_lists_is_true(self): + def test_add_fields_to_converts_element_to_list_when_merge_with_target_is_true(self): document = {"existing": "element"} expected = {"existing": ["element", "new element"]} - add_fields_to(document, {"existing": "new element"}, extends_lists=True) + add_fields_to(document, {"existing": ["new element"]}, merge_with_target=True) assert document == expected - - def test_add_fields_to_extends_but_does_not_overwrite_target(self): - document = {"existing": "element"} - expected = {"existing": "element"} - add_fields_to( - document, {"existing": "new element"}, extends_lists=True, overwrite_target_field=False - ) - assert document == expected - - # merge with target (keeps existing target value) - # overwrite target (replaces existing target value) From 35bf7a1afa4d53f5b4232d649484e8558b7a5b50 Mon Sep 17 00:00:00 2001 From: dtrai2 Date: Mon, 23 Dec 2024 17:13:59 +0100 Subject: [PATCH 03/10] convert labeler label values to lists in prefixed_label property - Ensures all values in the `prefixed_label` dictionary are lists. - Adds a unit test to validate the behavior for labeler rules. --- logprep/processor/labeler/rule.py | 2 +- tests/unit/processor/labeler/test_labeler_rule.py | 11 +++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/logprep/processor/labeler/rule.py b/logprep/processor/labeler/rule.py index 0b7f9ea90..0663a83a4 100644 --- a/logprep/processor/labeler/rule.py +++ b/logprep/processor/labeler/rule.py @@ -62,7 +62,7 @@ def label(self) -> dict: @property def prefixed_label(self) -> dict: - return {f"label.{key}": value for key, value in self.label.items()} + return {f"label.{key}": list(value) for key, value in self.label.items()} def conforms_to_schema(self, schema: LabelingSchema) -> bool: """Check if labels are valid.""" diff --git a/tests/unit/processor/labeler/test_labeler_rule.py b/tests/unit/processor/labeler/test_labeler_rule.py index 0bf877017..30494ea15 100644 --- a/tests/unit/processor/labeler/test_labeler_rule.py +++ b/tests/unit/processor/labeler/test_labeler_rule.py @@ -4,6 +4,7 @@ # pylint: disable=missing-function-docstring # pylint: disable=wrong-import-position from copy import deepcopy + import pytest from logprep.filter.expression.filter_expression import StringFilterExpression @@ -268,3 +269,13 @@ def test_complex_lucene_regex_does_not_match_returns_true_for_matching_document( assert not rule.matches({"applyrule": "UPloXXXX"}) assert not rule.matches({"applyrule": "88888888"}) assert not rule.matches({"applyrule": "UPlo$$7"}) + + def test_prefixed_label_property_is_a_dicts_with_only_list_values(self): + rule_definition = { + "filter": 'applyrule: "yes"', + "labeler": {"label": {"reporter": {"windows"}}}, # label is given as set + } + rule = LabelerRule._create_from_dict(rule_definition) + assert all( + isinstance(val, list) for val in rule.prefixed_label.values() + ), "prefixed_labels contain non-list values" From d3385a33d2367330c03c6f18d185b6f8df06580e Mon Sep 17 00:00:00 2001 From: dtrai2 Date: Mon, 23 Dec 2024 17:20:48 +0100 Subject: [PATCH 04/10] add CHANGELOG.md entry --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index a06ada075..e8aa06451 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,9 @@ the list is now fixed inside the packaged logprep * remove SQL feature from `generic_adder`, fields can only be added from rule config or from file * use a single rule tree instead of a generic and a specific rule tree +* replace the `extend_target_list` parameter with `merge_with_target` for improved naming clarity +and functionality across `FieldManager` based processors (e.g., `FieldManager`, `Clusterer`, +`GenericAdder`). ### Features @@ -34,6 +37,7 @@ the list is now fixed inside the packaged logprep * fix wrong documentation for `timestamp_differ` * add container signatures to images build in ci pipeline * add sbom to images build in ci pipeline +* `FieldManager` supports merging dictionaries ### Bugfix From 35bb304f11c24aeb882edd185bbf611c19b95a87 Mon Sep 17 00:00:00 2001 From: dtrai2 Date: Mon, 23 Dec 2024 17:29:57 +0100 Subject: [PATCH 05/10] add test to merge multiple dicts - moves test at the end of the list --- .../field_manager/test_field_manager.py | 52 ++++++++++++++----- 1 file changed, 39 insertions(+), 13 deletions(-) diff --git a/tests/unit/processor/field_manager/test_field_manager.py b/tests/unit/processor/field_manager/test_field_manager.py index dee40032b..9fdcd8bf4 100644 --- a/tests/unit/processor/field_manager/test_field_manager.py +++ b/tests/unit/processor/field_manager/test_field_manager.py @@ -8,19 +8,6 @@ from tests.unit.processor.base import BaseProcessorTestCase test_cases = [ # testcase, rule, event, expected - ( - "Merge source dict into existing target dict", - { - "filter": "source", - "field_manager": { - "source_fields": ["source"], - "target_field": "target", - "merge_with_target": True, - }, - }, - {"source": {"source1": "value"}, "target": {"target1": "value"}}, - {"source": {"source1": "value"}, "target": {"source1": "value", "target1": "value"}}, - ), ( "copies single field to non existing target field", { @@ -503,6 +490,45 @@ "new_field": ["Value A", "Value B", "Value C", "Value D"], }, ), + ( + "Merge source dict into existing target dict", + { + "filter": "source", + "field_manager": { + "source_fields": ["source"], + "target_field": "target", + "merge_with_target": True, + }, + }, + {"source": {"source1": "value"}, "target": {"target1": "value"}}, + {"source": {"source1": "value"}, "target": {"source1": "value", "target1": "value"}}, + ), + ( + "Merge multiple source dicts into existing target dict", + { + "filter": "source1", + "field_manager": { + "source_fields": ["source1", "source2", "source3"], + "target_field": "target", + "delete_source_fields": True, + "merge_with_target": True, + }, + }, + { + "source1": {"source1": "value"}, + "source2": {"source2": "value"}, + "source3": {"source-nested": {"foo": "bar"}}, + "target": {"target1": "value"}, + }, + { + "target": { + "source1": "value", + "source2": "value", + "source-nested": {"foo": "bar"}, + "target1": "value", + }, + }, + ), ] failure_test_cases = [ From 1509f03e66e409b3135fdf67829524251112a7d8 Mon Sep 17 00:00:00 2001 From: dtrai2 Date: Mon, 23 Dec 2024 17:30:45 +0100 Subject: [PATCH 06/10] revert assertion order --- tests/unit/processor/generic_adder/test_generic_adder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/processor/generic_adder/test_generic_adder.py b/tests/unit/processor/generic_adder/test_generic_adder.py index b6b27fe4a..be5cf6590 100644 --- a/tests/unit/processor/generic_adder/test_generic_adder.py +++ b/tests/unit/processor/generic_adder/test_generic_adder.py @@ -327,9 +327,9 @@ def test_generic_adder_testcases_failure_handling( ): self._load_rule(rule) result = self.object.process(event) - assert event == expected, testcase assert len(result.warnings) == 1 assert re.match(rf".*FieldExistsWarning.*{error_message}", str(result.warnings[0])) + assert event == expected, testcase def test_add_generic_fields_from_file_missing_and_existing_with_all_required(self): with pytest.raises(InvalidRuleDefinitionError, match=r"files do not exist"): From abf837642161d0c0ce8f628bf6748787edb04bca Mon Sep 17 00:00:00 2001 From: dtrai2 Date: Mon, 23 Dec 2024 17:31:15 +0100 Subject: [PATCH 07/10] fix test name --- tests/unit/processor/labeler/test_labeler_rule.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/processor/labeler/test_labeler_rule.py b/tests/unit/processor/labeler/test_labeler_rule.py index 30494ea15..72afb22da 100644 --- a/tests/unit/processor/labeler/test_labeler_rule.py +++ b/tests/unit/processor/labeler/test_labeler_rule.py @@ -270,7 +270,7 @@ def test_complex_lucene_regex_does_not_match_returns_true_for_matching_document( assert not rule.matches({"applyrule": "88888888"}) assert not rule.matches({"applyrule": "UPlo$$7"}) - def test_prefixed_label_property_is_a_dicts_with_only_list_values(self): + def test_prefixed_label_property_is_a_dict_with_only_list_values(self): rule_definition = { "filter": 'applyrule: "yes"', "labeler": {"label": {"reporter": {"windows"}}}, # label is given as set From 9532f0d688505b786bcafa62dd044d22fc379152 Mon Sep 17 00:00:00 2001 From: dtrai2 Date: Mon, 23 Dec 2024 17:59:42 +0100 Subject: [PATCH 08/10] refactor field merging logic in FieldManager processor - Simplifies conditional branches by reordering merge cases. - Streamlines how source values and targets are handled. - Ensures proper handling of dict merging and overwrite flags. --- logprep/processor/field_manager/processor.py | 24 +++++++++----------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/logprep/processor/field_manager/processor.py b/logprep/processor/field_manager/processor.py index e0a2c5274..8928c4728 100644 --- a/logprep/processor/field_manager/processor.py +++ b/logprep/processor/field_manager/processor.py @@ -30,7 +30,6 @@ from logprep.abc.processor import Processor from logprep.processor.field_manager.rule import FieldManagerRule from logprep.util.helper import ( - add_and_overwrite, add_fields_to, get_dotted_field_value, pop_dotted_field_value, @@ -88,20 +87,19 @@ def _write_to_single_target(self, args, merge_with_target, overwrite_target, rul event, target_field, source_fields_values = args if len(source_fields_values) == 1 and not merge_with_target: source_fields_values = source_fields_values.pop() - if merge_with_target: - flattened_source_fields = self._overwrite_from_source_values(source_fields_values) - new_value = [*flattened_source_fields] - if all(isinstance(elem, dict) for elem in new_value): - new_value = {key: value for d in new_value for key, value in d.items()} - if overwrite_target: - add_and_overwrite(event, {target_field: new_value}, rule) - else: - add_fields_to( - event, {target_field: new_value}, rule, merge_with_target, overwrite_target - ) - else: + if not merge_with_target: field = {target_field: source_fields_values} add_fields_to(event, field, rule, merge_with_target, overwrite_target) + return + new_values = self._overwrite_from_source_values(source_fields_values) + if all(isinstance(element, dict) for element in new_values): + # merge a list of dicts into one dict + new_values = {key: value for dict_ in new_values for key, value in dict_.items()} + if overwrite_target: + # source values are already included in new_values, add_fields_to also does not + # allow overwrite_target=True and marge_with_target=True at the same time + merge_with_target = False + add_fields_to(event, {target_field: new_values}, rule, merge_with_target, overwrite_target) def _overwrite_from_source_values(self, source_fields_values): duplicates = [] From ae710a83b0e8d3c5ec2e8ca9f1d139f61b050a5c Mon Sep 17 00:00:00 2001 From: dtrai2 Date: Mon, 23 Dec 2024 18:21:13 +0100 Subject: [PATCH 09/10] add example requester test case for merging response with existing dict - Introduced a new test case to validate merging response data with an existing dictionary. - Ensures proper behavior when "merge_with_target" is enabled in the requester configuration. --- .../processor/requester/test_requester.py | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/tests/unit/processor/requester/test_requester.py b/tests/unit/processor/requester/test_requester.py index 04157ebbb..035f867d1 100644 --- a/tests/unit/processor/requester/test_requester.py +++ b/tests/unit/processor/requester/test_requester.py @@ -277,6 +277,28 @@ "status": 200, }, ), + ( + "merge response with existing dict", + { + "filter": "data", + "requester": { + "url": "http://localhost/", + "method": "GET", + "target_field": "data", + "delete_source_fields": True, + "merge_with_target": True, + }, + }, + {"data": {"existing": "data"}}, + {"data": {"existing": "data", "new-data": {"dict": "value"}}}, + { + "method": "GET", + "url": "http://localhost/", + "json": {"new-data": {"dict": "value"}}, + "content_type": "text/plain", + "status": 200, + }, + ), ] # testcase, rule, event, expected, response failure_test_cases = [ From ae7546f2933878cede089a1c476965827bd20aea Mon Sep 17 00:00:00 2001 From: ekneg54 Date: Fri, 3 Jan 2025 10:28:27 +0100 Subject: [PATCH 10/10] add feedback from review --- logprep/processor/field_manager/rule.py | 7 +++++-- tests/unit/processor/field_manager/test_field_manager.py | 6 +++++- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/logprep/processor/field_manager/rule.py b/logprep/processor/field_manager/rule.py index f893e0c2e..0b278fa9c 100644 --- a/logprep/processor/field_manager/rule.py +++ b/logprep/processor/field_manager/rule.py @@ -122,9 +122,12 @@ class Config(Rule.Config): """Overwrite the target field value if exists. Defaults to :code:`False`""" merge_with_target: bool = field(validator=validators.instance_of(bool), default=False) """If the target field exists and is a list, the list will be extended with the values - of the source fields. If the source field is a list, the lists will be merged. + of the source fields. If the source field is a list, the lists will be merged by appending + the source fields list to the target list. If the source field is a dict, the dict will be + merged with the target dict. If the source keys exist in the target dict, the values will be + overwritten. So this is not e deep merge. If the target field does not exist, a new field will be added with the - source field value as list. Defaults to :code:`False`. + source field value as list or dict. Defaults to :code:`False`. """ ignore_missing_fields: bool = field(validator=validators.instance_of(bool), default=False) """If set to :code:`True` missing fields will be ignored, no warning is logged and the event diff --git a/tests/unit/processor/field_manager/test_field_manager.py b/tests/unit/processor/field_manager/test_field_manager.py index 9fdcd8bf4..4fbbe9ee3 100644 --- a/tests/unit/processor/field_manager/test_field_manager.py +++ b/tests/unit/processor/field_manager/test_field_manager.py @@ -345,7 +345,11 @@ { "filter": "field", "field_manager": { - "mapping": {"field.one": "one", "field.two": "two", "field.three": "three"}, + "mapping": { + "field.one": "one", + "field.two": "two", + "field.three": "three", + }, "merge_with_target": True, }, },