From c69c872b8f406b1989a0e5a80a227353f0510fb3 Mon Sep 17 00:00:00 2001 From: Eran Rivlis Date: Sun, 4 Aug 2024 00:42:34 +0300 Subject: [PATCH] add stream methods + unit tests + bump version to 0.2.0 - stream - stream_dict_records --- README.md | 82 ++++++++++++++ pyproject.toml | 2 +- src/mappingtools.py | 53 +++++++-- tests/test_stream.py | 176 ++++++++++++++++++++++++++++++ tests/test_stream_dict_records.py | 162 +++++++++++++++++++++++++++ tests/test_stream_namedtuples.py | 155 ++++++++++++++++++++++++++ 6 files changed, 622 insertions(+), 8 deletions(-) create mode 100644 tests/test_stream.py create mode 100644 tests/test_stream_dict_records.py create mode 100644 tests/test_stream_namedtuples.py diff --git a/README.md b/README.md index 4d058de..1a3ccf6 100644 --- a/README.md +++ b/README.md @@ -208,6 +208,88 @@ print(unwrapped_data) # Output: [{'key': 'key1', 'value': [{'key': 'subkey', 'value': 'value'}]}, {'key': 'key2', 'value': ['item1', 'item2']}] ``` +#### `stream` + +Takes a mapping and an optional item factory function, and generates items from the mapping. +If the item factory is provided, it applies the factory to each key-value pair before yielding. + +```python +from collections import namedtuple + +from mappingtools import stream + + +def custom_factory(key, value): + return f"{key}: {value}" + + +my_mapping = {'a': 1, 'b': 2, 'c': 3} + +for item in stream(my_mapping, custom_factory): + print(item) +# Output: +# a: 1 +# b: 2 +# c: 3 + + +MyTuple = namedtuple('MyTuple', ['key', 'value']) +data = {'a': 1, 'b': 2} + +for item in stream(data, MyTuple): + print(item) +# Output: +# MyTuple(key='a', value=1) +# MyTuple(key='b', value=2) + + +``` + +#### `stream_dict_records` + +generates dictionary records from a given mapping, where each record contains a key-value pair from the mapping with +customizable key and value names. + +```python +from mappingtools import stream_dict_records + +mapping = {'a': 1, 'b': 2} +records = stream_dict_records(mapping, key_name='letter', value_name='number') +for record in records: + print(record) +# Output: +# {'letter': 'a', 'number': 1} +# {'letter': 'b', 'number': 2} +``` + +#### `stream_namedtuples` + +generates named tuple instances from a given mapping and named tuple class. + +```python +from collections import namedtuple + +from mappingtools import stream + +MyTuple = namedtuple('MyTuple', ['key', 'value']) +data = {'a': 1, 'b': 2} + +for item in stream(data, MyTuple): + print(item) +# Output: +# MyTuple(key='a', value=1) +# MyTuple(key='b', value=2) + +def record(k, v): + return {'key': k, 'value': v} + +for item in stream(data, record): + print(item) +# output: +# {'key': 'a', 'value': 1} +# {'key': 'b', 'value': 2} +``` + ### Collectors #### `nested_defaultdict` diff --git a/pyproject.toml b/pyproject.toml index 65ec776..951f892 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "mappingtools" -version = "0.1.0" +version = "0.2.0" authors = [ { name = "Eran Rivlis", email = "eran@rivlis.info" }, ] diff --git a/src/mappingtools.py b/src/mappingtools.py index 497695c..13e0d58 100644 --- a/src/mappingtools.py +++ b/src/mappingtools.py @@ -4,7 +4,7 @@ from collections.abc import Callable, Generator, Iterable, Mapping from enum import Enum, auto from itertools import chain -from typing import Any, TypeVar +from typing import Any, NamedTuple, TypeVar K = TypeVar('K') KT = TypeVar('KT') @@ -204,7 +204,8 @@ def remove(keys: Iterable[K], *mappings: Mapping[K, Any]) -> Generator[Mapping[K def inverse(mapping: Mapping[Any, set]) -> Mapping[Any, set]: - """Return a new dictionary with keys and values swapped from the input mapping. + """ + Return a new dictionary with keys and values swapped from the input mapping. Args: mapping (Mapping[Any, set]): The input mapping to invert. @@ -342,10 +343,9 @@ def listify(obj: Any, key_name: str = 'key', value_name: str = 'value') -> Any: Returns: Any: The unwrapped object. - """ - return _process_obj(obj, _listify_mapping, _listify_iterable, _listify_class, - key_name=key_name, value_name=value_name) + return _process_obj(obj, _listify_mapping, _listify_iterable, _listify_class, key_name=key_name, + value_name=value_name) def _listify_mapping(obj: Mapping, key_name, value_name) -> list[dict]: @@ -361,7 +361,46 @@ def _listify_class(obj, key_name, value_name): not k.startswith('_')] +def stream(mapping: Mapping, item_factory: Callable[[Any, Any], Any] | None = None) -> Generator[Any, Any, None]: + """ + Generate a stream of items from a mapping. + + Args: + mapping (Mapping): The mapping object to stream items from. + item_factory (Callable[[Any, Any], Any], optional): A function that transforms each key-value pair from + the mapping. Defaults to None. + + Yields: + The streamed items from the mapping. + """ + + items = mapping.items() if item_factory is None else iter(item_factory(k, v) for k, v in mapping.items()) + yield from items + + +def stream_dict_records(mapping: Mapping, + key_name: str = 'key', + value_name: str = 'value') -> Generator[Mapping[str, Any], Any, None]: + """ + Generate dictionary records from a mapping. + + Args: + mapping (Mapping): The input mapping to generate records from. + key_name (str): The name to use for the key in the generated records. Defaults to 'key'. + value_name (str): The name to use for the value in the generated records. Defaults to 'value'. + + Yields: + dictionary records based on the input mapping. + """ + + def record(k, v): + return {key_name: k, value_name: v} + + yield from stream(mapping, record) + + __all__ = ( - 'distinct', 'keep', 'remove', 'inverse', 'nested_defaultdict', 'listify', 'simplify', 'strictify', 'Category', - 'CategoryCounter', 'MappingCollector', 'MappingCollectorMode' + 'distinct', 'keep', 'remove', 'inverse', 'nested_defaultdict', 'listify', 'simplify', 'stream', + 'stream_dict_records', 'strictify', 'Category', 'CategoryCounter', + 'MappingCollector', 'MappingCollectorMode' ) diff --git a/tests/test_stream.py b/tests/test_stream.py new file mode 100644 index 0000000..55beafb --- /dev/null +++ b/tests/test_stream.py @@ -0,0 +1,176 @@ +# Generated by CodiumAI +import dataclasses + +from mappingtools import stream + + +# stream function yields items from the mapping when item_factory is None +def test_yields_items_without_item_factory(): + # Arrange + mapping = {'a': 1, 'b': 2} + + # Act + result = list(stream(mapping)) + + # Assert + assert result == [('a', 1), ('b', 2)] + + +# stream function yields transformed items when item_factory is provided +def test_yields_transformed_items_with_item_factory(): + # Arrange + mapping = {'a': 1, 'b': 2} + + def item_factory(k, v): + return k, v * 2 + + # Act + result = list(stream(mapping, item_factory)) + + # Assert + assert result == [('a', 2), ('b', 4)] + + +# stream function works with different types of mappings (e.g., dict, defaultdict) +def test_works_with_different_mappings(): + # Arrange + from collections import defaultdict + mapping = defaultdict(int, {'a': 1, 'b': 2}) + + # Act + result = list(stream(mapping)) + + # Assert + assert result == [('a', 1), ('b', 2)] + + +# stream function handles empty mappings correctly +def test_handles_empty_mappings(): + # Arrange + mapping = {} + + # Act + result = list(stream(mapping)) + + # Assert + assert result == [] + + +# stream function works with various item_factory functions +def test_works_with_various_item_factories(): + # Arrange + mapping = {'a': 1, 'b': 2} + + def item_factory(k, v): + return k.upper(), v + 10 + + # Act + result = list(stream(mapping, item_factory)) + + # Assert + assert result == [('A', 11), ('B', 12)] + + +# stream function handles mappings with non-hashable keys +def test_handles_non_hashable_keys(): + # Arrange + mapping = {('a',): 1, ('b',): 2} + + # Act + result = list(stream(mapping)) + + # Assert + assert result == [(('a',), 1), (('b',), 2)] + + +# stream function handles mappings with None values +def test_handles_none_values(): + # Arrange + mapping = {'a': None, 'b': 2} + + # Act + result = list(stream(mapping)) + + # Assert + assert result == [('a', None), ('b', 2)] + + +# stream function handles mappings with mixed data types +def test_handles_mixed_data_types(): + # Arrange + mapping = {'a': 1, 'b': 'two', 'c': [3]} + + # Act + result = list(stream(mapping)) + + # Assert + assert result == [('a', 1), ('b', 'two'), ('c', [3])] + + +# stream function handles large mappings efficiently +def test_handles_large_mappings_efficiently(): + # Arrange + mapping = {i: i for i in range(1000000)} + + # Act & Assert + for i, item in enumerate(stream(mapping)): + assert item == (i, i) + if i >= 10: + break # Only check the first few items for efficiency + + +# stream function handles mappings with special characters in keys or values +def test_handles_special_characters_in_keys_or_values(): + # Arrange + mapping = {'a!@#': 'value$%^', 'b&*(': 'value)'} + + # Act + result = list(stream(mapping)) + + # Assert + assert result == [('a!@#', 'value$%^'), ('b&*(', 'value)')] + + +# stream function handles mappings with nested structures +def test_handles_nested_structures(): + # Arrange + mapping = {'a': {'nested': 1}, 'b': [2, 3]} + + # Act + result = list(stream(mapping)) + + # Assert + assert result == [('a', {'nested': 1}), ('b', [2, 3])] + + +# stream function handles mappings with cyclic references +def test_handles_cyclic_references(): + # Arrange + a = {} + b = {'a': a} + a['b'] = b + + mapping = {'a': a, 'b': b} + + # Act & Assert (checking for no infinite loop) + result = list(stream(mapping)) + + assert len(result) == 2 + assert ('a', a) in result + assert ('b', b) in result + + +def test_handles_dataclass_factory(): + mapping = {'a': 1, 'b': 2} + + @dataclasses.dataclass + class CustomDC: + key: str + value: int + + result = list(stream(mapping, CustomDC)) + + assert result[0].key == 'a' + assert result[0].value == 1 + assert result[1].key == 'b' + assert result[1].value == 2 diff --git a/tests/test_stream_dict_records.py b/tests/test_stream_dict_records.py new file mode 100644 index 0000000..a7ffc13 --- /dev/null +++ b/tests/test_stream_dict_records.py @@ -0,0 +1,162 @@ +# Generated by CodiumAI +import pytest +from mappingtools import stream_dict_records + + +# Convert a dictionary with string keys and values into a generator of dictionaries with 'key' and 'value' fields +def test_convert_string_keys_values(): + # Arrange + input_dict = {'a': '1', 'b': '2'} + expected_output = [{'key': 'a', 'value': '1'}, {'key': 'b', 'value': '2'}] + + # Act + result = list(stream_dict_records(input_dict)) + + # Assert + assert result == expected_output + + +# Use a custom key name and value name for the output dictionaries +def test_custom_key_value_names(): + # Arrange + input_dict = {'a': '1', 'b': '2'} + expected_output = [{'custom_key': 'a', 'custom_value': '1'}, {'custom_key': 'b', 'custom_value': '2'}] + + # Act + result = list(stream_dict_records(input_dict, key_name='custom_key', value_name='custom_value')) + + # Assert + assert result == expected_output + + +# Handle an empty dictionary gracefully +def test_empty_dictionary(): + # Arrange + input_dict = {} + expected_output = [] + + # Act + result = list(stream_dict_records(input_dict)) + + # Assert + assert result == expected_output + + +# Process a dictionary with mixed data types for keys and values +def test_mixed_data_types(): + # Arrange + input_dict = {1: 'one', 'two': 2, 3.0: [3], (4,): {4}} + expected_output = [ + {'key': 1, 'value': 'one'}, + {'key': 'two', 'value': 2}, + {'key': 3.0, 'value': [3]}, + {'key': (4,), 'value': {4}} + ] + + # Act + result = list(stream_dict_records(input_dict)) + + # Assert + assert result == expected_output + + +# Handle a dictionary with non-string keys +def test_non_string_keys(): + # Arrange + input_dict = {1: 'one', 2.0: 'two'} + expected_output = [{'key': 1, 'value': 'one'}, {'key': 2.0, 'value': 'two'}] + + # Act + result = list(stream_dict_records(input_dict)) + + # Assert + assert result == expected_output + + +# Handle a dictionary with non-string values +def test_non_string_values(): + # Arrange + input_dict = {'one': 1, 'two': 2.0} + expected_output = [{'key': 'one', 'value': 1}, {'key': 'two', 'value': 2.0}] + + # Act + result = list(stream_dict_records(input_dict)) + + # Assert + assert result == expected_output + + +# Process a dictionary with nested dictionaries as values +def test_nested_dictionaries_as_values(): + # Arrange + input_dict = {'a': {'nested_key': 'nested_value'}} + expected_output = [{'key': 'a', 'value': {'nested_key': 'nested_value'}}] + + # Act + result = list(stream_dict_records(input_dict)) + + # Assert + assert result == expected_output + + +# Handle a dictionary with None as a key or value +def test_none_as_key_or_value(): + # Arrange + input_dict = {None: 'none_value', 'none_key': None} + expected_output = [{'key': None, 'value': 'none_value'}, {'key': 'none_key', 'value': None}] + + # Act + result = list(stream_dict_records(input_dict)) + + # Assert + assert result == expected_output + + +# Process a dictionary with special characters in keys or values +def test_special_characters_in_keys_values(): + # Arrange + input_dict = {'sp@cial_k#y!': '@special_value#'} + expected_output = [{'key': 'sp@cial_k#y!', 'value': '@special_value#'}] + + # Act + result = list(stream_dict_records(input_dict)) + + # Assert + assert result == expected_output + + +# Ensure the generator stops correctly after all items are processed +def test_generator_stops_correctly(): + # Arrange + input_dict = {'a': 1, 'b': 2} + + # Act & Assert + gen = stream_dict_records(input_dict) + assert next(gen) == {'key': 'a', 'value': 1} + assert next(gen) == {'key': 'b', 'value': 2} + + with pytest.raises(StopIteration): + next(gen) + + +# Verify the order of items in the output matches the input dictionary +def test_order_of_items_matches_input(): + # Arrange + input_dict = {'first': 1, 'second': 2, 'third': 3} + expected_output = [{'key': 'first', 'value': 1}, {'key': 'second', 'value': 2}, {'key': 'third', 'value': 3}] + + # Act + result = list(stream_dict_records(input_dict)) + + # Assert + assert result == expected_output + + +# Handle large dictionaries efficiently without performance degradation +def test_large_dictionaries_performance(): + # Arrange + input_dict = {f'key_{i}': f'value_{i}' for i in range(100000)} + + # Act & Assert (no assertion needed for performance, just ensure it runs) + for _ in stream_dict_records(input_dict): + pass diff --git a/tests/test_stream_namedtuples.py b/tests/test_stream_namedtuples.py new file mode 100644 index 0000000..d57ffdd --- /dev/null +++ b/tests/test_stream_namedtuples.py @@ -0,0 +1,155 @@ +# Generated by CodiumAI + +from collections import OrderedDict, namedtuple + +import pytest +from mappingtools import stream + + +# Converts mapping to namedtuples correctly +def test_converts_mapping_to_namedtuples_correctly(): + # Arrange + TestTuple = namedtuple('TestTuple', ['key', 'value']) + mapping = {'a': 1, 'b': 2} + + # Act + result = list(stream(mapping, TestTuple)) + + # Assert + expected = [TestTuple('a', 1), TestTuple('b', 2)] + assert result == expected + + +# Uses provided NamedTuple class for conversion +def test_uses_provided_namedtuple_class_for_conversion(): + # Arrange + CustomTuple = namedtuple('CustomTuple', ['key', 'value']) + mapping = {'x': 10, 'y': 20} + + # Act + result = list(stream(mapping, CustomTuple)) + + # Assert + expected = [CustomTuple('x', 10), CustomTuple('y', 20)] + assert result == expected + + +# Yields all items from the mapping +def test_yields_all_items_from_mapping(): + # Arrange + TestTuple = namedtuple('TestTuple', ['key', 'value']) + mapping = {'a': 1, 'b': 2, 'c': 3} + + # Act + result = list(stream(mapping, TestTuple)) + + # Assert + assert len(result) == 3 + + +# Handles mappings with multiple key-value pairs +def test_handles_multiple_key_value_pairs(): + # Arrange + TestTuple = namedtuple('TestTuple', ['key', 'value']) + mapping = {'a': 1, 'b': 2, 'c': 3, 'd': 4} + + # Act + result = list(stream(mapping, TestTuple)) + + # Assert + expected = [TestTuple(k, v) for k, v in mapping.items()] + assert result == expected + + +# Works with different types of mappings +def test_works_with_different_types_of_mappings(): + # Arrange + TestTuple = namedtuple('TestTuple', ['key', 'value']) + mapping = OrderedDict([('a', 1), ('b', 2)]) + + # Act + result = list(stream(mapping, TestTuple)) + + # Assert + expected = [TestTuple('a', 1), TestTuple('b', 2)] + assert result == expected + + +# Empty mapping input +def test_empty_mapping_input(): + # Arrange + TestTuple = namedtuple('TestTuple', ['key', 'value']) + mapping = {} + + # Act + result = list(stream(mapping, TestTuple)) + + # Assert + assert result == [] + + +# Mapping with non-hashable keys +def test_mapping_with_non_hashable_keys(): + # Arrange + TestTuple = namedtuple('TestTuple', ['key', 'value']) + mapping = {('a',): 1, ('b',): 2} + + # Act + result = list(stream(mapping, TestTuple)) + + # Assert + expected = [TestTuple(('a',), 1), TestTuple(('b',), 2)] + assert result == expected + + +# Mapping with None values +def test_mapping_with_none_values(): + # Arrange + TestTuple = namedtuple('TestTuple', ['key', 'value']) + mapping = {'a': None, 'b': None} + + # Act + result = list(stream(mapping, TestTuple)) + + # Assert + expected = [TestTuple('a', None), TestTuple('b', None)] + assert result == expected + + +# NamedTuple with no fields +def test_namedtuple_with_no_fields(): + # Arrange + EmptyTuple = namedtuple('EmptyTuple', []) + mapping = {'a': 1} + + # Act and Assert (should raise TypeError) + with pytest.raises(TypeError): + list(stream(mapping, EmptyTuple)) + + +# Mapping with mixed data types +def test_mapping_with_mixed_data_types(): + # Arrange + MixedTypeTuple = namedtuple('MixedTypeTuple', ['key', 'value']) + mapping = {'a': 1, 'b': "string", 'c': [1, 2, 3]} + + # Act + result = list(stream(mapping, MixedTypeTuple)) + + # Assert + expected = [MixedTypeTuple('a', 1), MixedTypeTuple('b', "string"), MixedTypeTuple('c', [1, 2, 3])] + assert result == expected + + +# Handles large mappings efficiently +def test_handles_large_mappings_efficiently(): + # Arrange + LargeTuple = namedtuple('LargeTuple', ['key', 'value']) + large_mapping = {i: i for i in range(10000)} + + # Act and Assert (ensure it does not raise any exceptions) + + result = list(stream(large_mapping, LargeTuple)) + assert len(result) == 10000 + assert all(isinstance(item, LargeTuple) for item in result) + assert all(item.key == item.value for item in result)