From c69c872b8f406b1989a0e5a80a227353f0510fb3 Mon Sep 17 00:00:00 2001
From: Eran Rivlis <eran.rivlis@yahoo.com>
Date: Sun, 4 Aug 2024 00:42:34 +0300
Subject: [PATCH] add stream methods + unit tests + bump version to 0.2.0 -
 stream - stream_dict_records

---
 README.md                         |  82 ++++++++++++++
 pyproject.toml                    |   2 +-
 src/mappingtools.py               |  53 +++++++--
 tests/test_stream.py              | 176 ++++++++++++++++++++++++++++++
 tests/test_stream_dict_records.py | 162 +++++++++++++++++++++++++++
 tests/test_stream_namedtuples.py  | 155 ++++++++++++++++++++++++++
 6 files changed, 622 insertions(+), 8 deletions(-)
 create mode 100644 tests/test_stream.py
 create mode 100644 tests/test_stream_dict_records.py
 create mode 100644 tests/test_stream_namedtuples.py

diff --git a/README.md b/README.md
index 4d058de..1a3ccf6 100644
--- a/README.md
+++ b/README.md
@@ -208,6 +208,88 @@ print(unwrapped_data)
 # Output: [{'key': 'key1', 'value': [{'key': 'subkey', 'value': 'value'}]}, {'key': 'key2', 'value': ['item1', 'item2']}]
 ```
 
+#### `stream`
+
+Takes a mapping and an optional item factory function, and generates items from the mapping.
+If the item factory is provided, it applies the factory to each key-value pair before yielding.
+
+```python
+from collections import namedtuple
+
+from mappingtools import stream
+
+
+def custom_factory(key, value):
+    return f"{key}: {value}"
+
+
+my_mapping = {'a': 1, 'b': 2, 'c': 3}
+
+for item in stream(my_mapping, custom_factory):
+    print(item)
+# Output:
+# a: 1
+# b: 2
+# c: 3
+
+
+MyTuple = namedtuple('MyTuple', ['key', 'value'])
+data = {'a': 1, 'b': 2}
+
+for item in stream(data, MyTuple):
+    print(item)
+# Output:
+# MyTuple(key='a', value=1)
+# MyTuple(key='b', value=2)
+
+
+```
+
+#### `stream_dict_records`
+
+generates dictionary records from a given mapping, where each record contains a key-value pair from the mapping with
+customizable key and value names.
+
+```python
+from mappingtools import stream_dict_records
+
+mapping = {'a': 1, 'b': 2}
+records = stream_dict_records(mapping, key_name='letter', value_name='number')
+for record in records:
+    print(record)
+# Output:
+# {'letter': 'a', 'number': 1}
+# {'letter': 'b', 'number': 2}
+```
+
+#### `stream_namedtuples`
+
+generates named tuple instances from a given mapping and named tuple class.
+
+```python
+from collections import namedtuple
+
+from mappingtools import stream
+
+MyTuple = namedtuple('MyTuple', ['key', 'value'])
+data = {'a': 1, 'b': 2}
+
+for item in stream(data, MyTuple):
+    print(item)
+# Output:
+# MyTuple(key='a', value=1)
+# MyTuple(key='b', value=2)
+
+def record(k, v):
+    return {'key': k, 'value': v}
+
+for item in stream(data, record):
+    print(item)
+# output:
+# {'key': 'a', 'value': 1}
+# {'key': 'b', 'value': 2}
+```
+
 ### Collectors
 
 #### `nested_defaultdict`
diff --git a/pyproject.toml b/pyproject.toml
index 65ec776..951f892 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "mappingtools"
-version = "0.1.0"
+version = "0.2.0"
 authors = [
     { name = "Eran Rivlis", email = "eran@rivlis.info" },
 ]
diff --git a/src/mappingtools.py b/src/mappingtools.py
index 497695c..13e0d58 100644
--- a/src/mappingtools.py
+++ b/src/mappingtools.py
@@ -4,7 +4,7 @@
 from collections.abc import Callable, Generator, Iterable, Mapping
 from enum import Enum, auto
 from itertools import chain
-from typing import Any, TypeVar
+from typing import Any, NamedTuple, TypeVar
 
 K = TypeVar('K')
 KT = TypeVar('KT')
@@ -204,7 +204,8 @@ def remove(keys: Iterable[K], *mappings: Mapping[K, Any]) -> Generator[Mapping[K
 
 
 def inverse(mapping: Mapping[Any, set]) -> Mapping[Any, set]:
-    """Return a new dictionary with keys and values swapped from the input mapping.
+    """
+    Return a new dictionary with keys and values swapped from the input mapping.
 
     Args:
         mapping (Mapping[Any, set]): The input mapping to invert.
@@ -342,10 +343,9 @@ def listify(obj: Any, key_name: str = 'key', value_name: str = 'value') -> Any:
 
     Returns:
         Any: The unwrapped object.
-
     """
-    return _process_obj(obj, _listify_mapping, _listify_iterable, _listify_class,
-                        key_name=key_name, value_name=value_name)
+    return _process_obj(obj, _listify_mapping, _listify_iterable, _listify_class, key_name=key_name,
+                        value_name=value_name)
 
 
 def _listify_mapping(obj: Mapping, key_name, value_name) -> list[dict]:
@@ -361,7 +361,46 @@ def _listify_class(obj, key_name, value_name):
             not k.startswith('_')]
 
 
+def stream(mapping: Mapping, item_factory: Callable[[Any, Any], Any] | None = None) -> Generator[Any, Any, None]:
+    """
+    Generate a stream of items from a mapping.
+
+    Args:
+        mapping (Mapping): The mapping object to stream items from.
+        item_factory (Callable[[Any, Any], Any], optional): A function that transforms each key-value pair from
+            the mapping. Defaults to None.
+
+    Yields:
+        The streamed items from the mapping.
+    """
+
+    items = mapping.items() if item_factory is None else iter(item_factory(k, v) for k, v in mapping.items())
+    yield from items
+
+
+def stream_dict_records(mapping: Mapping,
+                        key_name: str = 'key',
+                        value_name: str = 'value') -> Generator[Mapping[str, Any], Any, None]:
+    """
+    Generate dictionary records from a mapping.
+
+    Args:
+        mapping (Mapping): The input mapping to generate records from.
+        key_name (str): The name to use for the key in the generated records. Defaults to 'key'.
+        value_name (str): The name to use for the value in the generated records. Defaults to 'value'.
+
+    Yields:
+        dictionary records based on the input mapping.
+    """
+
+    def record(k, v):
+        return {key_name: k, value_name: v}
+
+    yield from stream(mapping, record)
+
+
 __all__ = (
-    'distinct', 'keep', 'remove', 'inverse', 'nested_defaultdict', 'listify', 'simplify', 'strictify', 'Category',
-    'CategoryCounter', 'MappingCollector', 'MappingCollectorMode'
+    'distinct', 'keep', 'remove', 'inverse', 'nested_defaultdict', 'listify', 'simplify', 'stream',
+    'stream_dict_records', 'strictify', 'Category', 'CategoryCounter',
+    'MappingCollector', 'MappingCollectorMode'
 )
diff --git a/tests/test_stream.py b/tests/test_stream.py
new file mode 100644
index 0000000..55beafb
--- /dev/null
+++ b/tests/test_stream.py
@@ -0,0 +1,176 @@
+# Generated by CodiumAI
+import dataclasses
+
+from mappingtools import stream
+
+
+# stream function yields items from the mapping when item_factory is None
+def test_yields_items_without_item_factory():
+    # Arrange
+    mapping = {'a': 1, 'b': 2}
+
+    # Act
+    result = list(stream(mapping))
+
+    # Assert
+    assert result == [('a', 1), ('b', 2)]
+
+
+# stream function yields transformed items when item_factory is provided
+def test_yields_transformed_items_with_item_factory():
+    # Arrange
+    mapping = {'a': 1, 'b': 2}
+
+    def item_factory(k, v):
+        return k, v * 2
+
+    # Act
+    result = list(stream(mapping, item_factory))
+
+    # Assert
+    assert result == [('a', 2), ('b', 4)]
+
+
+# stream function works with different types of mappings (e.g., dict, defaultdict)
+def test_works_with_different_mappings():
+    # Arrange
+    from collections import defaultdict
+    mapping = defaultdict(int, {'a': 1, 'b': 2})
+
+    # Act
+    result = list(stream(mapping))
+
+    # Assert
+    assert result == [('a', 1), ('b', 2)]
+
+
+# stream function handles empty mappings correctly
+def test_handles_empty_mappings():
+    # Arrange
+    mapping = {}
+
+    # Act
+    result = list(stream(mapping))
+
+    # Assert
+    assert result == []
+
+
+# stream function works with various item_factory functions
+def test_works_with_various_item_factories():
+    # Arrange
+    mapping = {'a': 1, 'b': 2}
+
+    def item_factory(k, v):
+        return k.upper(), v + 10
+
+    # Act
+    result = list(stream(mapping, item_factory))
+
+    # Assert
+    assert result == [('A', 11), ('B', 12)]
+
+
+# stream function handles mappings with non-hashable keys
+def test_handles_non_hashable_keys():
+    # Arrange
+    mapping = {('a',): 1, ('b',): 2}
+
+    # Act
+    result = list(stream(mapping))
+
+    # Assert
+    assert result == [(('a',), 1), (('b',), 2)]
+
+
+# stream function handles mappings with None values
+def test_handles_none_values():
+    # Arrange
+    mapping = {'a': None, 'b': 2}
+
+    # Act
+    result = list(stream(mapping))
+
+    # Assert
+    assert result == [('a', None), ('b', 2)]
+
+
+# stream function handles mappings with mixed data types
+def test_handles_mixed_data_types():
+    # Arrange
+    mapping = {'a': 1, 'b': 'two', 'c': [3]}
+
+    # Act
+    result = list(stream(mapping))
+
+    # Assert
+    assert result == [('a', 1), ('b', 'two'), ('c', [3])]
+
+
+# stream function handles large mappings efficiently
+def test_handles_large_mappings_efficiently():
+    # Arrange
+    mapping = {i: i for i in range(1000000)}
+
+    # Act & Assert
+    for i, item in enumerate(stream(mapping)):
+        assert item == (i, i)
+        if i >= 10:
+            break  # Only check the first few items for efficiency
+
+
+# stream function handles mappings with special characters in keys or values
+def test_handles_special_characters_in_keys_or_values():
+    # Arrange
+    mapping = {'a!@#': 'value$%^', 'b&*(': 'value)'}
+
+    # Act
+    result = list(stream(mapping))
+
+    # Assert
+    assert result == [('a!@#', 'value$%^'), ('b&*(', 'value)')]
+
+
+# stream function handles mappings with nested structures
+def test_handles_nested_structures():
+    # Arrange
+    mapping = {'a': {'nested': 1}, 'b': [2, 3]}
+
+    # Act
+    result = list(stream(mapping))
+
+    # Assert
+    assert result == [('a', {'nested': 1}), ('b', [2, 3])]
+
+
+# stream function handles mappings with cyclic references
+def test_handles_cyclic_references():
+    # Arrange
+    a = {}
+    b = {'a': a}
+    a['b'] = b
+
+    mapping = {'a': a, 'b': b}
+
+    # Act & Assert (checking for no infinite loop)
+    result = list(stream(mapping))
+
+    assert len(result) == 2
+    assert ('a', a) in result
+    assert ('b', b) in result
+
+
+def test_handles_dataclass_factory():
+    mapping = {'a': 1, 'b': 2}
+
+    @dataclasses.dataclass
+    class CustomDC:
+        key: str
+        value: int
+
+    result = list(stream(mapping, CustomDC))
+
+    assert result[0].key == 'a'
+    assert result[0].value == 1
+    assert result[1].key == 'b'
+    assert result[1].value == 2
diff --git a/tests/test_stream_dict_records.py b/tests/test_stream_dict_records.py
new file mode 100644
index 0000000..a7ffc13
--- /dev/null
+++ b/tests/test_stream_dict_records.py
@@ -0,0 +1,162 @@
+# Generated by CodiumAI
+import pytest
+from mappingtools import stream_dict_records
+
+
+# Convert a dictionary with string keys and values into a generator of dictionaries with 'key' and 'value' fields
+def test_convert_string_keys_values():
+    # Arrange
+    input_dict = {'a': '1', 'b': '2'}
+    expected_output = [{'key': 'a', 'value': '1'}, {'key': 'b', 'value': '2'}]
+
+    # Act
+    result = list(stream_dict_records(input_dict))
+
+    # Assert
+    assert result == expected_output
+
+
+# Use a custom key name and value name for the output dictionaries
+def test_custom_key_value_names():
+    # Arrange
+    input_dict = {'a': '1', 'b': '2'}
+    expected_output = [{'custom_key': 'a', 'custom_value': '1'}, {'custom_key': 'b', 'custom_value': '2'}]
+
+    # Act
+    result = list(stream_dict_records(input_dict, key_name='custom_key', value_name='custom_value'))
+
+    # Assert
+    assert result == expected_output
+
+
+# Handle an empty dictionary gracefully
+def test_empty_dictionary():
+    # Arrange
+    input_dict = {}
+    expected_output = []
+
+    # Act
+    result = list(stream_dict_records(input_dict))
+
+    # Assert
+    assert result == expected_output
+
+
+# Process a dictionary with mixed data types for keys and values
+def test_mixed_data_types():
+    # Arrange
+    input_dict = {1: 'one', 'two': 2, 3.0: [3], (4,): {4}}
+    expected_output = [
+        {'key': 1, 'value': 'one'},
+        {'key': 'two', 'value': 2},
+        {'key': 3.0, 'value': [3]},
+        {'key': (4,), 'value': {4}}
+    ]
+
+    # Act
+    result = list(stream_dict_records(input_dict))
+
+    # Assert
+    assert result == expected_output
+
+
+# Handle a dictionary with non-string keys
+def test_non_string_keys():
+    # Arrange
+    input_dict = {1: 'one', 2.0: 'two'}
+    expected_output = [{'key': 1, 'value': 'one'}, {'key': 2.0, 'value': 'two'}]
+
+    # Act
+    result = list(stream_dict_records(input_dict))
+
+    # Assert
+    assert result == expected_output
+
+
+# Handle a dictionary with non-string values
+def test_non_string_values():
+    # Arrange
+    input_dict = {'one': 1, 'two': 2.0}
+    expected_output = [{'key': 'one', 'value': 1}, {'key': 'two', 'value': 2.0}]
+
+    # Act
+    result = list(stream_dict_records(input_dict))
+
+    # Assert
+    assert result == expected_output
+
+
+# Process a dictionary with nested dictionaries as values
+def test_nested_dictionaries_as_values():
+    # Arrange
+    input_dict = {'a': {'nested_key': 'nested_value'}}
+    expected_output = [{'key': 'a', 'value': {'nested_key': 'nested_value'}}]
+
+    # Act
+    result = list(stream_dict_records(input_dict))
+
+    # Assert
+    assert result == expected_output
+
+
+# Handle a dictionary with None as a key or value
+def test_none_as_key_or_value():
+    # Arrange
+    input_dict = {None: 'none_value', 'none_key': None}
+    expected_output = [{'key': None, 'value': 'none_value'}, {'key': 'none_key', 'value': None}]
+
+    # Act
+    result = list(stream_dict_records(input_dict))
+
+    # Assert
+    assert result == expected_output
+
+
+# Process a dictionary with special characters in keys or values
+def test_special_characters_in_keys_values():
+    # Arrange
+    input_dict = {'sp@cial_k#y!': '@special_value#'}
+    expected_output = [{'key': 'sp@cial_k#y!', 'value': '@special_value#'}]
+
+    # Act
+    result = list(stream_dict_records(input_dict))
+
+    # Assert
+    assert result == expected_output
+
+
+# Ensure the generator stops correctly after all items are processed
+def test_generator_stops_correctly():
+    # Arrange
+    input_dict = {'a': 1, 'b': 2}
+
+    # Act & Assert
+    gen = stream_dict_records(input_dict)
+    assert next(gen) == {'key': 'a', 'value': 1}
+    assert next(gen) == {'key': 'b', 'value': 2}
+
+    with pytest.raises(StopIteration):
+        next(gen)
+
+
+# Verify the order of items in the output matches the input dictionary
+def test_order_of_items_matches_input():
+    # Arrange
+    input_dict = {'first': 1, 'second': 2, 'third': 3}
+    expected_output = [{'key': 'first', 'value': 1}, {'key': 'second', 'value': 2}, {'key': 'third', 'value': 3}]
+
+    # Act
+    result = list(stream_dict_records(input_dict))
+
+    # Assert
+    assert result == expected_output
+
+
+# Handle large dictionaries efficiently without performance degradation
+def test_large_dictionaries_performance():
+    # Arrange
+    input_dict = {f'key_{i}': f'value_{i}' for i in range(100000)}
+
+    # Act & Assert (no assertion needed for performance, just ensure it runs)
+    for _ in stream_dict_records(input_dict):
+        pass
diff --git a/tests/test_stream_namedtuples.py b/tests/test_stream_namedtuples.py
new file mode 100644
index 0000000..d57ffdd
--- /dev/null
+++ b/tests/test_stream_namedtuples.py
@@ -0,0 +1,155 @@
+# Generated by CodiumAI
+
+from collections import OrderedDict, namedtuple
+
+import pytest
+from mappingtools import stream
+
+
+# Converts mapping to namedtuples correctly
+def test_converts_mapping_to_namedtuples_correctly():
+    # Arrange
+    TestTuple = namedtuple('TestTuple', ['key', 'value'])
+    mapping = {'a': 1, 'b': 2}
+
+    # Act
+    result = list(stream(mapping, TestTuple))
+
+    # Assert
+    expected = [TestTuple('a', 1), TestTuple('b', 2)]
+    assert result == expected
+
+
+# Uses provided NamedTuple class for conversion
+def test_uses_provided_namedtuple_class_for_conversion():
+    # Arrange
+    CustomTuple = namedtuple('CustomTuple', ['key', 'value'])
+    mapping = {'x': 10, 'y': 20}
+
+    # Act
+    result = list(stream(mapping, CustomTuple))
+
+    # Assert
+    expected = [CustomTuple('x', 10), CustomTuple('y', 20)]
+    assert result == expected
+
+
+# Yields all items from the mapping
+def test_yields_all_items_from_mapping():
+    # Arrange
+    TestTuple = namedtuple('TestTuple', ['key', 'value'])
+    mapping = {'a': 1, 'b': 2, 'c': 3}
+
+    # Act
+    result = list(stream(mapping, TestTuple))
+
+    # Assert
+    assert len(result) == 3
+
+
+# Handles mappings with multiple key-value pairs
+def test_handles_multiple_key_value_pairs():
+    # Arrange
+    TestTuple = namedtuple('TestTuple', ['key', 'value'])
+    mapping = {'a': 1, 'b': 2, 'c': 3, 'd': 4}
+
+    # Act
+    result = list(stream(mapping, TestTuple))
+
+    # Assert
+    expected = [TestTuple(k, v) for k, v in mapping.items()]
+    assert result == expected
+
+
+# Works with different types of mappings
+def test_works_with_different_types_of_mappings():
+    # Arrange
+    TestTuple = namedtuple('TestTuple', ['key', 'value'])
+    mapping = OrderedDict([('a', 1), ('b', 2)])
+
+    # Act
+    result = list(stream(mapping, TestTuple))
+
+    # Assert
+    expected = [TestTuple('a', 1), TestTuple('b', 2)]
+    assert result == expected
+
+
+# Empty mapping input
+def test_empty_mapping_input():
+    # Arrange
+    TestTuple = namedtuple('TestTuple', ['key', 'value'])
+    mapping = {}
+
+    # Act
+    result = list(stream(mapping, TestTuple))
+
+    # Assert
+    assert result == []
+
+
+# Mapping with non-hashable keys
+def test_mapping_with_non_hashable_keys():
+    # Arrange
+    TestTuple = namedtuple('TestTuple', ['key', 'value'])
+    mapping = {('a',): 1, ('b',): 2}
+
+    # Act
+    result = list(stream(mapping, TestTuple))
+
+    # Assert
+    expected = [TestTuple(('a',), 1), TestTuple(('b',), 2)]
+    assert result == expected
+
+
+# Mapping with None values
+def test_mapping_with_none_values():
+    # Arrange
+    TestTuple = namedtuple('TestTuple', ['key', 'value'])
+    mapping = {'a': None, 'b': None}
+
+    # Act
+    result = list(stream(mapping, TestTuple))
+
+    # Assert
+    expected = [TestTuple('a', None), TestTuple('b', None)]
+    assert result == expected
+
+
+# NamedTuple with no fields
+def test_namedtuple_with_no_fields():
+    # Arrange
+    EmptyTuple = namedtuple('EmptyTuple', [])
+    mapping = {'a': 1}
+
+    # Act and Assert (should raise TypeError)
+    with pytest.raises(TypeError):
+        list(stream(mapping, EmptyTuple))
+
+
+# Mapping with mixed data types
+def test_mapping_with_mixed_data_types():
+    # Arrange
+    MixedTypeTuple = namedtuple('MixedTypeTuple', ['key', 'value'])
+    mapping = {'a': 1, 'b': "string", 'c': [1, 2, 3]}
+
+    # Act
+    result = list(stream(mapping, MixedTypeTuple))
+
+    # Assert
+    expected = [MixedTypeTuple('a', 1), MixedTypeTuple('b', "string"), MixedTypeTuple('c', [1, 2, 3])]
+    assert result == expected
+
+
+# Handles large mappings efficiently
+def test_handles_large_mappings_efficiently():
+    # Arrange
+    LargeTuple = namedtuple('LargeTuple', ['key', 'value'])
+    large_mapping = {i: i for i in range(10000)}
+
+    # Act and Assert (ensure it does not raise any exceptions)
+
+    result = list(stream(large_mapping, LargeTuple))
+    assert len(result) == 10000
+    assert all(isinstance(item, LargeTuple) for item in result)
+    assert all(item.key == item.value for item in result)