Skip to content

Commit

Permalink
add CategoryCounter
Browse files Browse the repository at this point in the history
  • Loading branch information
erivlis committed Jul 25, 2024
1 parent 887a046 commit 5818c2c
Show file tree
Hide file tree
Showing 3 changed files with 233 additions and 3 deletions.
20 changes: 20 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,26 @@ print(unwrapped_data)
# Output: [{'key': 'key1', 'value': [{'key': 'subkey', 'value': 'value'}]}, {'key': 'key2', 'value': ['item1', 'item2']}]
```

### `CategoryCounter`

The CategoryCounter class extends a dictionary to count occurrences of data items categorized by multiple categories.
It maintains a total count of all data items and allows categorization using direct values or functions.

```python
from mappingtools import CategoryCounter

counter = CategoryCounter()

for fruit in ['apple', 'banana', 'apple']:
counter.update({fruit: 1}, type='fruit', char_count=len(fruit), unique_char_count=len(set(fruit)))

print(counter.total)
# Output: Counter({'apple': 2, 'banana': 1})

print(counter)
# Output: CategoryCounter({'type': defaultdict(<class 'collections.Counter'>, {'fruit': Counter({'apple': 2, 'banana': 1})}), 'char_count': defaultdict(<class 'collections.Counter'>, {5: Counter({'apple': 2}), 6: Counter({'banana': 1})}), 'unique_char_count': defaultdict(<class 'collections.Counter'>, {4: Counter({'apple': 2}), 3: Counter({'banana': 1})})})
```

### `MappingCollector`

A class designed to collect key-value pairs into an internal mapping,
Expand Down
37 changes: 34 additions & 3 deletions src/mappingtools.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import dataclasses
import inspect
from collections import defaultdict
from collections import Counter, defaultdict
from collections.abc import Callable, Generator, Iterable, Mapping
from enum import Enum, auto
from itertools import chain
Expand All @@ -11,6 +11,37 @@
VT = TypeVar('VT')
VT_co = TypeVar('VT_co')

Category = TypeVar('Category', bound=str | tuple | int | float)


class CategoryCounter(dict[str, defaultdict[Category, Counter]]):

def __init__(self):
super().__init__()
self.total = Counter()

def __repr__(self):
return f"CategoryCounter({super().__repr__()})"

def update(self, data, **categories: Category | Callable[[Any], Category]):
"""
Updates a CategoryCounter object with data and corresponding categories.
Parameters:
data: Any - The data to update the counter with (see Counter update method documentation).
**categories: Category | Callable[[Any], Category] - categories to associate the data with.
The categories can be either a direct value or a function that extracts the category from the data.
Returns:
None
"""
self.total.update(data)
for category_name, category_value in categories.items():
category_value = category_value(data) if callable(category_value) else category_value
if category_name not in self:
self[category_name] = defaultdict(Counter)
self[category_name][category_value].update(data)


class MappingCollectorMode(Enum):
"""
Expand Down Expand Up @@ -283,5 +314,5 @@ def _unwrap_class(obj):
return [{'key': k, 'value': unwrap(v)} for k, v in inspect.getmembers(obj) if not k.startswith('_')]


__all__ = ('dictify', 'distinct', 'keep', 'inverse', 'nested_defaultdict', 'remove', 'unwrap', 'MappingCollector',
'MappingCollectorMode')
__all__ = ('dictify', 'distinct', 'keep', 'inverse', 'nested_defaultdict', 'remove', 'unwrap', 'Category',
'CategoryCounter', 'MappingCollector', 'MappingCollectorMode')
179 changes: 179 additions & 0 deletions tests/test_category_counter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,179 @@
# Generated by CodiumAI
from collections import Counter

import pytest
from mappingtools import CategoryCounter


# Initialize CategoryCounter and update with a list of items
def test_initialize_and_update_with_list():
# Arrange
counter = CategoryCounter()
data = ['apple', 'banana', 'apple']

# Act
counter.update(data)

# Assert
assert counter.total == Counter({'apple': 2, 'banana': 1})


# Categorize items using direct category values
def test_categorize_with_direct_category_values():
# Arrange
counter = CategoryCounter()
fruits = ['apple', 'apricot', 'banana', 'cherry', 'pear', 'pineapple', 'plum', 'banana']

# Act
for fruit in fruits:
counter.update({fruit: 1}, char_count=len(fruit), unique_char_count=len(set(fruit)))

# Assert
assert counter.total == Counter({
'banana': 2,
'apple': 1,
'apricot': 1,
'cherry': 1,
'pear': 1,
'pineapple': 1,
'plum': 1
})
assert counter == {
'char_count': {
4: Counter({'pear': 1, 'plum': 1}),
5: Counter({'apple': 1}),
6: Counter({'banana': 2, 'cherry': 1}),
7: Counter({'apricot': 1}),
9: Counter({'pineapple': 1})
},
'unique_char_count': {
3: Counter({'banana': 2}),
4: Counter({'apple': 1, 'pear': 1, 'plum': 1}),
5: Counter({'cherry': 1}),
6: Counter({'pineapple': 1}),
7: Counter({'apricot': 1})
}
}


# Categorize items using functions to determine categories
def test_categorize_with_functions():
# Arrange
counter = CategoryCounter()
fruits = ['apple', 'apricot', 'banana', 'cherry', 'pear', 'pineapple', 'plum', 'banana']

# Act
for fruit in fruits:
counter.update({fruit: 1},
char_count=lambda s: len(next(iter(s))),
unique_char_count=lambda s: len(set(next(iter(s)))))

# Assert
assert counter.total == Counter({
'banana': 2,
'apple': 1,
'apricot': 1,
'cherry': 1,
'pear': 1,
'pineapple': 1,
'plum': 1
})
assert counter == {
'char_count': {
4: Counter({'pear': 1, 'plum': 1}),
5: Counter({'apple': 1}),
6: Counter({'banana': 2, 'cherry': 1}),
7: Counter({'apricot': 1}),
9: Counter({'pineapple': 1})
},
'unique_char_count': {
3: Counter({'banana': 2}),
4: Counter({'apple': 1, 'pear': 1, 'plum': 1}),
5: Counter({'cherry': 1}),
6: Counter({'pineapple': 1}),
7: Counter({'apricot': 1})
}
}


# Retrieve counts for specific categories
def test_retrieve_counts_for_specific_categories():
# Arrange
counter = CategoryCounter()
data = ['apple', 'banana', 'apple']

# Act
counter.update(data, type='fruit')

# Assert
assert counter['type']['fruit'] == Counter({'apple': 2, 'banana': 1})


# Update with an empty list
def test_update_with_empty_list():
# Arrange
counter = CategoryCounter()

# Act
counter.update([])

# Assert
assert counter.total == Counter()


# Provide static category
def test_categories_not_matching_any_items():
# Arrange
counter = CategoryCounter()
data = ['apple', 'banana', 'pear']

# Act
counter.update(data, type='fruit')

# Assert
assert counter['type']['car'] == Counter()


# Update with mixed data types
def test_update_with_mixed_data_types():
# Arrange
counter = CategoryCounter()
data = ['apple', 1, 2.5, 1, 1]

# Act & Assert
# with pytest.raises(TypeError):
counter.update(data)

assert counter.total == Counter({'apple': 1, 1: 3, 2.5: 1})


# Verify initialization without any updates
def test_initialization_without_updates():
# Arrange & Act
counter = CategoryCounter()

# Assert
assert counter.total == Counter()


# Check behavior with nested data structures
def test_nested_data_structures_behavior():
# Arrange
counter = CategoryCounter()
data = [{'name': 'apple'}, {'name': 'banana'}, {'name': 'apple'}]

# Act & Assert
with pytest.raises(TypeError):
counter.update(data)


# Correctly formats the string representation of CategoryCounter
def test_correct_formatting():
# Arrange
cc = CategoryCounter()

# Act
result = repr(cc)

# Assert
assert result == "CategoryCounter({})"

0 comments on commit 5818c2c

Please sign in to comment.