From 27f36e05358d8f59212fc5a9ee5742d70f3528b9 Mon Sep 17 00:00:00 2001 From: leavers Date: Sat, 7 Dec 2024 23:57:13 +0800 Subject: [PATCH] build: bump version 0.0.2 -> 0.1.0 (#8) * doc: update readme --- README.md | 110 ++++++++++++++++++++++++++++++++++++++++++++++++++- fluentmap.py | 5 ++- 2 files changed, 112 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 71c72f4..eccf68b 100644 --- a/README.md +++ b/README.md @@ -4,5 +4,113 @@ [![Package version](https://img.shields.io/pypi/v/fluentmap.svg)](https://pypi.org/project/fluentmap/) [![Python](https://img.shields.io/pypi/pyversions/fluentmap.svg)](https://pypi.org/project/fluentmap/) -Drop-in replacement for Python map with concurrency support. +Fluentmap provides a drop-in Python map replacement featuring parallel and batch +processing. +## Features + +- Use `executor` to run tasks in parallel. +- Use `batch_size`/`chunk_size` to send parameters in batches/chunks. +- Use `num_prepare` to prepare data in advance for better performance. +- Call `on_return` hook to process the return value of each task. + +## Installation + +Fluentmap is available on [PyPI](https://pypi.org/project/fluentmap/): + +```shell +pip install fluentmap +``` + +## Usage + +### Drop-in replacement + +You can start to use fluentmap just like built-in `map`: + +```python +from typing import Any, List + +from fluentmap import map + + +items: List[str] = [...] + + +def heavy_task(item: str) -> Any: + """Suppose this function represents a computationally expensive task.""" + + +def postprocessing(result: Any): + """Suppose this function represents a postprocessing task.""" + + +for result in map(heavy_task, items): + postprocessing(result) +``` + +### Parallel processing + +As `heavy_task` is a computationally expensive task, you can use `executor` to +run it in parallel. + +```python +from concurrent.futures import ProcessPoolExecutor + +from fluentmap import map + +# ...... + +with ProcessPoolExecutor() as executor: + # each heavy_task invocation runs in a separate process + for result in map(heavy_task, items, executor=executor): + postprocessing(result) +``` + +### Batch/chunk processing + +You can use `batch_size`/`chunk_size` to send arguments in batches/chunks. + +The difference between them is that `batch_size` packs multiple arguments into a batch +before sending them to the function, therefore the function needs to be modified to +handle a list of arguments. + +On the other hand, `chunk_size` packs multiple arguments into a chunks before passing +them to executor workers, while workers still process each argument sequentially. + +```python +from concurrent.futures import ProcessPoolExecutor +from typing import Any, List + +from fluentmap import map + + +# ...... + + +def heavy_task_in_batch(item: List[str]) -> Any: + """Note that `item` is a list since when `batch_size` is set, + fluentmap will concatenate multiple items into a batch before sending them to + the function which is to be invoked. + """ + + +# An example of using `batch_size` +with ProcessPoolExecutor() as executor: + for result in map( + heavy_task_in_batch, + items, + executor=executor, + batch_size=64, + ): + postprocessing(result) + +# An example of using `chunk_size` +with ProcessPoolExecutor() as executor: + for result in map( + heavy_task, + items, + executor=executor, + chunk_size=32, + ): + postprocessing(result) diff --git a/fluentmap.py b/fluentmap.py index fef451a..b908257 100644 --- a/fluentmap.py +++ b/fluentmap.py @@ -1,5 +1,6 @@ """ -Fluentmap is a drop-in replacement for Python map with concurrency support. +Fluentmap is a drop-in replacement for Python map featuring parallel and batch +processing. Copyright (c) 2020-2024, Leavers. License: MIT @@ -25,7 +26,7 @@ runtime_checkable, ) -__version__ = "0.0.2" +__version__ = "0.1.0" __all__ = ("Arguments", "map")