Skip to content

Commit

Permalink
noop
Browse files Browse the repository at this point in the history
  • Loading branch information
p9f committed Nov 14, 2023
0 parents commit 41f863d
Show file tree
Hide file tree
Showing 17 changed files with 941 additions and 0 deletions.
40 changes: 40 additions & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
name: test

permissions:
contents: read

on:
- pull_request
- push


jobs:
test:
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v4
- name: Install poetry
run: curl -sSL https://install.python-poetry.org | python3 -
env:
POETRY_VERSION: 1.7.0
- name: Add Poetry to path
run: echo "${HOME}/.poetry/bin" >> $GITHUB_PATH
- name: Set up Python 3.11
uses: actions/setup-python@v4
with:
python-version: "3.11"
cache: "poetry"
- name: Install Poetry Packages
run: |
poetry env use "3.11"
poetry install --only dev
- name: Add venv to path
run: echo `poetry env info --path`/bin/ >> $GITHUB_PATH

- run: ruff check --output-format github .
- run: ruff format --check .
- run: mypy .
- run: |
pip install .
pytest -s
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
__pycache__
21 changes: 21 additions & 0 deletions LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
MIT License

Copyright (c) 2023 Bright Network

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
## `iter_pipe`: Iterable Pipes

Functional pythonic pipelines for iterables

[Documentation](./tests/docs/)
1 change: 1 addition & 0 deletions iter_pipes/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .main import * # noqa
250 changes: 250 additions & 0 deletions iter_pipes/functional.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,250 @@
from __future__ import annotations

import math
from collections import deque
from collections.abc import Callable, Iterable, Iterator
from functools import partial
from itertools import count, groupby
from typing import Any, Generic, Literal, TypeGuard, TypeVar, overload

__all__ = [
"map",
"filter",
"for_each",
"for_batch",
"for_all",
"batch",
"fork_forget",
"not_none",
"batch_where",
"fork",
]

T_contra = TypeVar("T_contra", contravariant=True)
V_co = TypeVar("V_co", covariant=True)
T = TypeVar("T")
U = TypeVar("U")
W = TypeVar("W")


raw_filter = filter


Step = Callable[[Iterable[T_contra]], Iterable[V_co]]


def not_none(item: T | None) -> TypeGuard[T]:
return item is not None


def map(step: Callable[[V_co], W]) -> Step[V_co, W]:
def f(data: Iterable[V_co]) -> Iterable[W]:
for item in data:
yield step(item)

return f


def for_each(step: Callable[[V_co], Any]) -> Step[V_co, V_co]:
def f(data: Iterable[V_co]) -> Iterable[V_co]:
for item in data:
step(item)
yield item

return f


def for_batch(step: Callable[[list[V_co]], Any], batch_size: int) -> Step[V_co, V_co]:
def f(data: Iterable[V_co]) -> Iterable[V_co]:
for _, batch_iterator in groupby(
zip(data, count()),
key=lambda x: math.floor(x[1] / batch_size),
):
batch = [x[0] for x in batch_iterator]
step(batch)
yield from batch

return f


def for_all(f: Step[V_co, Any]) -> Step[V_co, V_co]:
return fork_forget(f)


def batch(step: Callable[[list[V_co]], Iterable[U]], batch_size: int) -> Step[V_co, U]:
def f(data: Iterable[V_co]) -> Iterable[U]:
for _, batch_iterator in groupby(
zip(data, count()),
key=lambda x: math.floor(x[1] / batch_size),
):
yield from step([x[0] for x in batch_iterator])

return f


@overload
def filter(step: Callable[[V_co], TypeGuard[W]]) -> Step[V_co, W]:
...


@overload
def filter(step: Callable[[V_co], bool]) -> Step[V_co, V_co]:
...


def filter(step: Callable[[V_co], bool]) -> Step[V_co, V_co]: # type: ignore
return partial(raw_filter, step) # type: ignore


def batch_where(
step: Callable[[list[V_co]], Iterable[U]],
where: Callable[[V_co], bool],
batch_size: int,
) -> Step[V_co, U | V_co]:
def f(data: Iterable[V_co]) -> Iterable[U | V_co]:
buffer: deque[V_co] = deque()

for item in data:
if not where(item):
yield item
else:
buffer.append(item)

if len(buffer) > batch_size:
yield from step(list(buffer))
buffer.clear()

return f


def flatten(iterable: Iterable[Iterable[T_contra]]) -> Iterable[T_contra]:
for item in iterable:
yield from item


class IteratorWrapper(Generic[W]):
def __init__(
self,
queue: deque[W],
consume_next: Callable[[], Any],
):
self._queue = queue
self._consume_next = consume_next

def __iter__(self) -> Iterator[W]:
return self

def __next__(self) -> W:
if not self._queue:
self._consume_next()
return self._queue.popleft()


@overload
def fork(
step1: Step[T_contra, U] | None,
step2: Step[T_contra, V_co],
max_inflight: int | None,
pick_first: Literal[True],
) -> Step[T_contra, U]:
...


@overload
def fork(
step1: Step[T_contra, U] | None,
step2: Step[T_contra, V_co],
pick_first: Literal[True],
) -> Step[T_contra, U]:
...


@overload
def fork(
step1: Step[T_contra, U],
step2: Step[T_contra, V_co],
max_inflight: int | None,
pick_first: Literal[False] | None,
) -> Step[T_contra, V_co | U]:
...


@overload
def fork(
step1: Step[T_contra, U],
step2: Step[T_contra, V_co],
step3: Step[T_contra, W],
max_inflight: int | None,
pick_first: Literal[False] | None,
) -> Step[T_contra, V_co | U | W]:
...


@overload
def fork(
*steps: Step[T_contra, Any] | None,
max_inflight: int | None,
pick_first: Literal[False] | None,
) -> Step[T_contra, Any]:
...


def fork( # type: ignore
*steps: Step[T_contra, Any] | None,
max_inflight: int = 1000,
pick_first: bool = False,
) -> Step[T_contra, Any]:
def f(iterable: Iterable[T_contra]) -> Iterable[Any]:
queues: list[deque] = [deque() for _ in steps]
it = iter(iterable)
paused_iterators: set[int] = set()

def consume_next(i: int) -> Callable[[], None]:
def wrapper() -> None:
val = next(it)
for d in queues:
d.append(val)
nb_inflights = sum(len(q) for q in queues)
if nb_inflights > max_inflight:
paused_iterators.add(i)
raise StopIteration

return wrapper

iterators = [
iter((steps[i] or identity)(IteratorWrapper(queues[i], consume_next(i))))
for i in range(len(steps))
]

while len(iterators):
i = max( # the index of the iterator with the most inflight items
range(len(iterators)),
key=lambda i: len(queues[i]),
)
try:
val = next(iterators[i])
if not (i and pick_first):
yield val
except StopIteration:
if i in paused_iterators: # resume the iterator
iterators[i] = iter(
(steps[i] or identity)(
IteratorWrapper(queues[i], consume_next(i))
)
)
paused_iterators.remove(i)
else:
iterators.remove(iterators[i])

return f


def identity(item: W) -> W:
return item


def fork_forget(step: Step[U, Any], max_inflight: int = 3) -> Step[U, U]:
def f(data: Iterable[U]) -> Iterable[U]:
yield from fork(None, step, pick_first=True, max_inflight=max_inflight)(data)

return f
Loading

0 comments on commit 41f863d

Please sign in to comment.