-
Notifications
You must be signed in to change notification settings - Fork 240
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[OPIK-99] Implement batching for spans creation (#298)
* Batch processing draft * Batching draft * [DRAFT] Update backend emulator for tests, update message processing to use batching * Fix lint errors * Implement _process_create_span_batch_message method in MessageProcessor * Remove assert, add debug log message * Add base batcher unit test * Make batch manager flush all batches if streamer.flush() was called, add tests for span batcher * Add tests for batch manager * Add test for flushing thread * Add one more test for batch manager * Fix lint errors * Fix lint errors * Fix bug when backend emulator didn't add feedback scores to traces * Fix lint errors * Rename flush_interval to flush_interval_seconds * Enable debug logs for e2e tests. Update debug messages in message_processors.py * Remove export statement from e2e workflow * Enable backend build. Enable debug logs for e2e tests * Update docker compose and e2e workflow file * Make batching disabled by default in Opik client. It is now enabled manually in Opik clients created under the hood of SDK * Add more unit tests for message processing * Add docstring for _use_batching parameter * Add missing _SECONDS suffix * Rename constant * Undone e2e tests infra changes --------- Co-authored-by: Andres Cruz <[email protected]>
- Loading branch information
1 parent
5f99fae
commit 939accf
Showing
22 changed files
with
623 additions
and
31 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
49 changes: 49 additions & 0 deletions
49
sdks/python/src/opik/message_processing/batching/base_batcher.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
import threading | ||
import time | ||
import abc | ||
|
||
from typing import List, Callable | ||
from .. import messages | ||
|
||
|
||
class BaseBatcher(abc.ABC): | ||
def __init__( | ||
self, | ||
flush_callback: Callable[[messages.BaseMessage], None], | ||
max_batch_size: int, | ||
flush_interval_seconds: float, | ||
): | ||
self._flush_interval_seconds: float = flush_interval_seconds | ||
self._flush_callback: Callable[[messages.BaseMessage], None] = flush_callback | ||
self._accumulated_messages: List[messages.BaseMessage] = [] | ||
self._max_batch_size: int = max_batch_size | ||
|
||
self._last_time_flush_callback_called: float = time.time() | ||
self._lock = threading.RLock() | ||
|
||
def add(self, message: messages.BaseMessage) -> None: | ||
with self._lock: | ||
self._accumulated_messages.append(message) | ||
if len(self._accumulated_messages) == self._max_batch_size: | ||
self.flush() | ||
|
||
def flush(self) -> None: | ||
with self._lock: | ||
if len(self._accumulated_messages) > 0: | ||
batch_message = self._create_batch_from_accumulated_messages() | ||
self._accumulated_messages = [] | ||
|
||
self._flush_callback(batch_message) | ||
self._last_time_flush_callback_called = time.time() | ||
|
||
def is_ready_to_flush(self) -> bool: | ||
return ( | ||
time.time() - self._last_time_flush_callback_called | ||
) >= self._flush_interval_seconds | ||
|
||
def is_empty(self) -> bool: | ||
with self._lock: | ||
return len(self._accumulated_messages) == 0 | ||
|
||
@abc.abstractmethod | ||
def _create_batch_from_accumulated_messages(self) -> messages.BaseMessage: ... |
41 changes: 41 additions & 0 deletions
41
sdks/python/src/opik/message_processing/batching/batch_manager.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
from typing import Type, Dict | ||
from .. import messages | ||
from . import base_batcher | ||
from . import flushing_thread | ||
|
||
|
||
class BatchManager: | ||
def __init__( | ||
self, | ||
message_to_batcher_mapping: Dict[ | ||
Type[messages.BaseMessage], base_batcher.BaseBatcher | ||
], | ||
) -> None: | ||
self._message_to_batcher_mapping = message_to_batcher_mapping | ||
self._flushing_thread = flushing_thread.FlushingThread( | ||
batchers=list(self._message_to_batcher_mapping.values()) | ||
) | ||
|
||
def start(self) -> None: | ||
self._flushing_thread.start() | ||
|
||
def stop(self) -> None: | ||
self._flushing_thread.close() | ||
|
||
def message_supports_batching(self, message: messages.BaseMessage) -> bool: | ||
return message.__class__ in self._message_to_batcher_mapping | ||
|
||
def process_message(self, message: messages.BaseMessage) -> None: | ||
self._message_to_batcher_mapping[type(message)].add(message) | ||
|
||
def is_empty(self) -> bool: | ||
return all( | ||
[ | ||
batcher.is_empty() | ||
for batcher in self._message_to_batcher_mapping.values() | ||
] | ||
) | ||
|
||
def flush(self) -> None: | ||
for batcher in self._message_to_batcher_mapping.values(): | ||
batcher.flush() |
29 changes: 29 additions & 0 deletions
29
sdks/python/src/opik/message_processing/batching/batch_manager_constuctors.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
import queue | ||
from typing import Type, Dict | ||
|
||
from .. import messages | ||
|
||
from . import base_batcher | ||
from . import create_span_message_batcher | ||
from . import batch_manager | ||
|
||
CREATE_SPANS_MESSAGE_BATCHER_FLUSH_INTERVAL_SECONDS = 1.0 | ||
CREATE_SPANS_MESSAGE_BATCHER_MAX_BATCH_SIZE = 1000 | ||
|
||
|
||
def create_batch_manager(message_queue: queue.Queue) -> batch_manager.BatchManager: | ||
create_span_message_batcher_ = create_span_message_batcher.CreateSpanMessageBatcher( | ||
flush_interval_seconds=CREATE_SPANS_MESSAGE_BATCHER_FLUSH_INTERVAL_SECONDS, | ||
max_batch_size=CREATE_SPANS_MESSAGE_BATCHER_MAX_BATCH_SIZE, | ||
flush_callback=message_queue.put, | ||
) | ||
|
||
MESSAGE_TO_BATCHER_MAPPING: Dict[ | ||
Type[messages.BaseMessage], base_batcher.BaseBatcher | ||
] = {messages.CreateSpanMessage: create_span_message_batcher_} | ||
|
||
batch_manager_ = batch_manager.BatchManager( | ||
message_to_batcher_mapping=MESSAGE_TO_BATCHER_MAPPING | ||
) | ||
|
||
return batch_manager_ |
9 changes: 9 additions & 0 deletions
9
sdks/python/src/opik/message_processing/batching/create_span_message_batcher.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
from . import base_batcher | ||
from .. import messages | ||
|
||
|
||
class CreateSpanMessageBatcher(base_batcher.BaseBatcher): | ||
def _create_batch_from_accumulated_messages( | ||
self, | ||
) -> messages.CreateSpansBatchMessage: | ||
return messages.CreateSpansBatchMessage(batch=self._accumulated_messages) # type: ignore |
30 changes: 30 additions & 0 deletions
30
sdks/python/src/opik/message_processing/batching/flushing_thread.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
import threading | ||
import time | ||
from typing import List | ||
|
||
from . import base_batcher | ||
|
||
|
||
class FlushingThread(threading.Thread): | ||
def __init__( | ||
self, | ||
batchers: List[base_batcher.BaseBatcher], | ||
probe_interval_seconds: float = 0.1, | ||
) -> None: | ||
threading.Thread.__init__(self, daemon=True) | ||
self._batchers = batchers | ||
self._probe_interval_seconds = probe_interval_seconds | ||
self._closed = False | ||
|
||
def close(self) -> None: | ||
for batcher in self._batchers: | ||
batcher.flush() | ||
|
||
self._closed = True | ||
|
||
def run(self) -> None: | ||
while not self._closed: | ||
for batcher in self._batchers: | ||
if batcher.is_ready_to_flush(): | ||
batcher.flush() | ||
time.sleep(self._probe_interval_seconds) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.