diff --git a/examples/python-operator-dataflow/README.md b/examples/python-operator-dataflow/README.md index 3c4bd335d..e3b6eb0f3 100644 --- a/examples/python-operator-dataflow/README.md +++ b/examples/python-operator-dataflow/README.md @@ -13,21 +13,144 @@ The [`dataflow.yml`](./dataflow.yml) defines a simple dataflow graph with the fo ## Getting started ```bash -cargo run --example python-dataflow +cargo run --example python-operator-dataflow ``` ## Installation -To install, you should run the `install.sh` script. +```bash +conda create -n example_env python=3.12 +pip install -r requirements.txt +``` + +## Run the dataflow + +- Start the object detection dataflow alone: + +```bash +dora start dataflow.yml +``` + +- Start the llm dataflow: ```bash -install.sh +dora start dataflow_llm.yml ``` -## Run the dataflow as a standalone +Within the window you can ask question such as: -- Start the `dora-coordinator`: +```bash +ask how are you +change bounding box plot to red +change confidence value to percentage +change object detection to only detect person +send 200 200 200 400 to topic line +record +``` +```bash +wget https://raw.githubusercontent.com/dora-rs/dora/v0.3.2/examples/python-operator-dataflow/keyboard_op.py +wget https://raw.githubusercontent.com/dora-rs/dora/v0.3.2/examples/python-operator-dataflow/microphone_op.py +wget https://raw.githubusercontent.com/dora-rs/dora/v0.3.2/examples/python-operator-dataflow/whisper_op.py +wget https://raw.githubusercontent.com/dora-rs/dora/v0.3.2/examples/python-operator-dataflow/sentence_transformers_op.py +wget https://raw.githubusercontent.com/dora-rs/dora/v0.3.2/examples/python-operator-dataflow/llm_op.py +wget https://raw.githubusercontent.com/dora-rs/dora/v0.3.2/examples/python-operator-dataflow/file_saver_op.py ``` -../../target/release/dora-daemon --run-dataflow dataflow.yml + +and adding the following to the dataflow configuration: + +```yaml +nodes: + - id: webcam + operator: + python: webcam.py + inputs: + tick: dora/timer/millis/50 + outputs: + - image + + - id: object_detection + operator: + python: object_detection.py + inputs: + image: webcam/image + outputs: + - bbox + + - id: plot + operator: + python: plot.py + inputs: + image: webcam/image + bbox: object_detection/bbox + line: llm/line + keyboard_buffer: keyboard/buffer + user_message: keyboard/submitted + assistant_message: llm/assistant_message + + ## Speech to text + - id: keyboard + custom: + source: keyboard_op.py + outputs: + - buffer + - submitted + - record + - ask + - send + - change + inputs: + recording: whisper/text + + - id: microphone + operator: + python: microphone_op.py + inputs: + record: keyboard/record + outputs: + - audio + + - id: whisper + operator: + python: whisper_op.py + inputs: + audio: microphone/audio + outputs: + - text + + ## Code Modifier + - id: vectordb + operator: + python: sentence_transformers_op.py + inputs: + query: keyboard/change + saved_file: file_saver/saved_file + outputs: + - raw_file + + - id: llm + operator: + python: llm_op.py + inputs: + code_modifier: vectordb/raw_file + assistant: keyboard/ask + message_sender: keyboard/send + outputs: + - modified_file + - line + - assistant_message + + - id: file_saver + operator: + python: file_saver_op.py + inputs: + file: llm/modified_file + outputs: + - saved_file ``` + +The keyboard, microphone, whisper node, works in a very similar fashion as the object detection dataflow and I'll let you check it out by yourself. + +The code modification flow works by first comparing an instruction with a vectordb of operators source code and then feeding the most similar operator to an llm with the instruction for code modification. + +The end result is then saved using the file saver diff --git a/examples/python-operator-dataflow/dataflow.yml b/examples/python-operator-dataflow/dataflow.yml index 92bf5f2b3..400f881f5 100644 --- a/examples/python-operator-dataflow/dataflow.yml +++ b/examples/python-operator-dataflow/dataflow.yml @@ -9,11 +9,13 @@ nodes: - id: object_detection operator: + send_stdout_as: stdout python: object_detection.py inputs: image: webcam/image outputs: - bbox + - stdout - id: plot operator: @@ -21,3 +23,4 @@ nodes: inputs: image: webcam/image bbox: object_detection/bbox + assistant_message: object_detection/stdout diff --git a/examples/python-operator-dataflow/dataflow_llm.yml b/examples/python-operator-dataflow/dataflow_llm.yml index 6d3be9a58..faf978af4 100644 --- a/examples/python-operator-dataflow/dataflow_llm.yml +++ b/examples/python-operator-dataflow/dataflow_llm.yml @@ -37,6 +37,24 @@ nodes: - ask - send - change + inputs: + recording: whisper/text + + - id: microphone + operator: + python: microphone_op.py + inputs: + record: keyboard/record + outputs: + - audio + + - id: whisper + operator: + python: whisper_op.py + inputs: + audio: microphone/audio + outputs: + - text ## Code Modifier - id: vectordb diff --git a/examples/python-operator-dataflow/dataflow_record.yml b/examples/python-operator-dataflow/dataflow_record.yml deleted file mode 100644 index faf978af4..000000000 --- a/examples/python-operator-dataflow/dataflow_record.yml +++ /dev/null @@ -1,87 +0,0 @@ -nodes: - - id: webcam - operator: - python: webcam.py - inputs: - tick: dora/timer/millis/50 - outputs: - - image - - - id: object_detection - operator: - python: object_detection.py - inputs: - image: webcam/image - outputs: - - bbox - - - id: plot - operator: - python: plot.py - inputs: - image: webcam/image - bbox: object_detection/bbox - line: llm/line - keyboard_buffer: keyboard/buffer - user_message: keyboard/submitted - assistant_message: llm/assistant_message - - ## Speech to text - - id: keyboard - custom: - source: keyboard_op.py - outputs: - - buffer - - submitted - - record - - ask - - send - - change - inputs: - recording: whisper/text - - - id: microphone - operator: - python: microphone_op.py - inputs: - record: keyboard/record - outputs: - - audio - - - id: whisper - operator: - python: whisper_op.py - inputs: - audio: microphone/audio - outputs: - - text - - ## Code Modifier - - id: vectordb - operator: - python: sentence_transformers_op.py - inputs: - query: keyboard/change - saved_file: file_saver/saved_file - outputs: - - raw_file - - - id: llm - operator: - python: llm_op.py - inputs: - code_modifier: vectordb/raw_file - assistant: keyboard/ask - message_sender: keyboard/send - outputs: - - modified_file - - line - - assistant_message - - - id: file_saver - operator: - python: file_saver_op.py - inputs: - file: llm/modified_file - outputs: - - saved_file \ No newline at end of file diff --git a/examples/python-operator-dataflow/keyboard_op.py b/examples/python-operator-dataflow/keyboard_op.py index 79a1cd4c3..2d179ac63 100644 --- a/examples/python-operator-dataflow/keyboard_op.py +++ b/examples/python-operator-dataflow/keyboard_op.py @@ -2,8 +2,6 @@ from pynput.keyboard import Key, Events import pyarrow as pa from dora import Node -from tkinter import Tk -import tkinter as tk node = Node() @@ -30,35 +28,8 @@ if event is not None and isinstance(event, Events.Press): if hasattr(event.key, "char"): cursor = 0 - if ctrl and event.key.char == "v": - r = Tk() - r.update() - try: - selection = r.clipboard_get() - r.withdraw() - r.update() - except tk.TclError: - selection = "" - r.destroy() - buffer_text += selection - node.send_output("buffer", pa.array([buffer_text])) - elif ctrl and event.key.char == "c": - r = Tk() - r.clipboard_clear() - r.clipboard_append(buffer_text) - r.update() - r.destroy() - elif ctrl and event.key.char == "x": - r = Tk() - r.clipboard_clear() - r.clipboard_append(buffer_text) - r.update() - r.destroy() - buffer_text = "" - node.send_output("buffer", pa.array([buffer_text])) - else: - buffer_text += event.key.char - node.send_output("buffer", pa.array([buffer_text])) + buffer_text += event.key.char + node.send_output("buffer", pa.array([buffer_text])) else: if event.key == Key.backspace: buffer_text = buffer_text[:-1] diff --git a/examples/python-operator-dataflow/llm_op.py b/examples/python-operator-dataflow/llm_op.py index 2d0510962..5f5957688 100644 --- a/examples/python-operator-dataflow/llm_op.py +++ b/examples/python-operator-dataflow/llm_op.py @@ -33,10 +33,7 @@ The response should look like this: ```json - - [ - {{ "topic": "line", "data": [10, 10, 90, 10] }}, -] + {{ "topic": "line", "data": [10, 10, 90, 10] }} ``` {user_message} @@ -83,6 +80,8 @@ def extract_python_code_blocks(text): matches = re.findall(pattern, text, re.DOTALL) if len(matches) == 0: return [text] + else: + matches = [remove_last_line(matches[0])] return matches @@ -172,7 +171,6 @@ def replace_code_in_source(source_code, replacement_block: str): Replace the best matching block in the source_code with the replacement_block, considering variable block lengths. """ replacement_block = extract_python_code_blocks(replacement_block)[0] - replacement_block = remove_last_line(replacement_block) start_index, end_index = find_best_match_location(source_code, replacement_block) if start_index != -1 and end_index != -1: # Replace the best matching part with the replacement block @@ -232,23 +230,20 @@ def on_event( ) outputs = extract_json_code_blocks(output)[0] try: - outputs = json.loads(outputs) - if not isinstance(outputs, list): - outputs = [outputs] - for output in outputs: - if not isinstance(output["data"], list): - output["data"] = [output["data"]] - - if output["topic"] in [ - "line", - ]: - send_output( - output["topic"], - pa.array(output["data"]), - dora_event["metadata"], - ) - else: - print("Could not find the topic: {}".format(output["topic"])) + output = json.loads(outputs) + if not isinstance(output["data"], list): + output["data"] = [output["data"]] + + if output["topic"] in [ + "line", + ]: + send_output( + output["topic"], + pa.array(output["data"]), + dora_event["metadata"], + ) + else: + print("Could not find the topic: {}".format(output["topic"])) except: print("Could not parse json") # if data is not iterable, put data in a list @@ -270,7 +265,7 @@ def ask_llm(self, prompt): input_ids = input.input_ids.cuda() # add attention mask here - attention_mask = input["attention_mask"] + attention_mask = input["attention_mask"].cuda() output = model.generate( inputs=input_ids, diff --git a/examples/python-operator-dataflow/plot.py b/examples/python-operator-dataflow/plot.py index 180af6fa4..c7b0a0bed 100755 --- a/examples/python-operator-dataflow/plot.py +++ b/examples/python-operator-dataflow/plot.py @@ -1,8 +1,17 @@ +import os import cv2 - +import time from dora import DoraStatus -from utils import LABELS, put_text, CAMERA_HEIGHT, CAMERA_WIDTH, FONT, CI +from utils import LABELS + + +CI = os.environ.get("CI") + +CAMERA_WIDTH = 640 +CAMERA_HEIGHT = 480 + +FONT = cv2.FONT_HERSHEY_SIMPLEX class Operator: @@ -50,26 +59,34 @@ def on_event( f"{LABELS[int(label)]}, {confidence:0.2f}", (int(max_x), int(max_y)), FONT, - 0.45, + 0.5, (0, 255, 0), - 2, - 1, ) - put_text( - image, - self.buffer, - (20, 12 * 25), - (190, 250, 0), + cv2.putText( + image, self.buffer, (20, 14 + 21 * 14), FONT, 0.5, (190, 250, 0), 1 ) - for i, text in enumerate(self.submitted[::-1]): - put_text( + i = 0 + for text in self.submitted[::-1]: + color = ( + (0, 255, 190) + if text["role"] == "user_message" + else (0, 190, 255) + ) + cv2.putText( image, text["content"], - (20, 25 + (10 - i) * 25), - (0, 255, 190), + ( + 20, + 14 + (19 - i) * 14, + ), + FONT, + 0.5, + color, + 1, ) + i += 1 for line in self.lines: cv2.line( diff --git a/examples/python-operator-dataflow/requirements.txt b/examples/python-operator-dataflow/requirements.txt index 4f0330c64..3fb39579d 100644 --- a/examples/python-operator-dataflow/requirements.txt +++ b/examples/python-operator-dataflow/requirements.txt @@ -46,8 +46,12 @@ seaborn>=0.11.0 opencv-python>=4.1.1 maturin -whisper +openai-whisper sounddevice pynput sentence-transformers -transformers \ No newline at end of file +transformers +pylcs +accelerate +optimum +auto-gptq>=0.7.1 \ No newline at end of file diff --git a/examples/python-operator-dataflow/utils.py b/examples/python-operator-dataflow/utils.py index 4ec04ed09..a40bd6d6f 100644 --- a/examples/python-operator-dataflow/utils.py +++ b/examples/python-operator-dataflow/utils.py @@ -1,26 +1,3 @@ -import os -import cv2 - - -def put_text(image, text, position, color): - cv2.putText( - image, - text, - position, - cv2.FONT_HERSHEY_SIMPLEX, - 0.45, - color, - 2, - 1, - ) - - -CI = os.environ.get("CI") - -CAMERA_WIDTH = 640 -CAMERA_HEIGHT = 480 - -FONT = cv2.FONT_HERSHEY_SIMPLEX LABELS = [ "person", "bicycle", diff --git a/examples/python-operator-dataflow/webcam.py b/examples/python-operator-dataflow/webcam.py index 9867ad7c4..c7d451919 100755 --- a/examples/python-operator-dataflow/webcam.py +++ b/examples/python-operator-dataflow/webcam.py @@ -52,7 +52,9 @@ def on_event( 2, 1, ) + else: self.failure_count += 1 + return DoraStatus.CONTINUE send_output( "image",