diff --git a/examples/argostranslate/README.md b/examples/argostranslate/README.md deleted file mode 100644 index 832640735..000000000 --- a/examples/argostranslate/README.md +++ /dev/null @@ -1,12 +0,0 @@ -# Dora argo example - -Make sure to have, dora, pip and cargo installed. - -```bash -dora up -dora build dataflow.yml -dora start dataflow.yml - -# In another terminal -terminal-print -``` diff --git a/examples/argostranslate/argo_translate_op.py b/examples/argostranslate/argo_translate_op.py deleted file mode 100644 index a04d314d4..000000000 --- a/examples/argostranslate/argo_translate_op.py +++ /dev/null @@ -1,38 +0,0 @@ -import argostranslate.package -import argostranslate.translate -from dora import Node - -node = Node() - -from_code = "fr" -to_code = "en" - -file_path = 'translated.txt' - -def write_string_to_file(file_path, content): - with open(file_path, 'w') as file: - file.write(content) - -# Download and install Argos Translate package -argostranslate.package.update_package_index() -available_packages = argostranslate.package.get_available_packages() -package_to_install = next( - filter( - lambda x: x.from_code == from_code and x.to_code == to_code, available_packages - ) -) -argostranslate.package.install_from_path(package_to_install.download()) - -# Translate - - -for event in node: - if event["type"] == "INPUT": - text = event["value"][0].as_py() - translatedText = argostranslate.translate.translate( - text, - from_code, - to_code, - ) - write_string_to_file(file_path, translatedText) - print(f"translated: {translatedText}", flush=True) diff --git a/examples/argostranslate/dataflow.yml b/examples/argostranslate/dataflow.yml deleted file mode 100644 index dbc7f6448..000000000 --- a/examples/argostranslate/dataflow.yml +++ /dev/null @@ -1,15 +0,0 @@ -nodes: - - id: type - custom: - source: shell - args: python write_op.py - inputs: - tick: dora/timer/millis/5000 - outputs: - - text - - id: argo - custom: - source: shell - args: python argo_translate_op.py - inputs: - text: type/text diff --git a/examples/argostranslate/write_op.py b/examples/argostranslate/write_op.py deleted file mode 100644 index 27acf9cb7..000000000 --- a/examples/argostranslate/write_op.py +++ /dev/null @@ -1,20 +0,0 @@ -import pyarrow as pa -from dora import Node - -node = Node() - -file_path = 'write_down_to_translate.txt' -text_base = "" - -def read_file_as_string(file_path): - with open(file_path, 'r') as file: - file_contents = file.read() - return file_contents - -for event in node: - if event["type"] == "INPUT": - to_be_translated = read_file_as_string(file_path) - if to_be_translated != text_base: - text_base = to_be_translated - node.send_output("text", pa.array([text_base])) - print(f"go for translation: " + text_base, flush=True) \ No newline at end of file diff --git a/examples/argostranslate/.gitignore b/examples/translation/.gitignore similarity index 100% rename from examples/argostranslate/.gitignore rename to examples/translation/.gitignore diff --git a/examples/translation/README.md b/examples/translation/README.md new file mode 100644 index 000000000..3af2c6a24 --- /dev/null +++ b/examples/translation/README.md @@ -0,0 +1,14 @@ +# Dora argo example + +Make sure to have, dora, pip and cargo installed. + +```bash +# Install rerun if it's not done already +dora up + +dora build dataflow_cn_terminal.yml +dora start dataflow_cn_terminal.yml + +# In another terminal +python pretty_print.py +``` diff --git a/examples/translation/dataflow.yml b/examples/translation/dataflow.yml new file mode 100644 index 000000000..55e41033f --- /dev/null +++ b/examples/translation/dataflow.yml @@ -0,0 +1,47 @@ +nodes: + - id: dora-microphone + build: pip install -e ../../node-hub/dora-microphone + path: dora-microphone + outputs: + - audio + + - id: dora-vad + build: pip install -e ../../node-hub/dora-vad + path: dora-vad + inputs: + audio: dora-microphone/audio + outputs: + - audio + + - id: dora-distil-whisper + build: pip install -e ../../node-hub/dora-distil-whisper + path: dora-distil-whisper + inputs: + input: dora-vad/audio + outputs: + - text + env: + TARGET_LANGUAGE: chinese + TRANSLATE: false + + - id: dora-argotranslate + build: pip install -e ../../node-hub/dora-argotranslate + path: dora-argotranslate + inputs: + text: + source: dora-distil-whisper/text + queue_size: 1 + outputs: + - text + env: + SOURCE_LANGUAGE: zh + TARGET_LANGUAGE: en + + - id: plot + build: cargo build -p dora-rerun --release + path: dora-rerun + inputs: + text: dora-argotranslate/text + env: + IMAGE_WIDTH: 640 + IMAGE_HEIGHT: 480 diff --git a/examples/translation/dataflow_cn.yml b/examples/translation/dataflow_cn.yml new file mode 100644 index 000000000..b687b9945 --- /dev/null +++ b/examples/translation/dataflow_cn.yml @@ -0,0 +1,46 @@ +nodes: + - id: dora-microphone + build: pip install -e ../../node-hub/dora-microphone + path: dora-microphone + outputs: + - audio + + - id: dora-vad + build: pip install -e ../../node-hub/dora-vad + path: dora-vad + inputs: + audio: dora-microphone/audio + outputs: + - audio + + - id: dora-distil-whisper + build: pip install -e ../../node-hub/dora-distil-whisper + path: dora-distil-whisper + inputs: + input: dora-vad/audio + outputs: + - text + env: + TARGET_LANGUAGE: chinese + TRANSLATE: false + + - id: dora-opus + build: pip install -e ../../node-hub/dora-opus + path: dora-opus + inputs: + text: dora-distil-whisper/text + outputs: + - text + env: + SOURCE_LANGUAGE: zh + TARGET_LANGUAGE: en + + - id: plot + build: cargo build -p dora-rerun --release + path: dora-rerun + inputs: + translated_text: dora-opus/text + original_text: dora-distil-whisper/text + env: + IMAGE_WIDTH: 640 + IMAGE_HEIGHT: 480 diff --git a/examples/translation/dataflow_cn_terminal.yml b/examples/translation/dataflow_cn_terminal.yml new file mode 100644 index 000000000..fd039901c --- /dev/null +++ b/examples/translation/dataflow_cn_terminal.yml @@ -0,0 +1,42 @@ +nodes: + - id: dora-microphone + build: pip install -e ../../node-hub/dora-microphone + path: dora-microphone + outputs: + - audio + + - id: dora-vad + build: pip install -e ../../node-hub/dora-vad + path: dora-vad + inputs: + audio: dora-microphone/audio + outputs: + - audio + + - id: dora-distil-whisper + build: pip install -e ../../node-hub/dora-distil-whisper + path: dora-distil-whisper + inputs: + input: dora-vad/audio + outputs: + - text + env: + TARGET_LANGUAGE: chinese + TRANSLATE: false + + - id: dora-opus + build: pip install -e ../../node-hub/dora-opus + path: dora-opus + inputs: + text: dora-distil-whisper/text + outputs: + - text + env: + SOURCE_LANGUAGE: zh + TARGET_LANGUAGE: en + + - id: pretty-print + path: dynamic + inputs: + translated_text: dora-opus/text + original_text: dora-distil-whisper/text diff --git a/examples/translation/dataflow_en.yml b/examples/translation/dataflow_en.yml new file mode 100644 index 000000000..a429156f5 --- /dev/null +++ b/examples/translation/dataflow_en.yml @@ -0,0 +1,45 @@ +nodes: + - id: dora-microphone + build: pip install -e ../../node-hub/dora-microphone + path: dora-microphone + outputs: + - audio + + - id: dora-vad + build: pip install -e ../../node-hub/dora-vad + path: dora-vad + inputs: + audio: dora-microphone/audio + outputs: + - audio + + - id: dora-distil-whisper + build: pip install -e ../../node-hub/dora-distil-whisper + path: dora-distil-whisper + inputs: + input: dora-vad/audio + outputs: + - text + env: + TARGET_LANGUAGE: english + TRANSLATE: false + + - id: dora-argotranslate + build: pip install -e ../../node-hub/dora-argotranslate + path: dora-argotranslate + inputs: + text: dora-distil-whisper/text + outputs: + - text + env: + SOURCE_LANGUAGE: en + TARGET_LANGUAGE: zh + + - id: dora-rerun + build: cargo build -p dora-rerun --release + path: dora-rerun + inputs: + text: dora-argotranslate/text + env: + IMAGE_WIDTH: 640 + IMAGE_HEIGHT: 480 diff --git a/examples/translation/dataflow_en_terminal.yml b/examples/translation/dataflow_en_terminal.yml new file mode 100644 index 000000000..e58661897 --- /dev/null +++ b/examples/translation/dataflow_en_terminal.yml @@ -0,0 +1,45 @@ +nodes: + - id: dora-microphone + build: pip install -e ../../node-hub/dora-microphone + path: dora-microphone + outputs: + - audio + + - id: dora-vad + build: pip install -e ../../node-hub/dora-vad + path: dora-vad + inputs: + audio: dora-microphone/audio + outputs: + - audio + + - id: dora-distil-whisper + build: pip install -e ../../node-hub/dora-distil-whisper + path: dora-distil-whisper + inputs: + input: dora-vad/audio + outputs: + - text + env: + TARGET_LANGUAGE: english + TRANSLATE: false + + - id: dora-opus + build: pip install -e ../../node-hub/dora-opus + path: dora-opus + inputs: + text: dora-distil-whisper/text + outputs: + - text + env: + SOURCE_LANGUAGE: en + TARGET_LANGUAGE: zh + + - id: pretty-print + path: dynamic + inputs: + translated_text: dora-opus/text + original_text: dora-distil-whisper/text + env: + IMAGE_WIDTH: 640 + IMAGE_HEIGHT: 480 diff --git a/examples/translation/dataflow_en_terminal_argo.yml b/examples/translation/dataflow_en_terminal_argo.yml new file mode 100644 index 000000000..65a96ca57 --- /dev/null +++ b/examples/translation/dataflow_en_terminal_argo.yml @@ -0,0 +1,46 @@ +nodes: + - id: dora-microphone + build: pip install -e ../../node-hub/dora-microphone + path: dora-microphone + outputs: + - audio + + - id: dora-vad + build: pip install -e ../../node-hub/dora-vad + path: dora-vad + inputs: + audio: dora-microphone/audio + outputs: + - audio + + - id: dora-distil-whisper + build: pip install -e ../../node-hub/dora-distil-whisper + path: dora-distil-whisper + inputs: + input: dora-vad/audio + outputs: + - text + env: + TARGET_LANGUAGE: english + TRANSLATE: false + + - id: dora-argotranslate + build: pip install -e ../../node-hub/dora-argotranslate + path: dora-argotranslate + inputs: + text: dora-distil-whisper/text + outputs: + - text + env: + SOURCE_LANGUAGE: en + TARGET_LANGUAGE: zh + + - id: pretty-print + build: cargo build -p dora-rerun --release + path: dynamic + inputs: + translated_text: dora-argotranslate/text + original_text: dora-distil-whisper/text + env: + IMAGE_WIDTH: 640 + IMAGE_HEIGHT: 480 diff --git a/examples/translation/dataflow_fr.yml b/examples/translation/dataflow_fr.yml new file mode 100644 index 000000000..f4f90eed3 --- /dev/null +++ b/examples/translation/dataflow_fr.yml @@ -0,0 +1,46 @@ +nodes: + - id: dora-microphone + build: pip install -e ../../node-hub/dora-microphone + path: dora-microphone + outputs: + - audio + + - id: dora-vad + build: pip install -e ../../node-hub/dora-vad + path: dora-vad + inputs: + audio: dora-microphone/audio + outputs: + - audio + + - id: dora-distil-whisper + build: pip install -e ../../node-hub/dora-distil-whisper + path: dora-distil-whisper + inputs: + input: dora-vad/audio + outputs: + - text + env: + TARGET_LANGUAGE: french + TRANSLATE: false + + - id: dora-argotranslate + build: pip install -e ../../node-hub/dora-argotranslate + path: dora-argotranslate + inputs: + text: dora-distil-whisper/text + outputs: + - text + env: + SOURCE_LANGUAGE: fr + TARGET_LANGUAGE: en + + - id: dora-rerun + build: cargo build -p dora-rerun --release + path: dora-rerun + inputs: + translated_text: dora-argotranslate/text + original_text: dora-distil-whisper/text + env: + IMAGE_WIDTH: 640 + IMAGE_HEIGHT: 480 diff --git a/examples/translation/pretty_print.py b/examples/translation/pretty_print.py new file mode 100644 index 000000000..77a9a7436 --- /dev/null +++ b/examples/translation/pretty_print.py @@ -0,0 +1,46 @@ +import os +import shutil + + +def clear_screen(): + # Clear the screen based on the operating system + os.system("cls" if os.name == "nt" else "clear") + + +def print_centered(texts): + # Get terminal size + terminal_size = shutil.get_terminal_size() + + # Print newlines to move cursor to the middle vertically + for k, v in texts.items(): + print(k) + print("\n" * 1) + # Calculate horizontal padding and print the centered text + for l in v: + print(l.center(terminal_size.columns)) + print("\n" * 1) + + +from dora import Node + +node = Node("pretty-print") + +previous_texts = {} + +clear_screen() +print("Waiting for speech...") +for event in node: + if event["type"] == "INPUT": + # The sentence to be printed + sentence = event["value"][0].as_py() + if event["id"] not in previous_texts.keys(): + + previous_texts[event["id"]] = ["", "", sentence] + else: + previous_texts[event["id"]] += [sentence] + previous_texts[event["id"]] = previous_texts[event["id"]][-3:] + # Clear the screen + clear_screen() + + # Print the sentence in the middle of the terminal + print_centered(previous_texts) diff --git a/node-hub/dora-argo/README.md b/node-hub/dora-argotranslate/README.md similarity index 100% rename from node-hub/dora-argo/README.md rename to node-hub/dora-argotranslate/README.md diff --git a/node-hub/dora-argo/dora_argo/__init__.py b/node-hub/dora-argotranslate/dora_argotranslate/__init__.py similarity index 100% rename from node-hub/dora-argo/dora_argo/__init__.py rename to node-hub/dora-argotranslate/dora_argotranslate/__init__.py diff --git a/node-hub/dora-argo/dora_argo/main.py b/node-hub/dora-argotranslate/dora_argotranslate/main.py similarity index 66% rename from node-hub/dora-argo/dora_argo/main.py rename to node-hub/dora-argotranslate/dora_argotranslate/main.py index 5cf8a4160..94a41918a 100644 --- a/node-hub/dora-argo/dora_argo/main.py +++ b/node-hub/dora-argotranslate/dora_argotranslate/main.py @@ -1,10 +1,14 @@ -import argostranslate.package -import argostranslate.translate +import os + +os.environ["ARGOS_DEVICE_TYPE"] = "auto" from dora import Node +import pyarrow as pa +import argostranslate.package +import argostranslate.translate -from_code = "fr" -to_code = "en" +from_code = os.getenv("SOURCE_LANGUAGE", "fr") +to_code = os.getenv("TARGET_LANGUAGE", "en") # Download and install Argos Translate package argostranslate.package.update_package_index() @@ -30,4 +34,10 @@ def main(): from_code, to_code, ) - print(f"translated: {translatedText}", flush=True) + print(text, flush=True) + print("translated: " + translatedText, flush=True) + node.send_output( + "text", + pa.array([translatedText]), + {"language": to_code}, + ) diff --git a/node-hub/dora-argo/pyproject.toml b/node-hub/dora-argotranslate/pyproject.toml similarity index 71% rename from node-hub/dora-argo/pyproject.toml rename to node-hub/dora-argotranslate/pyproject.toml index 4ccbfaf49..d477020a7 100644 --- a/node-hub/dora-argo/pyproject.toml +++ b/node-hub/dora-argotranslate/pyproject.toml @@ -1,15 +1,15 @@ [tool.poetry] -name = "dora-parler" +name = "dora-argotranslate" version = "0.3.6" description = "Dora Node for Text translating using Argostranslate" readme = "README.md" authors = [ "Haixuan Xavier Tao ", "Enzo Le Van ", - "Félix Huang " + "Félix Huang ", ] -packages = [{ include = "dora_argo" }] +packages = [{ include = "dora_argotranslate" }] [tool.poetry.dependencies] dora-rs = "^0.3.6" @@ -18,7 +18,7 @@ python = "^3.7" argostranslate = "^1.9.6" [tool.poetry.scripts] -dora-argo = "dora_argo.main:main" +dora-argotranslate = "dora_argotranslate.main:main" [build-system] requires = ["poetry-core>=1.8.0"] diff --git a/node-hub/dora-argotranslate/tests/test_translate.py b/node-hub/dora-argotranslate/tests/test_translate.py new file mode 100644 index 000000000..3ced2a6d3 --- /dev/null +++ b/node-hub/dora-argotranslate/tests/test_translate.py @@ -0,0 +1,9 @@ +import pytest + + +def test_import_main(): + from dora_argotranslate.main import main + + # Check that everything is working, and catch dora Runtime Exception as we're not running in a dora dataflow. + with pytest.raises(RuntimeError): + main() diff --git a/node-hub/dora-distil-whisper/dora_distil_whisper/main.py b/node-hub/dora-distil-whisper/dora_distil_whisper/main.py index 8e4acc72c..e059fce9a 100644 --- a/node-hub/dora-distil-whisper/dora_distil_whisper/main.py +++ b/node-hub/dora-distil-whisper/dora_distil_whisper/main.py @@ -2,34 +2,100 @@ from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline from dora import Node import pyarrow as pa +import os +MODEL_NAME_OR_PATH = os.getenv("MODEL_NAME_OR_PATH", "openai/whisper-large-v3-turbo") +TARGET_LANGUAGE = os.getenv("TARGET_LANGUAGE", "chinese") +TRANSLATE = bool(os.getenv("TRANSLATE", "False")) device = "cuda:0" if torch.cuda.is_available() else "cpu" torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32 -model_id = "distil-whisper/distil-large-v3" model = AutoModelForSpeechSeq2Seq.from_pretrained( - model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True + MODEL_NAME_OR_PATH, + torch_dtype=torch_dtype, + low_cpu_mem_usage=True, + use_safetensors=True, ) model.to(device) -processor = AutoProcessor.from_pretrained(model_id) +processor = AutoProcessor.from_pretrained(MODEL_NAME_OR_PATH) pipe = pipeline( "automatic-speech-recognition", model=model, tokenizer=processor.tokenizer, feature_extractor=processor.feature_extractor, - max_new_tokens=128, + max_new_tokens=400, torch_dtype=torch_dtype, device=device, ) +BAD_SENTENCES = [ + "字幕", + "字幕志愿", + "中文字幕", + "我", + "你", + "THANK YOU", + " Thank you.", + " www.microsoft.com", + " The", + " BANG", + " Silence.", + " Sous-titrage Société Radio-Canada", + " Sous", + " Sous-", +] + + +def cut_repetition(text, min_repeat_length=4, max_repeat_length=50): + # Check if the text is primarily Chinese (you may need to adjust this threshold) + if sum(1 for char in text if "\u4e00" <= char <= "\u9fff") / len(text) > 0.5: + # Chinese text processing + for repeat_length in range( + min_repeat_length, min(max_repeat_length, len(text) // 2) + ): + for i in range(len(text) - repeat_length * 2 + 1): + chunk1 = text[i : i + repeat_length] + chunk2 = text[i + repeat_length : i + repeat_length * 2] + + if chunk1 == chunk2: + return text[: i + repeat_length] + else: + # Non-Chinese (space-separated) text processing + words = text.split() + for repeat_length in range( + min_repeat_length, min(max_repeat_length, len(words) // 2) + ): + for i in range(len(words) - repeat_length * 2 + 1): + chunk1 = " ".join(words[i : i + repeat_length]) + chunk2 = " ".join(words[i + repeat_length : i + repeat_length * 2]) + + if chunk1 == chunk2: + return " ".join(words[: i + repeat_length]) + + return text + def main(): node = Node() for event in node: if event["type"] == "INPUT": audio = event["value"].to_numpy() - result = pipe(audio) - node.send_output("text", pa.array([result["text"]])) + confg = ( + {"language": TARGET_LANGUAGE, "task": "translate"} + if TRANSLATE + else { + "language": TARGET_LANGUAGE, + } + ) + result = pipe( + audio, + generate_kwargs=confg, + ) + if result["text"] in BAD_SENTENCES: + continue + text = cut_repetition(result["text"]) + print(text, flush=True) + node.send_output("text", pa.array([text]), {"language": TARGET_LANGUAGE}) diff --git a/node-hub/dora-microphone/dora_microphone/main.py b/node-hub/dora-microphone/dora_microphone/main.py index ada80416e..cb65b6acd 100644 --- a/node-hub/dora-microphone/dora_microphone/main.py +++ b/node-hub/dora-microphone/dora_microphone/main.py @@ -2,66 +2,31 @@ import numpy as np import pyarrow as pa import time as tm -from enum import Enum +import os from dora import Node - -class RecordingState(Enum): - """Enum for recording states.""" - - PENDING = 0 - RUNNING = 1 - SILENCE = 2 - - -def detect_speech(audio_data, threshold): - """Check if the amplitude of the audio signal exceeds the threshold.""" - return np.any(np.abs(audio_data) > threshold) +MAX_DURATION = float(os.getenv("MAX_DURATION", "0.1")) +SAMPLE_RATE = int(os.getenv("SAMPLE_RATE", "16000")) def main(): - # Parameters - threshold = 500 # Threshold for detecting speech (adjust this as needed) - SAMPLE_RATE = 16000 - silence_duration = 0.5 # Duration of silence before stopping the recording - # Initialize buffer and recording flag buffer = [] - state = RecordingState.PENDING - silence_start_time = tm.time() start_recording_time = tm.time() - max_duration = 20 node = Node() # pylint: disable=unused-argument def callback(indata, frames, time, status): - nonlocal buffer, state, silence_start_time, node, max_duration, start_recording_time + nonlocal buffer, node, start_recording_time - is_speaking = detect_speech(indata[:, 0], threshold) - if is_speaking: - if state == RecordingState.PENDING: - buffer = [] - state = RecordingState.RUNNING - start_recording_time = tm.time() - buffer.extend(indata[:, 0]) - elif not is_speaking and state == RecordingState.RUNNING: - silence_start_time = tm.time() # Reset silence timer - buffer.extend(indata[:, 0]) - state = RecordingState.SILENCE - elif ( - state == RecordingState.RUNNING or state == RecordingState.SILENCE - ) and tm.time() - start_recording_time > max_duration: + if tm.time() - start_recording_time > MAX_DURATION: audio_data = np.array(buffer).ravel().astype(np.float32) / 32768.0 node.send_output("audio", pa.array(audio_data)) - state = RecordingState.PENDING - elif not is_speaking and state == RecordingState.SILENCE: - if tm.time() - silence_start_time > silence_duration: - audio_data = np.array(buffer).ravel().astype(np.float32) / 32768.0 - node.send_output("audio", pa.array(audio_data)) - state = RecordingState.PENDING - else: - buffer.extend(indata[:, 0]) + buffer = [] + start_recording_time = tm.time() + else: + buffer.extend(indata[:, 0]) # Start recording with sd.InputStream( diff --git a/node-hub/dora-opus/README.md b/node-hub/dora-opus/README.md new file mode 100644 index 000000000..0fa6b0b69 --- /dev/null +++ b/node-hub/dora-opus/README.md @@ -0,0 +1 @@ +# Dora text translation Node using OPUS MT diff --git a/node-hub/dora-opus/dora_opus/__init__.py b/node-hub/dora-opus/dora_opus/__init__.py new file mode 100644 index 000000000..ac3cbef9f --- /dev/null +++ b/node-hub/dora-opus/dora_opus/__init__.py @@ -0,0 +1,11 @@ +import os + +# Define the path to the README file relative to the package directory +readme_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "README.md") + +# Read the content of the README file +try: + with open(readme_path, "r", encoding="utf-8") as f: + __doc__ = f.read() +except FileNotFoundError: + __doc__ = "README file not found." diff --git a/node-hub/dora-opus/dora_opus/main.py b/node-hub/dora-opus/dora_opus/main.py new file mode 100644 index 000000000..a30238e9a --- /dev/null +++ b/node-hub/dora-opus/dora_opus/main.py @@ -0,0 +1,70 @@ +import os +from dora import Node +import pyarrow as pa +import numpy as np + +from_code = os.getenv("SOURCE_LANGUAGE", "zh") +to_code = os.getenv("TARGET_LANGUAGE", "en") +MODEL_NAME_OR_PATH = os.getenv( + "MODEL_NAME_OR_PATH", f"Helsinki-NLP/opus-mt-{from_code}-{to_code}" +) + +from transformers import AutoTokenizer, AutoModelForSeq2SeqLM + + +def cut_repetition(text, min_repeat_length=4, max_repeat_length=50): + # Check if the text is primarily Chinese (you may need to adjust this threshold) + if sum(1 for char in text if "\u4e00" <= char <= "\u9fff") / len(text) > 0.5: + # Chinese text processing + for repeat_length in range( + min_repeat_length, min(max_repeat_length, len(text) // 2) + ): + for i in range(len(text) - repeat_length * 2 + 1): + chunk1 = text[i : i + repeat_length] + chunk2 = text[i + repeat_length : i + repeat_length * 2] + + if chunk1 == chunk2: + return text[: i + repeat_length] + else: + # Non-Chinese (space-separated) text processing + words = text.split() + for repeat_length in range( + min_repeat_length, min(max_repeat_length, len(words) // 2) + ): + for i in range(len(words) - repeat_length * 2 + 1): + chunk1 = " ".join(words[i : i + repeat_length]) + chunk2 = " ".join(words[i + repeat_length : i + repeat_length * 2]) + + if chunk1 == chunk2: + return " ".join(words[: i + repeat_length]) + + return text + + +def main(): + tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME_OR_PATH) + + model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME_OR_PATH) + node = Node() + while True: + event = node.next() + if event is None: + break + if event["type"] == "INPUT" and event["id"] == "text": + text = [str(event["value"][0].as_py())] + translated = ( + model.generate(**tokenizer(text, return_tensors="pt", padding=True)) + .to("cpu") + .detach() + .numpy() + .ravel() + ) + + array = np.array(tokenizer.decode(translated, skip_special_tokens=True)) + array = np.array(array).ravel() + array = [cut_repetition(array[0])] + node.send_output( + "text", + pa.array(array), + {"language": to_code}, + ) diff --git a/node-hub/dora-opus/pyproject.toml b/node-hub/dora-opus/pyproject.toml new file mode 100644 index 000000000..8a948719d --- /dev/null +++ b/node-hub/dora-opus/pyproject.toml @@ -0,0 +1,25 @@ +[tool.poetry] +name = "dora-opus" +version = "0.3.6" +description = "Dora Node for Text translating using Opus" +readme = "README.md" +authors = [ + "Haixuan Xavier Tao ", + "Enzo Le Van ", + "Félix Huang ", +] + +packages = [{ include = "dora_opus" }] + +[tool.poetry.dependencies] +dora-rs = "^0.3.6" +numpy = "< 2.0.0" +python = "^3.7" +transformers = "^4.45" + +[tool.poetry.scripts] +dora-opus = "dora_opus.main:main" + +[build-system] +requires = ["poetry-core>=1.8.0"] +build-backend = "poetry.core.masonry.api" diff --git a/node-hub/dora-argo/tests/test_translate.py b/node-hub/dora-opus/tests/test_translate.py similarity index 85% rename from node-hub/dora-argo/tests/test_translate.py rename to node-hub/dora-opus/tests/test_translate.py index 250ffa53c..63ba4f809 100644 --- a/node-hub/dora-argo/tests/test_translate.py +++ b/node-hub/dora-opus/tests/test_translate.py @@ -2,7 +2,7 @@ def test_import_main(): - from dora_argo.main import main + from dora_opus.main import main # Check that everything is working, and catch dora Runtime Exception as we're not running in a dora dataflow. with pytest.raises(RuntimeError): diff --git a/node-hub/dora-vad/README.md b/node-hub/dora-vad/README.md new file mode 100644 index 000000000..b5e7ad8a1 --- /dev/null +++ b/node-hub/dora-vad/README.md @@ -0,0 +1,3 @@ +# Speech Activity Detection(VAD) + +This is using Silero VAD. diff --git a/node-hub/dora-vad/dora_vad/__init__.py b/node-hub/dora-vad/dora_vad/__init__.py new file mode 100644 index 000000000..ac3cbef9f --- /dev/null +++ b/node-hub/dora-vad/dora_vad/__init__.py @@ -0,0 +1,11 @@ +import os + +# Define the path to the README file relative to the package directory +readme_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "README.md") + +# Read the content of the README file +try: + with open(readme_path, "r", encoding="utf-8") as f: + __doc__ = f.read() +except FileNotFoundError: + __doc__ = "README file not found." diff --git a/node-hub/dora-vad/dora_vad/main.py b/node-hub/dora-vad/dora_vad/main.py new file mode 100644 index 000000000..acca663c0 --- /dev/null +++ b/node-hub/dora-vad/dora_vad/main.py @@ -0,0 +1,53 @@ +from dora import Node +import pyarrow as pa +import numpy as np +import os +from silero_vad import load_silero_vad, get_speech_timestamps +import torch + +model = load_silero_vad() +MIN_SILENCE_DURATION_MS = int(os.getenv("MIN_SILENCE_DURATION_MS", "200")) +MIN_SPEECH_DURATION_MS = int(os.getenv("MIN_SPEECH_DURATION_MS", "1000")) + +MIN_AUDIO_SAMPLING_DURAION_S = int(os.getenv("MAX_AUDIO_DURATION_S", "20")) +MAX_AUDIO_DURAION_S = int(os.getenv("MAX_AUDIO_DURATION_S", "75")) + + +def main(): + node = Node() + last_audios = [] + while True: + event = node.next() + if event is None: + break + if event["type"] == "INPUT" and event["id"] == "audio": + audio = event["value"].to_numpy() + last_audios += [audio] + last_audios = last_audios[-100:] + audio = np.concatenate(last_audios) + speech_timestamps = get_speech_timestamps( + torch.from_numpy(audio), + model, + threshold=0.2, + min_speech_duration_ms=MIN_SPEECH_DURATION_MS, + min_silence_duration_ms=MIN_SILENCE_DURATION_MS, + ) + + # Check ig there is timestamp + if ( + len(speech_timestamps) > 0 + and len(last_audios) > MIN_AUDIO_SAMPLING_DURAION_S + ): + + # Check if the audio is not cut at the end. And only return if there is a long time spent + if speech_timestamps[-1]["end"] == len(audio): + continue + else: + audio = audio[0 : speech_timestamps[-1]["end"]] + node.send_output("audio", pa.array(audio)) + last_audios = [audio[speech_timestamps[-1]["end"] :]] + + # If there is no sound for too long return the audio + elif len(last_audios) > 75: + node.send_output("audio", pa.array(audio)) + last_audios = [] diff --git a/node-hub/dora-vad/pyproject.toml b/node-hub/dora-vad/pyproject.toml new file mode 100644 index 000000000..ee7778953 --- /dev/null +++ b/node-hub/dora-vad/pyproject.toml @@ -0,0 +1,26 @@ +[tool.poetry] +name = "dora-vad" +version = "0.3.6" +description = "Dora Node for Text translating using Argostranslate" +readme = "README.md" +authors = [ + "Haixuan Xavier Tao ", + "Enzo Le Van ", + "Félix Huang ", +] + +packages = [{ include = "dora_vad" }] + +[tool.poetry.dependencies] +dora-rs = "^0.3.6" +numpy = "< 2.0.0" +python = "^3.7" +silero-vad = "^5.1" + + +[tool.poetry.scripts] +dora-vad = "dora_vad.main:main" + +[build-system] +requires = ["poetry-core>=1.8.0"] +build-backend = "poetry.core.masonry.api" diff --git a/node-hub/dora-vad/tests/test_translate.py b/node-hub/dora-vad/tests/test_translate.py new file mode 100644 index 000000000..ea9a06488 --- /dev/null +++ b/node-hub/dora-vad/tests/test_translate.py @@ -0,0 +1,9 @@ +import pytest + + +def test_import_main(): + from dora_vad.main import main + + # Check that everything is working, and catch dora Runtime Exception as we're not running in a dora dataflow. + with pytest.raises(RuntimeError): + main()