Skip to content

Commit

Permalink
deps: update kokoro-onnx
Browse files Browse the repository at this point in the history
Should also fix #255
  • Loading branch information
Fedir Zadniprovskyi authored and fedirz committed Jan 26, 2025
1 parent 2f0c9a2 commit 5809d79
Show file tree
Hide file tree
Showing 6 changed files with 207 additions and 22 deletions.
4 changes: 2 additions & 2 deletions docs/usage/text-to-speech.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@ Download the Kokoro model and voices.
export KOKORO_REVISION=c97b7bbc3e60f447383c79b2f94fee861ff156ac
# Download the ONNX model (~346 MBs)
docker exec -it speaches huggingface-cli download hexgrad/Kokoro-82M --include 'kokoro-v0_19.onnx' --revision $KOKORO_REVISION
# Download the voices.json (~54 MBs) file
docker exec -it speaches curl --location --output /home/ubuntu/.cache/huggingface/hub/models--hexgrad--Kokoro-82M/snapshots/$KOKORO_REVISION/voices.json https://github.com/thewh1teagle/kokoro-onnx/releases/download/model-files/voices.json
# Download the voices.bin (~5.5 MBs) file
docker exec -it speaches curl --location --output /home/ubuntu/.cache/huggingface/hub/models--hexgrad--Kokoro-82M/snapshots/$KOKORO_REVISION/voices.bin https://github.com/thewh1teagle/kokoro-onnx/releases/download/model-files/voices.bin
```

!!! note
Expand Down
3 changes: 1 addition & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,8 @@ dependencies = [
"fastapi>=0.115.6",
"faster-whisper>=1.1.1",
"huggingface-hub[hf-transfer]>=0.25.1",
"kokoro-onnx>=0.2.2",
"kokoro-onnx[gpu]>=0.3.6,<0.4.0",
"numpy>=2.1.1",
"onnxruntime-gpu>=1.20.1 ; platform_machine == 'x86_64'",
"piper-phonemize ; platform_machine == 'x86_64'",
"piper-tts>=1.2.0 ; platform_machine == 'x86_64'",
"pydantic-settings>=2.5.2",
Expand Down
4 changes: 2 additions & 2 deletions src/speaches/hf_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,9 +232,9 @@ def download_kokoro_model() -> None:
)
# HACK
res = httpx.get(
"https://github.com/thewh1teagle/kokoro-onnx/releases/download/model-files/voices.json", follow_redirects=True
"https://github.com/thewh1teagle/kokoro-onnx/releases/download/model-files/voices.bin", follow_redirects=True
).raise_for_status()
voices_path = model_repo_path / "voices.json"
voices_path = model_repo_path / "voices.bin"
voices_path.touch(exist_ok=True)
voices_path.write_bytes(res.content)

Expand Down
2 changes: 1 addition & 1 deletion src/speaches/model_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,7 @@ def __init__(self, ttl: int) -> None:
# TODO
def _load_fn(self, _model_id: str) -> Kokoro:
model_path = get_kokoro_model_path()
voices_path = model_path.parent / "voices.json"
voices_path = model_path.parent / "voices.bin"
inf_sess = InferenceSession(model_path, providers=ONNX_PROVIDERS)
return Kokoro.from_session(inf_sess, str(voices_path))

Expand Down
2 changes: 1 addition & 1 deletion src/speaches/routers/speech.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ async def synthesize(
) -> StreamingResponse:
match body.model:
case "hexgrad/Kokoro-82M":
# TODO: download the `voices.json` file
# TODO: download the `voices.bin` file
with kokoro_model_manager.load_model(body.voice) as tts:
audio_generator = kokoro_utils.generate_audio(
tts,
Expand Down
Loading

0 comments on commit 5809d79

Please sign in to comment.