Skip to content

Commit

Permalink
Adding note on ffmpeg + fix for faster whisper on macOS
Browse files Browse the repository at this point in the history
  • Loading branch information
raivisdejus committed Aug 10, 2024
1 parent ecb85ae commit 5e498df
Show file tree
Hide file tree
Showing 4 changed files with 18 additions and 14 deletions.
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@ OpenAI's [Whisper](https://github.com/openai/whisper).

**PyPI**:

Install [ffmpeg](https://www.ffmpeg.org/download.html)

Install Buzz
```shell
pip install buzz-captions
python -m buzz
Expand Down
10 changes: 5 additions & 5 deletions buzz/transcriber/recording_transcriber.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@
from typing import Optional
from platformdirs import user_cache_dir

import torch
import numpy as np
import sounddevice
from torch import cuda, version
from sounddevice import PortAudioError
from openai import OpenAI
from PyQt6.QtCore import QObject, pyqtSignal
Expand Down Expand Up @@ -62,11 +62,11 @@ def start(self):
model_path = self.model_path
keep_samples = int(0.15 * self.sample_rate)

if torch.cuda.is_available():
logging.debug(f"CUDA version detected: {torch.version.cuda}")
if cuda.is_available():
logging.debug(f"CUDA version detected: {version.cuda}")

Check warning on line 66 in buzz/transcriber/recording_transcriber.py

View check run for this annotation

Codecov / codecov/patch

buzz/transcriber/recording_transcriber.py#L65-L66

Added lines #L65 - L66 were not covered by tests

if self.transcription_options.model.model_type == ModelType.WHISPER:
device = "cuda" if torch.cuda.is_available() else "cpu"
device = "cuda" if cuda.is_available() else "cpu"

Check warning on line 69 in buzz/transcriber/recording_transcriber.py

View check run for this annotation

Codecov / codecov/patch

buzz/transcriber/recording_transcriber.py#L69

Added line #L69 was not covered by tests
model = whisper.load_model(model_path, device=device)
elif self.transcription_options.model.model_type == ModelType.WHISPER_CPP:
model = WhisperCpp(model_path)
Expand All @@ -79,7 +79,7 @@ def start(self):
logging.debug("CUDA GPUs are currently no supported on Running on Windows, using CPU")
device = "cpu"

if torch.cuda.is_available() and torch.version.cuda < "12":
if cuda.is_available() and version.cuda < "12":

Check warning on line 82 in buzz/transcriber/recording_transcriber.py

View check run for this annotation

Codecov / codecov/patch

buzz/transcriber/recording_transcriber.py#L82

Added line #L82 was not covered by tests
logging.debug("Unsupported CUDA version (<12), using CPU")
device = "cpu"

Expand Down
10 changes: 5 additions & 5 deletions buzz/transcriber/whisper_file_transcriber.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@
import re
import os
import sys
import torch
import platform
from torch import cuda, version
from platformdirs import user_cache_dir
from multiprocessing.connection import Connection
from threading import Thread
Expand Down Expand Up @@ -52,8 +52,8 @@ def transcribe(self) -> List[Segment]:
"Starting whisper file transcription, task = %s", self.transcription_task
)

if torch.cuda.is_available():
logging.debug(f"CUDA version detected: {torch.version.cuda}")
if cuda.is_available():
logging.debug(f"CUDA version detected: {version.cuda}")

Check warning on line 56 in buzz/transcriber/whisper_file_transcriber.py

View check run for this annotation

Codecov / codecov/patch

buzz/transcriber/whisper_file_transcriber.py#L56

Added line #L56 was not covered by tests

recv_pipe, send_pipe = multiprocessing.Pipe(duplex=False)

Expand Down Expand Up @@ -149,7 +149,7 @@ def transcribe_faster_whisper(cls, task: FileTranscriptionTask) -> List[Segment]
logging.debug("CUDA GPUs are currently no supported on Running on Windows, using CPU")
device = "cpu"

if torch.cuda.is_available() and torch.version.cuda < "12":
if cuda.is_available() and version.cuda < "12":
logging.debug("Unsupported CUDA version (<12), using CPU")
device = "cpu"

Expand Down Expand Up @@ -195,7 +195,7 @@ def transcribe_faster_whisper(cls, task: FileTranscriptionTask) -> List[Segment]

@classmethod
def transcribe_openai_whisper(cls, task: FileTranscriptionTask) -> List[Segment]:
device = "cuda" if torch.cuda.is_available() else "cpu"
device = "cuda" if cuda.is_available() else "cpu"
model = whisper.load_model(task.model_path, device=device)

if task.transcription_options.word_level_timings:
Expand Down
9 changes: 5 additions & 4 deletions buzz/transformers_whisper.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
import os
import sys
import numpy as np
import torch
import requests
from torch import from_numpy, cuda, float16, float32

from typing import Optional, Union
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
from transformers.pipelines import AutomaticSpeechRecognitionPipeline
Expand Down Expand Up @@ -83,7 +84,7 @@ def preprocess(self, inputs, chunk_length_s=0, stride_length_s=None):
)

inputs = F.resample(
torch.from_numpy(inputs), in_sampling_rate, self.feature_extractor.sampling_rate
from_numpy(inputs), in_sampling_rate, self.feature_extractor.sampling_rate
).numpy()
ratio = self.feature_extractor.sampling_rate / in_sampling_rate
else:
Expand Down Expand Up @@ -161,8 +162,8 @@ def transcribe(
language: str,
task: str,
):
device = "cuda" if torch.cuda.is_available() else "cpu"
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
device = "cuda" if cuda.is_available() else "cpu"
torch_dtype = float16 if cuda.is_available() else float32

safetensors_path = os.path.join(self.model_id, "model.safetensors")
use_safetensors = os.path.exists(safetensors_path)
Expand Down

0 comments on commit 5e498df

Please sign in to comment.