Skip to content

Commit

Permalink
Add support for faster-whisper multilingual option
Browse files Browse the repository at this point in the history
The faster-whisper multilingual option allows language detection to be performed
on each segment. With the multilingual option enabled, you can get better
transcription results if the language was incorrectly detected in the first
place, or if the spoken language is switched in the audio.
  • Loading branch information
snoesberger committed Dec 19, 2024
1 parent 02ed561 commit 5650d69
Show file tree
Hide file tree
Showing 3 changed files with 10 additions and 0 deletions.
7 changes: 7 additions & 0 deletions src/whisper_ctranslate2/commandline.py
Original file line number Diff line number Diff line change
Expand Up @@ -364,6 +364,13 @@ def read_command_line():
help="When using Batched transcription the maximum number of parallel requests to model for decoding.",
)

algorithm_args.add_argument(
"--multilingual",
type=CommandLine._str2bool,
default=False,
help="Perform language detection on every segment",
)

vad_args = parser.add_argument_group("VAD filter arguments")

vad_args.add_argument(
Expand Down
2 changes: 2 additions & 0 deletions src/whisper_ctranslate2/transcribe.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ class TranscriptionOptions(NamedTuple):
vad_min_speech_duration_ms: Optional[int]
vad_max_speech_duration_s: Optional[int]
vad_min_silence_duration_ms: Optional[int]
multilingual: bool


class Transcribe:
Expand Down Expand Up @@ -179,6 +180,7 @@ def inference(
vad_filter=vad,
vad_parameters=vad_parameters,
**batch_size,
multilingual=options.multilingual,
)

language_name = LANGUAGES[info.language].title()
Expand Down
1 change: 1 addition & 0 deletions src/whisper_ctranslate2/whisper_ctranslate2.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ def get_transcription_options(args):
vad_min_speech_duration_ms=args.pop("vad_min_speech_duration_ms"),
vad_max_speech_duration_s=args.pop("vad_max_speech_duration_s"),
vad_min_silence_duration_ms=args.pop("vad_min_silence_duration_ms"),
multilingual=args.pop("multilingual"),
)


Expand Down

0 comments on commit 5650d69

Please sign in to comment.