Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use a separate thread to initialize models for lazarus examples. #1270

Merged
merged 1 commit into from
Aug 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/lazarus.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ on:
branches:
- master
- lazarus
- fix-lazarus
paths:
- '.github/workflows/lazarus.yaml'
- 'CMakeLists.txt'
Expand Down
4 changes: 4 additions & 0 deletions lazarus-examples/generate_subtitles/generate_subtitles.lpi
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,10 @@
<Filename Value="my_worker.pas"/>
<IsPartOfProject Value="True"/>
</Unit>
<Unit>
<Filename Value="my_init.pas"/>
<IsPartOfProject Value="True"/>
</Unit>
</Units>
</ProjectOptions>
<CompilerOptions>
Expand Down
2 changes: 1 addition & 1 deletion lazarus-examples/generate_subtitles/generate_subtitles.lpr
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
athreads,
{$ENDIF}
Interfaces, // this includes the LCL widgetset
Forms, unit1, my_worker
Forms, unit1, my_worker, my_init
{ you can add units after this };

{$R *.res}
Expand Down
358 changes: 358 additions & 0 deletions lazarus-examples/generate_subtitles/my_init.pas
Original file line number Diff line number Diff line change
@@ -0,0 +1,358 @@
unit my_init;

{$mode ObjFPC}{$H+}

interface

uses
{$IFDEF UNIX}
cthreads,
cmem,
{$ENDIF}
{$IFDEF HASAMIGA}
athreads,
{$ENDIF}
Classes, SysUtils;

type
TMyInitThread = class(TThread)
private
Status: AnsiString;
ModelDir: AnsiString;
procedure ShowStatus;

protected
procedure Execute; override;
public
Constructor Create(CreateSuspended: Boolean; ModelDirectory: AnsiString);
end;

var
MyInitThread: TMyInitThread;

implementation

uses
unit1, sherpa_onnx;

function CreateVad(VadFilename: AnsiString): TSherpaOnnxVoiceActivityDetector;
var
Config: TSherpaOnnxVadModelConfig;

SampleRate: Integer;
WindowSize: Integer;
begin
Initialize(Config);

SampleRate := 16000; {Please don't change it unless you know the details}
WindowSize := 512; {Please don't change it unless you know the details}

Config.SileroVad.Model := VadFilename;
Config.SileroVad.MinSpeechDuration := 0.5;
Config.SileroVad.MinSilenceDuration := 0.5;
Config.SileroVad.Threshold := 0.5;
Config.SileroVad.WindowSize := WindowSize;
Config.NumThreads:= 2;
Config.Debug:= True;
Config.Provider:= 'cpu';
Config.SampleRate := SampleRate;

Result := TSherpaOnnxVoiceActivityDetector.Create(Config, 30);
end;

function CreateOfflineRecognizerTransducer(
Tokens: AnsiString;
Encoder: AnsiString;
Decoder: AnsiString;
Joiner: AnsiString;
ModelType: AnsiString): TSherpaOnnxOfflineRecognizer;
var
Config: TSherpaOnnxOfflineRecognizerConfig;
begin
Initialize(Config);

Config.ModelConfig.Transducer.Encoder := Encoder;
Config.ModelConfig.Transducer.Decoder := Decoder;
Config.ModelConfig.Transducer.Joiner := Joiner;

Config.ModelConfig.ModelType := ModelType;
Config.ModelConfig.Tokens := Tokens;
Config.ModelConfig.Provider := 'cpu';
Config.ModelConfig.NumThreads := 2;
Config.ModelConfig.Debug := False;

Result := TSherpaOnnxOfflineRecognizer.Create(Config);
end;

function CreateOfflineRecognizerTeleSpeech(
Tokens: AnsiString;
TeleSpeech: AnsiString): TSherpaOnnxOfflineRecognizer;
var
Config: TSherpaOnnxOfflineRecognizerConfig;
begin
Initialize(Config);

Config.ModelConfig.TeleSpeechCtc := TeleSpeech;

Config.ModelConfig.Tokens := Tokens;
Config.ModelConfig.Provider := 'cpu';
Config.ModelConfig.NumThreads := 2;
Config.ModelConfig.Debug := False;

Result := TSherpaOnnxOfflineRecognizer.Create(Config);
end;

function CreateOfflineRecognizerParaformer(
Tokens: AnsiString;
Paraformer: AnsiString): TSherpaOnnxOfflineRecognizer;
var
Config: TSherpaOnnxOfflineRecognizerConfig;
begin
Initialize(Config);

Config.ModelConfig.Paraformer.Model := Paraformer;

Config.ModelConfig.Tokens := Tokens;
Config.ModelConfig.Provider := 'cpu';
Config.ModelConfig.NumThreads := 2;
Config.ModelConfig.Debug := False;

Result := TSherpaOnnxOfflineRecognizer.Create(Config);
end;

function CreateOfflineRecognizerSenseVoice(
Tokens: AnsiString;
SenseVoice: AnsiString): TSherpaOnnxOfflineRecognizer;
var
Config: TSherpaOnnxOfflineRecognizerConfig;
begin
Initialize(Config);

Config.ModelConfig.SenseVoice.Model := SenseVoice;
Config.ModelConfig.SenseVoice.Language := 'auto';
Config.ModelConfig.SenseVoice.UseItn := True;
Config.ModelConfig.Tokens := Tokens;
Config.ModelConfig.Provider := 'cpu';
Config.ModelConfig.NumThreads := 2;
Config.ModelConfig.Debug := False;

Result := TSherpaOnnxOfflineRecognizer.Create(Config);
end;

function CreateOfflineRecognizerWhisper(
Tokens: AnsiString;
WhisperEncoder: AnsiString;
WhisperDecoder: AnsiString): TSherpaOnnxOfflineRecognizer;
var
Config: TSherpaOnnxOfflineRecognizerConfig;
begin
Initialize(Config);

Config.ModelConfig.Whisper.Encoder := WhisperEncoder;
Config.ModelConfig.Whisper.Decoder := WhisperDecoder;
Config.ModelConfig.Tokens := Tokens;
Config.ModelConfig.Provider := 'cpu';
Config.ModelConfig.NumThreads := 2;
Config.ModelConfig.Debug := False;

Result := TSherpaOnnxOfflineRecognizer.Create(Config);
end;

constructor TMyInitThread.Create(CreateSuspended : boolean; ModelDirectory: AnsiString);
begin
inherited Create(CreateSuspended);
ModelDir := ModelDirectory;
FreeOnTerminate := True;
end;

procedure TMyInitThread.ShowStatus;
begin
Form1.UpdateInitStatus(Status);
end;

procedure TMyInitThread.Execute;
var
Msg: AnsiString;
VadFilename: AnsiString;
Tokens: AnsiString;

WhisperEncoder: AnsiString;
WhisperDecoder: AnsiString;

SenseVoice: AnsiString;

Paraformer: AnsiString;

TeleSpeech: AnsiString;

TransducerEncoder: AnsiString; // from icefall
TransducerDecoder: AnsiString;
TransducerJoiner: AnsiString;

NeMoTransducerEncoder: AnsiString;
NeMoTransducerDecoder: AnsiString;
NeMoTransducerJoiner: AnsiString;
begin
VadFilename := ModelDir + 'silero_vad.onnx';
Tokens := ModelDir + 'tokens.txt';

{
Please refer to
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/whisper/export-onnx.html#available-models
for a list of whisper models.

In the code, we use the normalized filename whisper-encoder.onnx, whisper-decoder.onnx, and tokens.txt
You need to rename the existing model files.

For instance, if you use sherpa-onnx-whisper-tiny.en, you have to do
mv tiny.en-tokens.txt tokens.txt

mv tiny.en-encoder.onnx whisper-encoder.onnx
mv tiny.en-decoder.onnx whisper-decoder.onnx

// or use the int8.onnx

mv tiny.en-encoder.int8.onnx whisper-encoder.onnx
mv tiny.en-decoder.int8.onnx whisper-decoder.onnx
}
WhisperEncoder := ModelDir + 'whisper-encoder.onnx';
WhisperDecoder := ModelDir + 'whisper-decoder.onnx';


{
Please refer to
https://k2-fsa.github.io/sherpa/onnx/sense-voice/pretrained.html#pre-trained-models
to download models for SenseVoice.

In the code, we use the normalized model name sense-voice.onnx. You have
to rename the downloaded model files.

For example, you need to use

mv model.onnx sense-voice.onnx

// or use the int8.onnx
mv model.int8.onnx sense-voice.onnx
}

SenseVoice := ModelDir + 'sense-voice.onnx';

{
Please refer to
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-paraformer/index.html
to download paraformer models.

Note that you have to rename model.onnx or model.int8.onnx to paraformer.onnx.
An example is given below for the rename:

cp model.onnx paraformer.onnx

// or use int8.onnx
cp model.int8.onnx paraformer.onnx
}
Paraformer := ModelDir + 'paraformer.onnx';


{
please refer to
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/telespeech/models.html
to download TeleSpeech models.

Note that you have to rename model files after downloading. The following
is an example

mv model.onnx telespeech.onnx

// or to use int8.onnx

mv model.int8.onnx telespeech.onnx
}

TeleSpeech := ModelDir + 'telespeech.onnx';


{
Please refer to
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
to download an icefall offline transducer model. Note that you need to rename the
model files to transducer-encoder.onnx, transducer-decoder.onnx, and
transducer-joiner.onnx
}
TransducerEncoder := ModelDir + 'transducer-encoder.onnx';
TransducerDecoder := ModelDir + 'transducer-decoder.onnx';
TransducerJoiner := ModelDir + 'transducer-joiner.onnx';

{
Please visit
https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
to donwload a NeMo transducer model.
}
NeMoTransducerEncoder := ModelDir + 'nemo-transducer-encoder.onnx';
NeMoTransducerDecoder := ModelDir + 'nemo-transducer-decoder.onnx';
NeMoTransducerJoiner := ModelDir + 'nemo-transducer-joiner.onnx';

if not FileExists(VadFilename) then
begin
Status := VadFilename + ' does not exist! Please download it from' +
sLineBreak + 'https://github.com/k2-fsa/sherpa-onnx/tree/asr-models';
Synchronize(@ShowStatus);
Exit;
end;

if Form1.Vad = nil then
begin
Form1.Vad := CreateVad(VadFilename);
end;

if not FileExists(Tokens) then
begin
Status := Tokens + ' not found. Please download a non-streaming ASR model first!';
Synchronize(@ShowStatus);
Exit;
end;

if FileExists(WhisperEncoder) and FileExists(WhisperDecoder) then
begin
Form1.OfflineRecognizer := CreateOfflineRecognizerWhisper(Tokens, WhisperEncoder, WhisperDecoder);
Msg := 'Whisper';
end
else if FileExists(SenseVoice) then
begin
Form1.OfflineRecognizer := CreateOfflineRecognizerSenseVoice(Tokens, SenseVoice);
Msg := 'SenseVoice';
end
else if FileExists(Paraformer) then
begin
Form1.OfflineRecognizer := CreateOfflineRecognizerParaformer(Tokens, Paraformer);
Msg := 'Paraformer';
end
else if FileExists(TeleSpeech) then
begin
Form1.OfflineRecognizer := CreateOfflineRecognizerTeleSpeech(Tokens, TeleSpeech);
Msg := 'TeleSpeech';
end
else if FileExists(TransducerEncoder) and FileExists(TransducerDecoder) and FileExists(TransducerJoiner) then
begin
Form1.OfflineRecognizer := CreateOfflineRecognizerTransducer(Tokens,
TransducerEncoder, TransducerDecoder, TransducerJoiner, 'transducer');
Msg := 'Zipformer transducer';
end
else if FileExists(NeMoTransducerEncoder) and FileExists(NeMoTransducerDecoder) and FileExists(NeMoTransducerJoiner) then
begin
Form1.OfflineRecognizer := CreateOfflineRecognizerTransducer(Tokens,
NeMoTransducerEncoder, NeMoTransducerDecoder, NeMoTransducerJoiner, 'nemo_transducer');
Msg := 'NeMo transducer';
end
else
begin
Status := 'Please download at least one non-streaming speech recognition model first.';
Synchronize(@ShowStatus);
Exit;
end;

Status := 'Congratulations! The ' + Msg + ' model is initialized succesfully!';
Synchronize(@ShowStatus);
end;

end.

Loading
Loading