Skip to content

Commit

Permalink
Add Go API for SenseVoice (#1154)
Browse files Browse the repository at this point in the history
  • Loading branch information
csukuangfj authored Jul 20, 2024
1 parent e472180 commit 8f4d332
Show file tree
Hide file tree
Showing 5 changed files with 42 additions and 0 deletions.
4 changes: 4 additions & 0 deletions .github/workflows/test-go.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,10 @@ jobs:
go build
ls -lh
echo "Test SenseVoice ctc"
./run-sense-voice-small.sh
rm -rf sherpa-onnx-sense-*
echo "Test telespeech ctc"
./run-telespeech-ctc.sh
rm -rf sherpa-onnx-telespeech-ctc-*
Expand Down
4 changes: 4 additions & 0 deletions go-api-examples/non-streaming-decode-files/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,10 @@ func main() {

flag.StringVar(&config.ModelConfig.Tdnn.Model, "tdnn-model", "", "Path to the tdnn model")

flag.StringVar(&config.ModelConfig.SenseVoice.Model, "sense-voice-model", "", "Path to the SenseVoice model")
flag.StringVar(&config.ModelConfig.SenseVoice.Language, "sense-voice-language", "", "If not empty, specify the Language for the input wave")
flag.IntVar(&config.ModelConfig.SenseVoice.UseInverseTextNormalization, "sense-voice-use-itn", 1, " 1 to use inverse text normalization")

flag.StringVar(&config.ModelConfig.Tokens, "tokens", "", "Path to the tokens file")
flag.IntVar(&config.ModelConfig.NumThreads, "num-threads", 1, "Number of threads for computing")
flag.IntVar(&config.ModelConfig.Debug, "debug", 0, "Whether to show debug message")
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#!/usr/bin/env bash

set -ex

if [ ! -d sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17 ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
rm sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
fi

go mod tidy
go build

./non-streaming-decode-files \
--sense-voice-model ./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/model.int8.onnx \
--tokens ./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt \
--debug 0 \
./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/test_wavs/zh.wav
15 changes: 15 additions & 0 deletions scripts/go/sherpa_onnx.go
Original file line number Diff line number Diff line change
Expand Up @@ -370,6 +370,12 @@ type OfflineTdnnModelConfig struct {
Model string
}

type OfflineSenseVoiceModelConfig struct {
Model string
Language string
UseInverseTextNormalization int
}

// Configuration for offline LM.
type OfflineLMConfig struct {
Model string // Path to the model
Expand All @@ -382,6 +388,7 @@ type OfflineModelConfig struct {
NemoCTC OfflineNemoEncDecCtcModelConfig
Whisper OfflineWhisperModelConfig
Tdnn OfflineTdnnModelConfig
SenseVoice OfflineSenseVoiceModelConfig
Tokens string // Path to tokens.txt

// Number of threads to use for neural network computation
Expand Down Expand Up @@ -478,6 +485,14 @@ func NewOfflineRecognizer(config *OfflineRecognizerConfig) *OfflineRecognizer {
c.model_config.tdnn.model = C.CString(config.ModelConfig.Tdnn.Model)
defer C.free(unsafe.Pointer(c.model_config.tdnn.model))

c.model_config.sense_voice.model = C.CString(config.ModelConfig.SenseVoice.Model)
defer C.free(unsafe.Pointer(c.model_config.sense_voice.model))

c.model_config.sense_voice.language = C.CString(config.ModelConfig.SenseVoice.Language)
defer C.free(unsafe.Pointer(c.model_config.sense_voice.language))

c.model_config.sense_voice.use_itn = C.int(config.ModelConfig.SenseVoice.UseInverseTextNormalization)

c.model_config.tokens = C.CString(config.ModelConfig.Tokens)
defer C.free(unsafe.Pointer(c.model_config.tokens))

Expand Down

0 comments on commit 8f4d332

Please sign in to comment.