Skip to content

Commit

Permalink
Support CED models (#792)
Browse files Browse the repository at this point in the history
  • Loading branch information
csukuangfj authored Apr 19, 2024
1 parent d97a283 commit c1608b3
Show file tree
Hide file tree
Showing 33 changed files with 605 additions and 46 deletions.
78 changes: 78 additions & 0 deletions .github/workflows/export-ced-to-onnx.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
name: export-ced-to-onnx

on:
workflow_dispatch:

concurrency:
group: export-ced-to-onnx-${{ github.ref }}
cancel-in-progress: true

jobs:
export-ced-to-onnx:
if: github.repository_owner == 'k2-fsa' || github.repository_owner == 'csukuangfj'
name: export ced
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest]
python-version: ["3.8"]

steps:
- uses: actions/checkout@v4

- name: Setup Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}

- name: Run
shell: bash
run: |
cd scripts/ced
./run.sh
- name: Release
uses: svenstaro/upload-release-action@v2
with:
file_glob: true
file: ./*.tar.bz2
overwrite: true
repo_name: k2-fsa/sherpa-onnx
repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
tag: audio-tagging-models

- name: Publish to huggingface
env:
HF_TOKEN: ${{ secrets.HF_TOKEN }}
uses: nick-fields/retry@v3
with:
max_attempts: 20
timeout_seconds: 200
shell: bash
command: |
git config --global user.email "[email protected]"
git config --global user.name "Fangjun Kuang"
models=(
tiny
mini
small
base
)
for m in ${models[@]}; do
rm -rf huggingface
export GIT_LFS_SKIP_SMUDGE=1
d=sherpa-onnx-ced-$m-audio-tagging-2024-04-19
git clone https://huggingface.co/k2-fsa/$d huggingface
mv -v $d/* huggingface
cd huggingface
git lfs track "*.onnx"
git status
git add .
git status
git commit -m "first commit"
git push https://csukuangfj:[email protected]/k2-fsa/$d main
cd ..
done
2 changes: 1 addition & 1 deletion android/SherpaOnnx/app/src/main/res/values/strings.xml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
<resources>
<string name="app_name">ASR with Next-gen Kaldi</string>
<string name="app_name">ASR</string>
<string name="hint">Click the Start button to play speech-to-text with Next-gen Kaldi.
\n
\n\n\n
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
<resources>
<string name="app_name">ASR with Next-gen Kaldi</string>
<string name="app_name">ASR2pass </string>
<string name="hint">Click the Start button to play speech-to-text with Next-gen Kaldi.
\n
\n\n\n
Expand Down
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
package com.k2fsa.sherpa.onnx

import android.content.res.AssetManager
import android.util.Log

private val TAG = "sherpa-onnx"
const val TAG = "sherpa-onnx"

data class OfflineZipformerAudioTaggingModelConfig(
var model: String,
var model: String = "",
)

data class AudioTaggingModelConfig(
var zipformer: OfflineZipformerAudioTaggingModelConfig,
var zipformer: OfflineZipformerAudioTaggingModelConfig = OfflineZipformerAudioTaggingModelConfig(),
var ced: String = "",
var numThreads: Int = 1,
var debug: Boolean = false,
var provider: String = "cpu",
Expand Down Expand Up @@ -103,7 +103,7 @@ class AudioTagging(
//
// See also
// https://k2-fsa.github.io/sherpa/onnx/audio-tagging/
fun getAudioTaggingConfig(type: Int, numThreads: Int=1): AudioTaggingConfig? {
fun getAudioTaggingConfig(type: Int, numThreads: Int = 1): AudioTaggingConfig? {
when (type) {
0 -> {
val modelDir = "sherpa-onnx-zipformer-small-audio-tagging-2024-04-15"
Expand All @@ -123,14 +123,65 @@ fun getAudioTaggingConfig(type: Int, numThreads: Int=1): AudioTaggingConfig? {
return AudioTaggingConfig(
model = AudioTaggingModelConfig(
zipformer = OfflineZipformerAudioTaggingModelConfig(model = "$modelDir/model.int8.onnx"),
numThreads = 1,
numThreads = numThreads,
debug = true,
),
labels = "$modelDir/class_labels_indices.csv",
topK = 3,
)
}

2 -> {
val modelDir = "sherpa-onnx-ced-tiny-audio-tagging-2024-04-19"
return AudioTaggingConfig(
model = AudioTaggingModelConfig(
ced = "$modelDir/model.int8.onnx",
numThreads = numThreads,
debug = true,
),
labels = "$modelDir/class_labels_indices.csv",
topK = 3,
)
}

3 -> {
val modelDir = "sherpa-onnx-ced-mini-audio-tagging-2024-04-19"
return AudioTaggingConfig(
model = AudioTaggingModelConfig(
ced = "$modelDir/model.int8.onnx",
numThreads = numThreads,
debug = true,
),
labels = "$modelDir/class_labels_indices.csv",
topK = 3,
)
}

4 -> {
val modelDir = "sherpa-onnx-ced-small-audio-tagging-2024-04-19"
return AudioTaggingConfig(
model = AudioTaggingModelConfig(
ced = "$modelDir/model.int8.onnx",
numThreads = numThreads,
debug = true,
),
labels = "$modelDir/class_labels_indices.csv",
topK = 3,
)
}

5 -> {
val modelDir = "sherpa-onnx-ced-base-audio-tagging-2024-04-19"
return AudioTaggingConfig(
model = AudioTaggingModelConfig(
ced = "$modelDir/model.int8.onnx",
numThreads = numThreads,
debug = true,
),
labels = "$modelDir/class_labels_indices.csv",
topK = 3,
)
}
}

return null
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,24 +3,15 @@
package com.k2fsa.sherpa.onnx.audio.tagging

import android.Manifest

import android.app.Activity
import android.content.pm.PackageManager
import android.media.AudioFormat
import android.media.AudioRecord
import androidx.compose.foundation.lazy.items
import android.media.MediaRecorder
import android.util.Log
import androidx.compose.foundation.ExperimentalFoundationApi
import androidx.compose.foundation.background
import androidx.compose.foundation.layout.Arrangement
import androidx.compose.foundation.layout.Box
import androidx.compose.material3.CenterAlignedTopAppBar
import androidx.compose.runtime.Composable
import androidx.compose.material3.Scaffold
import androidx.compose.material3.TopAppBarDefaults
import androidx.compose.material3.MaterialTheme
import androidx.compose.material3.Text
import androidx.compose.foundation.layout.Column
import androidx.compose.foundation.layout.PaddingValues
import androidx.compose.foundation.layout.Row
Expand All @@ -30,25 +21,32 @@ import androidx.compose.foundation.layout.fillMaxWidth
import androidx.compose.foundation.layout.height
import androidx.compose.foundation.layout.padding
import androidx.compose.foundation.lazy.LazyColumn
import androidx.compose.foundation.lazy.items
import androidx.compose.material3.Button
import androidx.compose.material3.CenterAlignedTopAppBar
import androidx.compose.material3.ExperimentalMaterial3Api
import androidx.compose.material3.MaterialTheme
import androidx.compose.material3.Scaffold
import androidx.compose.material3.Slider
import androidx.compose.material3.Surface
import androidx.compose.material3.Text
import androidx.compose.material3.TopAppBarDefaults
import androidx.compose.runtime.Composable
import androidx.compose.runtime.getValue
import androidx.compose.runtime.mutableStateListOf
import androidx.compose.runtime.mutableStateOf
import androidx.compose.runtime.remember
import androidx.compose.runtime.setValue
import androidx.compose.ui.Alignment
import androidx.compose.ui.Modifier
import androidx.compose.ui.graphics.Color
import androidx.compose.ui.platform.LocalContext
import androidx.compose.ui.text.font.FontWeight
import androidx.compose.ui.text.style.TextAlign
import androidx.compose.ui.unit.dp
import androidx.compose.ui.unit.sp
import androidx.core.app.ActivityCompat
import com.k2fsa.sherpa.onnx.AudioEvent
import com.k2fsa.sherpa.onnx.Tagger
import kotlin.concurrent.thread


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import androidx.compose.material3.Surface
import androidx.compose.runtime.Composable
import androidx.compose.ui.Modifier
import androidx.core.app.ActivityCompat
import com.k2fsa.sherpa.onnx.Tagger
import com.k2fsa.sherpa.onnx.audio.tagging.ui.theme.SherpaOnnxAudioTaggingTheme

const val TAG = "sherpa-onnx"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,25 +1,24 @@
package com.k2fsa.sherpa.onnx.audio.tagging
package com.k2fsa.sherpa.onnx

import android.content.res.AssetManager
import android.util.Log
import com.k2fsa.sherpa.onnx.AudioTagging
import com.k2fsa.sherpa.onnx.audio.tagging.wear.os.presentation.TAG
import com.k2fsa.sherpa.onnx.getAudioTaggingConfig


object Tagger {
private var _tagger: AudioTagging? = null
val tagger: AudioTagging
get() {
return _tagger!!
}

fun initTagger(assetManager: AssetManager? = null, numThreads: Int = 1) {
synchronized(this) {
if (_tagger != null) {
return
}

Log.i(TAG, "Initializing audio tagger")
val config = getAudioTaggingConfig(type = 0, numThreads=numThreads)!!
val config = getAudioTaggingConfig(type = 0, numThreads = numThreads)!!
_tagger = AudioTagging(assetManager, config)
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ import androidx.wear.compose.material.Button
import androidx.wear.compose.material.MaterialTheme
import androidx.wear.compose.material.Text
import com.k2fsa.sherpa.onnx.AudioEvent
import com.k2fsa.sherpa.onnx.audio.tagging.Tagger
import com.k2fsa.sherpa.onnx.Tagger
import com.k2fsa.sherpa.onnx.audio.tagging.wear.os.presentation.theme.SherpaOnnxAudioTaggingWearOsTheme
import kotlin.concurrent.thread

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ import androidx.activity.compose.setContent
import androidx.compose.runtime.Composable
import androidx.core.app.ActivityCompat
import androidx.core.splashscreen.SplashScreen.Companion.installSplashScreen
import com.k2fsa.sherpa.onnx.audio.tagging.Tagger
import com.k2fsa.sherpa.onnx.Tagger

const val TAG = "sherpa-onnx"
private const val REQUEST_RECORD_AUDIO_PERMISSION = 200
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
<resources>
<string name="app_name">AudioTagging</string>
<string name="app_name">Audio Tagging</string>
<!--
This string is used for square devices and overridden by hello_world in
values-round/strings.xml for round devices.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
<resources>
<string name="app_name">Speaker Identification</string>
<string name="app_name">Speaker ID</string>
<string name="start">Start recording</string>
<string name="stop">Stop recording</string>
<string name="add">Add speaker</string>
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
<resources>
<string name="app_name">SherpaOnnxSpokenLanguageIdentification</string>
<string name="app_name">Language ID</string>
</resources>
2 changes: 1 addition & 1 deletion android/SherpaOnnxTts/app/src/main/res/values/strings.xml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
<resources>
<string name="app_name">Next-gen Kaldi: TTS</string>
<string name="app_name">TTS</string>
<string name="sid_label">Speaker ID</string>
<string name="sid_hint">0</string>
<string name="speed_label">Speech speed (large->fast)</string>
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
<resources>
<string name="app_name">Next-gen Kaldi: TTS</string>
<string name="app_name">TTS Engine</string>
</resources>
2 changes: 1 addition & 1 deletion android/SherpaOnnxVad/app/src/main/res/values/strings.xml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
<resources>
<string name="app_name">Next-gen Kaldi: SileroVAD</string>
<string name="app_name">VAD</string>

<string name="hint">Click the Start button to play Silero VAD with Next-gen Kaldi.</string>
<string name="start">Start</string>
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
<resources>
<string name="app_name">ASR with Next-gen Kaldi</string>
<string name="app_name">VAD-ASR</string>
<string name="hint">Click the Start button to play speech-to-text with Next-gen Kaldi.
\n
\n\n\n
Expand Down
25 changes: 24 additions & 1 deletion scripts/apk/generate-audio-tagging-apk-script.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,30 @@ def get_models():
),
]

return icefall_models
ced_models = [
AudioTaggingModel(
model_name="sherpa-onnx-ced-tiny-audio-tagging-2024-04-19",
idx=2,
short_name="ced_tiny",
),
AudioTaggingModel(
model_name="sherpa-onnx-ced-mini-audio-tagging-2024-04-19",
idx=3,
short_name="ced_mini",
),
AudioTaggingModel(
model_name="sherpa-onnx-ced-small-audio-tagging-2024-04-19",
idx=4,
short_name="ced_small",
),
AudioTaggingModel(
model_name="sherpa-onnx-ced-base-audio-tagging-2024-04-19",
idx=5,
short_name="ced_base",
),
]

return icefall_models + ced_models


def main():
Expand Down
Loading

0 comments on commit c1608b3

Please sign in to comment.