From 9dd0e03568295b1faef64d3cbf3916a36b04b547 Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Sat, 22 Jun 2024 18:18:36 +0800 Subject: [PATCH 001/201] Enable to stop TTS generation (#1041) --- CMakeLists.txt | 2 +- .../com/k2fsa/sherpa/onnx/MainActivity.kt | 35 ++++++++-- .../main/java/com/k2fsa/sherpa/onnx/Tts.kt | 4 +- .../app/src/main/res/layout/activity_main.xml | 12 ++++ .../app/src/main/res/values/strings.xml | 1 + .../sherpa/onnx/tts/engine/TtsService.kt | 7 +- .../non-streaming-asr/pubspec.yaml | 2 +- dart-api-examples/streaming-asr/pubspec.yaml | 2 +- dart-api-examples/tts/bin/piper.dart | 4 ++ dart-api-examples/tts/pubspec.yaml | 2 +- dart-api-examples/vad/pubspec.yaml | 2 +- dotnet-examples/offline-tts-play/Program.cs | 4 ++ kotlin-api-examples/test_tts.kt | 42 +++++++++++- .../NonStreamingTextToSpeech.rc | Bin 12926 -> 13026 bytes .../NonStreamingTextToSpeechDlg.cpp | 62 ++++++++++++------ .../NonStreamingTextToSpeechDlg.h | 3 + .../NonStreamingTextToSpeech/Resource.h | 3 +- nodejs-addon-examples/package.json | 2 +- python-api-examples/offline-tts-play.py | 7 ++ scripts/dotnet/OfflineTts.cs | 6 +- sherpa-onnx/c-api/c-api.cc | 10 +-- sherpa-onnx/c-api/c-api.h | 15 +++-- sherpa-onnx/csrc/offline-tts-vits-impl.h | 10 +-- sherpa-onnx/csrc/offline-tts.h | 4 +- .../csrc/sherpa-onnx-offline-tts-play-alsa.cc | 11 +++- .../csrc/sherpa-onnx-offline-tts-play.cc | 10 ++- sherpa-onnx/csrc/sherpa-onnx-offline-tts.cc | 3 +- sherpa-onnx/flutter/CHANGELOG.md | 4 ++ .../flutter/lib/src/sherpa_onnx_bindings.dart | 2 +- sherpa-onnx/flutter/lib/src/tts.dart | 6 +- sherpa-onnx/jni/offline-tts.cc | 34 +++++++++- sherpa-onnx/python/csrc/offline-tts.cc | 6 +- 32 files changed, 248 insertions(+), 69 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index d474c6aa9..ec529bab6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -8,7 +8,7 @@ project(sherpa-onnx) # ./nodejs-addon-examples # ./dart-api-examples/ # ./sherpa-onnx/flutter/CHANGELOG.md -set(SHERPA_ONNX_VERSION "1.10.0") +set(SHERPA_ONNX_VERSION "1.10.1") # Disable warning about # diff --git a/android/SherpaOnnxTts/app/src/main/java/com/k2fsa/sherpa/onnx/MainActivity.kt b/android/SherpaOnnxTts/app/src/main/java/com/k2fsa/sherpa/onnx/MainActivity.kt index f44bef8eb..b95ad7d78 100644 --- a/android/SherpaOnnxTts/app/src/main/java/com/k2fsa/sherpa/onnx/MainActivity.kt +++ b/android/SherpaOnnxTts/app/src/main/java/com/k2fsa/sherpa/onnx/MainActivity.kt @@ -26,6 +26,9 @@ class MainActivity : AppCompatActivity() { private lateinit var speed: EditText private lateinit var generate: Button private lateinit var play: Button + private lateinit var stop: Button + private var stopped: Boolean = false + private var mediaPlayer: MediaPlayer? = null // see // https://developer.android.com/reference/kotlin/android/media/AudioTrack @@ -49,9 +52,11 @@ class MainActivity : AppCompatActivity() { generate = findViewById(R.id.generate) play = findViewById(R.id.play) + stop = findViewById(R.id.stop) generate.setOnClickListener { onClickGenerate() } play.setOnClickListener { onClickPlay() } + stop.setOnClickListener { onClickStop() } sid.setText("0") speed.setText("1.0") @@ -70,7 +75,7 @@ class MainActivity : AppCompatActivity() { AudioFormat.CHANNEL_OUT_MONO, AudioFormat.ENCODING_PCM_FLOAT ) - Log.i(TAG, "sampleRate: ${sampleRate}, buffLength: ${bufLength}") + Log.i(TAG, "sampleRate: $sampleRate, buffLength: $bufLength") val attr = AudioAttributes.Builder().setContentType(AudioAttributes.CONTENT_TYPE_SPEECH) .setUsage(AudioAttributes.USAGE_MEDIA) @@ -90,8 +95,14 @@ class MainActivity : AppCompatActivity() { } // this function is called from C++ - private fun callback(samples: FloatArray) { - track.write(samples, 0, samples.size, AudioTrack.WRITE_BLOCKING) + private fun callback(samples: FloatArray): Int { + if (!stopped) { + track.write(samples, 0, samples.size, AudioTrack.WRITE_BLOCKING) + return 1 + } else { + track.stop() + return 0 + } } private fun onClickGenerate() { @@ -127,6 +138,8 @@ class MainActivity : AppCompatActivity() { track.play() play.isEnabled = false + generate.isEnabled = false + stopped = false Thread { val audio = tts.generateWithCallback( text = textStr, @@ -140,6 +153,7 @@ class MainActivity : AppCompatActivity() { if (ok) { runOnUiThread { play.isEnabled = true + generate.isEnabled = true track.stop() } } @@ -148,11 +162,22 @@ class MainActivity : AppCompatActivity() { private fun onClickPlay() { val filename = application.filesDir.absolutePath + "/generated.wav" - val mediaPlayer = MediaPlayer.create( + mediaPlayer?.stop() + mediaPlayer = MediaPlayer.create( applicationContext, Uri.fromFile(File(filename)) ) - mediaPlayer.start() + mediaPlayer?.start() + } + + private fun onClickStop() { + stopped = true + play.isEnabled = true + generate.isEnabled = true + track.pause() + track.flush() + mediaPlayer?.stop() + mediaPlayer = null } private fun initTts() { diff --git a/android/SherpaOnnxTts/app/src/main/java/com/k2fsa/sherpa/onnx/Tts.kt b/android/SherpaOnnxTts/app/src/main/java/com/k2fsa/sherpa/onnx/Tts.kt index b25869d07..4f9c4b6f6 100644 --- a/android/SherpaOnnxTts/app/src/main/java/com/k2fsa/sherpa/onnx/Tts.kt +++ b/android/SherpaOnnxTts/app/src/main/java/com/k2fsa/sherpa/onnx/Tts.kt @@ -76,7 +76,7 @@ class OfflineTts( text: String, sid: Int = 0, speed: Float = 1.0f, - callback: (samples: FloatArray) -> Unit + callback: (samples: FloatArray) -> Int ): GeneratedAudio { val objArray = generateWithCallbackImpl( ptr, @@ -146,7 +146,7 @@ class OfflineTts( text: String, sid: Int = 0, speed: Float = 1.0f, - callback: (samples: FloatArray) -> Unit + callback: (samples: FloatArray) -> Int ): Array companion object { diff --git a/android/SherpaOnnxTts/app/src/main/res/layout/activity_main.xml b/android/SherpaOnnxTts/app/src/main/res/layout/activity_main.xml index 3547de872..c66022a8c 100644 --- a/android/SherpaOnnxTts/app/src/main/res/layout/activity_main.xml +++ b/android/SherpaOnnxTts/app/src/main/res/layout/activity_main.xml @@ -84,4 +84,16 @@ app:layout_constraintLeft_toLeftOf="parent" app:layout_constraintRight_toRightOf="parent" app:layout_constraintTop_toBottomOf="@id/generate" /> + + + + +
+
+ + + +
+
+ + + + + diff --git a/wasm/vad/sherpa-onnx-vad.js b/wasm/vad/sherpa-onnx-vad.js new file mode 100644 index 000000000..154bbea0f --- /dev/null +++ b/wasm/vad/sherpa-onnx-vad.js @@ -0,0 +1,253 @@ +function freeConfig(config, Module) { + if ('buffer' in config) { + Module._free(config.buffer); + } + + if ('sileroVad' in config) { + freeConfig(config.sileroVad, Module) + } + + + Module._free(config.ptr); +} + +// The user should free the returned pointers +function initSherpaOnnxSileroVadModelConfig(config, Module) { + const modelLen = Module.lengthBytesUTF8(config.model || '') + 1; + + const n = modelLen; + + const buffer = Module._malloc(n); + + const len = 5 * 4; + const ptr = Module._malloc(len); + + Module.stringToUTF8(config.model || '', buffer, modelLen); + + offset = 0; + Module.setValue(ptr, buffer, 'i8*'); + offset += 4; + + Module.setValue(ptr + offset, config.threshold || 0.5, 'float'); + offset += 4; + + Module.setValue(ptr + offset, config.minSilenceDuration || 0.5, 'float'); + offset += 4; + + Module.setValue(ptr + offset, config.minSpeechDuration || 0.25, 'float'); + offset += 4; + + Module.setValue(ptr + offset, config.windowSize || 512, 'i32'); + offset += 4; + + return { + buffer: buffer, ptr: ptr, len: len, + } +} + +function initSherpaOnnxVadModelConfig(config, Module) { + if (!('sileroVad' in config)) { + config.sileroVad = { + model: '', + threshold: 0.50, + minSilenceDuration: 0.50, + minSpeechDuration: 0.25, + windowSize: 512, + }; + } + + const sileroVad = + initSherpaOnnxSileroVadModelConfig(config.sileroVad, Module); + + const len = sileroVad.len + 4 * 4; + const ptr = Module._malloc(len); + + const providerLen = Module.lengthBytesUTF8(config.provider || 'cpu') + 1; + const buffer = Module._malloc(providerLen); + Module.stringToUTF8(config.provider || 'cpu', buffer, providerLen); + + let offset = 0; + Module._CopyHeap(sileroVad.ptr, sileroVad.len, ptr + offset); + offset += sileroVad.len; + + Module.setValue(ptr + offset, config.sampleRate || 16000, 'i32'); + offset += 4; + + Module.setValue(ptr + offset, config.numThreads || 1, 'i32'); + offset += 4; + + Module.setValue(ptr + offset, buffer, 'i8*'); // provider + offset += 4; + + Module.setValue(ptr + offset, config.debug || 0, 'i32'); + offset += 4; + + return { + buffer: buffer, ptr: ptr, len: len, sileroVad: sileroVad, + } +} + +function createVad(Module, myConfig) { + const sileroVad = { + model: './silero_vad.onnx', + threshold: 0.50, + minSilenceDuration: 0.50, + minSpeechDuration: 0.25, + windowSize: 512, + }; + + let config = { + sileroVad: sileroVad, + sampleRate: 16000, + numThreads: 1, + provider: 'cpu', + debug: 1, + bufferSizeInSeconds: 30, + }; + + if (myConfig) { + config = myConfig; + } + + return new Vad(config, Module); +} + + +class CircularBuffer { + constructor(capacity, Module) { + this.handle = Module._SherpaOnnxCreateCircularBuffer(capacity); + this.Module = Module; + } + + free() { + this.Module._SherpaOnnxDestroyCircularBuffer(this.handle); + this.handle = 0 + } + + /** + * @param samples {Float32Array} + */ + push(samples) { + const pointer = + this.Module._malloc(samples.length * samples.BYTES_PER_ELEMENT); + this.Module.HEAPF32.set(samples, pointer / samples.BYTES_PER_ELEMENT); + this.Module._SherpaOnnxCircularBufferPush( + this.handle, pointer, samples.length); + this.Module._free(pointer); + } + + get(startIndex, n) { + const p = + this.Module._SherpaOnnxCircularBufferGet(this.handle, startIndex, n); + + const samplesPtr = p / 4; + const samples = new Float32Array(n); + for (let i = 0; i < n; i++) { + samples[i] = this.Module.HEAPF32[samplesPtr + i]; + } + + this.Module._SherpaOnnxCircularBufferFree(p); + + return samples; + } + + pop(n) { + this.Module._SherpaOnnxCircularBufferPop(this.handle, n); + } + + size() { + return this.Module._SherpaOnnxCircularBufferSize(this.handle); + } + + head() { + return this.Module._SherpaOnnxCircularBufferHead(this.handle); + } + + reset() { + this.Module._SherpaOnnxCircularBufferReset(this.handle); + } +} + +class Vad { + constructor(configObj, Module) { + this.config = configObj; + const config = initSherpaOnnxVadModelConfig(configObj, Module); + Module._MyPrint(config.ptr); + const handle = Module._SherpaOnnxCreateVoiceActivityDetector( + config.ptr, configObj.bufferSizeInSeconds || 30); + freeConfig(config, Module); + + this.handle = handle; + this.Module = Module; + } + + free() { + this.Module._SherpaOnnxDestroyVoiceActivityDetector(this.handle); + this.handle = 0 + } + + // samples is a float32 array + acceptWaveform(samples) { + const pointer = + this.Module._malloc(samples.length * samples.BYTES_PER_ELEMENT); + this.Module.HEAPF32.set(samples, pointer / samples.BYTES_PER_ELEMENT); + this.Module._SherpaOnnxVoiceActivityDetectorAcceptWaveform( + this.handle, pointer, samples.length); + this.Module._free(pointer); + } + + isEmpty() { + return this.Module._SherpaOnnxVoiceActivityDetectorEmpty(this.handle) == 1; + } + + isDetected() { + return this.Module._SherpaOnnxVoiceActivityDetectorDetected(this.handle) == + 1; + } + + pop() { + this.Module._SherpaOnnxVoiceActivityDetectorPop(this.handle); + } + + clear() { + this.Module._SherpaOnnxVoiceActivityDetectorClear(this.handle); + } + + /* +{ + samples: a 1-d float32 array, + start: an int32 +} + */ + front() { + const h = this.Module._SherpaOnnxVoiceActivityDetectorFront(this.handle); + + const start = this.Module.HEAP32[h / 4]; + const samplesPtr = this.Module.HEAP32[h / 4 + 1] / 4; + const numSamples = this.Module.HEAP32[h / 4 + 2]; + + const samples = new Float32Array(numSamples); + for (let i = 0; i < numSamples; i++) { + samples[i] = this.Module.HEAPF32[samplesPtr + i]; + } + + this.Module._SherpaOnnxDestroySpeechSegment(h); + return {samples: samples, start: start}; + } + + reset() { + this.Module._SherpaOnnxVoiceActivityDetectorReset(this.handle); + } + + flush() { + this.Module._SherpaOnnxVoiceActivityDetectorFlush(this.handle); + } +}; + +if (typeof process == 'object' && typeof process.versions == 'object' && + typeof process.versions.node == 'string') { + module.exports = { + createVad, + CircularBuffer, + }; +} diff --git a/wasm/vad/sherpa-onnx-wasm-main-vad.cc b/wasm/vad/sherpa-onnx-wasm-main-vad.cc new file mode 100644 index 000000000..3c1600ba1 --- /dev/null +++ b/wasm/vad/sherpa-onnx-wasm-main-vad.cc @@ -0,0 +1,45 @@ +// wasm/sherpa-onnx-wasm-main-vad.cc +// +// Copyright (c) 2024 Xiaomi Corporation +#include + +#include +#include + +#include "sherpa-onnx/c-api/c-api.h" + +// see also +// https://emscripten.org/docs/porting/connecting_cpp_and_javascript/Interacting-with-code.html + +extern "C" { + +static_assert(sizeof(SherpaOnnxSileroVadModelConfig) == 5 * 4, ""); + +static_assert(sizeof(SherpaOnnxVadModelConfig) == + sizeof(SherpaOnnxSileroVadModelConfig) + 4 * 4, + ""); +void MyPrint(SherpaOnnxVadModelConfig *config) { + auto silero_vad = &config->silero_vad; + + fprintf(stdout, "----------silero_vad config----------\n"); + fprintf(stdout, "model: %s\n", silero_vad->model); + fprintf(stdout, "threshold: %.3f\n", silero_vad->threshold); + fprintf(stdout, "min_silence_duration: %.3f\n", + silero_vad->min_silence_duration); + fprintf(stdout, "min_speech_duration: %.3f\n", + silero_vad->min_speech_duration); + fprintf(stdout, "window_size: %d\n", silero_vad->window_size); + + fprintf(stdout, "----------config----------\n"); + + fprintf(stdout, "sample_rate: %d\n", config->sample_rate); + fprintf(stdout, "num_threads: %d\n", config->num_threads); + + fprintf(stdout, "provider: %s\n", config->provider); + fprintf(stdout, "debug: %d\n", config->debug); +} + +void CopyHeap(const char *src, int32_t num_bytes, char *dst) { + std::copy(src, src + num_bytes, dst); +} +} From 537e163dd012aec3b250af77697908d37759c057 Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Sat, 24 Aug 2024 13:24:52 +0800 Subject: [PATCH 128/201] WebAssembly example for VAD + Non-streaming ASR (#1284) --- .../workflows/wasm-simd-hf-space-de-tts.yaml | 4 + .../wasm-simd-hf-space-en-asr-zipformer.yaml | 3 + .../workflows/wasm-simd-hf-space-en-tts.yaml | 4 + .../wasm-simd-hf-space-silero-vad.yaml | 1 + .../workflows/wasm-simd-hf-space-vad-asr.yaml | 93 +++++ ...-space-zh-cantonese-en-asr-paraformer.yaml | 4 + ...sm-simd-hf-space-zh-en-asr-paraformer.yaml | 4 + ...asm-simd-hf-space-zh-en-asr-zipformer.yaml | 4 + CMakeLists.txt | 15 +- README.md | 239 ++++++++--- build-wasm-simd-vad-asr.sh | 68 +++ scripts/wasm/generate-vad-asr.py | 229 +++++++++++ scripts/wasm/run-vad-asr.sh.in | 92 +++++ sherpa-onnx/c-api/c-api.cc | 5 + sherpa-onnx/c-api/c-api.h | 3 + wasm/CMakeLists.txt | 4 + wasm/asr/assets/README.md | 7 + wasm/asr/index.html | 2 +- wasm/tts/assets/README.md | 5 + wasm/vad-asr/CMakeLists.txt | 83 ++++ wasm/vad-asr/app-vad-asr.js | 389 ++++++++++++++++++ wasm/vad-asr/assets/README.md | 23 ++ wasm/vad-asr/index.html | 43 ++ wasm/vad-asr/sherpa-onnx-asr.js | 1 + wasm/vad-asr/sherpa-onnx-vad.js | 1 + wasm/vad-asr/sherpa-onnx-wasm-main-vad-asr.cc | 19 + wasm/vad/assets/README.md | 3 + wasm/vad/index.html | 2 +- wasm/vad/sherpa-onnx-vad.js | 1 - 29 files changed, 1281 insertions(+), 70 deletions(-) create mode 100644 .github/workflows/wasm-simd-hf-space-vad-asr.yaml create mode 100755 build-wasm-simd-vad-asr.sh create mode 100755 scripts/wasm/generate-vad-asr.py create mode 100644 scripts/wasm/run-vad-asr.sh.in create mode 100644 wasm/vad-asr/CMakeLists.txt create mode 100644 wasm/vad-asr/app-vad-asr.js create mode 100644 wasm/vad-asr/assets/README.md create mode 100644 wasm/vad-asr/index.html create mode 120000 wasm/vad-asr/sherpa-onnx-asr.js create mode 120000 wasm/vad-asr/sherpa-onnx-vad.js create mode 100644 wasm/vad-asr/sherpa-onnx-wasm-main-vad-asr.cc diff --git a/.github/workflows/wasm-simd-hf-space-de-tts.yaml b/.github/workflows/wasm-simd-hf-space-de-tts.yaml index f51535379..cbd3b1fce 100644 --- a/.github/workflows/wasm-simd-hf-space-de-tts.yaml +++ b/.github/workflows/wasm-simd-hf-space-de-tts.yaml @@ -25,8 +25,12 @@ jobs: - uses: actions/checkout@v4 with: fetch-depth: 0 + - name: Install emsdk uses: mymindstorm/setup-emsdk@v14 + with: + version: 3.1.51 + actions-cache-folder: 'emsdk-cache' - name: View emsdk version shell: bash diff --git a/.github/workflows/wasm-simd-hf-space-en-asr-zipformer.yaml b/.github/workflows/wasm-simd-hf-space-en-asr-zipformer.yaml index 975266917..510a003c7 100644 --- a/.github/workflows/wasm-simd-hf-space-en-asr-zipformer.yaml +++ b/.github/workflows/wasm-simd-hf-space-en-asr-zipformer.yaml @@ -27,6 +27,9 @@ jobs: fetch-depth: 0 - name: Install emsdk uses: mymindstorm/setup-emsdk@v14 + with: + version: 3.1.51 + actions-cache-folder: 'emsdk-cache' - name: View emsdk version shell: bash diff --git a/.github/workflows/wasm-simd-hf-space-en-tts.yaml b/.github/workflows/wasm-simd-hf-space-en-tts.yaml index f5f950c3c..9c5c1d446 100644 --- a/.github/workflows/wasm-simd-hf-space-en-tts.yaml +++ b/.github/workflows/wasm-simd-hf-space-en-tts.yaml @@ -25,8 +25,12 @@ jobs: - uses: actions/checkout@v4 with: fetch-depth: 0 + - name: Install emsdk uses: mymindstorm/setup-emsdk@v14 + with: + version: 3.1.51 + actions-cache-folder: 'emsdk-cache' - name: View emsdk version shell: bash diff --git a/.github/workflows/wasm-simd-hf-space-silero-vad.yaml b/.github/workflows/wasm-simd-hf-space-silero-vad.yaml index e384af3fb..dc8bada70 100644 --- a/.github/workflows/wasm-simd-hf-space-silero-vad.yaml +++ b/.github/workflows/wasm-simd-hf-space-silero-vad.yaml @@ -25,6 +25,7 @@ jobs: - uses: actions/checkout@v4 with: fetch-depth: 0 + - name: Install emsdk uses: mymindstorm/setup-emsdk@v14 with: diff --git a/.github/workflows/wasm-simd-hf-space-vad-asr.yaml b/.github/workflows/wasm-simd-hf-space-vad-asr.yaml new file mode 100644 index 000000000..726b69826 --- /dev/null +++ b/.github/workflows/wasm-simd-hf-space-vad-asr.yaml @@ -0,0 +1,93 @@ +name: wasm-simd-hf-space-vad-asr + +on: + push: + branches: + - wasm + tags: + - 'v[0-9]+.[0-9]+.[0-9]+*' + + workflow_dispatch: + +concurrency: + group: wasm-simd-hf-space-vad-asr${{ github.ref }} + cancel-in-progress: true + +jobs: + wasm-simd-hf-space-vad-asr: + name: ${{ matrix.index }}/${{ matrix.total }} + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest] + total: ["8"] + index: ["0", "1", "2", "3", "4", "5", "6", "7"] + + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Install Python dependencies + shell: bash + run: | + python3 -m pip install --upgrade pip jinja2 + + - name: Install emsdk + uses: mymindstorm/setup-emsdk@v14 + with: + version: 3.1.51 + actions-cache-folder: 'emsdk-cache' + + - name: View emsdk version + shell: bash + run: | + emcc -v + echo "--------------------" + emcc --check + + - name: Generate build script + shell: bash + run: | + cd scripts/wasm + + total=${{ matrix.total }} + index=${{ matrix.index }} + + ./generate-vad-asr.py --total $total --index $index + + chmod +x run-vad-asr.sh + mv -v ./run-vad-asr.sh ../.. + + - name: Show build scripts + shell: bash + run: | + cat ./run-vad-asr.sh + + - uses: actions/upload-artifact@v4 + with: + name: run-vad-asr-${{ matrix.index }} + path: ./run-vad-asr.sh + + - name: Build sherpa-onnx for WebAssembly + shell: bash + env: + MS_TOKEN: ${{ secrets.MODEL_SCOPE_GIT_TOKEN }} + HF_TOKEN: ${{ secrets.HF_TOKEN }} + run: | + ./run-vad-asr.sh + + - name: Release jar + if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && github.event_name == 'push' && contains(github.ref, 'refs/tags/') + uses: svenstaro/upload-release-action@v2 + with: + file_glob: true + overwrite: true + file: ./*.tar.bz2 + + - name: Upload wasm files + uses: actions/upload-artifact@v4 + with: + name: sherpa-onnx-wasm-simd-vad-asr-${{ matrix.index }} + path: ./sherpa-onnx-wasm-simd-*.tar.bz2 diff --git a/.github/workflows/wasm-simd-hf-space-zh-cantonese-en-asr-paraformer.yaml b/.github/workflows/wasm-simd-hf-space-zh-cantonese-en-asr-paraformer.yaml index e0c665737..c72e0cef2 100644 --- a/.github/workflows/wasm-simd-hf-space-zh-cantonese-en-asr-paraformer.yaml +++ b/.github/workflows/wasm-simd-hf-space-zh-cantonese-en-asr-paraformer.yaml @@ -25,8 +25,12 @@ jobs: - uses: actions/checkout@v4 with: fetch-depth: 0 + - name: Install emsdk uses: mymindstorm/setup-emsdk@v14 + with: + version: 3.1.51 + actions-cache-folder: 'emsdk-cache' - name: View emsdk version shell: bash diff --git a/.github/workflows/wasm-simd-hf-space-zh-en-asr-paraformer.yaml b/.github/workflows/wasm-simd-hf-space-zh-en-asr-paraformer.yaml index 500305420..b76f912b4 100644 --- a/.github/workflows/wasm-simd-hf-space-zh-en-asr-paraformer.yaml +++ b/.github/workflows/wasm-simd-hf-space-zh-en-asr-paraformer.yaml @@ -25,8 +25,12 @@ jobs: - uses: actions/checkout@v4 with: fetch-depth: 0 + - name: Install emsdk uses: mymindstorm/setup-emsdk@v14 + with: + version: 3.1.51 + actions-cache-folder: 'emsdk-cache' - name: View emsdk version shell: bash diff --git a/.github/workflows/wasm-simd-hf-space-zh-en-asr-zipformer.yaml b/.github/workflows/wasm-simd-hf-space-zh-en-asr-zipformer.yaml index dfa0e1614..9bdd90ee2 100644 --- a/.github/workflows/wasm-simd-hf-space-zh-en-asr-zipformer.yaml +++ b/.github/workflows/wasm-simd-hf-space-zh-en-asr-zipformer.yaml @@ -25,8 +25,12 @@ jobs: - uses: actions/checkout@v4 with: fetch-depth: 0 + - name: Install emsdk uses: mymindstorm/setup-emsdk@v14 + with: + version: 3.1.51 + actions-cache-folder: 'emsdk-cache' - name: View emsdk version shell: bash diff --git a/CMakeLists.txt b/CMakeLists.txt index 7408f8d69..b71bb133d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -36,6 +36,7 @@ option(SHERPA_ONNX_ENABLE_WASM_TTS "Whether to enable WASM for TTS" OFF) option(SHERPA_ONNX_ENABLE_WASM_ASR "Whether to enable WASM for ASR" OFF) option(SHERPA_ONNX_ENABLE_WASM_KWS "Whether to enable WASM for KWS" OFF) option(SHERPA_ONNX_ENABLE_WASM_VAD "Whether to enable WASM for VAD" OFF) +option(SHERPA_ONNX_ENABLE_WASM_VAD_ASR "Whether to enable WASM for VAD+ASR" OFF) option(SHERPA_ONNX_ENABLE_WASM_NODEJS "Whether to enable WASM for NodeJS" OFF) option(SHERPA_ONNX_ENABLE_BINARY "Whether to build binaries" ON) option(SHERPA_ONNX_ENABLE_TTS "Whether to build TTS related code" ON) @@ -137,6 +138,7 @@ message(STATUS "SHERPA_ONNX_ENABLE_WASM_TTS ${SHERPA_ONNX_ENABLE_WASM_TTS}") message(STATUS "SHERPA_ONNX_ENABLE_WASM_ASR ${SHERPA_ONNX_ENABLE_WASM_ASR}") message(STATUS "SHERPA_ONNX_ENABLE_WASM_KWS ${SHERPA_ONNX_ENABLE_WASM_KWS}") message(STATUS "SHERPA_ONNX_ENABLE_WASM_VAD ${SHERPA_ONNX_ENABLE_WASM_VAD}") +message(STATUS "SHERPA_ONNX_ENABLE_WASM_VAD_ASR ${SHERPA_ONNX_ENABLE_WASM_VAD_ASR}") message(STATUS "SHERPA_ONNX_ENABLE_WASM_NODEJS ${SHERPA_ONNX_ENABLE_WASM_NODEJS}") message(STATUS "SHERPA_ONNX_ENABLE_BINARY ${SHERPA_ONNX_ENABLE_BINARY}") message(STATUS "SHERPA_ONNX_ENABLE_TTS ${SHERPA_ONNX_ENABLE_TTS}") @@ -211,11 +213,22 @@ if(SHERPA_ONNX_ENABLE_WASM) endif() if(SHERPA_ONNX_ENABLE_WASM_KWS) + if(NOT SHERPA_ONNX_ENABLE_WASM) + message(FATAL_ERROR "Please set SHERPA_ONNX_ENABLE_WASM to ON if you enable WASM for KWS") + endif() add_definitions(-DSHERPA_ONNX_ENABLE_WASM_KWS=1) endif() if(SHERPA_ONNX_ENABLE_WASM_VAD) - add_definitions(-DSHERPA_ONNX_ENABLE_WASM_VAD=1) + if(NOT SHERPA_ONNX_ENABLE_WASM) + message(FATAL_ERROR "Please set SHERPA_ONNX_ENABLE_WASM to ON if you enable WASM for VAD") + endif() +endif() + +if(SHERPA_ONNX_ENABLE_WASM_VAD_ASR) + if(NOT SHERPA_ONNX_ENABLE_WASM) + message(FATAL_ERROR "Please set SHERPA_ONNX_ENABLE_WASM to ON if you enable WASM for VAD+ASR") + endif() endif() if(NOT CMAKE_CXX_STANDARD) diff --git a/README.md b/README.md index dcdaec2f2..cc9acb2b1 100644 --- a/README.md +++ b/README.md @@ -14,13 +14,13 @@ ### Supported platforms -|Architecture| Android | iOS | Windows | macOS | linux | -|------------|------------------|---------------|------------|-------|-------| -| x64 | ✔️ | | ✔️ | ✔️ | ✔️ | -| x86 | ✔️ | | ✔️ | | | -| arm64 | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | -| arm32 | ✔️ | | | | ✔️ | -| riscv64 | | | | | ✔️ | +|Architecture| Android | iOS | Windows | macOS | linux | +|------------|---------|---------|------------|-------|-------| +| x64 | ✔️ | | ✔️ | ✔️ | ✔️ | +| x86 | ✔️ | | ✔️ | | | +| arm64 | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | +| arm32 | ✔️ | | | | ✔️ | +| riscv64 | | | | | ✔️ | ### Supported programming languages @@ -37,7 +37,7 @@ |-------|----------|----------|------------| | ✔️ | ✔️ | ✔️ | ✔️ | -For Rust support, please see https://github.com/thewh1teagle/sherpa-rs +For Rust support, please see [sherpa-rs][sherpa-rs] It also supports WebAssembly. @@ -51,7 +51,7 @@ This repository supports running the following functions **locally** - Speaker verification - Spoken language identification - Audio tagging - - VAD (e.g., [silero-vad](https://github.com/snakers4/silero-vad)) + - VAD (e.g., [silero-vad][silero-vad]) - Keyword spotting on the following platforms and operating systems: @@ -62,11 +62,12 @@ on the following platforms and operating systems: - iOS - NodeJS - WebAssembly - - [Raspberry Pi](https://www.raspberrypi.com/) - - [RV1126](https://www.rock-chips.com/uploads/pdf/2022.8.26/191/RV1126%20Brief%20Datasheet.pdf) - - [LicheePi4A](https://sipeed.com/licheepi4a) - - [VisionFive 2](https://www.starfivetech.com/en/site/boards) - - [旭日X3派](https://developer.horizon.ai/api/v1/fileData/documents_pi/index.html) + - [Raspberry Pi][Raspberry Pi] + - [RV1126][RV1126] + - [LicheePi4A][LicheePi4A] + - [VisionFive 2][VisionFive 2] + - [旭日X3派][旭日X3派] + - [爱芯派][爱芯派] - etc with the following APIs @@ -81,59 +82,68 @@ with the following APIs You can visit the following Huggingface spaces to try `sherpa-onnx` without installing anything. All you need is a browser. -| Description | URL | -|---|---| -| Speech recognition | [Click me](https://huggingface.co/spaces/k2-fsa/automatic-speech-recognition)| -| Speech recognition with [Whisper](https://github.com/openai/whisper)| [Click me](https://huggingface.co/spaces/k2-fsa/automatic-speech-recognition-with-whisper)| -| Speech synthesis | [Click me](https://huggingface.co/spaces/k2-fsa/text-to-speech)| -| Generate subtitles| [Click me](https://huggingface.co/spaces/k2-fsa/generate-subtitles-for-videos)| -|Audio tagging| [Click me](https://huggingface.co/spaces/k2-fsa/audio-tagging)| -|Spoken language identification with [Whisper](https://github.com/openai/whisper)|[Click me](https://huggingface.co/spaces/k2-fsa/spoken-language-identification)| +| Description | URL | +|-------------------------------------------------------|------------------------------------| +| Speech recognition | [Click me][hf-space-asr] | +| Speech recognition with [Whisper][Whisper] | [Click me][hf-space-asr-whisper] | +| Speech synthesis | [Click me][hf-space-tts] | +| Generate subtitles | [Click me][hf-space-subtitle] | +| Audio tagging | [Click me][hf-space-audio-tagging] | +| Spoken language identification with [Whisper][Whisper]| [Click me][hf-space-slid-whisper] | We also have spaces built using WebAssembly. The are listed below: -| Description | URL| Chinese users| -|---|---|---| -|Voice activity detection with [silero-vad](https://github.com/snakers4/silero-vad)| [Click me](https://huggingface.co/spaces/k2-fsa/web-assembly-vad-sherpa-onnx)|[地址](https://modelscope.cn/studios/csukuangfj/web-assembly-vad-sherpa-onnx)| -|Real-time speech recognition (Chinese + English) with Zipformer | [Click me](https://huggingface.co/spaces/k2-fsa/web-assembly-asr-sherpa-onnx-zh-en)|[地址](https://modelscope.cn/studios/k2-fsa/web-assembly-asr-sherpa-onnx-zh-en)| -|Real-time speech recognition (Chinese + English) with Paraformer|[Click me](https://huggingface.co/spaces/k2-fsa/web-assembly-asr-sherpa-onnx-zh-en-paraformer)| [地址](https://modelscope.cn/studios/k2-fsa/web-assembly-asr-sherpa-onnx-zh-en-paraformer)| -|Real-time speech recognition (Chinese + English + Cantonese) with Paraformer|[Click me](https://huggingface.co/spaces/k2-fsa/web-assembly-asr-sherpa-onnx-zh-cantonese-en-paraformer)| [地址](https://modelscope.cn/studios/k2-fsa/web-assembly-asr-sherpa-onnx-zh-cantonese-en-paraformer)| -|Real-time speech recognition (English) |[Click me](https://huggingface.co/spaces/k2-fsa/web-assembly-asr-sherpa-onnx-en)|[地址](https://modelscope.cn/studios/k2-fsa/web-assembly-asr-sherpa-onnx-en)| -|Speech synthesis (English) |[Click me](https://huggingface.co/spaces/k2-fsa/web-assembly-tts-sherpa-onnx-en)| [地址](https://modelscope.cn/studios/k2-fsa/web-assembly-tts-sherpa-onnx-en)| -|Speech synthesis (German)|[Click me](https://huggingface.co/spaces/k2-fsa/web-assembly-tts-sherpa-onnx-de)| [地址](https://modelscope.cn/studios/k2-fsa/web-assembly-tts-sherpa-onnx-de)| +| Description | Huggingface space| ModelScope space| +|------------------------------------------------------------------------------------------|------------------|-----------------| +|Voice activity detection with [silero-vad][silero-vad] | [Click me][wasm-hf-vad]|[地址][wasm-ms-vad]| +|Real-time speech recognition (Chinese + English) with Zipformer | [Click me][wasm-hf-streaming-asr-zh-en-zipformer]|[地址][wasm-hf-streaming-asr-zh-en-zipformer]| +|Real-time speech recognition (Chinese + English) with Paraformer |[Click me][wasm-hf-streaming-asr-zh-en-paraformer]| [地址][wasm-ms-streaming-asr-zh-en-paraformer]| +|Real-time speech recognition (Chinese + English + Cantonese) with [Paraformer-large][Paraformer-large]|[Click me][wasm-hf-streaming-asr-zh-en-yue-paraformer]| [地址][wasm-ms-streaming-asr-zh-en-yue-paraformer]| +|Real-time speech recognition (English) |[Click me][wasm-hf-streaming-asr-en-zipformer] |[地址][wasm-ms-streaming-asr-en-zipformer]| +|VAD + speech recognition (Chinese + English + Korean + Japanese + Cantonese) with [SenseVoice][SenseVoice]|[Click me][wasm-hf-vad-asr-zh-en-ko-ja-yue-sense-voice]| [地址][wasm-ms-vad-asr-zh-en-ko-ja-yue-sense-voice]| +|VAD + speech recognition (English) with [Whisper][Whisper] tiny.en|[Click me][wasm-hf-vad-asr-en-whisper-tiny-en]| [地址][wasm-ms-vad-asr-en-whisper-tiny-en]| +|VAD + speech recognition (English) with Zipformer trained with [GigaSpeech][GigaSpeech] |[Click me][wasm-hf-vad-asr-en-zipformer-gigaspeech]| [地址][wasm-ms-vad-asr-en-zipformer-gigaspeech]| +|VAD + speech recognition (Chinese) with Zipformer trained with [WenetSpeech][WenetSpeech] |[Click me][wasm-hf-vad-asr-zh-zipformer-wenetspeech]| [地址][wasm-ms-vad-asr-zh-zipformer-wenetspeech]| +|VAD + speech recognition (Japanese) with Zipformer trained with [ReazonSpeech][ReazonSpeech]|[Click me][wasm-hf-vad-asr-ja-zipformer-reazonspeech]| [地址][wasm-ms-vad-asr-ja-zipformer-reazonspeech]| +|VAD + speech recognition (Thai) with Zipformer trained with [GigaSpeech2][GigaSpeech2] |[Click me][wasm-hf-vad-asr-th-zipformer-gigaspeech2]| [地址][wasm-ms-vad-asr-th-zipformer-gigaspeech2]| +|VAD + speech recognition (Chinese 多种方言) with a [TeleSpeech-ASR][TeleSpeech-ASR] CTC model|[Click me][wasm-hf-vad-asr-zh-telespeech]| [地址][wasm-ms-vad-asr-zh-telespeech]| +|VAD + speech recognition (English + Chinese, 及多种中文方言) with Paraformer-large |[Click me][wasm-hf-vad-asr-zh-en-paraformer-large]| [地址][wasm-ms-vad-asr-zh-en-paraformer-large]| +|VAD + speech recognition (English + Chinese, 及多种中文方言) with Paraformer-small |[Click me][wasm-hf-vad-asr-zh-en-paraformer-small]| [地址][wasm-ms-vad-asr-zh-en-paraformer-small]| +|Speech synthesis (English) |[Click me][wasm-hf-tts-piper-en]| [地址][wasm-ms-tts-piper-en]| +|Speech synthesis (German) |[Click me][wasm-hf-tts-piper-de]| [地址][wasm-ms-tts-piper-de]| ### Links for pre-built Android APKs -| Description | URL | 中国用户 | -|--------------------------------|-----------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------| -| Streaming speech recognition | [Address](https://k2-fsa.github.io/sherpa/onnx/android/apk.html) | [点此](https://k2-fsa.github.io/sherpa/onnx/android/apk-cn.html) | -| Text-to-speech | [Address](https://k2-fsa.github.io/sherpa/onnx/tts/apk-engine.html) | [点此](https://k2-fsa.github.io/sherpa/onnx/tts/apk-engine-cn.html) | -|Voice activity detection (VAD) | [Address](https://k2-fsa.github.io/sherpa/onnx/vad/apk.html) | [点此](https://k2-fsa.github.io/sherpa/onnx/vad/apk-cn.html)| -|VAD + non-streaming speech recognition| [Address](https://k2-fsa.github.io/sherpa/onnx/vad/apk-asr.html)| [点此](https://k2-fsa.github.io/sherpa/onnx/vad/apk-asr-cn.html)| -|Two-pass speech recognition| [Address](https://k2-fsa.github.io/sherpa/onnx/android/apk-2pass.html)| [点此](https://k2-fsa.github.io/sherpa/onnx/android/apk-2pass-cn.html)| -| Audio tagging | [Address](https://k2-fsa.github.io/sherpa/onnx/audio-tagging/apk.html) | [点此](https://k2-fsa.github.io/sherpa/onnx/audio-tagging/apk-cn.html) | -| Audio tagging (WearOS) | [Address](https://k2-fsa.github.io/sherpa/onnx/audio-tagging/apk-wearos.html) | [点此](https://k2-fsa.github.io/sherpa/onnx/audio-tagging/apk-wearos-cn.html) | -| Speaker identification | [Address](https://k2-fsa.github.io/sherpa/onnx/speaker-identification/apk.html) | [点此](https://k2-fsa.github.io/sherpa/onnx/speaker-identification/apk-cn.html) | -| Spoken language identification | [Address](https://k2-fsa.github.io/sherpa/onnx/spoken-language-identification/apk.html) | [点此](https://k2-fsa.github.io/sherpa/onnx/spoken-language-identification/apk-cn.html) | -|Keyword spotting| [Address](https://k2-fsa.github.io/sherpa/onnx/kws/apk.html)| [点此](https://k2-fsa.github.io/sherpa/onnx/kws/apk-cn.html)| +| Description | URL | 中国用户 | +|----------------------------------------|------------------------------|-----------------------------| +| Streaming speech recognition | [Address][apk-streaming-asr] | [点此][apk-streaming-asr-cn]| +| Text-to-speech | [Address][apk-tts] | [点此][apk-tts-cn] | +| Voice activity detection (VAD) | [Address][apk-vad] | [点此][apk-vad-cn] | +| VAD + non-streaming speech recognition | [Address][apk-vad-asr] | [点此][apk-vad-asr-cn] | +| Two-pass speech recognition | [Address][apk-2pass] | [点此][apk-2pass-cn] | +| Audio tagging | [Address][apk-at] | [点此][apk-at-cn] | +| Audio tagging (WearOS) | [Address][apk-at-wearos] | [点此][apk-at-wearos-cn] | +| Speaker identification | [Address][apk-sid] | [点此][apk-sid-cn] | +| Spoken language identification | [Address][apk-slid] | [点此][apk-slid-cn] | +| Keyword spotting | [Address][apk-kws] | [点此][apk-kws-cn] | ### Links for pre-built Flutter APPs #### Real-time speech recognition -| Description | URL | 中国用户 | -|--------------------------------|---------------------------------------------------------------------|---------------------------------------------------------------------| -| Streaming speech recognition | [Address](https://k2-fsa.github.io/sherpa/onnx/flutter/asr/app.html)| [点此](https://k2-fsa.github.io/sherpa/onnx/flutter/asr/app-cn.html)| +| Description | URL | 中国用户 | +|--------------------------------|-------------------------------------|-------------------------------------| +| Streaming speech recognition | [Address][apk-flutter-streaming-asr]| [点此][apk-flutter-streaming-asr-cn]| #### Text-to-speech -| Description | URL | 中国用户 | -|--------------------------------|--------------------------------------------------------------|-----------------------------------------------------------------------------| -| Android (arm64-v8a, armeabi-v7a, x86_64) | [Address](https://k2-fsa.github.io/sherpa/onnx/flutter/tts-android.html) | [点此](https://k2-fsa.github.io/sherpa/onnx/flutter/tts-android-cn.html)| -| Linux (x64) | [Address](https://k2-fsa.github.io/sherpa/onnx/flutter/tts-linux.html) | [点此](https://k2-fsa.github.io/sherpa/onnx/flutter/tts-linux-cn.html) | -| macOS (x64) | [Address](https://k2-fsa.github.io/sherpa/onnx/flutter/tts-macos-x64.html) | [点此](https://k2-fsa.github.io/sherpa/onnx/flutter/tts-macos-x64-cn.html) | -| macOS (arm64) | [Address](https://k2-fsa.github.io/sherpa/onnx/flutter/tts-macos-arm64.html) | [点此](https://k2-fsa.github.io/sherpa/onnx/flutter/tts-macos-arm64-cn.html)| -| Windows (x64) | [Address](https://k2-fsa.github.io/sherpa/onnx/flutter/tts-win.html) | [点此](https://k2-fsa.github.io/sherpa/onnx/flutter/tts-win-cn.html) | +| Description | URL | 中国用户 | +|------------------------------------------|------------------------------------|------------------------------------| +| Android (arm64-v8a, armeabi-v7a, x86_64) | [Address][flutter-tts-android] | [点此][flutter-tts-android-cn] | +| Linux (x64) | [Address][flutter-tts-linux] | [点此][flutter-tts-linux-cn] | +| macOS (x64) | [Address][flutter-tts-macos-x64] | [点此][flutter-tts-macos-arm64-cn] | +| macOS (arm64) | [Address][flutter-tts-macos-arm64] | [点此][flutter-tts-macos-x64-cn] | +| Windows (x64) | [Address][flutter-tts-win-x64] | [点此][flutter-tts-win-x64-cn] | > Note: You need to build from source for iOS. @@ -141,23 +151,23 @@ We also have spaces built using WebAssembly. The are listed below: #### Generating subtitles -| Description | URL | 中国用户 | -|--------------------------------|---------------------------------------------------------------------|---------------------------------------------------------------------| -| Generate subtitles (生成字幕) | [Address](https://k2-fsa.github.io/sherpa/onnx/lazarus/download-generated-subtitles.html)| [点此](https://k2-fsa.github.io/sherpa/onnx/lazarus/download-generated-subtitles-cn.html)| +| Description | URL | 中国用户 | +|--------------------------------|----------------------------|----------------------------| +| Generate subtitles (生成字幕) | [Address][lazarus-subtitle]| [点此][lazarus-subtitle-cn]| ### Links for pre-trained models -| Description | URL | -|--------------------------------|--------------------------------------------------------------------------------------------------------------------------------| -| Speech recognition (speech to text, ASR) | [Address](https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models) | -| Text-to-speech (TTS) | [Address](https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models) | -| VAD | [Address](https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx)| -| Keyword spotting |[Address](https://github.com/k2-fsa/sherpa-onnx/releases/tag/kws-models)| -| Audio tagging | [Address](https://github.com/k2-fsa/sherpa-onnx/releases/tag/audio-tagging-models)| -| Speaker identification (Speaker ID) | [Address](https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models)| -| Spoken language identification (Language ID) | See multi-lingual [Whisper](https://github.com/openai/whisper) ASR models from [Speech recognition](https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models) | -| Punctuation| [Address](https://github.com/k2-fsa/sherpa-onnx/releases/tag/punctuation-models)| +| Description | URL | +|---------------------------------------------|---------------------------------------------------------------------------------------| +| Speech recognition (speech to text, ASR) | [Address][asr-models] | +| Text-to-speech (TTS) | [Address][tts-models] | +| VAD | [Address][vad-models] | +| Keyword spotting | [Address][kws-models] | +| Audio tagging | [Address][at-models] | +| Speaker identification (Speaker ID) | [Address][sid-models] | +| Spoken language identification (Language ID)| See multi-lingual [Whisper][Whisper] ASR models from [Speech recognition][asr-models]| +| Punctuation | [Address][punct-models] | ### Useful links @@ -169,3 +179,100 @@ We also have spaces built using WebAssembly. The are listed below: Please see https://k2-fsa.github.io/sherpa/social-groups.html for 新一代 Kaldi **微信交流群** and **QQ 交流群**. + +[sherpa-rs]: https://github.com/thewh1teagle/sherpa-rs +[silero-vad]: https://github.com/snakers4/silero-vad +[Raspberry Pi]: https://www.raspberrypi.com/ +[RV1126]: https://www.rock-chips.com/uploads/pdf/2022.8.26/191/RV1126%20Brief%20Datasheet.pdf +[LicheePi4A]: https://sipeed.com/licheepi4a +[VisionFive 2]: https://www.starfivetech.com/en/site/boards +[旭日X3派]: https://developer.horizon.ai/api/v1/fileData/documents_pi/index.html +[爱芯派]: https://wiki.sipeed.com/hardware/zh/maixIII/ax-pi/axpi.html +[hf-space-asr]: https://huggingface.co/spaces/k2-fsa/automatic-speech-recognition +[Whisper]: https://github.com/openai/whisper +[hf-space-asr-whisper]: https://huggingface.co/spaces/k2-fsa/automatic-speech-recognition-with-whisper +[hf-space-tts]: https://huggingface.co/spaces/k2-fsa/text-to-speech +[hf-space-subtitle]: https://huggingface.co/spaces/k2-fsa/generate-subtitles-for-videos +[hf-space-audio-tagging]: https://huggingface.co/spaces/k2-fsa/audio-tagging +[hf-space-slid-whisper]: https://huggingface.co/spaces/k2-fsa/spoken-language-identification +[wasm-hf-vad]: https://huggingface.co/spaces/k2-fsa/web-assembly-vad-sherpa-onnx +[wasm-ms-vad]: https://modelscope.cn/studios/csukuangfj/web-assembly-vad-sherpa-onnx +[wasm-hf-streaming-asr-zh-en-zipformer]: https://huggingface.co/spaces/k2-fsa/web-assembly-asr-sherpa-onnx-zh-en +[wasm-ms-streaming-asr-zh-en-zipformer]: https://modelscope.cn/studios/k2-fsa/web-assembly-asr-sherpa-onnx-zh-en +[wasm-hf-streaming-asr-zh-en-paraformer]: https://huggingface.co/spaces/k2-fsa/web-assembly-asr-sherpa-onnx-zh-en-paraformer +[wasm-ms-streaming-asr-zh-en-paraformer]: https://modelscope.cn/studios/k2-fsa/web-assembly-asr-sherpa-onnx-zh-en-paraformer +[Paraformer-large]: https://www.modelscope.cn/models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/summary +[wasm-hf-streaming-asr-zh-en-yue-paraformer]: https://huggingface.co/spaces/k2-fsa/web-assembly-asr-sherpa-onnx-zh-cantonese-en-paraformer +[wasm-ms-streaming-asr-zh-en-yue-paraformer]: https://modelscope.cn/studios/k2-fsa/web-assembly-asr-sherpa-onnx-zh-cantonese-en-paraformer +[wasm-hf-streaming-asr-en-zipformer]: https://huggingface.co/spaces/k2-fsa/web-assembly-asr-sherpa-onnx-en +[wasm-ms-streaming-asr-en-zipformer]: https://modelscope.cn/studios/k2-fsa/web-assembly-asr-sherpa-onnx-en +[SenseVoice]: https://github.com/FunAudioLLM/SenseVoice +[wasm-hf-vad-asr-zh-en-ko-ja-yue-sense-voice]: https://huggingface.co/spaces/k2-fsa/web-assembly-vad-asr-sherpa-onnx-zh-en-ja-ko-cantonese-sense-voice +[wasm-ms-vad-asr-zh-en-ko-ja-yue-sense-voice]: https://www.modelscope.cn/studios/csukuangfj/web-assembly-vad-asr-sherpa-onnx-zh-en-jp-ko-cantonese-sense-voice +[wasm-hf-vad-asr-en-whisper-tiny-en]: https://huggingface.co/spaces/k2-fsa/web-assembly-vad-asr-sherpa-onnx-en-whisper-tiny +[wasm-ms-vad-asr-en-whisper-tiny-en]: https://www.modelscope.cn/studios/csukuangfj/web-assembly-vad-asr-sherpa-onnx-en-whisper-tiny +[wasm-hf-vad-asr-en-zipformer-gigaspeech]: https://huggingface.co/spaces/k2-fsa/web-assembly-vad-asr-sherpa-onnx-en-zipformer-gigaspeech +[wasm-ms-vad-asr-en-zipformer-gigaspeech]: https://www.modelscope.cn/studios/k2-fsa/web-assembly-vad-asr-sherpa-onnx-en-zipformer-gigaspeech +[wasm-hf-vad-asr-zh-zipformer-wenetspeech]: https://huggingface.co/spaces/k2-fsa/web-assembly-vad-asr-sherpa-onnx-zh-zipformer-wenetspeech +[wasm-ms-vad-asr-zh-zipformer-wenetspeech]: https://www.modelscope.cn/studios/k2-fsa/web-assembly-vad-asr-sherpa-onnx-zh-zipformer-wenetspeech +[ReazonSpeech]: https://research.reazon.jp/_static/reazonspeech_nlp2023.pdf +[wasm-hf-vad-asr-ja-zipformer-reazonspeech]: https://huggingface.co/spaces/k2-fsa/web-assembly-vad-asr-sherpa-onnx-ja-zipformer +[wasm-ms-vad-asr-ja-zipformer-reazonspeech]: https://www.modelscope.cn/studios/csukuangfj/web-assembly-vad-asr-sherpa-onnx-ja-zipformer +[GigaSpeech2]: https://github.com/SpeechColab/GigaSpeech2 +[wasm-hf-vad-asr-th-zipformer-gigaspeech2]: https://huggingface.co/spaces/k2-fsa/web-assembly-vad-asr-sherpa-onnx-th-zipformer +[wasm-ms-vad-asr-th-zipformer-gigaspeech2]: https://www.modelscope.cn/studios/csukuangfj/web-assembly-vad-asr-sherpa-onnx-th-zipformer +[TeleSpeech-ASR]: https://github.com/Tele-AI/TeleSpeech-ASR +[wasm-hf-vad-asr-zh-telespeech]: https://huggingface.co/spaces/k2-fsa/web-assembly-vad-asr-sherpa-onnx-zh-telespeech +[wasm-ms-vad-asr-zh-telespeech]: https://www.modelscope.cn/studios/k2-fsa/web-assembly-vad-asr-sherpa-onnx-zh-telespeech +[wasm-hf-vad-asr-zh-en-paraformer-large]: https://huggingface.co/spaces/k2-fsa/web-assembly-vad-asr-sherpa-onnx-zh-en-paraformer +[wasm-ms-vad-asr-zh-en-paraformer-large]: https://www.modelscope.cn/studios/k2-fsa/web-assembly-vad-asr-sherpa-onnx-zh-en-paraformer +[wasm-hf-vad-asr-zh-en-paraformer-small]: https://huggingface.co/spaces/k2-fsa/web-assembly-vad-asr-sherpa-onnx-zh-en-paraformer-small +[wasm-ms-vad-asr-zh-en-paraformer-small]: https://www.modelscope.cn/studios/k2-fsa/web-assembly-vad-asr-sherpa-onnx-zh-en-paraformer-small +[wasm-hf-tts-piper-en]: https://huggingface.co/spaces/k2-fsa/web-assembly-tts-sherpa-onnx-en +[wasm-ms-tts-piper-en]: https://modelscope.cn/studios/k2-fsa/web-assembly-tts-sherpa-onnx-en +[wasm-hf-tts-piper-de]: https://huggingface.co/spaces/k2-fsa/web-assembly-tts-sherpa-onnx-de +[wasm-ms-tts-piper-de]: https://modelscope.cn/studios/k2-fsa/web-assembly-tts-sherpa-onnx-de +[apk-streaming-asr]: https://k2-fsa.github.io/sherpa/onnx/android/apk.html +[apk-streaming-asr-cn]: https://k2-fsa.github.io/sherpa/onnx/android/apk-cn.html +[apk-tts]: https://k2-fsa.github.io/sherpa/onnx/tts/apk-engine.html +[apk-tts-cn]: https://k2-fsa.github.io/sherpa/onnx/tts/apk-engine-cn.html +[apk-vad]: https://k2-fsa.github.io/sherpa/onnx/vad/apk.html +[apk-vad-cn]: https://k2-fsa.github.io/sherpa/onnx/vad/apk-cn.html +[apk-vad-asr]: https://k2-fsa.github.io/sherpa/onnx/vad/apk-asr.html +[apk-vad-asr-cn]: https://k2-fsa.github.io/sherpa/onnx/vad/apk-asr-cn.html +[apk-2pass]: https://k2-fsa.github.io/sherpa/onnx/android/apk-2pass.html +[apk-2pass-cn]: https://k2-fsa.github.io/sherpa/onnx/android/apk-2pass-cn.html +[apk-at]: https://k2-fsa.github.io/sherpa/onnx/audio-tagging/apk.html +[apk-at-cn]: https://k2-fsa.github.io/sherpa/onnx/audio-tagging/apk-cn.html +[apk-at-wearos]: https://k2-fsa.github.io/sherpa/onnx/audio-tagging/apk-wearos.html +[apk-at-wearos-cn]: https://k2-fsa.github.io/sherpa/onnx/audio-tagging/apk-wearos-cn.html +[apk-sid]: https://k2-fsa.github.io/sherpa/onnx/speaker-identification/apk.html +[apk-sid-cn]: https://k2-fsa.github.io/sherpa/onnx/speaker-identification/apk-cn.html +[apk-slid]: https://k2-fsa.github.io/sherpa/onnx/spoken-language-identification/apk.html +[apk-slid-cn]: https://k2-fsa.github.io/sherpa/onnx/spoken-language-identification/apk-cn.html +[apk-kws]: https://k2-fsa.github.io/sherpa/onnx/kws/apk.html +[apk-kws-cn]: https://k2-fsa.github.io/sherpa/onnx/kws/apk-cn.html +[apk-flutter-streaming-asr]: https://k2-fsa.github.io/sherpa/onnx/flutter/asr/app.html +[apk-flutter-streaming-asr-cn]: https://k2-fsa.github.io/sherpa/onnx/flutter/asr/app-cn.html +[flutter-tts-android]: https://k2-fsa.github.io/sherpa/onnx/flutter/tts-android.html +[flutter-tts-android-cn]: https://k2-fsa.github.io/sherpa/onnx/flutter/tts-android-cn.html +[flutter-tts-linux]: https://k2-fsa.github.io/sherpa/onnx/flutter/tts-linux.html +[flutter-tts-linux-cn]: https://k2-fsa.github.io/sherpa/onnx/flutter/tts-linux-cn.html +[flutter-tts-macos-x64]: https://k2-fsa.github.io/sherpa/onnx/flutter/tts-macos-x64.html +[flutter-tts-macos-arm64-cn]: https://k2-fsa.github.io/sherpa/onnx/flutter/tts-macos-x64-cn.html +[flutter-tts-macos-arm64]: https://k2-fsa.github.io/sherpa/onnx/flutter/tts-macos-arm64.html +[flutter-tts-macos-x64-cn]: https://k2-fsa.github.io/sherpa/onnx/flutter/tts-macos-arm64-cn.html +[flutter-tts-win-x64]: https://k2-fsa.github.io/sherpa/onnx/flutter/tts-win.html +[flutter-tts-win-x64-cn]: https://k2-fsa.github.io/sherpa/onnx/flutter/tts-win-cn.html +[lazarus-subtitle]: https://k2-fsa.github.io/sherpa/onnx/lazarus/download-generated-subtitles.html +[lazarus-subtitle-cn]: https://k2-fsa.github.io/sherpa/onnx/lazarus/download-generated-subtitles-cn.html +[asr-models]: https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models +[tts-models]: https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models +[vad-models]: https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx +[kws-models]: https://github.com/k2-fsa/sherpa-onnx/releases/tag/kws-models +[at-models]: https://github.com/k2-fsa/sherpa-onnx/releases/tag/audio-tagging-models +[sid-models]: https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models +[slid-models]: https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models +[punct-models]: https://github.com/k2-fsa/sherpa-onnx/releases/tag/punctuation-models +[GigaSpeech]: https://github.com/SpeechColab/GigaSpeech +[WenetSpeech]: https://github.com/wenet-e2e/WenetSpeech diff --git a/build-wasm-simd-vad-asr.sh b/build-wasm-simd-vad-asr.sh new file mode 100755 index 000000000..5d15cf651 --- /dev/null +++ b/build-wasm-simd-vad-asr.sh @@ -0,0 +1,68 @@ +#!/usr/bin/env bash +# Copyright (c) 2024 Xiaomi Corporation +# +# This script is to build sherpa-onnx for WebAssembly (VAD+ASR) +# Note: ASR here means non-streaming ASR + +set -ex + +if [ x"$EMSCRIPTEN" == x"" ]; then + if ! command -v emcc &> /dev/null; then + echo "Please install emscripten first" + echo "" + echo "You can use the following commands to install it:" + echo "" + echo "git clone https://github.com/emscripten-core/emsdk.git" + echo "cd emsdk" + echo "git pull" + echo "./emsdk install latest" + echo "./emsdk activate latest" + echo "source ./emsdk_env.sh" + exit 1 + else + EMSCRIPTEN=$(dirname $(realpath $(which emcc))) + fi +fi + +export EMSCRIPTEN=$EMSCRIPTEN +echo "EMSCRIPTEN: $EMSCRIPTEN" +if [ ! -f $EMSCRIPTEN/cmake/Modules/Platform/Emscripten.cmake ]; then + echo "Cannot find $EMSCRIPTEN/cmake/Modules/Platform/Emscripten.cmake" + echo "Please make sure you have installed emsdk correctly" + exit 1 +fi + +mkdir -p build-wasm-simd-vad-asr +pushd build-wasm-simd-vad-asr + +export SHERPA_ONNX_IS_USING_BUILD_WASM_SH=ON + +cmake \ + -DCMAKE_INSTALL_PREFIX=./install \ + -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_TOOLCHAIN_FILE=$EMSCRIPTEN/cmake/Modules/Platform/Emscripten.cmake \ + \ + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ + -DSHERPA_ONNX_ENABLE_TESTS=OFF \ + -DSHERPA_ONNX_ENABLE_CHECK=OFF \ + -DBUILD_SHARED_LIBS=OFF \ + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ + -DSHERPA_ONNX_ENABLE_JNI=OFF \ + -DSHERPA_ONNX_ENABLE_TTS=OFF \ + -DSHERPA_ONNX_ENABLE_C_API=ON \ + -DSHERPA_ONNX_ENABLE_WEBSOCKET=OFF \ + -DSHERPA_ONNX_ENABLE_GPU=OFF \ + -DSHERPA_ONNX_ENABLE_WASM=ON \ + -DSHERPA_ONNX_ENABLE_WASM_VAD_ASR=ON \ + -DSHERPA_ONNX_ENABLE_BINARY=OFF \ + -DSHERPA_ONNX_LINK_LIBSTDCPP_STATICALLY=OFF \ + .. +make -j2 +make install + +echo "pwd: $PWD" + +cp -fv ../wasm/vad/sherpa-onnx-vad.js ./install/bin/wasm/vad-asr/ +cp -fv ../wasm/asr/sherpa-onnx-asr.js ./install/bin/wasm/vad-asr/ + +ls -lh install/bin/wasm/vad-asr diff --git a/scripts/wasm/generate-vad-asr.py b/scripts/wasm/generate-vad-asr.py new file mode 100755 index 000000000..4c0099af8 --- /dev/null +++ b/scripts/wasm/generate-vad-asr.py @@ -0,0 +1,229 @@ +#!/usr/bin/env python3 + +import argparse +from dataclasses import dataclass +from typing import List, Optional + +import jinja2 + + +def get_args(): + parser = argparse.ArgumentParser() + parser.add_argument( + "--total", + type=int, + default=1, + help="Number of runners", + ) + parser.add_argument( + "--index", + type=int, + default=0, + help="Index of the current runner", + ) + return parser.parse_args() + + +@dataclass +class Model: + model_name: str + hf: str # huggingface space name + ms: str # modelscope space name + short_name: str + cmd: str = "" + + +def get_models(): + models = [ + Model( + model_name="sherpa-onnx-whisper-tiny.en", + hf="k2-fsa/web-assembly-vad-asr-sherpa-onnx-en-whisper-tiny", + ms="csukuangfj/web-assembly-vad-asr-sherpa-onnx-en-whisper-tiny", + short_name="vad-asr-en-whisper_tiny", + cmd=""" + pushd $model_name + mv -v tiny.en-encoder.int8.onnx ../whisper-encoder.onnx + mv -v tiny.en-decoder.int8.onnx ../whisper-decoder.onnx + mv -v tiny.en-tokens.txt ../tokens.txt + popd + rm -rf $model_name + sed -i.bak 's/Zipformer/Whisper tiny.en supporting English 英文/g' ../index.html + git diff + """, + ), + Model( + model_name="sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17", + hf="k2-fsa/web-assembly-vad-asr-sherpa-onnx-zh-en-ja-ko-cantonese-sense-voice", + ms="csukuangfj/web-assembly-vad-asr-sherpa-onnx-zh-en-jp-ko-cantonese-sense-voice", + short_name="vad-asr-zh_en_ja_ko_cantonese-sense_voice_small", + cmd=""" + pushd $model_name + mv -v model.int8.onnx ../sense-voice.onnx + mv -v tokens.txt ../ + popd + rm -rf $model_name + sed -i.bak 's/Zipformer/SenseVoice Small supporting English, Chinese, Japanese, Korean, Cantonese 中英日韩粤/g' ../index.html + git diff + """, + ), + Model( + model_name="sherpa-onnx-paraformer-zh-2023-09-14", + hf="k2-fsa/web-assembly-vad-asr-sherpa-onnx-zh-en-paraformer", + ms="k2-fsa/web-assembly-vad-asr-sherpa-onnx-zh-en-paraformer", + short_name="vad-asr-zh_en-paraformer_large", + cmd=""" + pushd $model_name + mv -v model.int8.onnx ../paraformer.onnx + mv -v tokens.txt ../ + popd + rm -rf $model_name + sed -i.bak 's/Zipformer/Paraformer supporting Chinese, English 中英/g' ../index.html + git diff + """, + ), + Model( + model_name="sherpa-onnx-paraformer-zh-small-2024-03-09", + hf="k2-fsa/web-assembly-vad-asr-sherpa-onnx-zh-en-paraformer-small", + ms="k2-fsa/web-assembly-vad-asr-sherpa-onnx-zh-en-paraformer-small", + short_name="vad-asr-zh_en-paraformer_small", + cmd=""" + pushd $model_name + mv -v model.int8.onnx ../paraformer.onnx + mv -v tokens.txt ../ + popd + rm -rf $model_name + sed -i.bak 's/Zipformer/Paraformer-small supporting Chinese, English 中英文/g' ../index.html + git diff + """, + ), + Model( + model_name="sherpa-onnx-zipformer-gigaspeech-2023-12-12", + hf="k2-fsa/web-assembly-vad-asr-sherpa-onnx-en-zipformer-gigaspeech", + ms="k2-fsa/web-assembly-vad-asr-sherpa-onnx-en-zipformer-gigaspeech", + short_name="vad-asr-en-zipformer_gigaspeech", + cmd=""" + pushd $model_name + mv encoder-epoch-30-avg-1.int8.onnx ../transducer-encoder.onnx + mv decoder-epoch-30-avg-1.onnx ../transducer-decoder.onnx + mv joiner-epoch-30-avg-1.int8.onnx ../transducer-joiner.onnx + mv tokens.txt ../ + popd + rm -rf $model_name + sed -i.bak 's/Zipformer/Zipformer supporting English 英语/g' ../index.html + git diff + """, + ), + Model( + model_name="icefall-asr-zipformer-wenetspeech-20230615", + hf="k2-fsa/web-assembly-vad-asr-sherpa-onnx-zh-zipformer-wenetspeech", + ms="k2-fsa/web-assembly-vad-asr-sherpa-onnx-zh-zipformer-wenetspeech", + short_name="vad-asr-zh-zipformer_wenetspeech", + cmd=""" + pushd $model_name + mv -v data/lang_char/tokens.txt ../ + mv -v exp/encoder-epoch-12-avg-4.int8.onnx ../transducer-encoder.onnx + mv -v exp/decoder-epoch-12-avg-4.onnx ../transducer-decoder.onnx + mv -v exp/joiner-epoch-12-avg-4.int8.onnx ../transducer-joiner.onnx + popd + rm -rf $model_name + sed -i.bak 's/Zipformer/Zipformer supporting Chinese 中文/g' ../index.html + git diff + """, + ), + Model( + model_name="sherpa-onnx-zipformer-ja-reazonspeech-2024-08-01", + hf="k2-fsa/web-assembly-vad-asr-sherpa-onnx-ja-zipformer", + ms="csukuangfj/web-assembly-vad-asr-sherpa-onnx-ja-zipformer", + short_name="vad-asr-ja-zipformer_reazonspeech", + cmd=""" + pushd $model_name + mv encoder-epoch-99-avg-1.int8.onnx ../transducer-encoder.onnx + mv decoder-epoch-99-avg-1.onnx ../transducer-decoder.onnx + mv joiner-epoch-99-avg-1.int8.onnx ../transducer-joiner.onnx + mv tokens.txt ../ + popd + rm -rf $model_name + sed -i.bak 's/Zipformer/Zipformer supporting Japanese 日语/g' ../index.html + git diff + """, + ), + Model( + model_name="sherpa-onnx-zipformer-thai-2024-06-20", + hf="k2-fsa/web-assembly-vad-asr-sherpa-onnx-th-zipformer", + ms="csukuangfj/web-assembly-vad-asr-sherpa-onnx-th-zipformer", + short_name="vad-asr-th-zipformer_gigaspeech2", + cmd=""" + pushd $model_name + mv encoder-epoch-12-avg-5.int8.onnx ../transducer-encoder.onnx + mv decoder-epoch-12-avg-5.onnx ../transducer-decoder.onnx + mv joiner-epoch-12-avg-5.int8.onnx ../transducer-joiner.onnx + mv tokens.txt ../ + popd + rm -rf $model_name + sed -i.bak 's/Zipformer/Zipformer supporting Thai 泰语/g' ../index.html + git diff + """, + ), + Model( + model_name="sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04", + hf="k2-fsa/web-assembly-vad-asr-sherpa-onnx-zh-telespeech", + ms="k2-fsa/web-assembly-vad-asr-sherpa-onnx-zh-telespeech", + short_name="vad-asr-zh-telespeech", + cmd=""" + pushd $model_name + mv model.int8.onnx ../telespeech.onnx + mv tokens.txt ../ + popd + rm -rf $model_name + sed -i.bak 's/Zipformer/TeleSpeech-ASR supporting Chinese 多种中文方言/g' ../index.html + git diff + """, + ), + ] + return models + + +def main(): + args = get_args() + index = args.index + total = args.total + assert 0 <= index < total, (index, total) + + all_model_list = get_models() + + num_models = len(all_model_list) + + num_per_runner = num_models // total + if num_per_runner <= 0: + raise ValueError(f"num_models: {num_models}, num_runners: {total}") + + start = index * num_per_runner + end = start + num_per_runner + + remaining = num_models - args.total * num_per_runner + + print(f"{index}/{total}: {start}-{end}/{num_models}") + + d = dict() + d["model_list"] = all_model_list[start:end] + if index < remaining: + s = args.total * num_per_runner + index + d["model_list"].append(all_model_list[s]) + print(f"{s}/{num_models}") + + filename_list = [ + "./run-vad-asr.sh", + ] + for filename in filename_list: + environment = jinja2.Environment() + with open(f"{filename}.in") as f: + s = f.read() + template = environment.from_string(s) + + s = template.render(**d) + with open(filename, "w") as f: + print(s, file=f) + + +if __name__ == "__main__": + main() diff --git a/scripts/wasm/run-vad-asr.sh.in b/scripts/wasm/run-vad-asr.sh.in new file mode 100644 index 000000000..8d5e1d206 --- /dev/null +++ b/scripts/wasm/run-vad-asr.sh.in @@ -0,0 +1,92 @@ +#!/usr/bin/env bash +# +# Build WebAssembly APPs for huggingface spaces and modelscope spaces + +set -ex + +log() { + # This function is from espnet + local fname=${BASH_SOURCE[1]##*/} + echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*" +} + +SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2) + + +{% for model in model_list %} +model_name={{ model.model_name }} +short_name={{ model.short_name }} +hf_name={{ model.hf }} +ms_name={{ model.ms }} + +pushd wasm/vad-asr +git checkout . +rm -rf assets +mkdir assets +cd assets +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/${model_name}.tar.bz2 +tar xvf ${model_name}.tar.bz2 +rm ${model_name}.tar.bz2 + +{{ model.cmd }} + +popd + +ls -lh wasm/vad-asr/assets + +rm -rf build-wasm-simd-vad-asr/install +rm -rf build-wasm-simd-vad-asr/wasm + +./build-wasm-simd-vad-asr.sh + +dst=sherpa-onnx-wasm-simd-${SHERPA_ONNX_VERSION}-${short_name} +mv build-wasm-simd-vad-asr/install/bin/wasm/vad-asr $dst +ls -lh $dst +tar cjfv $dst.tar.bz2 ./$dst +ls -lh *.tar.bz2 + +git config --global user.email "csukuangfj@gmail.com" +git config --global user.name "Fangjun Kuang" + +export GIT_LFS_SKIP_SMUDGE=1 +export GIT_CLONE_PROTECTION_ACTIVE=false + +rm -rf ms +git clone https://www.modelscope.cn/studios/$ms_name.git ms + +cd ms +cp -v ../$dst/* . + +git status +git lfs track "*.data" +git lfs track "*.wasm" +ls -lh + +git add . +git commit -m "update model" +git push https://oauth2:${MS_TOKEN}@www.modelscope.cn/studios/$ms_name.git +cd .. +rm -rf ms + +rm -rf huggingface + +git clone https://huggingface.co/spaces/$hf_name huggingface +cd huggingface +cp -v ../$dst/* . + +git status +git lfs track "*.data" +git lfs track "*.wasm" +ls -lh + +git add . +git commit -m "update model" +git push https://csukuangfj:$HF_TOKEN@huggingface.co/spaces/$hf_name main +cd .. +rm -rf huggingface +rm -rf $dst + +ls -lh *.tar.bz2 + +{% endfor %} diff --git a/sherpa-onnx/c-api/c-api.cc b/sherpa-onnx/c-api/c-api.cc index e01ae0478..f2bbf9d76 100644 --- a/sherpa-onnx/c-api/c-api.cc +++ b/sherpa-onnx/c-api/c-api.cc @@ -13,6 +13,7 @@ #include "sherpa-onnx/csrc/audio-tagging.h" #include "sherpa-onnx/csrc/circular-buffer.h" #include "sherpa-onnx/csrc/display.h" +#include "sherpa-onnx/csrc/file-utils.h" #include "sherpa-onnx/csrc/keyword-spotter.h" #include "sherpa-onnx/csrc/macros.h" #include "sherpa-onnx/csrc/offline-punctuation.h" @@ -1638,3 +1639,7 @@ int32_t SherpaOnnxLinearResamplerResampleGetOutputSampleRate( void SherpaOnnxLinearResamplerReset(SherpaOnnxLinearResampler *p) { p->impl->Reset(); } + +int32_t SherpaOnnxFileExists(const char *filename) { + return sherpa_onnx::FileExists(filename); +} diff --git a/sherpa-onnx/c-api/c-api.h b/sherpa-onnx/c-api/c-api.h index 97b8d8081..d4844aed1 100644 --- a/sherpa-onnx/c-api/c-api.h +++ b/sherpa-onnx/c-api/c-api.h @@ -1361,6 +1361,9 @@ SHERPA_ONNX_API int32_t SherpaOnnxLinearResamplerResampleGetInputSampleRate( SHERPA_ONNX_API int32_t SherpaOnnxLinearResamplerResampleGetOutputSampleRate( const SherpaOnnxLinearResampler *p); +// Return 1 if the file exists; return 0 if the file does not exist. +SHERPA_ONNX_API int32_t SherpaOnnxFileExists(const char *filename); + #if defined(__GNUC__) #pragma GCC diagnostic pop #endif diff --git a/wasm/CMakeLists.txt b/wasm/CMakeLists.txt index 075dfbf8d..b143e57b8 100644 --- a/wasm/CMakeLists.txt +++ b/wasm/CMakeLists.txt @@ -14,6 +14,10 @@ if(SHERPA_ONNX_ENABLE_WASM_VAD) add_subdirectory(vad) endif() +if(SHERPA_ONNX_ENABLE_WASM_VAD_ASR) + add_subdirectory(vad-asr) +endif() + if(SHERPA_ONNX_ENABLE_WASM_NODEJS) add_subdirectory(nodejs) endif() diff --git a/wasm/asr/assets/README.md b/wasm/asr/assets/README.md index d37c431a7..983347f78 100644 --- a/wasm/asr/assets/README.md +++ b/wasm/asr/assets/README.md @@ -80,3 +80,10 @@ assets fangjun$ tree -L 1 0 directories, 4 files ``` + +You can find example build scripts at: + + - Streaming Zipformer (English + Chinese): https://github.com/k2-fsa/sherpa-onnx/blob/master/.github/workflows/ wasm-simd-hf-space-zh-en-asr-zipformer.yaml + - Streaming Zipformer (English): https://github.com/k2-fsa/sherpa-onnx/blob/master/.github/workflows/wasm-simd-hf-space-en-asr-zipformer.yaml + - Streaming Paraformer (English + Chinese): https://github.com/k2-fsa/sherpa-onnx/blob/master/.github/workflows/wasm-simd-hf-space-zh-en-asr-paraformer.yaml + - Streaming Paraformer (English + Chinese + Cantonese): https://github.com/k2-fsa/sherpa-onnx/blob/master/.github/workflows/wasm-simd-hf-space-zh-cantonese-en-asr-paraformer.yaml diff --git a/wasm/asr/index.html b/wasm/asr/index.html index 3156321c6..53ee43d8f 100644 --- a/wasm/asr/index.html +++ b/wasm/asr/index.html @@ -3,7 +3,7 @@ - Next-gen Kaldi WebAssembly with sherpa-onnx for Text-to-speech + Next-gen Kaldi WebAssembly with sherpa-onnx for ASR + + + +

+ Next-gen Kaldi + WebAssembly
+ VAD+ASR Demo with sherpa-onnx
+ (with Zipformer) +

+ +
+ Loading model ... ... +
+
+ + + +
+
+ +
+ +
+
+ + + + + + diff --git a/wasm/vad-asr/sherpa-onnx-asr.js b/wasm/vad-asr/sherpa-onnx-asr.js new file mode 120000 index 000000000..fada5db1d --- /dev/null +++ b/wasm/vad-asr/sherpa-onnx-asr.js @@ -0,0 +1 @@ +../asr/sherpa-onnx-asr.js \ No newline at end of file diff --git a/wasm/vad-asr/sherpa-onnx-vad.js b/wasm/vad-asr/sherpa-onnx-vad.js new file mode 120000 index 000000000..47b3c8d0b --- /dev/null +++ b/wasm/vad-asr/sherpa-onnx-vad.js @@ -0,0 +1 @@ +../vad/sherpa-onnx-vad.js \ No newline at end of file diff --git a/wasm/vad-asr/sherpa-onnx-wasm-main-vad-asr.cc b/wasm/vad-asr/sherpa-onnx-wasm-main-vad-asr.cc new file mode 100644 index 000000000..1e2fc00b2 --- /dev/null +++ b/wasm/vad-asr/sherpa-onnx-wasm-main-vad-asr.cc @@ -0,0 +1,19 @@ +// wasm/sherpa-onnx-wasm-main-vad-asr.cc +// +// Copyright (c) 2024 Xiaomi Corporation +#include + +#include +#include + +#include "sherpa-onnx/c-api/c-api.h" + +// see also +// https://emscripten.org/docs/porting/connecting_cpp_and_javascript/Interacting-with-code.html + +extern "C" { + +void CopyHeap(const char *src, int32_t num_bytes, char *dst) { + std::copy(src, src + num_bytes, dst); +} +} diff --git a/wasm/vad/assets/README.md b/wasm/vad/assets/README.md index 99510982a..3d5a76210 100644 --- a/wasm/vad/assets/README.md +++ b/wasm/vad/assets/README.md @@ -3,3 +3,6 @@ Please download https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx and put `silero_vad.onnx` into the current directory, i.e., `wasm/vad/assets`. + +You can find example build script at +https://github.com/k2-fsa/sherpa-onnx/blob/master/.github/workflows/wasm-simd-hf-space-silero-vad.yaml diff --git a/wasm/vad/index.html b/wasm/vad/index.html index 5d8e0372c..7ae2a76e6 100644 --- a/wasm/vad/index.html +++ b/wasm/vad/index.html @@ -3,7 +3,7 @@ - Next-gen Kaldi WebAssembly with sherpa-onnx for Text-to-speech + Next-gen Kaldi WebAssembly with sherpa-onnx for VAD + + + +

+ Next-gen Kaldi + WebAssembly
+ Speaker Diarization
with sherpa-onnx +

+
+ Loading model ... ... +
+
+ + +
+
+ + +
+
+ + +
+
+ + +
+
+ +
+ + + + + diff --git a/wasm/speaker-diarization/sherpa-onnx-speaker-diarization.js b/wasm/speaker-diarization/sherpa-onnx-speaker-diarization.js new file mode 100644 index 000000000..ccfc8373c --- /dev/null +++ b/wasm/speaker-diarization/sherpa-onnx-speaker-diarization.js @@ -0,0 +1,295 @@ + +function freeConfig(config, Module) { + if ('buffer' in config) { + Module._free(config.buffer); + } + + if ('config' in config) { + freeConfig(config.config, Module) + } + + if ('segmentation' in config) { + freeConfig(config.segmentation, Module) + } + + if ('embedding' in config) { + freeConfig(config.embedding, Module) + } + + if ('clustering' in config) { + freeConfig(config.clustering, Module) + } + + Module._free(config.ptr); +} + +function initSherpaOnnxOfflineSpeakerSegmentationPyannoteModelConfig( + config, Module) { + const modelLen = Module.lengthBytesUTF8(config.model || '') + 1; + const n = modelLen; + const buffer = Module._malloc(n); + + const len = 1 * 4; + const ptr = Module._malloc(len); + + let offset = 0; + Module.stringToUTF8(config.model || '', buffer + offset, modelLen); + offset += modelLen; + + offset = 0; + Module.setValue(ptr, buffer + offset, 'i8*'); + + return { + buffer: buffer, ptr: ptr, len: len, + } +} + +function initSherpaOnnxOfflineSpeakerSegmentationModelConfig(config, Module) { + if (!('pyannote' in config)) { + config.pyannote = { + model: '', + }; + } + + const pyannote = initSherpaOnnxOfflineSpeakerSegmentationPyannoteModelConfig( + config.pyannote, Module); + + const len = pyannote.len + 3 * 4; + const ptr = Module._malloc(len); + + let offset = 0; + Module._CopyHeap(pyannote.ptr, pyannote.len, ptr + offset); + offset += pyannote.len; + + Module.setValue(ptr + offset, config.numThreads || 1, 'i32'); + offset += 4; + + Module.setValue(ptr + offset, config.debug || 1, 'i32'); + offset += 4; + + const providerLen = Module.lengthBytesUTF8(config.provider || 'cpu') + 1; + const buffer = Module._malloc(providerLen); + Module.stringToUTF8(config.provider || 'cpu', buffer, providerLen); + Module.setValue(ptr + offset, buffer, 'i8*'); + + return { + buffer: buffer, + ptr: ptr, + len: len, + config: pyannote, + }; +} + +function initSherpaOnnxSpeakerEmbeddingExtractorConfig(config, Module) { + const modelLen = Module.lengthBytesUTF8(config.model || '') + 1; + const providerLen = Module.lengthBytesUTF8(config.provider || 'cpu') + 1; + const n = modelLen + providerLen; + const buffer = Module._malloc(n); + + const len = 4 * 4; + const ptr = Module._malloc(len); + + let offset = 0; + Module.stringToUTF8(config.model || '', buffer + offset, modelLen); + offset += modelLen; + + Module.stringToUTF8(config.provider || 'cpu', buffer + offset, providerLen); + offset += providerLen; + + offset = 0 + Module.setValue(ptr + offset, buffer, 'i8*'); + offset += 4; + + Module.setValue(ptr + offset, config.numThreads || 1, 'i32'); + offset += 4; + + Module.setValue(ptr + offset, config.debug || 1, 'i32'); + offset += 4; + + Module.setValue(ptr + offset, buffer + modelLen, 'i8*'); + offset += 4; + + return { + buffer: buffer, + ptr: ptr, + len: len, + }; +} + +function initSherpaOnnxFastClusteringConfig(config, Module) { + const len = 2 * 4; + const ptr = Module._malloc(len); + + let offset = 0; + Module.setValue(ptr + offset, config.numClusters || -1, 'i32'); + offset += 4; + + Module.setValue(ptr + offset, config.threshold || 0.5, 'float'); + offset += 4; + + return { + ptr: ptr, + len: len, + }; +} + +function initSherpaOnnxOfflineSpeakerDiarizationConfig(config, Module) { + if (!('segmentation' in config)) { + config.segmentation = { + pyannote: {model: ''}, + numThreads: 1, + debug: 0, + provider: 'cpu', + }; + } + + if (!('embedding' in config)) { + config.embedding = { + model: '', + numThreads: 1, + debug: 0, + provider: 'cpu', + }; + } + + if (!('clustering' in config)) { + config.clustering = { + numClusters: -1, + threshold: 0.5, + }; + } + + const segmentation = initSherpaOnnxOfflineSpeakerSegmentationModelConfig( + config.segmentation, Module); + + const embedding = + initSherpaOnnxSpeakerEmbeddingExtractorConfig(config.embedding, Module); + + const clustering = + initSherpaOnnxFastClusteringConfig(config.clustering, Module); + + const len = segmentation.len + embedding.len + clustering.len + 2 * 4; + const ptr = Module._malloc(len); + + let offset = 0; + Module._CopyHeap(segmentation.ptr, segmentation.len, ptr + offset); + offset += segmentation.len; + + Module._CopyHeap(embedding.ptr, embedding.len, ptr + offset); + offset += embedding.len; + + Module._CopyHeap(clustering.ptr, clustering.len, ptr + offset); + offset += clustering.len; + + Module.setValue(ptr + offset, config.minDurationOn || 0.2, 'float'); + offset += 4; + + Module.setValue(ptr + offset, config.minDurationOff || 0.5, 'float'); + offset += 4; + + return { + ptr: ptr, len: len, segmentation: segmentation, embedding: embedding, + clustering: clustering, + } +} + +class OfflineSpeakerDiarization { + constructor(configObj, Module) { + const config = + initSherpaOnnxOfflineSpeakerDiarizationConfig(configObj, Module) + // Module._MyPrint(config.ptr); + + const handle = + Module._SherpaOnnxCreateOfflineSpeakerDiarization(config.ptr); + + freeConfig(config, Module); + + this.handle = handle; + this.sampleRate = + Module._SherpaOnnxOfflineSpeakerDiarizationGetSampleRate(this.handle); + this.Module = Module + + this.config = configObj; + } + + free() { + this.Module._SherpaOnnxDestroyOfflineSpeakerDiarization(this.handle); + this.handle = 0 + } + + setConfig(configObj) { + if (!('clustering' in configObj)) { + return; + } + + const config = + initSherpaOnnxOfflineSpeakerDiarizationConfig(configObj, this.Module); + + this.Module._SherpaOnnxOfflineSpeakerDiarizationSetConfig( + this.handle, config.ptr); + + freeConfig(config, Module); + + this.config.clustering = configObj.clustering; + } + + process(samples) { + const pointer = + this.Module._malloc(samples.length * samples.BYTES_PER_ELEMENT); + this.Module.HEAPF32.set(samples, pointer / samples.BYTES_PER_ELEMENT); + + let r = this.Module._SherpaOnnxOfflineSpeakerDiarizationProcess( + this.handle, pointer, samples.length); + this.Module._free(pointer); + + let numSegments = + this.Module._SherpaOnnxOfflineSpeakerDiarizationResultGetNumSegments(r); + + let segments = + this.Module._SherpaOnnxOfflineSpeakerDiarizationResultSortByStartTime( + r); + + let ans = []; + + let sizeOfSegment = 3 * 4; + for (let i = 0; i < numSegments; ++i) { + let p = segments + i * sizeOfSegment + + let start = this.Module.HEAPF32[p / 4 + 0]; + let end = this.Module.HEAPF32[p / 4 + 1]; + let speaker = this.Module.HEAP32[p / 4 + 2]; + + ans.push({start: start, end: end, speaker: speaker}); + } + + this.Module._SherpaOnnxOfflineSpeakerDiarizationDestroySegment(segments); + this.Module._SherpaOnnxOfflineSpeakerDiarizationDestroyResult(r); + + return ans; + } +} + +function createOfflineSpeakerDiarization(Module, myConfig) { + const config = { + segmentation: { + pyannote: {model: './segmentation.onnx'}, + }, + embedding: {model: './embedding.onnx'}, + clustering: {numClusters: -1, threshold: 0.5}, + minDurationOn: 0.3, + minDurationOff: 0.5, + }; + + if (myConfig) { + config = myConfig; + } + + return new OfflineSpeakerDiarization(config, Module); +} + +if (typeof process == 'object' && typeof process.versions == 'object' && + typeof process.versions.node == 'string') { + module.exports = { + createOfflineSpeakerDiarization, + }; +} diff --git a/wasm/speaker-diarization/sherpa-onnx-wasm-main-speaker-diarization.cc b/wasm/speaker-diarization/sherpa-onnx-wasm-main-speaker-diarization.cc new file mode 100644 index 000000000..6e83f61d8 --- /dev/null +++ b/wasm/speaker-diarization/sherpa-onnx-wasm-main-speaker-diarization.cc @@ -0,0 +1,63 @@ +// wasm/sherpa-onnx-wasm-main-speaker-diarization.cc +// +// Copyright (c) 2024 Xiaomi Corporation +#include + +#include +#include + +#include "sherpa-onnx/c-api/c-api.h" + +// see also +// https://emscripten.org/docs/porting/connecting_cpp_and_javascript/Interacting-with-code.html + +extern "C" { + +static_assert(sizeof(SherpaOnnxOfflineSpeakerSegmentationPyannoteModelConfig) == + 1 * 4, + ""); + +static_assert( + sizeof(SherpaOnnxOfflineSpeakerSegmentationModelConfig) == + sizeof(SherpaOnnxOfflineSpeakerSegmentationPyannoteModelConfig) + 3 * 4, + ""); + +static_assert(sizeof(SherpaOnnxFastClusteringConfig) == 2 * 4, ""); + +static_assert(sizeof(SherpaOnnxSpeakerEmbeddingExtractorConfig) == 4 * 4, ""); + +static_assert(sizeof(SherpaOnnxOfflineSpeakerDiarizationConfig) == + sizeof(SherpaOnnxOfflineSpeakerSegmentationModelConfig) + + sizeof(SherpaOnnxSpeakerEmbeddingExtractorConfig) + + sizeof(SherpaOnnxFastClusteringConfig) + 2 * 4, + ""); + +void MyPrint(const SherpaOnnxOfflineSpeakerDiarizationConfig *sd_config) { + const auto &segmentation = sd_config->segmentation; + const auto &embedding = sd_config->embedding; + const auto &clustering = sd_config->clustering; + + fprintf(stdout, "----------segmentation config----------\n"); + fprintf(stdout, "pyannote model: %s\n", segmentation.pyannote.model); + fprintf(stdout, "num threads: %d\n", segmentation.num_threads); + fprintf(stdout, "debug: %d\n", segmentation.debug); + fprintf(stdout, "provider: %s\n", segmentation.provider); + + fprintf(stdout, "----------embedding config----------\n"); + fprintf(stdout, "model: %s\n", embedding.model); + fprintf(stdout, "num threads: %d\n", embedding.num_threads); + fprintf(stdout, "debug: %d\n", embedding.debug); + fprintf(stdout, "provider: %s\n", embedding.provider); + + fprintf(stdout, "----------clustering config----------\n"); + fprintf(stdout, "num_clusters: %d\n", clustering.num_clusters); + fprintf(stdout, "threshold: %.3f\n", clustering.threshold); + + fprintf(stdout, "min_duration_on: %.3f\n", sd_config->min_duration_on); + fprintf(stdout, "min_duration_off: %.3f\n", sd_config->min_duration_off); +} + +void CopyHeap(const char *src, int32_t num_bytes, char *dst) { + std::copy(src, src + num_bytes, dst); +} +} From f1b311ee4fe4d84468ed93d8479097b21d13c5d2 Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Fri, 11 Oct 2024 10:27:16 +0800 Subject: [PATCH 191/201] Handle audio files less than 10s long for speaker diarization. (#1412) If the input audio file is less than 10 seconds long, there is only one chunk, and there is no need to compute embeddings or do clustering. We can use the segmentation result from the speaker segmentation model directly. --- ...ffline-speaker-diarization-pyannote-impl.h | 35 +++++++++++++++++-- 1 file changed, 32 insertions(+), 3 deletions(-) diff --git a/sherpa-onnx/csrc/offline-speaker-diarization-pyannote-impl.h b/sherpa-onnx/csrc/offline-speaker-diarization-pyannote-impl.h index 9667088d5..8f669e27c 100644 --- a/sherpa-onnx/csrc/offline-speaker-diarization-pyannote-impl.h +++ b/sherpa-onnx/csrc/offline-speaker-diarization-pyannote-impl.h @@ -99,6 +99,14 @@ class OfflineSpeakerDiarizationPyannoteImpl segmentations.clear(); + if (labels.size() == 1) { + if (callback) { + callback(1, 1, callback_arg); + } + + return HandleOneChunkSpecialCase(labels[0], n); + } + // labels[i] is a 0-1 matrix of shape (num_frames, num_speakers) // speaker count per frame @@ -201,7 +209,7 @@ class OfflineSpeakerDiarizationPyannoteImpl } int32_t num_chunks = (n - window_size) / window_shift + 1; - bool has_last_chunk = (n - window_size) % window_shift > 0; + bool has_last_chunk = ((n - window_size) % window_shift) > 0; ans.reserve(num_chunks + has_last_chunk); @@ -524,9 +532,9 @@ class OfflineSpeakerDiarizationPyannoteImpl count(seq, Eigen::all).array() += labels[i].array(); } - bool has_last_chunk = (num_samples - window_size) % window_shift > 0; + bool has_last_chunk = ((num_samples - window_size) % window_shift) > 0; - if (has_last_chunk) { + if (!has_last_chunk) { return count; } @@ -622,6 +630,27 @@ class OfflineSpeakerDiarizationPyannoteImpl return ans; } + OfflineSpeakerDiarizationResult HandleOneChunkSpecialCase( + const Matrix2DInt32 &final_labels, int32_t num_samples) const { + const auto &meta_data = segmentation_model_.GetModelMetaData(); + int32_t window_size = meta_data.window_size; + int32_t window_shift = meta_data.window_shift; + int32_t receptive_field_shift = meta_data.receptive_field_shift; + + bool has_last_chunk = (num_samples - window_size) % window_shift > 0; + if (!has_last_chunk) { + return ComputeResult(final_labels); + } + + int32_t num_frames = final_labels.rows(); + + int32_t new_num_frames = num_samples / receptive_field_shift; + + num_frames = (new_num_frames <= num_frames) ? new_num_frames : num_frames; + + return ComputeResult(final_labels(Eigen::seq(0, num_frames), Eigen::all)); + } + void MergeSegments( std::vector *segments) const { float min_duration_off = config_.min_duration_off; From eefc17209589fe3b950f561bc9c8b8d1c9b8a742 Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Fri, 11 Oct 2024 11:40:10 +0800 Subject: [PATCH 192/201] JavaScript API with WebAssembly for speaker diarization (#1414) #1408 uses [node-addon-api](https://github.com/nodejs/node-addon-api) to call C API from JavaScript, whereas this pull request uses WebAssembly to call C API from JavaScript. --- .github/scripts/test-nodejs-npm.sh | 12 ++++ .github/workflows/test-build-wheel.yaml | 2 +- .github/workflows/test-pip-install.yaml | 2 +- nodejs-examples/README.md | 16 +++++ .../test-offline-speaker-diarization.js | 64 +++++++++++++++++++ scripts/nodejs/index.js | 8 +++ wasm/nodejs/CMakeLists.txt | 12 ++++ wasm/speaker-diarization/assets/README.md | 4 -- .../sherpa-onnx-speaker-diarization.js | 12 ++-- 9 files changed, 122 insertions(+), 10 deletions(-) create mode 100644 nodejs-examples/test-offline-speaker-diarization.js diff --git a/.github/scripts/test-nodejs-npm.sh b/.github/scripts/test-nodejs-npm.sh index c41a0de65..03dec04aa 100755 --- a/.github/scripts/test-nodejs-npm.sh +++ b/.github/scripts/test-nodejs-npm.sh @@ -9,6 +9,18 @@ git status ls -lh ls -lh node_modules +echo '-----speaker diarization----------' +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2 +tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2 +rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2 + +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx + +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/0-four-speakers-zh.wav + +node ./test-offline-speaker-diarization.js +rm -rfv *.wav *.onnx sherpa-onnx-pyannote-* + echo '-----vad+whisper----------' curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2 diff --git a/.github/workflows/test-build-wheel.yaml b/.github/workflows/test-build-wheel.yaml index a9b2db589..8b7472b84 100644 --- a/.github/workflows/test-build-wheel.yaml +++ b/.github/workflows/test-build-wheel.yaml @@ -139,7 +139,7 @@ jobs: export PATH=/c/hostedtoolcache/windows/Python/3.9.13/x64/bin:$PATH export PATH=/c/hostedtoolcache/windows/Python/3.10.11/x64/bin:$PATH export PATH=/c/hostedtoolcache/windows/Python/3.11.9/x64/bin:$PATH - export PATH=/c/hostedtoolcache/windows/Python/3.12.6/x64/bin:$PATH + export PATH=/c/hostedtoolcache/windows/Python/3.12.7/x64/bin:$PATH which sherpa-onnx sherpa-onnx --help diff --git a/.github/workflows/test-pip-install.yaml b/.github/workflows/test-pip-install.yaml index 0f73e3643..b59b66b53 100644 --- a/.github/workflows/test-pip-install.yaml +++ b/.github/workflows/test-pip-install.yaml @@ -104,7 +104,7 @@ jobs: export PATH=/c/hostedtoolcache/windows/Python/3.9.13/x64/bin:$PATH export PATH=/c/hostedtoolcache/windows/Python/3.10.11/x64/bin:$PATH export PATH=/c/hostedtoolcache/windows/Python/3.11.9/x64/bin:$PATH - export PATH=/c/hostedtoolcache/windows/Python/3.12.6/x64/bin:$PATH + export PATH=/c/hostedtoolcache/windows/Python/3.12.7/x64/bin:$PATH sherpa-onnx --help sherpa-onnx-keyword-spotter --help diff --git a/nodejs-examples/README.md b/nodejs-examples/README.md index 73a85de77..496a0062b 100644 --- a/nodejs-examples/README.md +++ b/nodejs-examples/README.md @@ -22,6 +22,22 @@ In the following, we describe how to use [sherpa-onnx](https://github.com/k2-fsa for text-to-speech and speech-to-text. +# Speaker diarization + +In the following, we demonstrate how to run speaker diarization. + +```bash +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2 +tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2 +rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2 + +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx + +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/0-four-speakers-zh.wav + +node ./test-offline-speaker-diarization.js +``` + # Text-to-speech In the following, we demonstrate how to run text-to-speech. diff --git a/nodejs-examples/test-offline-speaker-diarization.js b/nodejs-examples/test-offline-speaker-diarization.js new file mode 100644 index 000000000..de0f4a45b --- /dev/null +++ b/nodejs-examples/test-offline-speaker-diarization.js @@ -0,0 +1,64 @@ +// Copyright (c) 2024 Xiaomi Corporation +const sherpa_onnx = require('sherpa-onnx'); + +// clang-format off +/* Please use the following commands to download files + used in this script + +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2 +tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2 +rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2 + +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx + +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/0-four-speakers-zh.wav + + */ +// clang-format on + +const config = { + segmentation: { + pyannote: { + model: './sherpa-onnx-pyannote-segmentation-3-0/model.onnx', + debug: 1, + }, + }, + embedding: { + model: './3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx', + debug: 1, + }, + clustering: { + // since we know that the test wave file + // ./0-four-speakers-zh.wav contains 4 speakers, we use 4 for numClusters + // here. if you don't have such information, please set numClusters to -1 + numClusters: 4, + + // If numClusters is not -1, then threshold is ignored. + // + // A larger threshold leads to fewer clusters, i.e., fewer speakers + // A smaller threshold leads to more clusters, i.e., more speakers + // You need to tune it by yourself. + threshold: 0.5, + }, + + // If a segment is shorter than minDurationOn, we discard it + minDurationOn: 0.2, // in seconds + + // If the gap between two segments is less than minDurationOff, then we + // merge these two segments into a single one + minDurationOff: 0.5, // in seconds +}; + +const waveFilename = './0-four-speakers-zh.wav'; + +const sd = sherpa_onnx.createOfflineSpeakerDiarization(config); +console.log('Started') + +const wave = sherpa_onnx.readWave(waveFilename); +if (sd.sampleRate != wave.sampleRate) { + throw new Error( + `Expected sample rate: ${sd.sampleRate}, given: ${wave.sampleRate}`); +} + +const segments = sd.process(wave.samples); +console.log(segments); diff --git a/scripts/nodejs/index.js b/scripts/nodejs/index.js index 3f0789edb..b1b77841c 100644 --- a/scripts/nodejs/index.js +++ b/scripts/nodejs/index.js @@ -7,6 +7,8 @@ const sherpa_onnx_tts = require('./sherpa-onnx-tts.js'); const sherpa_onnx_kws = require('./sherpa-onnx-kws.js'); const sherpa_onnx_wave = require('./sherpa-onnx-wave.js'); const sherpa_onnx_vad = require('./sherpa-onnx-vad.js'); +const sherpa_onnx_speaker_diarization = + require('./sherpa-onnx-speaker-diarization.js'); function createOnlineRecognizer(config) { return sherpa_onnx_asr.createOnlineRecognizer(wasmModule, config); @@ -32,6 +34,11 @@ function createVad(config) { return sherpa_onnx_vad.createVad(wasmModule, config); } +function createOfflineSpeakerDiarization(config) { + return sherpa_onnx_speaker_diarization.createOfflineSpeakerDiarization( + wasmModule, config); +} + function readWave(filename) { return sherpa_onnx_wave.readWave(filename, wasmModule); } @@ -51,4 +58,5 @@ module.exports = { writeWave, createCircularBuffer, createVad, + createOfflineSpeakerDiarization, }; diff --git a/wasm/nodejs/CMakeLists.txt b/wasm/nodejs/CMakeLists.txt index 4efc879a1..dc8d8c854 100644 --- a/wasm/nodejs/CMakeLists.txt +++ b/wasm/nodejs/CMakeLists.txt @@ -70,6 +70,17 @@ set(exported_functions SherpaOnnxDestroySpeechSegment SherpaOnnxVoiceActivityDetectorReset SherpaOnnxVoiceActivityDetectorFlush + # Speaker diarization + SherpaOnnxCreateOfflineSpeakerDiarization + SherpaOnnxDestroyOfflineSpeakerDiarization + SherpaOnnxOfflineSpeakerDiarizationDestroyResult + SherpaOnnxOfflineSpeakerDiarizationDestroySegment + SherpaOnnxOfflineSpeakerDiarizationGetSampleRate + SherpaOnnxOfflineSpeakerDiarizationProcess + SherpaOnnxOfflineSpeakerDiarizationProcessWithCallback + SherpaOnnxOfflineSpeakerDiarizationResultGetNumSegments + SherpaOnnxOfflineSpeakerDiarizationResultSortByStartTime + SherpaOnnxOfflineSpeakerDiarizationSetConfig # SherpaOnnxFileExists SherpaOnnxReadWave @@ -109,6 +120,7 @@ install( ${CMAKE_SOURCE_DIR}/wasm/tts/sherpa-onnx-tts.js ${CMAKE_SOURCE_DIR}/wasm/kws/sherpa-onnx-kws.js ${CMAKE_SOURCE_DIR}/wasm/vad/sherpa-onnx-vad.js + ${CMAKE_SOURCE_DIR}/wasm/speaker-diarization/sherpa-onnx-speaker-diarization.js ${CMAKE_SOURCE_DIR}/wasm/nodejs/sherpa-onnx-wave.js "$/sherpa-onnx-wasm-nodejs.js" "$/sherpa-onnx-wasm-nodejs.wasm" diff --git a/wasm/speaker-diarization/assets/README.md b/wasm/speaker-diarization/assets/README.md index 5c06139e2..f09a5899d 100644 --- a/wasm/speaker-diarization/assets/README.md +++ b/wasm/speaker-diarization/assets/README.md @@ -12,7 +12,6 @@ Remember to rename the downloaded files. The following is an example. - ```bash cd wasm/speaker-diarization/assets/ @@ -22,9 +21,6 @@ rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2 cp sherpa-onnx-pyannote-segmentation-3-0/model.onnx ./segmentation.onnx rm -rf sherpa-onnx-pyannote-segmentation-3-0 - curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx mv 3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx ./embedding.onnx - - ``` diff --git a/wasm/speaker-diarization/sherpa-onnx-speaker-diarization.js b/wasm/speaker-diarization/sherpa-onnx-speaker-diarization.js index ccfc8373c..741013480 100644 --- a/wasm/speaker-diarization/sherpa-onnx-speaker-diarization.js +++ b/wasm/speaker-diarization/sherpa-onnx-speaker-diarization.js @@ -64,7 +64,7 @@ function initSherpaOnnxOfflineSpeakerSegmentationModelConfig(config, Module) { Module.setValue(ptr + offset, config.numThreads || 1, 'i32'); offset += 4; - Module.setValue(ptr + offset, config.debug || 1, 'i32'); + Module.setValue(ptr + offset, config.debug || 0, 'i32'); offset += 4; const providerLen = Module.lengthBytesUTF8(config.provider || 'cpu') + 1; @@ -103,7 +103,7 @@ function initSherpaOnnxSpeakerEmbeddingExtractorConfig(config, Module) { Module.setValue(ptr + offset, config.numThreads || 1, 'i32'); offset += 4; - Module.setValue(ptr + offset, config.debug || 1, 'i32'); + Module.setValue(ptr + offset, config.debug || 0, 'i32'); offset += 4; Module.setValue(ptr + offset, buffer + modelLen, 'i8*'); @@ -270,11 +270,15 @@ class OfflineSpeakerDiarization { } function createOfflineSpeakerDiarization(Module, myConfig) { - const config = { + let config = { segmentation: { pyannote: {model: './segmentation.onnx'}, + debug: 1, + }, + embedding: { + model: './embedding.onnx', + debug: 1, }, - embedding: {model: './embedding.onnx'}, clustering: {numClusters: -1, threshold: 0.5}, minDurationOn: 0.3, minDurationOff: 0.5, From 2d412b1190778bc35f337ef1feeb12292b5c9f92 Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Fri, 11 Oct 2024 14:41:53 +0800 Subject: [PATCH 193/201] Kotlin API for speaker diarization (#1415) --- .../OfflineSpeakerDiarization.kt | 1 + kotlin-api-examples/run.sh | 31 +++ .../test_offline_speaker_diarization.kt | 53 +++++ .../csrc/offline-speaker-diarization-result.h | 2 +- sherpa-onnx/jni/CMakeLists.txt | 6 + .../jni/offline-speaker-diarization.cc | 219 ++++++++++++++++++ .../kotlin-api/OfflineSpeakerDiarization.kt | 101 ++++++++ 7 files changed, 412 insertions(+), 1 deletion(-) create mode 120000 kotlin-api-examples/OfflineSpeakerDiarization.kt create mode 100644 kotlin-api-examples/test_offline_speaker_diarization.kt create mode 100644 sherpa-onnx/jni/offline-speaker-diarization.cc create mode 100644 sherpa-onnx/kotlin-api/OfflineSpeakerDiarization.kt diff --git a/kotlin-api-examples/OfflineSpeakerDiarization.kt b/kotlin-api-examples/OfflineSpeakerDiarization.kt new file mode 120000 index 000000000..870612b4c --- /dev/null +++ b/kotlin-api-examples/OfflineSpeakerDiarization.kt @@ -0,0 +1 @@ +../sherpa-onnx/kotlin-api/OfflineSpeakerDiarization.kt \ No newline at end of file diff --git a/kotlin-api-examples/run.sh b/kotlin-api-examples/run.sh index 23e86886e..50e7816f1 100755 --- a/kotlin-api-examples/run.sh +++ b/kotlin-api-examples/run.sh @@ -285,6 +285,37 @@ function testPunctuation() { java -Djava.library.path=../build/lib -jar $out_filename } +function testOfflineSpeakerDiarization() { + if [ ! -f ./sherpa-onnx-pyannote-segmentation-3-0/model.onnx ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2 + tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2 + rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2 + fi + + if [ ! -f ./3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx + fi + + if [ ! -f ./0-four-speakers-zh.wav ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/0-four-speakers-zh.wav + fi + + out_filename=test_offline_speaker_diarization.jar + kotlinc-jvm -include-runtime -d $out_filename \ + test_offline_speaker_diarization.kt \ + OfflineSpeakerDiarization.kt \ + Speaker.kt \ + OnlineStream.kt \ + WaveReader.kt \ + faked-asset-manager.kt \ + faked-log.kt + + ls -lh $out_filename + + java -Djava.library.path=../build/lib -jar $out_filename +} + +testOfflineSpeakerDiarization testSpeakerEmbeddingExtractor testOnlineAsr testTts diff --git a/kotlin-api-examples/test_offline_speaker_diarization.kt b/kotlin-api-examples/test_offline_speaker_diarization.kt new file mode 100644 index 000000000..96c33f062 --- /dev/null +++ b/kotlin-api-examples/test_offline_speaker_diarization.kt @@ -0,0 +1,53 @@ +package com.k2fsa.sherpa.onnx + +fun main() { + testOfflineSpeakerDiarization() +} + +fun callback(numProcessedChunks: Int, numTotalChunks: Int, arg: Long): Int { + val progress = numProcessedChunks.toFloat() / numTotalChunks * 100 + val s = "%.2f".format(progress) + println("Progress: ${s}%"); + + return 0 +} + +fun testOfflineSpeakerDiarization() { + var config = OfflineSpeakerDiarizationConfig( + segmentation=OfflineSpeakerSegmentationModelConfig( + pyannote=OfflineSpeakerSegmentationPyannoteModelConfig("./sherpa-onnx-pyannote-segmentation-3-0/model.onnx"), + ), + embedding=SpeakerEmbeddingExtractorConfig( + model="./3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx", + ), + + // The test wave file ./0-four-speakers-zh.wav contains four speakers, so + // we use numClusters=4 here. If you don't know the number of speakers + // in the test wave file, please set the threshold like below. + // + // clustering=FastClusteringConfig(threshold=0.5), + // + // WARNING: You need to tune threshold by yourself. + // A larger threshold leads to fewer clusters, i.e., few speakers. + // A smaller threshold leads to more clusters, i.e., more speakers. + // + clustering=FastClusteringConfig(numClusters=4), + ) + + val sd = OfflineSpeakerDiarization(config=config) + + val waveData = WaveReader.readWave( + filename = "./0-four-speakers-zh.wav", + ) + + if (sd.sampleRate() != waveData.sampleRate) { + println("Expected sample rate: ${sd.sampleRate()}, given: ${waveData.sampleRate}") + return + } + + // val segments = sd.process(waveData.samples) // this one is also ok + val segments = sd.processWithCallback(waveData.samples, callback=::callback) + for (segment in segments) { + println("${segment.start} -- ${segment.end} speaker_${segment.speaker}") + } +} diff --git a/sherpa-onnx/csrc/offline-speaker-diarization-result.h b/sherpa-onnx/csrc/offline-speaker-diarization-result.h index 5fb144f5c..6298a87c7 100644 --- a/sherpa-onnx/csrc/offline-speaker-diarization-result.h +++ b/sherpa-onnx/csrc/offline-speaker-diarization-result.h @@ -58,7 +58,7 @@ class OfflineSpeakerDiarizationResult { std::vector> SortBySpeaker() const; - public: + private: std::vector segments_; }; diff --git a/sherpa-onnx/jni/CMakeLists.txt b/sherpa-onnx/jni/CMakeLists.txt index 998379084..23544c177 100644 --- a/sherpa-onnx/jni/CMakeLists.txt +++ b/sherpa-onnx/jni/CMakeLists.txt @@ -33,6 +33,12 @@ if(SHERPA_ONNX_ENABLE_TTS) ) endif() +if(SHERPA_ONNX_ENABLE_SPEAKER_DIARIZATION) + list(APPEND sources + offline-speaker-diarization.cc + ) +endif() + add_library(sherpa-onnx-jni ${sources}) target_compile_definitions(sherpa-onnx-jni PRIVATE SHERPA_ONNX_BUILD_SHARED_LIBS=1) diff --git a/sherpa-onnx/jni/offline-speaker-diarization.cc b/sherpa-onnx/jni/offline-speaker-diarization.cc new file mode 100644 index 000000000..a0eef8b9c --- /dev/null +++ b/sherpa-onnx/jni/offline-speaker-diarization.cc @@ -0,0 +1,219 @@ +// sherpa-onnx/jni/offline-speaker-diarization.cc +// +// Copyright (c) 2024 Xiaomi Corporation + +#include "sherpa-onnx/csrc/offline-speaker-diarization.h" + +#include "sherpa-onnx/csrc/macros.h" +#include "sherpa-onnx/jni/common.h" + +namespace sherpa_onnx { + +static OfflineSpeakerDiarizationConfig GetOfflineSpeakerDiarizationConfig( + JNIEnv *env, jobject config) { + OfflineSpeakerDiarizationConfig ans; + + jclass cls = env->GetObjectClass(config); + jfieldID fid; + + //---------- segmentation ---------- + fid = env->GetFieldID( + cls, "segmentation", + "Lcom/k2fsa/sherpa/onnx/OfflineSpeakerSegmentationModelConfig;"); + jobject segmentation_config = env->GetObjectField(config, fid); + jclass segmentation_config_cls = env->GetObjectClass(segmentation_config); + + fid = env->GetFieldID( + segmentation_config_cls, "pyannote", + "Lcom/k2fsa/sherpa/onnx/OfflineSpeakerSegmentationPyannoteModelConfig;"); + jobject pyannote_config = env->GetObjectField(segmentation_config, fid); + jclass pyannote_config_cls = env->GetObjectClass(pyannote_config); + + fid = env->GetFieldID(pyannote_config_cls, "model", "Ljava/lang/String;"); + jstring s = (jstring)env->GetObjectField(pyannote_config, fid); + const char *p = env->GetStringUTFChars(s, nullptr); + ans.segmentation.pyannote.model = p; + env->ReleaseStringUTFChars(s, p); + + fid = env->GetFieldID(segmentation_config_cls, "numThreads", "I"); + ans.segmentation.num_threads = env->GetIntField(segmentation_config, fid); + + fid = env->GetFieldID(segmentation_config_cls, "debug", "Z"); + ans.segmentation.debug = env->GetBooleanField(segmentation_config, fid); + + fid = env->GetFieldID(segmentation_config_cls, "provider", + "Ljava/lang/String;"); + s = (jstring)env->GetObjectField(segmentation_config, fid); + p = env->GetStringUTFChars(s, nullptr); + ans.segmentation.provider = p; + env->ReleaseStringUTFChars(s, p); + + //---------- embedding ---------- + fid = env->GetFieldID( + cls, "embedding", + "Lcom/k2fsa/sherpa/onnx/SpeakerEmbeddingExtractorConfig;"); + jobject embedding_config = env->GetObjectField(config, fid); + jclass embedding_config_cls = env->GetObjectClass(embedding_config); + + fid = env->GetFieldID(embedding_config_cls, "model", "Ljava/lang/String;"); + s = (jstring)env->GetObjectField(embedding_config, fid); + p = env->GetStringUTFChars(s, nullptr); + ans.embedding.model = p; + env->ReleaseStringUTFChars(s, p); + + fid = env->GetFieldID(embedding_config_cls, "numThreads", "I"); + ans.embedding.num_threads = env->GetIntField(embedding_config, fid); + + fid = env->GetFieldID(embedding_config_cls, "debug", "Z"); + ans.embedding.debug = env->GetBooleanField(embedding_config, fid); + + fid = env->GetFieldID(embedding_config_cls, "provider", "Ljava/lang/String;"); + s = (jstring)env->GetObjectField(embedding_config, fid); + p = env->GetStringUTFChars(s, nullptr); + ans.embedding.provider = p; + env->ReleaseStringUTFChars(s, p); + + //---------- clustering ---------- + fid = env->GetFieldID(cls, "clustering", + "Lcom/k2fsa/sherpa/onnx/FastClusteringConfig;"); + jobject clustering_config = env->GetObjectField(config, fid); + jclass clustering_config_cls = env->GetObjectClass(clustering_config); + + fid = env->GetFieldID(clustering_config_cls, "numClusters", "I"); + ans.clustering.num_clusters = env->GetIntField(clustering_config, fid); + + fid = env->GetFieldID(clustering_config_cls, "threshold", "F"); + ans.clustering.threshold = env->GetFloatField(clustering_config, fid); + + // its own fields + fid = env->GetFieldID(cls, "minDurationOn", "F"); + ans.min_duration_on = env->GetFloatField(config, fid); + + fid = env->GetFieldID(cls, "minDurationOff", "F"); + ans.min_duration_off = env->GetFloatField(config, fid); + + return ans; +} + +} // namespace sherpa_onnx + +SHERPA_ONNX_EXTERN_C +JNIEXPORT jlong JNICALL +Java_com_k2fsa_sherpa_onnx_OfflineSpeakerDiarization_newFromAsset( + JNIEnv *env, jobject /*obj*/, jobject asset_manager, jobject _config) { + return 0; +} + +SHERPA_ONNX_EXTERN_C +JNIEXPORT jlong JNICALL +Java_com_k2fsa_sherpa_onnx_OfflineSpeakerDiarization_newFromFile( + JNIEnv *env, jobject /*obj*/, jobject _config) { + auto config = sherpa_onnx::GetOfflineSpeakerDiarizationConfig(env, _config); + SHERPA_ONNX_LOGE("config:\n%s", config.ToString().c_str()); + + if (!config.Validate()) { + SHERPA_ONNX_LOGE("Errors found in config!"); + return 0; + } + + auto sd = new sherpa_onnx::OfflineSpeakerDiarization(config); + + return (jlong)sd; +} + +SHERPA_ONNX_EXTERN_C +JNIEXPORT void JNICALL +Java_com_k2fsa_sherpa_onnx_OfflineSpeakerDiarization_setConfig( + JNIEnv *env, jobject /*obj*/, jlong ptr, jobject _config) { + auto config = sherpa_onnx::GetOfflineSpeakerDiarizationConfig(env, _config); + SHERPA_ONNX_LOGE("config:\n%s", config.ToString().c_str()); + + auto sd = reinterpret_cast(ptr); + sd->SetConfig(config); +} + +SHERPA_ONNX_EXTERN_C +JNIEXPORT void JNICALL +Java_com_k2fsa_sherpa_onnx_OfflineSpeakerDiarization_delete(JNIEnv * /*env*/, + jobject /*obj*/, + jlong ptr) { + delete reinterpret_cast(ptr); +} + +static jobjectArray ProcessImpl( + JNIEnv *env, + const std::vector + &segments) { + jclass cls = + env->FindClass("com/k2fsa/sherpa/onnx/OfflineSpeakerDiarizationSegment"); + + jobjectArray obj_arr = + (jobjectArray)env->NewObjectArray(segments.size(), cls, nullptr); + + jmethodID constructor = env->GetMethodID(cls, "", "(FFI)V"); + + for (int32_t i = 0; i != segments.size(); ++i) { + const auto &s = segments[i]; + jobject segment = + env->NewObject(cls, constructor, s.Start(), s.End(), s.Speaker()); + env->SetObjectArrayElement(obj_arr, i, segment); + } + + return obj_arr; +} + +SHERPA_ONNX_EXTERN_C +JNIEXPORT jobjectArray JNICALL +Java_com_k2fsa_sherpa_onnx_OfflineSpeakerDiarization_process( + JNIEnv *env, jobject /*obj*/, jlong ptr, jfloatArray samples) { + auto sd = reinterpret_cast(ptr); + + jfloat *p = env->GetFloatArrayElements(samples, nullptr); + jsize n = env->GetArrayLength(samples); + auto segments = sd->Process(p, n).SortByStartTime(); + env->ReleaseFloatArrayElements(samples, p, JNI_ABORT); + + return ProcessImpl(env, segments); +} + +SHERPA_ONNX_EXTERN_C +JNIEXPORT jobjectArray JNICALL +Java_com_k2fsa_sherpa_onnx_OfflineSpeakerDiarization_processWithCallback( + JNIEnv *env, jobject /*obj*/, jlong ptr, jfloatArray samples, + jobject callback, jlong arg) { + std::function callback_wrapper = + [env, callback](int32_t num_processed_chunks, int32_t num_total_chunks, + void *data) -> int { + jclass cls = env->GetObjectClass(callback); + + jmethodID mid = env->GetMethodID(cls, "invoke", "(IIJ)Ljava/lang/Integer;"); + if (mid == nullptr) { + SHERPA_ONNX_LOGE("Failed to get the callback. Ignore it."); + return 0; + } + + jobject ret = env->CallObjectMethod(callback, mid, num_processed_chunks, + num_total_chunks, (jlong)data); + jclass jklass = env->GetObjectClass(ret); + jmethodID int_value_mid = env->GetMethodID(jklass, "intValue", "()I"); + return env->CallIntMethod(ret, int_value_mid); + }; + + auto sd = reinterpret_cast(ptr); + + jfloat *p = env->GetFloatArrayElements(samples, nullptr); + jsize n = env->GetArrayLength(samples); + auto segments = + sd->Process(p, n, callback_wrapper, (void *)arg).SortByStartTime(); + env->ReleaseFloatArrayElements(samples, p, JNI_ABORT); + + return ProcessImpl(env, segments); +} + +SHERPA_ONNX_EXTERN_C +JNIEXPORT jint JNICALL +Java_com_k2fsa_sherpa_onnx_OfflineSpeakerDiarization_getSampleRate( + JNIEnv * /*env*/, jobject /*obj*/, jlong ptr) { + return reinterpret_cast(ptr) + ->SampleRate(); +} diff --git a/sherpa-onnx/kotlin-api/OfflineSpeakerDiarization.kt b/sherpa-onnx/kotlin-api/OfflineSpeakerDiarization.kt new file mode 100644 index 000000000..de0a9dffd --- /dev/null +++ b/sherpa-onnx/kotlin-api/OfflineSpeakerDiarization.kt @@ -0,0 +1,101 @@ +package com.k2fsa.sherpa.onnx + +import android.content.res.AssetManager + +data class OfflineSpeakerSegmentationPyannoteModelConfig( + var model: String, +) + +data class OfflineSpeakerSegmentationModelConfig( + var pyannote: OfflineSpeakerSegmentationPyannoteModelConfig, + var numThreads: Int = 1, + var debug: Boolean = false, + var provider: String = "cpu", +) + +data class FastClusteringConfig( + var numClusters: Int = -1, + var threshold: Float = 0.5f, +) + +data class OfflineSpeakerDiarizationConfig( + var segmentation: OfflineSpeakerSegmentationModelConfig, + var embedding: SpeakerEmbeddingExtractorConfig, + var clustering: FastClusteringConfig, + var minDurationOn: Float = 0.2f, + var minDurationOff: Float = 0.5f, +) + +data class OfflineSpeakerDiarizationSegment( + val start: Float, // in seconds + val end: Float, // in seconds + val speaker: Int, // ID of the speaker; count from 0 +) + +class OfflineSpeakerDiarization( + assetManager: AssetManager? = null, + config: OfflineSpeakerDiarizationConfig, +) { + private var ptr: Long + + init { + ptr = if (assetManager != null) { + newFromAsset(assetManager, config) + } else { + newFromFile(config) + } + } + + protected fun finalize() { + if (ptr != 0L) { + delete(ptr) + ptr = 0 + } + } + + fun release() = finalize() + + // Only config.clustering is used. All other fields in config + // are ignored + fun setConfig(config: OfflineSpeakerDiarizationConfig) = setConfig(ptr, config) + + fun sampleRate() = getSampleRate(ptr) + + fun process(samples: FloatArray) = process(ptr, samples) + + fun processWithCallback( + samples: FloatArray, + callback: (numProcessedChunks: Int, numTotalChunks: Int, arg: Long) -> Int, + arg: Long = 0, + ) = processWithCallback(ptr, samples, callback, arg) + + private external fun delete(ptr: Long) + + private external fun newFromAsset( + assetManager: AssetManager, + config: OfflineSpeakerDiarizationConfig, + ): Long + + private external fun newFromFile( + config: OfflineSpeakerDiarizationConfig, + ): Long + + private external fun setConfig(ptr: Long, config: OfflineSpeakerDiarizationConfig) + + private external fun getSampleRate(ptr: Long): Int + + private external fun process(ptr: Long, samples: FloatArray): Array + + private external fun processWithCallback( + ptr: Long, + samples: FloatArray, + callback: (numProcessedChunks: Int, numTotalChunks: Int, arg: Long) -> Int, + arg: Long, + ): Array + + companion object { + init { + System.loadLibrary("sherpa-onnx-jni") + } + } +} From 1851ff63373ed1d3ef614b431a153bcc6528e4e2 Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Fri, 11 Oct 2024 16:51:40 +0800 Subject: [PATCH 194/201] Java API for speaker diarization (#1416) --- .github/workflows/run-java-test.yaml | 7 ++ .../OfflineSpeakerDiarizationDemo.java | 99 +++++++++++++++++++ java-api-examples/README.md | 6 ++ .../run-offline-speaker-diarization.sh | 45 +++++++++ sherpa-onnx/java-api/Makefile | 9 ++ .../sherpa/onnx/FastClusteringConfig.java | 44 +++++++++ .../onnx/OfflineSpeakerDiarization.java | 61 ++++++++++++ .../OfflineSpeakerDiarizationCallback.java | 8 ++ .../onnx/OfflineSpeakerDiarizationConfig.java | 79 +++++++++++++++ .../OfflineSpeakerDiarizationSegment.java | 27 +++++ ...OfflineSpeakerSegmentationModelConfig.java | 52 ++++++++++ ...peakerSegmentationPyannoteModelConfig.java | 32 ++++++ .../k2fsa/sherpa/onnx/OfflineTtsCallback.java | 2 + .../onnx/SpeakerEmbeddingExtractorConfig.java | 1 - 14 files changed, 471 insertions(+), 1 deletion(-) create mode 100644 java-api-examples/OfflineSpeakerDiarizationDemo.java create mode 100755 java-api-examples/run-offline-speaker-diarization.sh create mode 100644 sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/FastClusteringConfig.java create mode 100644 sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineSpeakerDiarization.java create mode 100644 sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineSpeakerDiarizationCallback.java create mode 100644 sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineSpeakerDiarizationConfig.java create mode 100644 sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineSpeakerDiarizationSegment.java create mode 100644 sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineSpeakerSegmentationModelConfig.java create mode 100644 sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineSpeakerSegmentationPyannoteModelConfig.java diff --git a/.github/workflows/run-java-test.yaml b/.github/workflows/run-java-test.yaml index 3e932707c..5759ea5d8 100644 --- a/.github/workflows/run-java-test.yaml +++ b/.github/workflows/run-java-test.yaml @@ -107,6 +107,13 @@ jobs: make -j4 ls -lh lib + - name: Run java test (speaker diarization) + shell: bash + run: | + cd ./java-api-examples + ./run-offline-speaker-diarization.sh + rm -rfv *.onnx *.wav sherpa-onnx-pyannote-* + - name: Run java test (kws) shell: bash run: | diff --git a/java-api-examples/OfflineSpeakerDiarizationDemo.java b/java-api-examples/OfflineSpeakerDiarizationDemo.java new file mode 100644 index 000000000..a5ef8d1f4 --- /dev/null +++ b/java-api-examples/OfflineSpeakerDiarizationDemo.java @@ -0,0 +1,99 @@ +// Copyright 2024 Xiaomi Corporation + +// This file shows how to use sherpa-onnx Java API for speaker diarization, +import com.k2fsa.sherpa.onnx.*; + +public class OfflineSpeakerDiarizationDemo { + public static void main(String[] args) { + /* Please use the following commands to download files used in this file + Step 1: Download a speaker segmentation model + + Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-segmentation-models + for a list of available models. The following is an example + + wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2 + tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2 + rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2 + + Step 2: Download a speaker embedding extractor model + + Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models + for a list of available models. The following is an example + + wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx + + Step 3. Download test wave files + + Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-segmentation-models + for a list of available test wave files. The following is an example + + wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/0-four-speakers-zh.wav + + Step 4. Run it + */ + + String segmentationModel = "./sherpa-onnx-pyannote-segmentation-3-0/model.onnx"; + String embeddingModel = "./3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx"; + String waveFilename = "./0-four-speakers-zh.wav"; + + WaveReader reader = new WaveReader(waveFilename); + + OfflineSpeakerSegmentationPyannoteModelConfig pyannote = + OfflineSpeakerSegmentationPyannoteModelConfig.builder().setModel(segmentationModel).build(); + + OfflineSpeakerSegmentationModelConfig segmentation = + OfflineSpeakerSegmentationModelConfig.builder() + .setPyannote(pyannote) + .setDebug(true) + .build(); + + SpeakerEmbeddingExtractorConfig embedding = + SpeakerEmbeddingExtractorConfig.builder().setModel(embeddingModel).setDebug(true).build(); + + // The test wave file ./0-four-speakers-zh.wav contains four speakers, so + // we use numClusters=4 here. If you don't know the number of speakers + // in the test wave file, please set the numClusters to -1 and provide + // threshold for clustering + FastClusteringConfig clustering = + FastClusteringConfig.builder() + .setNumClusters(4) // set it to -1 if you don't know the actual number + .setThreshold(0.5f) + .build(); + + OfflineSpeakerDiarizationConfig config = + OfflineSpeakerDiarizationConfig.builder() + .setSegmentation(segmentation) + .setEmbedding(embedding) + .setClustering(clustering) + .setMinDurationOn(0.2f) + .setMinDurationOff(0.5f) + .build(); + + OfflineSpeakerDiarization sd = new OfflineSpeakerDiarization(config); + if (sd.getSampleRate() != reader.getSampleRate()) { + System.out.printf( + "Expected sample rate: %d, given: %d\n", sd.getSampleRate(), reader.getSampleRate()); + return; + } + + // OfflineSpeakerDiarizationSegment[] segments = sd.process(reader.getSamples()); + // without callback is also ok + + // or you can use a callback to show the progress + OfflineSpeakerDiarizationSegment[] segments = + sd.processWithCallback( + reader.getSamples(), + (int numProcessedChunks, int numTotalChunks, long arg) -> { + float progress = 100.0f * numProcessedChunks / numTotalChunks; + System.out.printf("Progress: %.2f%%\n", progress); + + return 0; + }); + + for (OfflineSpeakerDiarizationSegment s : segments) { + System.out.printf("%.3f -- %.3f speaker_%02d\n", s.getStart(), s.getEnd(), s.getSpeaker()); + } + + sd.release(); + } +} diff --git a/java-api-examples/README.md b/java-api-examples/README.md index 697f0c876..779c1b254 100755 --- a/java-api-examples/README.md +++ b/java-api-examples/README.md @@ -4,6 +4,12 @@ This directory contains examples for the JAVA API of sherpa-onnx. # Usage +## Non-streaming speaker diarization + +```bash +./run-offline-speaker-diarization.sh +``` + ## Streaming Speech recognition ``` diff --git a/java-api-examples/run-offline-speaker-diarization.sh b/java-api-examples/run-offline-speaker-diarization.sh new file mode 100755 index 000000000..d5cd63b5f --- /dev/null +++ b/java-api-examples/run-offline-speaker-diarization.sh @@ -0,0 +1,45 @@ +#!/usr/bin/env bash + +set -ex + +if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then + mkdir -p ../build + pushd ../build + cmake \ + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ + -DSHERPA_ONNX_ENABLE_TESTS=OFF \ + -DSHERPA_ONNX_ENABLE_CHECK=OFF \ + -DBUILD_SHARED_LIBS=ON \ + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ + -DSHERPA_ONNX_ENABLE_JNI=ON \ + .. + + make -j4 + ls -lh lib + popd +fi + +if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then + pushd ../sherpa-onnx/java-api + make + popd +fi + +if [ ! -f ./sherpa-onnx-pyannote-segmentation-3-0/model.onnx ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2 + tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2 + rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2 +fi + +if [ ! -f ./3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx +fi + +if [ ! -f ./0-four-speakers-zh.wav ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/0-four-speakers-zh.wav +fi + +java \ + -Djava.library.path=$PWD/../build/lib \ + -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \ + ./OfflineSpeakerDiarizationDemo.java diff --git a/sherpa-onnx/java-api/Makefile b/sherpa-onnx/java-api/Makefile index 69c3631b4..6e4778ae7 100644 --- a/sherpa-onnx/java-api/Makefile +++ b/sherpa-onnx/java-api/Makefile @@ -68,6 +68,15 @@ java_files += KeywordSpotterConfig.java java_files += KeywordSpotterResult.java java_files += KeywordSpotter.java +java_files += OfflineSpeakerSegmentationPyannoteModelConfig.java +java_files += OfflineSpeakerSegmentationModelConfig.java +java_files += FastClusteringConfig.java +java_files += OfflineSpeakerDiarizationConfig.java +java_files += OfflineSpeakerDiarizationSegment.java +java_files += OfflineSpeakerDiarizationCallback.java +java_files += OfflineSpeakerDiarization.java + + class_files := $(java_files:%.java=%.class) java_files := $(addprefix src/$(package_dir)/,$(java_files)) diff --git a/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/FastClusteringConfig.java b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/FastClusteringConfig.java new file mode 100644 index 000000000..f2e957259 --- /dev/null +++ b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/FastClusteringConfig.java @@ -0,0 +1,44 @@ +// Copyright 2024 Xiaomi Corporation + +package com.k2fsa.sherpa.onnx; + +public class FastClusteringConfig { + private final int numClusters; + private final float threshold; + + private FastClusteringConfig(Builder builder) { + this.numClusters = builder.numClusters; + this.threshold = builder.threshold; + } + + public static Builder builder() { + return new Builder(); + } + + public int getNumClusters() { + return numClusters; + } + + public float getThreshold() { + return threshold; + } + + public static class Builder { + private int numClusters = -1; + private float threshold = 0.5f; + + public FastClusteringConfig build() { + return new FastClusteringConfig(this); + } + + public Builder setNumClusters(int numClusters) { + this.numClusters = numClusters; + return this; + } + + public Builder setThreshold(float threshold) { + this.threshold = threshold; + return this; + } + } +} diff --git a/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineSpeakerDiarization.java b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineSpeakerDiarization.java new file mode 100644 index 000000000..b75cd09ea --- /dev/null +++ b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineSpeakerDiarization.java @@ -0,0 +1,61 @@ +// Copyright 2024 Xiaomi Corporation + +package com.k2fsa.sherpa.onnx; + +public class OfflineSpeakerDiarization { + static { + System.loadLibrary("sherpa-onnx-jni"); + } + + private long ptr = 0; + + public OfflineSpeakerDiarization(OfflineSpeakerDiarizationConfig config) { + ptr = newFromFile(config); + } + + public int getSampleRate() { + return getSampleRate(ptr); + } + + // Only config.clustering is used. All other fields are ignored + public void setConfig(OfflineSpeakerDiarizationConfig config) { + setConfig(ptr, config); + } + + public OfflineSpeakerDiarizationSegment[] process(float[] samples) { + return process(ptr, samples); + } + + public OfflineSpeakerDiarizationSegment[] processWithCallback(float[] samples, OfflineSpeakerDiarizationCallback callback) { + return processWithCallback(ptr, samples, callback, 0); + } + + public OfflineSpeakerDiarizationSegment[] processWithCallback(float[] samples, OfflineSpeakerDiarizationCallback callback, long arg) { + return processWithCallback(ptr, samples, callback, arg); + } + + protected void finalize() throws Throwable { + release(); + } + + // You'd better call it manually if it is not used anymore + public void release() { + if (this.ptr == 0) { + return; + } + delete(this.ptr); + this.ptr = 0; + } + + private native int getSampleRate(long ptr); + + private native void delete(long ptr); + + private native long newFromFile(OfflineSpeakerDiarizationConfig config); + + private native void setConfig(long ptr, OfflineSpeakerDiarizationConfig config); + + private native OfflineSpeakerDiarizationSegment[] process(long ptr, float[] samples); + + private native OfflineSpeakerDiarizationSegment[] processWithCallback(long ptr, float[] samples, OfflineSpeakerDiarizationCallback callback, long arg); +} \ No newline at end of file diff --git a/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineSpeakerDiarizationCallback.java b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineSpeakerDiarizationCallback.java new file mode 100644 index 000000000..7787386d3 --- /dev/null +++ b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineSpeakerDiarizationCallback.java @@ -0,0 +1,8 @@ +// Copyright 2024 Xiaomi Corporation + +package com.k2fsa.sherpa.onnx; + +@FunctionalInterface +public interface OfflineSpeakerDiarizationCallback { + Integer invoke(int numProcessedChunks, int numTotalCunks, long arg); +} diff --git a/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineSpeakerDiarizationConfig.java b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineSpeakerDiarizationConfig.java new file mode 100644 index 000000000..9965c5742 --- /dev/null +++ b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineSpeakerDiarizationConfig.java @@ -0,0 +1,79 @@ +package com.k2fsa.sherpa.onnx; + +public class OfflineSpeakerDiarizationConfig { + private final OfflineSpeakerSegmentationModelConfig segmentation; + private final SpeakerEmbeddingExtractorConfig embedding; + private final FastClusteringConfig clustering; + private final float minDurationOn; + private final float minDurationOff; + + private OfflineSpeakerDiarizationConfig(Builder builder) { + this.segmentation = builder.segmentation; + this.embedding = builder.embedding; + this.clustering = builder.clustering; + this.minDurationOff = builder.minDurationOff; + this.minDurationOn = builder.minDurationOn; + } + + public static Builder builder() { + return new Builder(); + } + + public OfflineSpeakerSegmentationModelConfig getSegmentation() { + return segmentation; + } + + public SpeakerEmbeddingExtractorConfig getEmbedding() { + return embedding; + } + + public FastClusteringConfig getClustering() { + return clustering; + } + + public float getMinDurationOff() { + return minDurationOff; + } + + public float getMinDurationOn() { + return minDurationOn; + } + + public static class Builder { + private OfflineSpeakerSegmentationModelConfig segmentation = OfflineSpeakerSegmentationModelConfig.builder().build(); + private SpeakerEmbeddingExtractorConfig embedding = SpeakerEmbeddingExtractorConfig.builder().build(); + private FastClusteringConfig clustering = FastClusteringConfig.builder().build(); + private float minDurationOn = 0.2f; + private float minDurationOff = 0.5f; + + public OfflineSpeakerDiarizationConfig build() { + return new OfflineSpeakerDiarizationConfig(this); + } + + public Builder setSegmentation(OfflineSpeakerSegmentationModelConfig segmentation) { + this.segmentation = segmentation; + return this; + } + + public Builder setEmbedding(SpeakerEmbeddingExtractorConfig embedding) { + this.embedding = embedding; + return this; + } + + public Builder setClustering(FastClusteringConfig clustering) { + this.clustering = clustering; + return this; + } + + public Builder setMinDurationOff(float minDurationOff) { + this.minDurationOff = minDurationOff; + return this; + } + + public Builder setMinDurationOn(float minDurationOn) { + this.minDurationOn = minDurationOn; + return this; + } + } + +} diff --git a/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineSpeakerDiarizationSegment.java b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineSpeakerDiarizationSegment.java new file mode 100644 index 000000000..1bb1a7635 --- /dev/null +++ b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineSpeakerDiarizationSegment.java @@ -0,0 +1,27 @@ +// Copyright 2024 Xiaomi Corporation + +package com.k2fsa.sherpa.onnx; + +public class OfflineSpeakerDiarizationSegment { + private final float start; + private final float end; + private final int speaker; + + public OfflineSpeakerDiarizationSegment(float start, float end, int speaker) { + this.start = start; + this.end = end; + this.speaker = speaker; + } + + public float getStart() { + return start; + } + + public float getEnd() { + return end; + } + + public int getSpeaker() { + return speaker; + } +} diff --git a/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineSpeakerSegmentationModelConfig.java b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineSpeakerSegmentationModelConfig.java new file mode 100644 index 000000000..55df6c295 --- /dev/null +++ b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineSpeakerSegmentationModelConfig.java @@ -0,0 +1,52 @@ +// Copyright 2024 Xiaomi Corporation + +package com.k2fsa.sherpa.onnx; + +public class OfflineSpeakerSegmentationModelConfig { + private final OfflineSpeakerSegmentationPyannoteModelConfig pyannote; + private final int numThreads; + private final boolean debug; + private final String provider; + + private OfflineSpeakerSegmentationModelConfig(Builder builder) { + this.pyannote = builder.pyannote; + this.numThreads = builder.numThreads; + this.debug = builder.debug; + this.provider = builder.provider; + } + + public static Builder builder() { + return new Builder(); + } + + public static class Builder { + private OfflineSpeakerSegmentationPyannoteModelConfig pyannote = OfflineSpeakerSegmentationPyannoteModelConfig.builder().build(); + private int numThreads = 1; + private boolean debug = true; + private String provider = "cpu"; + + public OfflineSpeakerSegmentationModelConfig build() { + return new OfflineSpeakerSegmentationModelConfig(this); + } + + public Builder setPyannote(OfflineSpeakerSegmentationPyannoteModelConfig pyannote) { + this.pyannote = pyannote; + return this; + } + + public Builder setNumThreads(int numThreads) { + this.numThreads = numThreads; + return this; + } + + public Builder setDebug(boolean debug) { + this.debug = debug; + return this; + } + + public Builder setProvider(String provider) { + this.provider = provider; + return this; + } + } +} \ No newline at end of file diff --git a/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineSpeakerSegmentationPyannoteModelConfig.java b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineSpeakerSegmentationPyannoteModelConfig.java new file mode 100644 index 000000000..51fd99874 --- /dev/null +++ b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineSpeakerSegmentationPyannoteModelConfig.java @@ -0,0 +1,32 @@ +// Copyright 2024 Xiaomi Corporation + +package com.k2fsa.sherpa.onnx; + +public class OfflineSpeakerSegmentationPyannoteModelConfig { + private final String model; + + private OfflineSpeakerSegmentationPyannoteModelConfig(Builder builder) { + this.model = builder.model; + } + + public static Builder builder() { + return new Builder(); + } + + public String getModel() { + return model; + } + + public static class Builder { + private String model = ""; + + public OfflineSpeakerSegmentationPyannoteModelConfig build() { + return new OfflineSpeakerSegmentationPyannoteModelConfig(this); + } + + public Builder setModel(String model) { + this.model = model; + return this; + } + } +} diff --git a/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineTtsCallback.java b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineTtsCallback.java index 396594a96..2fc1d45dd 100644 --- a/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineTtsCallback.java +++ b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineTtsCallback.java @@ -1,3 +1,5 @@ +// Copyright 2024 Xiaomi Corporation + package com.k2fsa.sherpa.onnx; @FunctionalInterface diff --git a/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/SpeakerEmbeddingExtractorConfig.java b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/SpeakerEmbeddingExtractorConfig.java index ffc688f34..80f800cdc 100644 --- a/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/SpeakerEmbeddingExtractorConfig.java +++ b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/SpeakerEmbeddingExtractorConfig.java @@ -50,5 +50,4 @@ public Builder setProvider(String provider) { return this; } } - } From 1ed803adc13a3b060a6b972253e3adfa81be8126 Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Fri, 11 Oct 2024 21:17:41 +0800 Subject: [PATCH 195/201] Dart API for speaker diarization (#1418) --- .github/scripts/test-dart.sh | 5 + .github/workflows/test-dart.yaml | 1 + dart-api-examples/README.md | 1 + .../speaker-diarization/.gitignore | 3 + .../speaker-diarization/CHANGELOG.md | 3 + .../speaker-diarization/README.md | 7 + .../speaker-diarization/analysis_options.yaml | 30 ++ .../speaker-diarization/bin/init.dart | 1 + .../bin/speaker-diarization.dart | 100 +++++++ .../speaker-diarization/pubspec.yaml | 17 ++ dart-api-examples/speaker-diarization/run.sh | 21 ++ flutter/sherpa_onnx/example/example.md | 1 + flutter/sherpa_onnx/lib/sherpa_onnx.dart | 1 + .../lib/src/offline_speaker_diarization.dart | 243 ++++++++++++++++ .../lib/src/sherpa_onnx_bindings.dart | 263 +++++++++++++++++- flutter/sherpa_onnx/pubspec.yaml | 8 +- scripts/dart/speaker-diarization-pubspec.yaml | 16 ++ sherpa-onnx/c-api/c-api.cc | 16 ++ sherpa-onnx/c-api/c-api.h | 9 + ...ffline-speaker-diarization-pyannote-impl.h | 1 + .../jni/offline-speaker-diarization.cc | 3 +- 21 files changed, 733 insertions(+), 17 deletions(-) create mode 100644 dart-api-examples/speaker-diarization/.gitignore create mode 100644 dart-api-examples/speaker-diarization/CHANGELOG.md create mode 100644 dart-api-examples/speaker-diarization/README.md create mode 100644 dart-api-examples/speaker-diarization/analysis_options.yaml create mode 120000 dart-api-examples/speaker-diarization/bin/init.dart create mode 100644 dart-api-examples/speaker-diarization/bin/speaker-diarization.dart create mode 100644 dart-api-examples/speaker-diarization/pubspec.yaml create mode 100755 dart-api-examples/speaker-diarization/run.sh create mode 100644 flutter/sherpa_onnx/lib/src/offline_speaker_diarization.dart create mode 100644 scripts/dart/speaker-diarization-pubspec.yaml diff --git a/.github/scripts/test-dart.sh b/.github/scripts/test-dart.sh index 0aff2085e..27c21573a 100755 --- a/.github/scripts/test-dart.sh +++ b/.github/scripts/test-dart.sh @@ -4,6 +4,11 @@ set -ex cd dart-api-examples +pushd speaker-diarization +echo '----------speaker diarization----------' +./run.sh +popd + pushd speaker-identification echo '----------3d speaker----------' ./run-3d-speaker.sh diff --git a/.github/workflows/test-dart.yaml b/.github/workflows/test-dart.yaml index 58d505490..d9e27e86f 100644 --- a/.github/workflows/test-dart.yaml +++ b/.github/workflows/test-dart.yaml @@ -114,6 +114,7 @@ jobs: cp scripts/dart/audio-tagging-pubspec.yaml dart-api-examples/audio-tagging/pubspec.yaml cp scripts/dart/add-punctuations-pubspec.yaml dart-api-examples/add-punctuations/pubspec.yaml cp scripts/dart/speaker-id-pubspec.yaml dart-api-examples/speaker-identification/pubspec.yaml + cp scripts/dart/speaker-diarization-pubspec.yaml dart-api-examples/speaker-diarization/pubspec.yaml cp scripts/dart/sherpa-onnx-pubspec.yaml flutter/sherpa_onnx/pubspec.yaml diff --git a/dart-api-examples/README.md b/dart-api-examples/README.md index 9370372e7..3d66cb04e 100644 --- a/dart-api-examples/README.md +++ b/dart-api-examples/README.md @@ -9,6 +9,7 @@ https://pub.dev/packages/sherpa_onnx | Directory | Description | |-----------|-------------| +| [./speaker-diarization](./speaker-diarization)| Example for speaker diarization.| | [./add-punctuations](./add-punctuations)| Example for adding punctuations to text.| | [./audio-tagging](./audio-tagging)| Example for audio tagging.| | [./keyword-spotter](./keyword-spotter)| Example for keyword spotting| diff --git a/dart-api-examples/speaker-diarization/.gitignore b/dart-api-examples/speaker-diarization/.gitignore new file mode 100644 index 000000000..3a8579040 --- /dev/null +++ b/dart-api-examples/speaker-diarization/.gitignore @@ -0,0 +1,3 @@ +# https://dart.dev/guides/libraries/private-files +# Created by `dart pub` +.dart_tool/ diff --git a/dart-api-examples/speaker-diarization/CHANGELOG.md b/dart-api-examples/speaker-diarization/CHANGELOG.md new file mode 100644 index 000000000..effe43c82 --- /dev/null +++ b/dart-api-examples/speaker-diarization/CHANGELOG.md @@ -0,0 +1,3 @@ +## 1.0.0 + +- Initial version. diff --git a/dart-api-examples/speaker-diarization/README.md b/dart-api-examples/speaker-diarization/README.md new file mode 100644 index 000000000..d4d8c4fd2 --- /dev/null +++ b/dart-api-examples/speaker-diarization/README.md @@ -0,0 +1,7 @@ +# Introduction + +This example shows how to use the Dart API from sherpa-onnx for speaker diarization. + +# Usage + +Please see [./run.sh](./run.sh) diff --git a/dart-api-examples/speaker-diarization/analysis_options.yaml b/dart-api-examples/speaker-diarization/analysis_options.yaml new file mode 100644 index 000000000..dee8927aa --- /dev/null +++ b/dart-api-examples/speaker-diarization/analysis_options.yaml @@ -0,0 +1,30 @@ +# This file configures the static analysis results for your project (errors, +# warnings, and lints). +# +# This enables the 'recommended' set of lints from `package:lints`. +# This set helps identify many issues that may lead to problems when running +# or consuming Dart code, and enforces writing Dart using a single, idiomatic +# style and format. +# +# If you want a smaller set of lints you can change this to specify +# 'package:lints/core.yaml'. These are just the most critical lints +# (the recommended set includes the core lints). +# The core lints are also what is used by pub.dev for scoring packages. + +include: package:lints/recommended.yaml + +# Uncomment the following section to specify additional rules. + +# linter: +# rules: +# - camel_case_types + +# analyzer: +# exclude: +# - path/to/excluded/files/** + +# For more information about the core and recommended set of lints, see +# https://dart.dev/go/core-lints + +# For additional information about configuring this file, see +# https://dart.dev/guides/language/analysis-options diff --git a/dart-api-examples/speaker-diarization/bin/init.dart b/dart-api-examples/speaker-diarization/bin/init.dart new file mode 120000 index 000000000..48508cfd3 --- /dev/null +++ b/dart-api-examples/speaker-diarization/bin/init.dart @@ -0,0 +1 @@ +../../vad/bin/init.dart \ No newline at end of file diff --git a/dart-api-examples/speaker-diarization/bin/speaker-diarization.dart b/dart-api-examples/speaker-diarization/bin/speaker-diarization.dart new file mode 100644 index 000000000..760adc868 --- /dev/null +++ b/dart-api-examples/speaker-diarization/bin/speaker-diarization.dart @@ -0,0 +1,100 @@ +// Copyright (c) 2024 Xiaomi Corporation +import 'dart:io'; +import 'dart:typed_data'; +import 'dart:ffi'; + +import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx; +import './init.dart'; + +void main(List arguments) async { + await initSherpaOnnx(); + + /* Please use the following commands to download files used in this file + Step 1: Download a speaker segmentation model + + Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-segmentation-models + for a list of available models. The following is an example + + wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2 + tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2 + rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2 + + Step 2: Download a speaker embedding extractor model + + Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models + for a list of available models. The following is an example + + wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx + + Step 3. Download test wave files + + Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-segmentation-models + for a list of available test wave files. The following is an example + + wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/0-four-speakers-zh.wav + + Step 4. Run it + */ + + final segmentationModel = + "./sherpa-onnx-pyannote-segmentation-3-0/model.onnx"; + + final embeddingModel = + "./3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx"; + + final waveFilename = "./0-four-speakers-zh.wav"; + + final segmentationConfig = sherpa_onnx.OfflineSpeakerSegmentationModelConfig( + pyannote: sherpa_onnx.OfflineSpeakerSegmentationPyannoteModelConfig( + model: segmentationModel), + ); + + final embeddingConfig = + sherpa_onnx.SpeakerEmbeddingExtractorConfig(model: embeddingModel); + + // since we know there are 4 speakers in ./0-four-speakers-zh.wav, we set + // numClusters to 4. If you don't know the exact number, please set it to -1. + // in that case, you have to set threshold. A larger threshold leads to + // fewer clusters, i.e., fewer speakers. + final clusteringConfig = + sherpa_onnx.FastClusteringConfig(numClusters: 4, threshold: 0.5); + + var config = sherpa_onnx.OfflineSpeakerDiarizationConfig( + segmentation: segmentationConfig, + embedding: embeddingConfig, + clustering: clusteringConfig, + minDurationOn: 0.2, + minDurationOff: 0.5); + + final sd = sherpa_onnx.OfflineSpeakerDiarization(config); + if (sd.ptr == nullptr) { + return; + } + + final waveData = sherpa_onnx.readWave(waveFilename); + if (sd.sampleRate != waveData.sampleRate) { + print( + 'Expected sample rate: ${sd.sampleRate}, given: ${waveData.sampleRate}'); + return; + } + + print('started'); + + // Use the following statement if you don't want to use a callback + // final segments = sd.process(samples: waveData.samples); + + final segments = sd.processWithCallback( + samples: waveData.samples, + callback: (int numProcessedChunk, int numTotalChunks) { + final progress = 100.0 * numProcessedChunk / numTotalChunks; + + print('Progress ${progress.toStringAsFixed(2)}%'); + + return 0; + }); + + for (int i = 0; i < segments.length; ++i) { + print( + '${segments[i].start.toStringAsFixed(3)} -- ${segments[i].end.toStringAsFixed(3)} speaker_${segments[i].speaker}'); + } +} diff --git a/dart-api-examples/speaker-diarization/pubspec.yaml b/dart-api-examples/speaker-diarization/pubspec.yaml new file mode 100644 index 000000000..28154a49c --- /dev/null +++ b/dart-api-examples/speaker-diarization/pubspec.yaml @@ -0,0 +1,17 @@ +name: speaker_diarization +description: > + This example demonstrates how to use the Dart API for speaker diarization. + +version: 1.0.0 + +environment: + sdk: ">=3.0.0 <4.0.0" + +dependencies: + sherpa_onnx: ^1.10.27 + # sherpa_onnx: + # path: ../../flutter/sherpa_onnx + path: ^1.9.0 + +dev_dependencies: + lints: ^3.0.0 diff --git a/dart-api-examples/speaker-diarization/run.sh b/dart-api-examples/speaker-diarization/run.sh new file mode 100755 index 000000000..7717870dc --- /dev/null +++ b/dart-api-examples/speaker-diarization/run.sh @@ -0,0 +1,21 @@ +#!/usr/bin/env bash + +set -ex + +dart pub get + +if [ ! -f ./sherpa-onnx-pyannote-segmentation-3-0/model.onnx ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2 + tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2 + rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2 +fi + +if [ ! -f ./3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx +fi + +if [ ! -f ./0-four-speakers-zh.wav ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/0-four-speakers-zh.wav +fi + +dart run ./bin/speaker-diarization.dart diff --git a/flutter/sherpa_onnx/example/example.md b/flutter/sherpa_onnx/example/example.md index 7e7e8031d..0c24a79b2 100644 --- a/flutter/sherpa_onnx/example/example.md +++ b/flutter/sherpa_onnx/example/example.md @@ -11,6 +11,7 @@ | Functions | URL | Supported Platforms| |---|---|---| +|Speaker diarization| [Address](https://github.com/k2-fsa/sherpa-onnx/tree/master/dart-api-examples/speaker-diarization)| macOS, Windows, Linux| |Streaming speech recognition| [Address](https://github.com/k2-fsa/sherpa-onnx/tree/master/dart-api-examples/streaming-asr)| macOS, Windows, Linux| |Non-Streaming speech recognition| [Address](https://github.com/k2-fsa/sherpa-onnx/tree/master/dart-api-examples/non-streaming-asr)| macOS, Windows, Linux| |Text to speech| [Address](https://github.com/k2-fsa/sherpa-onnx/tree/master/dart-api-examples/tts)| macOS, Windows, Linux| diff --git a/flutter/sherpa_onnx/lib/sherpa_onnx.dart b/flutter/sherpa_onnx/lib/sherpa_onnx.dart index b15e67532..9fcd2872f 100644 --- a/flutter/sherpa_onnx/lib/sherpa_onnx.dart +++ b/flutter/sherpa_onnx/lib/sherpa_onnx.dart @@ -6,6 +6,7 @@ export 'src/audio_tagging.dart'; export 'src/feature_config.dart'; export 'src/keyword_spotter.dart'; export 'src/offline_recognizer.dart'; +export 'src/offline_speaker_diarization.dart'; export 'src/offline_stream.dart'; export 'src/online_recognizer.dart'; export 'src/online_stream.dart'; diff --git a/flutter/sherpa_onnx/lib/src/offline_speaker_diarization.dart b/flutter/sherpa_onnx/lib/src/offline_speaker_diarization.dart new file mode 100644 index 000000000..5981e3c04 --- /dev/null +++ b/flutter/sherpa_onnx/lib/src/offline_speaker_diarization.dart @@ -0,0 +1,243 @@ +// Copyright (c) 2024 Xiaomi Corporation +import 'dart:ffi'; +import 'dart:typed_data'; + +import 'package:ffi/ffi.dart'; + +import './sherpa_onnx_bindings.dart'; +import './speaker_identification.dart'; + +class OfflineSpeakerDiarizationSegment { + const OfflineSpeakerDiarizationSegment({ + required this.start, + required this.end, + required this.speaker, + }); + + @override + String toString() { + return 'OfflineSpeakerDiarizationSegment(start: $start, end: $end, speaker: $speaker)'; + } + + final double start; + final double end; + final int speaker; +} + +class OfflineSpeakerSegmentationPyannoteModelConfig { + const OfflineSpeakerSegmentationPyannoteModelConfig({ + this.model = '', + }); + + @override + String toString() { + return 'OfflineSpeakerSegmentationPyannoteModelConfig(model: $model)'; + } + + final String model; +} + +class OfflineSpeakerSegmentationModelConfig { + const OfflineSpeakerSegmentationModelConfig({ + this.pyannote = const OfflineSpeakerSegmentationPyannoteModelConfig(), + this.numThreads = 1, + this.debug = true, + this.provider = 'cpu', + }); + + @override + String toString() { + return 'OfflineSpeakerSegmentationModelConfig(pyannote: $pyannote, numThreads: $numThreads, debug: $debug, provider: $provider)'; + } + + final OfflineSpeakerSegmentationPyannoteModelConfig pyannote; + + final int numThreads; + final bool debug; + final String provider; +} + +class FastClusteringConfig { + const FastClusteringConfig({ + this.numClusters = -1, + this.threshold = 0.5, + }); + + @override + String toString() { + return 'FastClusteringConfig(numClusters: $numClusters, threshold: $threshold)'; + } + + final int numClusters; + final double threshold; +} + +class OfflineSpeakerDiarizationConfig { + const OfflineSpeakerDiarizationConfig({ + this.segmentation = const OfflineSpeakerSegmentationModelConfig(), + this.embedding = const SpeakerEmbeddingExtractorConfig(model: ''), + this.clustering = const FastClusteringConfig(), + this.minDurationOn = 0.2, + this.minDurationOff = 0.5, + }); + + @override + String toString() { + return 'OfflineSpeakerDiarizationConfig(segmentation: $segmentation, embedding: $embedding, clustering: $clustering, minDurationOn: $minDurationOn, minDurationOff: $minDurationOff)'; + } + + final OfflineSpeakerSegmentationModelConfig segmentation; + final SpeakerEmbeddingExtractorConfig embedding; + final FastClusteringConfig clustering; + final double minDurationOff; // in seconds + final double minDurationOn; // in seconds +} + +class OfflineSpeakerDiarization { + OfflineSpeakerDiarization._( + {required this.ptr, required this.config, required this.sampleRate}); + + void free() { + SherpaOnnxBindings.sherpaOnnxDestroyOfflineSpeakerDiarization?.call(ptr); + ptr = nullptr; + } + + /// The user is responsible to call the OfflineSpeakerDiarization.free() + /// method of the returned instance to avoid memory leak. + factory OfflineSpeakerDiarization(OfflineSpeakerDiarizationConfig config) { + final c = calloc(); + + c.ref.segmentation.pyannote.model = + config.segmentation.pyannote.model.toNativeUtf8(); + c.ref.segmentation.numThreads = config.segmentation.numThreads; + c.ref.segmentation.debug = config.segmentation.debug ? 1 : 0; + c.ref.segmentation.provider = config.segmentation.provider.toNativeUtf8(); + + c.ref.embedding.model = config.embedding.model.toNativeUtf8(); + c.ref.embedding.numThreads = config.embedding.numThreads; + c.ref.embedding.debug = config.embedding.debug ? 1 : 0; + c.ref.embedding.provider = config.embedding.provider.toNativeUtf8(); + + c.ref.clustering.numClusters = config.clustering.numClusters; + c.ref.clustering.threshold = config.clustering.threshold; + + c.ref.minDurationOn = config.minDurationOn; + c.ref.minDurationOff = config.minDurationOff; + + final ptr = + SherpaOnnxBindings.sherpaOnnxCreateOfflineSpeakerDiarization?.call(c) ?? + nullptr; + + calloc.free(c.ref.embedding.provider); + calloc.free(c.ref.embedding.model); + calloc.free(c.ref.segmentation.provider); + calloc.free(c.ref.segmentation.pyannote.model); + + int sampleRate = 0; + if (ptr != nullptr) { + sampleRate = SherpaOnnxBindings + .sherpaOnnxOfflineSpeakerDiarizationGetSampleRate + ?.call(ptr) ?? + 0; + } + return OfflineSpeakerDiarization._( + ptr: ptr, config: config, sampleRate: sampleRate); + } + + List process( + {required Float32List samples}) { + if (ptr == nullptr) { + return []; + } + + final n = samples.length; + final Pointer p = calloc(n); + + final pList = p.asTypedList(n); + pList.setAll(0, samples); + + final r = SherpaOnnxBindings.sherpaOnnxOfflineSpeakerDiarizationProcess + ?.call(ptr, p, n) ?? + nullptr; + + final ans = _processImpl(r); + + SherpaOnnxBindings.sherpaOnnxOfflineSpeakerDiarizationDestroyResult + ?.call(r); + + return ans; + } + + List processWithCallback({ + required Float32List samples, + required int Function(int numProcessedChunks, int numTotalChunks) callback, + }) { + if (ptr == nullptr) { + return []; + } + + final n = samples.length; + final Pointer p = calloc(n); + + final pList = p.asTypedList(n); + pList.setAll(0, samples); + + final wrapper = NativeCallable< + SherpaOnnxOfflineSpeakerDiarizationProgressCallbackNoArgNative>.isolateLocal( + (int numProcessedChunks, int numTotalChunks) { + return callback(numProcessedChunks, numTotalChunks); + }, exceptionalReturn: 0); + + final r = SherpaOnnxBindings + .sherpaOnnxOfflineSpeakerDiarizationProcessWithCallbackNoArg + ?.call(ptr, p, n, wrapper.nativeFunction) ?? + nullptr; + + wrapper.close(); + + final ans = _processImpl(r); + + SherpaOnnxBindings.sherpaOnnxOfflineSpeakerDiarizationDestroyResult + ?.call(r); + + return ans; + } + + List _processImpl( + Pointer r) { + if (r == nullptr) { + return []; + } + + final numSegments = SherpaOnnxBindings + .sherpaOnnxOfflineSpeakerDiarizationResultGetNumSegments + ?.call(r) ?? + 0; + final segments = SherpaOnnxBindings + .sherpaOnnxOfflineSpeakerDiarizationResultSortByStartTime + ?.call(r) ?? + nullptr; + + if (segments == nullptr) { + return []; + } + + final ans = []; + for (int i = 0; i != numSegments; ++i) { + final s = segments + i; + + final tmp = OfflineSpeakerDiarizationSegment( + start: s.ref.start, end: s.ref.end, speaker: s.ref.speaker); + ans.add(tmp); + } + + SherpaOnnxBindings.sherpaOnnxOfflineSpeakerDiarizationDestroySegment + ?.call(segments); + + return ans; + } + + Pointer ptr; + OfflineSpeakerDiarizationConfig config; + final int sampleRate; +} diff --git a/flutter/sherpa_onnx/lib/src/sherpa_onnx_bindings.dart b/flutter/sherpa_onnx/lib/src/sherpa_onnx_bindings.dart index 42294c2d4..8a8817d63 100644 --- a/flutter/sherpa_onnx/lib/src/sherpa_onnx_bindings.dart +++ b/flutter/sherpa_onnx/lib/src/sherpa_onnx_bindings.dart @@ -2,6 +2,66 @@ import 'dart:ffi'; import 'package:ffi/ffi.dart'; +final class SherpaOnnxSpeakerEmbeddingExtractorConfig extends Struct { + external Pointer model; + + @Int32() + external int numThreads; + + @Int32() + external int debug; + + external Pointer provider; +} + +final class SherpaOnnxOfflineSpeakerDiarizationSegment extends Struct { + @Float() + external double start; + + @Float() + external double end; + + @Int32() + external int speaker; +} + +final class SherpaOnnxOfflineSpeakerSegmentationPyannoteModelConfig + extends Struct { + external Pointer model; +} + +final class SherpaOnnxOfflineSpeakerSegmentationModelConfig extends Struct { + external SherpaOnnxOfflineSpeakerSegmentationPyannoteModelConfig pyannote; + + @Int32() + external int numThreads; + + @Int32() + external int debug; + + external Pointer provider; +} + +final class SherpaOnnxFastClusteringConfig extends Struct { + @Int32() + external int numClusters; + + @Float() + external double threshold; +} + +final class SherpaOnnxOfflineSpeakerDiarizationConfig extends Struct { + external SherpaOnnxOfflineSpeakerSegmentationModelConfig segmentation; + external SherpaOnnxSpeakerEmbeddingExtractorConfig embedding; + external SherpaOnnxFastClusteringConfig clustering; + + @Float() + external double minDurationOn; + + @Float() + external double minDurationOff; +} + final class SherpaOnnxOfflinePunctuationModelConfig extends Struct { external Pointer ctTransformer; @@ -341,18 +401,6 @@ final class SherpaOnnxWave extends Struct { external int numSamples; } -final class SherpaOnnxSpeakerEmbeddingExtractorConfig extends Struct { - external Pointer model; - - @Int32() - external int numThreads; - - @Int32() - external int debug; - - external Pointer provider; -} - final class SherpaOnnxKeywordSpotterConfig extends Struct { external SherpaOnnxFeatureConfig feat; @@ -402,10 +450,101 @@ final class SherpaOnnxSpeakerEmbeddingExtractor extends Opaque {} final class SherpaOnnxSpeakerEmbeddingManager extends Opaque {} +final class SherpaOnnxOfflineSpeakerDiarization extends Opaque {} + +final class SherpaOnnxOfflineSpeakerDiarizationResult extends Opaque {} + +typedef SherpaOnnxCreateOfflineSpeakerDiarizationNative + = Pointer Function( + Pointer); + +typedef SherpaOnnxCreateOfflineSpeakerDiarization + = SherpaOnnxCreateOfflineSpeakerDiarizationNative; + +typedef SherpaOnnxDestroyOfflineSpeakerDiarizationNative = Void Function( + Pointer); + +typedef SherpaOnnxDestroyOfflineSpeakerDiarization = void Function( + Pointer); + typedef SherpaOnnxCreateOfflinePunctuationNative = Pointer Function( Pointer); +typedef SherpaOnnxOfflineSpeakerDiarizationGetSampleRateNative = Int32 Function( + Pointer); + +typedef SherpaOnnxOfflineSpeakerDiarizationGetSampleRate = int Function( + Pointer); + +typedef SherpaOnnxOfflineSpeakerDiarizationSetConfigNative = Void Function( + Pointer, + Pointer); + +typedef SherpaOnnxOfflineSpeakerDiarizationResultGetNumSpeakersNative = Int32 + Function(Pointer); + +typedef SherpaOnnxOfflineSpeakerDiarizationResultGetNumSpeakers = int Function( + Pointer); + +typedef SherpaOnnxOfflineSpeakerDiarizationResultGetNumSegmentsNative = Int32 + Function(Pointer); + +typedef SherpaOnnxOfflineSpeakerDiarizationResultGetNumSegments = int Function( + Pointer); + +typedef SherpaOnnxOfflineSpeakerDiarizationResultSortByStartTimeNative + = Pointer Function( + Pointer); + +typedef SherpaOnnxOfflineSpeakerDiarizationResultSortByStartTime + = SherpaOnnxOfflineSpeakerDiarizationResultSortByStartTimeNative; + +typedef SherpaOnnxOfflineSpeakerDiarizationDestroySegmentNative = Void Function( + Pointer); + +typedef SherpaOnnxOfflineSpeakerDiarizationDestroySegment = void Function( + Pointer); + +typedef SherpaOnnxOfflineSpeakerDiarizationProcessNative + = Pointer Function( + Pointer, Pointer, Int32); + +typedef SherpaOnnxOfflineSpeakerDiarizationProcess + = Pointer Function( + Pointer, Pointer, int); + +typedef SherpaOnnxOfflineSpeakerDiarizationProgressCallbackNoArgNative = Int32 + Function(Int32, Int32); + +typedef SherpaOnnxOfflineSpeakerDiarizationProcessWithCallbackNoArgNative + = Pointer Function( + Pointer, + Pointer, + Int32, + Pointer< + NativeFunction< + SherpaOnnxOfflineSpeakerDiarizationProgressCallbackNoArgNative>>); + +typedef SherpaOnnxOfflineSpeakerDiarizationProcessWithCallbackNoArg + = Pointer Function( + Pointer, + Pointer, + int, + Pointer< + NativeFunction< + SherpaOnnxOfflineSpeakerDiarizationProgressCallbackNoArgNative>>); + +typedef SherpaOnnxOfflineSpeakerDiarizationDestroyResultNative = Void Function( + Pointer); + +typedef SherpaOnnxOfflineSpeakerDiarizationDestroyResult = void Function( + Pointer); + +typedef SherpaOnnxOfflineSpeakerDiarizationSetConfig = void Function( + Pointer, + Pointer); + typedef SherpaOnnxCreateOfflinePunctuation = SherpaOnnxCreateOfflinePunctuationNative; @@ -940,6 +1079,29 @@ typedef SherpaOnnxFreeWaveNative = Void Function(Pointer); typedef SherpaOnnxFreeWave = void Function(Pointer); class SherpaOnnxBindings { + static SherpaOnnxCreateOfflineSpeakerDiarization? + sherpaOnnxCreateOfflineSpeakerDiarization; + static SherpaOnnxDestroyOfflineSpeakerDiarization? + sherpaOnnxDestroyOfflineSpeakerDiarization; + static SherpaOnnxOfflineSpeakerDiarizationGetSampleRate? + sherpaOnnxOfflineSpeakerDiarizationGetSampleRate; + static SherpaOnnxOfflineSpeakerDiarizationSetConfig? + sherpaOnnxOfflineSpeakerDiarizationSetConfig; + static SherpaOnnxOfflineSpeakerDiarizationResultGetNumSpeakers? + sherpaOnnxOfflineSpeakerDiarizationResultGetNumSpeakers; + static SherpaOnnxOfflineSpeakerDiarizationResultGetNumSegments? + sherpaOnnxOfflineSpeakerDiarizationResultGetNumSegments; + static SherpaOnnxOfflineSpeakerDiarizationResultSortByStartTime? + sherpaOnnxOfflineSpeakerDiarizationResultSortByStartTime; + static SherpaOnnxOfflineSpeakerDiarizationDestroySegment? + sherpaOnnxOfflineSpeakerDiarizationDestroySegment; + static SherpaOnnxOfflineSpeakerDiarizationProcess? + sherpaOnnxOfflineSpeakerDiarizationProcess; + static SherpaOnnxOfflineSpeakerDiarizationDestroyResult? + sherpaOnnxOfflineSpeakerDiarizationDestroyResult; + static SherpaOnnxOfflineSpeakerDiarizationProcessWithCallbackNoArg? + sherpaOnnxOfflineSpeakerDiarizationProcessWithCallbackNoArg; + static SherpaOnnxCreateOfflinePunctuation? sherpaOnnxCreateOfflinePunctuation; static SherpaOnnxDestroyOfflinePunctuation? sherpaOnnxDestroyOfflinePunctuation; @@ -1107,6 +1269,83 @@ class SherpaOnnxBindings { static SherpaOnnxFreeWave? freeWave; static void init(DynamicLibrary dynamicLibrary) { + sherpaOnnxCreateOfflineSpeakerDiarization ??= dynamicLibrary + .lookup< + NativeFunction< + SherpaOnnxCreateOfflineSpeakerDiarizationNative>>( + 'SherpaOnnxCreateOfflineSpeakerDiarization') + .asFunction(); + + sherpaOnnxDestroyOfflineSpeakerDiarization ??= dynamicLibrary + .lookup< + NativeFunction< + SherpaOnnxDestroyOfflineSpeakerDiarizationNative>>( + 'SherpaOnnxDestroyOfflineSpeakerDiarization') + .asFunction(); + + sherpaOnnxOfflineSpeakerDiarizationGetSampleRate ??= dynamicLibrary + .lookup< + NativeFunction< + SherpaOnnxOfflineSpeakerDiarizationGetSampleRateNative>>( + 'SherpaOnnxOfflineSpeakerDiarizationGetSampleRate') + .asFunction(); + + sherpaOnnxOfflineSpeakerDiarizationSetConfig ??= dynamicLibrary + .lookup< + NativeFunction< + SherpaOnnxOfflineSpeakerDiarizationSetConfigNative>>( + 'SherpaOnnxOfflineSpeakerDiarizationSetConfig') + .asFunction(); + + sherpaOnnxOfflineSpeakerDiarizationResultGetNumSpeakers ??= dynamicLibrary + .lookup< + NativeFunction< + SherpaOnnxOfflineSpeakerDiarizationResultGetNumSpeakersNative>>( + 'SherpaOnnxOfflineSpeakerDiarizationResultGetNumSpeakers') + .asFunction(); + + sherpaOnnxOfflineSpeakerDiarizationResultGetNumSegments ??= dynamicLibrary + .lookup< + NativeFunction< + SherpaOnnxOfflineSpeakerDiarizationResultGetNumSegmentsNative>>( + 'SherpaOnnxOfflineSpeakerDiarizationResultGetNumSegments') + .asFunction(); + + sherpaOnnxOfflineSpeakerDiarizationResultSortByStartTime ??= dynamicLibrary + .lookup< + NativeFunction< + SherpaOnnxOfflineSpeakerDiarizationResultSortByStartTimeNative>>( + 'SherpaOnnxOfflineSpeakerDiarizationResultSortByStartTime') + .asFunction(); + + sherpaOnnxOfflineSpeakerDiarizationDestroySegment ??= dynamicLibrary + .lookup< + NativeFunction< + SherpaOnnxOfflineSpeakerDiarizationDestroySegmentNative>>( + 'SherpaOnnxOfflineSpeakerDiarizationDestroySegment') + .asFunction(); + + sherpaOnnxOfflineSpeakerDiarizationProcess ??= dynamicLibrary + .lookup< + NativeFunction< + SherpaOnnxOfflineSpeakerDiarizationProcessNative>>( + 'SherpaOnnxOfflineSpeakerDiarizationProcess') + .asFunction(); + + sherpaOnnxOfflineSpeakerDiarizationProcessWithCallbackNoArg ??= dynamicLibrary + .lookup< + NativeFunction< + SherpaOnnxOfflineSpeakerDiarizationProcessWithCallbackNoArgNative>>( + 'SherpaOnnxOfflineSpeakerDiarizationProcessWithCallbackNoArg') + .asFunction(); + + sherpaOnnxOfflineSpeakerDiarizationDestroyResult ??= dynamicLibrary + .lookup< + NativeFunction< + SherpaOnnxOfflineSpeakerDiarizationDestroyResultNative>>( + 'SherpaOnnxOfflineSpeakerDiarizationDestroyResult') + .asFunction(); + sherpaOnnxCreateOfflinePunctuation ??= dynamicLibrary .lookup>( 'SherpaOnnxCreateOfflinePunctuation') diff --git a/flutter/sherpa_onnx/pubspec.yaml b/flutter/sherpa_onnx/pubspec.yaml index 5b693ef0b..e92071833 100644 --- a/flutter/sherpa_onnx/pubspec.yaml +++ b/flutter/sherpa_onnx/pubspec.yaml @@ -1,8 +1,8 @@ name: sherpa_onnx description: > - Speech recognition, speech synthesis, and speaker recognition using next-gen Kaldi - with onnxruntime without Internet connection. + Speech recognition, speech synthesis, speaker diarization, and speaker recognition + using next-gen Kaldi with onnxruntime without Internet connection. repository: https://github.com/k2-fsa/sherpa-onnx/tree/master/flutter @@ -12,7 +12,7 @@ documentation: https://k2-fsa.github.io/sherpa/onnx/ topics: - speech-recognition - speech-synthesis - - speaker-identification + - speaker-diarization - audio-tagging - voice-activity-detection @@ -41,7 +41,7 @@ dependencies: sherpa_onnx_linux: ^1.10.27 # sherpa_onnx_linux: # path: ../sherpa_onnx_linux - # + sherpa_onnx_windows: ^1.10.27 # sherpa_onnx_windows: # path: ../sherpa_onnx_windows diff --git a/scripts/dart/speaker-diarization-pubspec.yaml b/scripts/dart/speaker-diarization-pubspec.yaml new file mode 100644 index 000000000..fec147e75 --- /dev/null +++ b/scripts/dart/speaker-diarization-pubspec.yaml @@ -0,0 +1,16 @@ +name: speaker_diarization +description: > + This example demonstrates how to use the Dart API for speaker diarization. + +version: 1.0.0 + +environment: + sdk: ">=3.0.0 <4.0.0" + +dependencies: + sherpa_onnx: + path: ../../flutter/sherpa_onnx + path: ^1.9.0 + +dev_dependencies: + lints: ^3.0.0 diff --git a/sherpa-onnx/c-api/c-api.cc b/sherpa-onnx/c-api/c-api.cc index abcfc5b82..4ba0a4a60 100644 --- a/sherpa-onnx/c-api/c-api.cc +++ b/sherpa-onnx/c-api/c-api.cc @@ -1828,4 +1828,20 @@ SherpaOnnxOfflineSpeakerDiarizationProcessWithCallback( return ans; } +const SherpaOnnxOfflineSpeakerDiarizationResult * +SherpaOnnxOfflineSpeakerDiarizationProcessWithCallbackNoArg( + const SherpaOnnxOfflineSpeakerDiarization *sd, const float *samples, + int32_t n, + SherpaOnnxOfflineSpeakerDiarizationProgressCallbackNoArg callback) { + auto wrapper = [callback](int32_t num_processed_chunks, + int32_t num_total_chunks, void *) { + return callback(num_processed_chunks, num_total_chunks); + }; + + auto ans = new SherpaOnnxOfflineSpeakerDiarizationResult; + ans->impl = sd->impl->Process(samples, n, wrapper); + + return ans; +} + #endif diff --git a/sherpa-onnx/c-api/c-api.h b/sherpa-onnx/c-api/c-api.h index c9e7f9ee1..4b41a81a9 100644 --- a/sherpa-onnx/c-api/c-api.h +++ b/sherpa-onnx/c-api/c-api.h @@ -1485,6 +1485,9 @@ SHERPA_ONNX_API void SherpaOnnxOfflineSpeakerDiarizationDestroySegment( typedef int32_t (*SherpaOnnxOfflineSpeakerDiarizationProgressCallback)( int32_t num_processed_chunk, int32_t num_total_chunks, void *arg); +typedef int32_t (*SherpaOnnxOfflineSpeakerDiarizationProgressCallbackNoArg)( + int32_t num_processed_chunk, int32_t num_total_chunks); + // The user has to invoke SherpaOnnxOfflineSpeakerDiarizationDestroyResult() // to free the returned pointer to avoid memory leak. SHERPA_ONNX_API const SherpaOnnxOfflineSpeakerDiarizationResult * @@ -1500,6 +1503,12 @@ SherpaOnnxOfflineSpeakerDiarizationProcessWithCallback( int32_t n, SherpaOnnxOfflineSpeakerDiarizationProgressCallback callback, void *arg); +SHERPA_ONNX_API const SherpaOnnxOfflineSpeakerDiarizationResult * +SherpaOnnxOfflineSpeakerDiarizationProcessWithCallbackNoArg( + const SherpaOnnxOfflineSpeakerDiarization *sd, const float *samples, + int32_t n, + SherpaOnnxOfflineSpeakerDiarizationProgressCallbackNoArg callback); + SHERPA_ONNX_API void SherpaOnnxOfflineSpeakerDiarizationDestroyResult( const SherpaOnnxOfflineSpeakerDiarizationResult *r); diff --git a/sherpa-onnx/csrc/offline-speaker-diarization-pyannote-impl.h b/sherpa-onnx/csrc/offline-speaker-diarization-pyannote-impl.h index 8f669e27c..0c70f0bc6 100644 --- a/sherpa-onnx/csrc/offline-speaker-diarization-pyannote-impl.h +++ b/sherpa-onnx/csrc/offline-speaker-diarization-pyannote-impl.h @@ -5,6 +5,7 @@ #define SHERPA_ONNX_CSRC_OFFLINE_SPEAKER_DIARIZATION_PYANNOTE_IMPL_H_ #include +#include #include #include #include diff --git a/sherpa-onnx/jni/offline-speaker-diarization.cc b/sherpa-onnx/jni/offline-speaker-diarization.cc index a0eef8b9c..e82962c80 100644 --- a/sherpa-onnx/jni/offline-speaker-diarization.cc +++ b/sherpa-onnx/jni/offline-speaker-diarization.cc @@ -204,7 +204,8 @@ Java_com_k2fsa_sherpa_onnx_OfflineSpeakerDiarization_processWithCallback( jfloat *p = env->GetFloatArrayElements(samples, nullptr); jsize n = env->GetArrayLength(samples); auto segments = - sd->Process(p, n, callback_wrapper, (void *)arg).SortByStartTime(); + sd->Process(p, n, callback_wrapper, reinterpret_cast(arg)) + .SortByStartTime(); env->ReleaseFloatArrayElements(samples, p, JNI_ABORT); return ProcessImpl(env, segments); From 5e273c5be44e349b8e65cb649bc6e7e05f4f5ba7 Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Sat, 12 Oct 2024 12:28:38 +0800 Subject: [PATCH 196/201] Pascal API for speaker diarization (#1420) --- .github/workflows/pascal.yaml | 15 + pascal-api-examples/README.md | 1 + .../speaker-diarization/main.pas | 104 ++++++ .../speaker-diarization/run.sh | 49 +++ sherpa-onnx/pascal-api/sherpa_onnx.pas | 339 +++++++++++++++++- 5 files changed, 506 insertions(+), 2 deletions(-) create mode 100644 pascal-api-examples/speaker-diarization/main.pas create mode 100755 pascal-api-examples/speaker-diarization/run.sh diff --git a/.github/workflows/pascal.yaml b/.github/workflows/pascal.yaml index 2ed213184..ba9a73163 100644 --- a/.github/workflows/pascal.yaml +++ b/.github/workflows/pascal.yaml @@ -127,6 +127,21 @@ jobs: cp -v ../sherpa-onnx/pascal-api/*.pas ../pascal-api-examples/tts fi + - name: Run Pascal test (Speaker diarization) + shell: bash + run: | + export PATH=/c/lazarus/fpc/3.2.2/bin/x86_64-win64:$PATH + + cd ./pascal-api-examples + pushd speaker-diarization + + ./run.sh + rm -rfv *.onnx *.wav sherpa-onnx-* + ls -lh + echo "---" + + popd + - name: Run Pascal test (TTS) shell: bash run: | diff --git a/pascal-api-examples/README.md b/pascal-api-examples/README.md index 5475d825b..5e709cd7e 100644 --- a/pascal-api-examples/README.md +++ b/pascal-api-examples/README.md @@ -9,6 +9,7 @@ https://k2-fsa.github.io/sherpa/onnx/pascal-api/index.html |Directory| Description| |---------|------------| |[read-wav](./read-wav)|It shows how to read a wave file.| +|[speaker-diarization](./speaker-diarization)|It shows how to use Pascal API for speaker diarization.| |[streaming-asr](./streaming-asr)| It shows how to use streaming models for speech recognition.| |[non-streaming-asr](./non-streaming-asr)| It shows how to use non-streaming models for speech recognition.| |[vad](./vad)| It shows how to use the voice activity detection API.| diff --git a/pascal-api-examples/speaker-diarization/main.pas b/pascal-api-examples/speaker-diarization/main.pas new file mode 100644 index 000000000..35d915d0b --- /dev/null +++ b/pascal-api-examples/speaker-diarization/main.pas @@ -0,0 +1,104 @@ +{ Copyright (c) 2024 Xiaomi Corporation } +{ +This file shows how to use the Pascal API from sherpa-onnx +for speaker diarization. + +Usage: + +Step 1: Download a speaker segmentation model + +Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-segmentation-models +for a list of available models. The following is an example + + wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2 + tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2 + rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2 + +Step 2: Download a speaker embedding extractor model + +Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models +for a list of available models. The following is an example + + wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx + +Step 3. Download test wave files + +Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-segmentation-models +for a list of available test wave files. The following is an example + + wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/0-four-speakers-zh.wav + +Step 4. Run it +} + +program main; + +{$mode delphi} + +uses + sherpa_onnx, + ctypes, + SysUtils; + +function ProgressCallback( + NumProcessedChunks: cint32; + NumTotalChunks: cint32): cint32; cdecl; +var + Progress: Single; +begin + Progress := 100.0 * NumProcessedChunks / NumTotalChunks; + WriteLn(Format('Progress: %.3f%%', [Progress])); + + Result := 0; +end; + +var + Wave: TSherpaOnnxWave; + Config: TSherpaOnnxOfflineSpeakerDiarizationConfig; + Sd: TSherpaOnnxOfflineSpeakerDiarization; + Segments: TSherpaOnnxOfflineSpeakerDiarizationSegmentArray; + I: Integer; +begin + Wave := SherpaOnnxReadWave('./0-four-speakers-zh.wav'); + + Config.Segmentation.Pyannote.Model := './sherpa-onnx-pyannote-segmentation-3-0/model.onnx'; + Config.Embedding.Model := './3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx'; + + { + Since we know that there are 4 speakers in ./0-four-speakers-zh.wav, we + set NumClusters to 4 here. + If you don't have such information, please set NumClusters to -1. + In that case, you have to set Config.Clustering.Threshold. + A larger threshold leads to fewer clusters, i.e., fewer speakers. + } + Config.Clustering.NumClusters := 4; + Config.Segmentation.Debug := True; + Config.Embedding.Debug := True; + + Sd := TSherpaOnnxOfflineSpeakerDiarization.Create(Config); + if Sd.GetHandle = nil then + begin + WriteLn('Please check you config'); + Exit; + end; + + if Sd.GetSampleRate <> Wave.SampleRate then + begin + WriteLn(Format('Expected sample rate: %d, given: %d', [Sd.GetSampleRate, Wave.SampleRate])); + Exit; + end; + + { + // If you don't want to use a callback + Segments := Sd.Process(Wave.Samples); + } + Segments := Sd.Process(Wave.Samples, @ProgressCallback); + + for I := Low(Segments) to High(Segments) do + begin + WriteLn(Format('%.3f -- %.3f speaker_%d', + [Segments[I].Start, Segments[I].Stop, Segments[I].Speaker])); + end; + + FreeAndNil(Sd); +end. diff --git a/pascal-api-examples/speaker-diarization/run.sh b/pascal-api-examples/speaker-diarization/run.sh new file mode 100755 index 000000000..866dc63c9 --- /dev/null +++ b/pascal-api-examples/speaker-diarization/run.sh @@ -0,0 +1,49 @@ +#!/usr/bin/env bash + +set -ex + +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) +SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd) + +echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR" + +if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then + mkdir -p ../../build + pushd ../../build + cmake \ + -DCMAKE_INSTALL_PREFIX=./install \ + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ + -DSHERPA_ONNX_ENABLE_TESTS=OFF \ + -DSHERPA_ONNX_ENABLE_CHECK=OFF \ + -DBUILD_SHARED_LIBS=ON \ + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ + .. + + cmake --build . --target install --config Release + popd +fi + +fpc \ + -dSHERPA_ONNX_USE_SHARED_LIBS \ + -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \ + -Fl$SHERPA_ONNX_DIR/build/install/lib \ + ./main.pas + +export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH +export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH + +if [ ! -f ./sherpa-onnx-pyannote-segmentation-3-0/model.onnx ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2 + tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2 + rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2 +fi + +if [ ! -f ./3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx +fi + +if [ ! -f ./0-four-speakers-zh.wav ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/0-four-speakers-zh.wav +fi + +./main diff --git a/sherpa-onnx/pascal-api/sherpa_onnx.pas b/sherpa-onnx/pascal-api/sherpa_onnx.pas index 7f05793e1..1b24dec80 100644 --- a/sherpa-onnx/pascal-api/sherpa_onnx.pas +++ b/sherpa-onnx/pascal-api/sherpa_onnx.pas @@ -102,7 +102,7 @@ TSherpaOnnxOfflineTts = class function Generate(Text: AnsiString; SpeakerId: Integer; Speed: Single; - Callback:PSherpaOnnxGeneratedAudioCallbackWithArg; + Callback: PSherpaOnnxGeneratedAudioCallbackWithArg; Arg: Pointer ): TSherpaOnnxGeneratedAudio; overload; @@ -398,6 +398,78 @@ TSherpaOnnxVoiceActivityDetector = class property GetHandle: Pointer Read Handle; end; + + TSherpaOnnxOfflineSpeakerSegmentationPyannoteModelConfig = record + Model: AnsiString; + function ToString: AnsiString; + end; + + TSherpaOnnxOfflineSpeakerSegmentationModelConfig = record + Pyannote: TSherpaOnnxOfflineSpeakerSegmentationPyannoteModelConfig; + NumThreads: Integer; + Debug: Boolean; + Provider: AnsiString; + function ToString: AnsiString; + class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineSpeakerSegmentationModelConfig); + end; + + TSherpaOnnxFastClusteringConfig = record + NumClusters: Integer; + Threshold: Single; + function ToString: AnsiString; + class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxFastClusteringConfig); + end; + + TSherpaOnnxSpeakerEmbeddingExtractorConfig = record + Model: AnsiString; + NumThreads: Integer; + Debug: Boolean; + Provider: AnsiString; + function ToString: AnsiString; + class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxSpeakerEmbeddingExtractorConfig); + end; + + TSherpaOnnxOfflineSpeakerDiarizationConfig = record + Segmentation: TSherpaOnnxOfflineSpeakerSegmentationModelConfig; + Embedding: TSherpaOnnxSpeakerEmbeddingExtractorConfig; + Clustering: TSherpaOnnxFastClusteringConfig; + MinDurationOn: Single; + MinDurationOff: Single; + function ToString: AnsiString; + class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineSpeakerDiarizationConfig); + end; + + TSherpaOnnxOfflineSpeakerDiarizationSegment = record + Start: Single; + Stop: Single; + Speaker: Integer; + function ToString: AnsiString; + end; + + TSherpaOnnxOfflineSpeakerDiarizationSegmentArray = array of TSherpaOnnxOfflineSpeakerDiarizationSegment; + + PSherpaOnnxOfflineSpeakerDiarizationProgressCallbackNoArg = ^TSherpaOnnxOfflineSpeakerDiarizationProgressCallbackNoArg; + + TSherpaOnnxOfflineSpeakerDiarizationProgressCallbackNoArg = function( + NumProcessChunks: cint32; + NumTotalChunks: cint32): cint32; cdecl; + + TSherpaOnnxOfflineSpeakerDiarization = class + private + Handle: Pointer; + SampleRate: Integer; + _Config: TSherpaOnnxOfflineSpeakerDiarizationConfig; + public + constructor Create(Config: TSherpaOnnxOfflineSpeakerDiarizationConfig); + destructor Destroy; override; + procedure SetConfig(Config: TSherpaOnnxOfflineSpeakerDiarizationConfig); + function Process(Samples: array of Single): TSherpaOnnxOfflineSpeakerDiarizationSegmentArray; overload; + function Process(Samples: array of Single; Callback: PSherpaOnnxOfflineSpeakerDiarizationProgressCallbackNoArg): TSherpaOnnxOfflineSpeakerDiarizationSegmentArray; overload; + property GetHandle: Pointer Read Handle; + property GetSampleRate: Integer Read SampleRate; + end; + + { It supports reading a single channel wave with 16-bit encoded samples. Samples are normalized to the range [-1, 1]. } @@ -656,6 +728,47 @@ SherpaOnnxResampleOut = record PSherpaOnnxResampleOut = ^SherpaOnnxResampleOut; + SherpaOnnxOfflineSpeakerSegmentationPyannoteModelConfig = record + Model: PAnsiChar; + end; + + SherpaOnnxOfflineSpeakerSegmentationModelConfig = record + Pyannote: SherpaOnnxOfflineSpeakerSegmentationPyannoteModelConfig; + NumThreads: cint32; + Debug: cint32; + Provider: PAnsiChar; + end; + + SherpaOnnxFastClusteringConfig = record + NumClusters: cint32; + Threshold: cfloat; + end; + + SherpaOnnxSpeakerEmbeddingExtractorConfig = record + Model: PAnsiChar; + NumThreads: cint32; + Debug: cint32; + Provider: PAnsiChar; + end; + + SherpaOnnxOfflineSpeakerDiarizationConfig = record + Segmentation: SherpaOnnxOfflineSpeakerSegmentationModelConfig; + Embedding: SherpaOnnxSpeakerEmbeddingExtractorConfig; + Clustering: SherpaOnnxFastClusteringConfig; + MinDurationOn: cfloat; + MinDurationOff: cfloat; + end; + + SherpaOnnxOfflineSpeakerDiarizationSegment = record + Start: cfloat; + Stop: cfloat; + Speaker: cint32; + end; + + PSherpaOnnxOfflineSpeakerDiarizationSegment = ^SherpaOnnxOfflineSpeakerDiarizationSegment; + + PSherpaOnnxOfflineSpeakerDiarizationConfig = ^SherpaOnnxOfflineSpeakerDiarizationConfig; + function SherpaOnnxCreateLinearResampler(SampleRateInHz: cint32; SampleRateOutHz: cint32; FilterCutoffHz: cfloat; @@ -677,6 +790,37 @@ procedure SherpaOnnxLinearResamplerResampleFree(P: PSherpaOnnxResampleOut); cdec procedure SherpaOnnxLinearResamplerReset(P: Pointer); cdecl; external SherpaOnnxLibName; +function SherpaOnnxCreateOfflineSpeakerDiarization(Config: PSherpaOnnxOfflineSpeakerDiarizationConfig): Pointer; cdecl; + external SherpaOnnxLibName; + +procedure SherpaOnnxDestroyOfflineSpeakerDiarization(P: Pointer); cdecl; + external SherpaOnnxLibName; + +function SherpaOnnxOfflineSpeakerDiarizationGetSampleRate(P: Pointer): cint32; cdecl; + external SherpaOnnxLibName; + +procedure SherpaOnnxOfflineSpeakerDiarizationSetConfig(P: Pointer; Config: PSherpaOnnxOfflineSpeakerDiarizationConfig); cdecl; + external SherpaOnnxLibName; + +function SherpaOnnxOfflineSpeakerDiarizationResultGetNumSegments(P: Pointer): cint32; cdecl; + external SherpaOnnxLibName; + +function SherpaOnnxOfflineSpeakerDiarizationResultSortByStartTime(P: Pointer): PSherpaOnnxOfflineSpeakerDiarizationSegment; cdecl; + external SherpaOnnxLibName; + +procedure SherpaOnnxOfflineSpeakerDiarizationDestroySegment(P: Pointer); cdecl; + external SherpaOnnxLibName; + +function SherpaOnnxOfflineSpeakerDiarizationProcess(P: Pointer; Samples: pcfloat; N: cint32): Pointer; cdecl; + external SherpaOnnxLibName; + +function SherpaOnnxOfflineSpeakerDiarizationProcessWithCallbackNoArg(P: Pointer; + Samples: pcfloat; N: cint32; Callback: PSherpaOnnxOfflineSpeakerDiarizationProgressCallbackNoArg): Pointer; cdecl; + external SherpaOnnxLibName; + +procedure SherpaOnnxOfflineSpeakerDiarizationDestroyResult(P: Pointer); cdecl; + external SherpaOnnxLibName; + function SherpaOnnxCreateOfflineTts(Config: PSherpaOnnxOfflineTtsConfig): Pointer; cdecl; external SherpaOnnxLibName; @@ -1773,7 +1917,7 @@ function TSherpaOnnxOfflineTts.Generate(Text: AnsiString; SpeakerId: Integer; function TSherpaOnnxOfflineTts.Generate(Text: AnsiString; SpeakerId: Integer; Speed: Single; - Callback:PSherpaOnnxGeneratedAudioCallbackWithArg; + Callback: PSherpaOnnxGeneratedAudioCallbackWithArg; Arg: Pointer ): TSherpaOnnxGeneratedAudio; var @@ -1847,4 +1991,195 @@ procedure TSherpaOnnxLinearResampler.Reset; SherpaOnnxLinearResamplerReset(Self.Handle); end; +function TSherpaOnnxOfflineSpeakerSegmentationPyannoteModelConfig.ToString: AnsiString; +begin + Result := Format('TSherpaOnnxOfflineSpeakerSegmentationPyannoteModelConfig(' + + 'Model := %s)',[Self.Model]); +end; + +function TSherpaOnnxOfflineSpeakerSegmentationModelConfig.ToString: AnsiString; +begin + Result := Format('TSherpaOnnxOfflineSpeakerSegmentationPyannoteModelConfig(' + + 'Pyannote := %s, ' + + 'NumThreads := %d, ' + + 'Debug := %s, ' + + 'Provider := %s)', + [Self.Pyannote.ToString, Self.NumThreads, + Self.Debug.ToString, Self.Provider]); +end; + +class operator TSherpaOnnxOfflineSpeakerSegmentationModelConfig.Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineSpeakerSegmentationModelConfig); +begin + Dest.NumThreads := 1; + Dest.Debug := False; + Dest.Provider := 'cpu'; +end; + +function TSherpaOnnxFastClusteringConfig.ToString: AnsiString; +begin + Result := Format('TSherpaOnnxFastClusteringConfig(' + + 'NumClusters := %d, Threshold := %.3f)', + [Self.NumClusters, Self.Threshold]); +end; + +class operator TSherpaOnnxFastClusteringConfig.Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxFastClusteringConfig); +begin + Dest.NumClusters := -1; + Dest.Threshold := 0.5; +end; + +function TSherpaOnnxSpeakerEmbeddingExtractorConfig.ToString: AnsiString; +begin + Result := Format('TSherpaOnnxSpeakerEmbeddingExtractorConfig(' + + 'Model := %s, '+ + 'NumThreads := %d, '+ + 'Debug := %s, '+ + 'Provider := %s)', + [Self.Model, Self.NumThreads, Self.Debug.ToString, Self.Provider]); +end; + +class operator TSherpaOnnxSpeakerEmbeddingExtractorConfig.Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxSpeakerEmbeddingExtractorConfig); +begin + Dest.NumThreads := 1; + Dest.Debug := False; + Dest.Provider := 'cpu'; +end; + +function TSherpaOnnxOfflineSpeakerDiarizationConfig.ToString: AnsiString; +begin + Result := Format('TSherpaOnnxOfflineSpeakerDiarizationConfig(' + + 'Segmentation := %s, '+ + 'Embedding := %s, '+ + 'Clustering := %s, '+ + 'MinDurationOn := %.3f, '+ + 'MinDurationOff := %.3f)', + [Self.Segmentation.ToString, Self.Embedding.ToString, + Self.Clustering.ToString, Self.MinDurationOn, Self.MinDurationOff]); +end; + +class operator TSherpaOnnxOfflineSpeakerDiarizationConfig.Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineSpeakerDiarizationConfig); +begin + Dest.MinDurationOn := 0.2; + Dest.MinDurationOff := 0.5; +end; + +function TSherpaOnnxOfflineSpeakerDiarizationSegment.ToString: AnsiString; +begin + Result := Format('TSherpaOnnxOfflineSpeakerDiarizationSegment(' + + 'Start := %.3f, '+ + 'Stop := %.3f, '+ + 'Speaker := %d)', + [Self.Start, Self.Stop, Self.Speaker]); +end; + +constructor TSherpaOnnxOfflineSpeakerDiarization.Create(Config: TSherpaOnnxOfflineSpeakerDiarizationConfig); +var + C: SherpaOnnxOfflineSpeakerDiarizationConfig; +begin + C := Default(SherpaOnnxOfflineSpeakerDiarizationConfig); + C.Segmentation.Pyannote.Model := PAnsiChar(Config.Segmentation.Pyannote.Model); + C.Segmentation.NumThreads := Config.Segmentation.NumThreads; + C.Segmentation.Debug := Ord(Config.Segmentation.Debug); + C.Segmentation.Provider := PAnsiChar(Config.Segmentation.Provider); + + C.Embedding.Model := PAnsiChar(Config.Embedding.Model); + C.Embedding.NumThreads := Config.Embedding.NumThreads; + C.Embedding.Debug := Ord(Config.Embedding.Debug); + C.Embedding.Provider := PAnsiChar(Config.Embedding.Provider); + + C.Clustering.NumClusters := Config.Clustering.NumClusters; + C.Clustering.Threshold := Config.Clustering.Threshold; + + C.MinDurationOn := Config.MinDurationOn; + C.MinDurationOff := Config.MinDurationOff; + + Self.Handle := SherpaOnnxCreateOfflineSpeakerDiarization(@C); + Self._Config := Config; + Self.SampleRate := 0; + + if Self.Handle <> nil then + begin + Self.SampleRate := SherpaOnnxOfflineSpeakerDiarizationGetSampleRate(Self.Handle); + end; +end; + +destructor TSherpaOnnxOfflineSpeakerDiarization.Destroy; +begin + SherpaOnnxDestroyOfflineSpeakerDiarization(Self.Handle); + Self.Handle := nil; +end; + +procedure TSherpaOnnxOfflineSpeakerDiarization.SetConfig(Config: TSherpaOnnxOfflineSpeakerDiarizationConfig); +var + C: SherpaOnnxOfflineSpeakerDiarizationConfig; +begin + C := Default(SherpaOnnxOfflineSpeakerDiarizationConfig); + + C.Clustering.NumClusters := Config.Clustering.NumClusters; + C.Clustering.Threshold := Config.Clustering.Threshold; + + SherpaOnnxOfflineSpeakerDiarizationSetConfig(Self.Handle, @C); +end; + +function TSherpaOnnxOfflineSpeakerDiarization.Process(Samples: array of Single): TSherpaOnnxOfflineSpeakerDiarizationSegmentArray; +var + R: Pointer; + NumSegments: Integer; + I: Integer; + Segments: PSherpaOnnxOfflineSpeakerDiarizationSegment; +begin + Result := nil; + + R := SherpaOnnxOfflineSpeakerDiarizationProcess(Self.Handle, pcfloat(Samples), Length(Samples)); + if R = nil then + begin + Exit + end; + NumSegments := SherpaOnnxOfflineSpeakerDiarizationResultGetNumSegments(R); + + Segments := SherpaOnnxOfflineSpeakerDiarizationResultSortByStartTime(R); + + SetLength(Result, NumSegments); + for I := Low(Result) to High(Result) do + begin + Result[I].Start := Segments[I].Start; + Result[I].Stop := Segments[I].Stop; + Result[I].Speaker := Segments[I].Speaker; + end; + + SherpaOnnxOfflineSpeakerDiarizationDestroySegment(Segments); + SherpaOnnxOfflineSpeakerDiarizationDestroyResult(R); +end; + +function TSherpaOnnxOfflineSpeakerDiarization.Process(Samples: array of Single; + callback: PSherpaOnnxOfflineSpeakerDiarizationProgressCallbackNoArg): TSherpaOnnxOfflineSpeakerDiarizationSegmentArray; +var + R: Pointer; + NumSegments: Integer; + I: Integer; + Segments: PSherpaOnnxOfflineSpeakerDiarizationSegment; +begin + Result := nil; + + R := SherpaOnnxOfflineSpeakerDiarizationProcessWithCallbackNoArg(Self.Handle, pcfloat(Samples), Length(Samples), callback); + if R = nil then + begin + Exit + end; + NumSegments := SherpaOnnxOfflineSpeakerDiarizationResultGetNumSegments(R); + + Segments := SherpaOnnxOfflineSpeakerDiarizationResultSortByStartTime(R); + + SetLength(Result, NumSegments); + for I := Low(Result) to High(Result) do + begin + Result[I].Start := Segments[I].Start; + Result[I].Stop := Segments[I].Stop; + Result[I].Speaker := Segments[I].Speaker; + end; + + SherpaOnnxOfflineSpeakerDiarizationDestroySegment(Segments); + SherpaOnnxOfflineSpeakerDiarizationDestroyResult(R); +end; + end. From 94b26ff07c1b6275d1830cd2987081a0bdbedacb Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Sat, 12 Oct 2024 13:03:48 +0800 Subject: [PATCH 197/201] Android JNI support for speaker diarization (#1421) --- .../csrc/offline-speaker-diarization-impl.cc | 14 ++++++++++++++ .../csrc/offline-speaker-diarization-impl.h | 10 ++++++++++ ...ffline-speaker-diarization-pyannote-impl.h | 16 ++++++++++++++++ .../csrc/offline-speaker-diarization.cc | 6 ++++++ .../csrc/offline-speaker-diarization.h | 10 ++++++++++ ...ine-speaker-segmentation-pyannote-model.cc | 18 ++++++++++++++++++ ...line-speaker-segmentation-pyannote-model.h | 10 ++++++++++ .../sherpa-onnx-vad-microphone-offline-asr.cc | 2 +- sherpa-onnx/jni/audio-tagging.cc | 1 + sherpa-onnx/jni/keyword-spotter.cc | 2 ++ sherpa-onnx/jni/offline-punctuation.cc | 2 ++ sherpa-onnx/jni/offline-recognizer.cc | 2 ++ .../jni/offline-speaker-diarization.cc | 19 ++++++++++++++++++- sherpa-onnx/jni/offline-tts.cc | 1 + sherpa-onnx/jni/online-recognizer.cc | 1 + .../jni/speaker-embedding-extractor.cc | 1 + .../jni/spoken-language-identification.cc | 1 + sherpa-onnx/jni/voice-activity-detector.cc | 2 ++ 18 files changed, 116 insertions(+), 2 deletions(-) diff --git a/sherpa-onnx/csrc/offline-speaker-diarization-impl.cc b/sherpa-onnx/csrc/offline-speaker-diarization-impl.cc index e41a7767a..15c3a2eb4 100644 --- a/sherpa-onnx/csrc/offline-speaker-diarization-impl.cc +++ b/sherpa-onnx/csrc/offline-speaker-diarization-impl.cc @@ -23,4 +23,18 @@ OfflineSpeakerDiarizationImpl::Create( return nullptr; } +#if __ANDROID_API__ >= 9 +std::unique_ptr +OfflineSpeakerDiarizationImpl::Create( + AAssetManager *mgr, const OfflineSpeakerDiarizationConfig &config) { + if (!config.segmentation.pyannote.model.empty()) { + return std::make_unique(mgr, config); + } + + SHERPA_ONNX_LOGE("Please specify a speaker segmentation model."); + + return nullptr; +} +#endif + } // namespace sherpa_onnx diff --git a/sherpa-onnx/csrc/offline-speaker-diarization-impl.h b/sherpa-onnx/csrc/offline-speaker-diarization-impl.h index 3aed9d72f..41f0e1e2f 100644 --- a/sherpa-onnx/csrc/offline-speaker-diarization-impl.h +++ b/sherpa-onnx/csrc/offline-speaker-diarization-impl.h @@ -8,6 +8,11 @@ #include #include +#if __ANDROID_API__ >= 9 +#include "android/asset_manager.h" +#include "android/asset_manager_jni.h" +#endif + #include "sherpa-onnx/csrc/offline-speaker-diarization.h" namespace sherpa_onnx { @@ -16,6 +21,11 @@ class OfflineSpeakerDiarizationImpl { static std::unique_ptr Create( const OfflineSpeakerDiarizationConfig &config); +#if __ANDROID_API__ >= 9 + static std::unique_ptr Create( + AAssetManager *mgr, const OfflineSpeakerDiarizationConfig &config); +#endif + virtual ~OfflineSpeakerDiarizationImpl() = default; virtual int32_t SampleRate() const = 0; diff --git a/sherpa-onnx/csrc/offline-speaker-diarization-pyannote-impl.h b/sherpa-onnx/csrc/offline-speaker-diarization-pyannote-impl.h index 0c70f0bc6..aaedc3be0 100644 --- a/sherpa-onnx/csrc/offline-speaker-diarization-pyannote-impl.h +++ b/sherpa-onnx/csrc/offline-speaker-diarization-pyannote-impl.h @@ -10,6 +10,11 @@ #include #include +#if __ANDROID_API__ >= 9 +#include "android/asset_manager.h" +#include "android/asset_manager_jni.h" +#endif + #include "Eigen/Dense" #include "sherpa-onnx/csrc/fast-clustering.h" #include "sherpa-onnx/csrc/math.h" @@ -65,6 +70,17 @@ class OfflineSpeakerDiarizationPyannoteImpl Init(); } +#if __ANDROID_API__ >= 9 + OfflineSpeakerDiarizationPyannoteImpl( + AAssetManager *mgr, const OfflineSpeakerDiarizationConfig &config) + : config_(config), + segmentation_model_(mgr, config_.segmentation), + embedding_extractor_(mgr, config_.embedding), + clustering_(std::make_unique(config_.clustering)) { + Init(); + } +#endif + int32_t SampleRate() const override { const auto &meta_data = segmentation_model_.GetModelMetaData(); diff --git a/sherpa-onnx/csrc/offline-speaker-diarization.cc b/sherpa-onnx/csrc/offline-speaker-diarization.cc index 00733bfb2..f34ea4e0e 100644 --- a/sherpa-onnx/csrc/offline-speaker-diarization.cc +++ b/sherpa-onnx/csrc/offline-speaker-diarization.cc @@ -73,6 +73,12 @@ OfflineSpeakerDiarization::OfflineSpeakerDiarization( const OfflineSpeakerDiarizationConfig &config) : impl_(OfflineSpeakerDiarizationImpl::Create(config)) {} +#if __ANDROID_API__ >= 9 +OfflineSpeakerDiarization::OfflineSpeakerDiarization( + AAssetManager *mgr, const OfflineSpeakerDiarizationConfig &config) + : impl_(OfflineSpeakerDiarizationImpl::Create(mgr, config)) {} +#endif + OfflineSpeakerDiarization::~OfflineSpeakerDiarization() = default; int32_t OfflineSpeakerDiarization::SampleRate() const { diff --git a/sherpa-onnx/csrc/offline-speaker-diarization.h b/sherpa-onnx/csrc/offline-speaker-diarization.h index 376e5f975..4a517fbb2 100644 --- a/sherpa-onnx/csrc/offline-speaker-diarization.h +++ b/sherpa-onnx/csrc/offline-speaker-diarization.h @@ -9,6 +9,11 @@ #include #include +#if __ANDROID_API__ >= 9 +#include "android/asset_manager.h" +#include "android/asset_manager_jni.h" +#endif + #include "sherpa-onnx/csrc/fast-clustering-config.h" #include "sherpa-onnx/csrc/offline-speaker-diarization-result.h" #include "sherpa-onnx/csrc/offline-speaker-segmentation-model-config.h" @@ -57,6 +62,11 @@ class OfflineSpeakerDiarization { explicit OfflineSpeakerDiarization( const OfflineSpeakerDiarizationConfig &config); +#if __ANDROID_API__ >= 9 + OfflineSpeakerDiarization(AAssetManager *mgr, + const OfflineSpeakerDiarizationConfig &config); +#endif + ~OfflineSpeakerDiarization(); // Expected sample rate of the input audio samples diff --git a/sherpa-onnx/csrc/offline-speaker-segmentation-pyannote-model.cc b/sherpa-onnx/csrc/offline-speaker-segmentation-pyannote-model.cc index 3f3323698..e3768dcf4 100644 --- a/sherpa-onnx/csrc/offline-speaker-segmentation-pyannote-model.cc +++ b/sherpa-onnx/csrc/offline-speaker-segmentation-pyannote-model.cc @@ -24,6 +24,17 @@ class OfflineSpeakerSegmentationPyannoteModel::Impl { Init(buf.data(), buf.size()); } +#if __ANDROID_API__ >= 9 + Impl(AAssetManager *mgr, const OfflineSpeakerSegmentationModelConfig &config) + : config_(config), + env_(ORT_LOGGING_LEVEL_ERROR), + sess_opts_(GetSessionOptions(config)), + allocator_{} { + auto buf = ReadFile(mgr, config_.pyannote.model); + Init(buf.data(), buf.size()); + } +#endif + const OfflineSpeakerSegmentationPyannoteModelMetaData &GetModelMetaData() const { return meta_data_; @@ -92,6 +103,13 @@ OfflineSpeakerSegmentationPyannoteModel:: const OfflineSpeakerSegmentationModelConfig &config) : impl_(std::make_unique(config)) {} +#if __ANDROID_API__ >= 9 +OfflineSpeakerSegmentationPyannoteModel:: + OfflineSpeakerSegmentationPyannoteModel( + AAssetManager *mgr, const OfflineSpeakerSegmentationModelConfig &config) + : impl_(std::make_unique(mgr, config)) {} +#endif + OfflineSpeakerSegmentationPyannoteModel:: ~OfflineSpeakerSegmentationPyannoteModel() = default; diff --git a/sherpa-onnx/csrc/offline-speaker-segmentation-pyannote-model.h b/sherpa-onnx/csrc/offline-speaker-segmentation-pyannote-model.h index b504c373f..6b835763b 100644 --- a/sherpa-onnx/csrc/offline-speaker-segmentation-pyannote-model.h +++ b/sherpa-onnx/csrc/offline-speaker-segmentation-pyannote-model.h @@ -6,6 +6,11 @@ #include +#if __ANDROID_API__ >= 9 +#include "android/asset_manager.h" +#include "android/asset_manager_jni.h" +#endif + #include "onnxruntime_cxx_api.h" // NOLINT #include "sherpa-onnx/csrc/offline-speaker-segmentation-model-config.h" #include "sherpa-onnx/csrc/offline-speaker-segmentation-pyannote-model-meta-data.h" @@ -17,6 +22,11 @@ class OfflineSpeakerSegmentationPyannoteModel { explicit OfflineSpeakerSegmentationPyannoteModel( const OfflineSpeakerSegmentationModelConfig &config); +#if __ANDROID_API__ >= 9 + OfflineSpeakerSegmentationPyannoteModel( + AAssetManager *mgr, const OfflineSpeakerSegmentationModelConfig &config); +#endif + ~OfflineSpeakerSegmentationPyannoteModel(); const OfflineSpeakerSegmentationPyannoteModelMetaData &GetModelMetaData() diff --git a/sherpa-onnx/csrc/sherpa-onnx-vad-microphone-offline-asr.cc b/sherpa-onnx/csrc/sherpa-onnx-vad-microphone-offline-asr.cc index c90c29c52..df3e250a5 100644 --- a/sherpa-onnx/csrc/sherpa-onnx-vad-microphone-offline-asr.cc +++ b/sherpa-onnx/csrc/sherpa-onnx-vad-microphone-offline-asr.cc @@ -211,7 +211,7 @@ to download models for offline ASR. } while (!vad->Empty()) { - auto &segment = vad->Front(); + const auto &segment = vad->Front(); auto s = recognizer.CreateStream(); s->AcceptWaveform(sample_rate, segment.samples.data(), segment.samples.size()); diff --git a/sherpa-onnx/jni/audio-tagging.cc b/sherpa-onnx/jni/audio-tagging.cc index ff8db0089..7ad6e7d53 100644 --- a/sherpa-onnx/jni/audio-tagging.cc +++ b/sherpa-onnx/jni/audio-tagging.cc @@ -70,6 +70,7 @@ JNIEXPORT jlong JNICALL Java_com_k2fsa_sherpa_onnx_AudioTagging_newFromAsset( AAssetManager *mgr = AAssetManager_fromJava(env, asset_manager); if (!mgr) { SHERPA_ONNX_LOGE("Failed to get asset manager: %p", mgr); + return 0; } #endif diff --git a/sherpa-onnx/jni/keyword-spotter.cc b/sherpa-onnx/jni/keyword-spotter.cc index ca0c229c2..4ac80a294 100644 --- a/sherpa-onnx/jni/keyword-spotter.cc +++ b/sherpa-onnx/jni/keyword-spotter.cc @@ -115,10 +115,12 @@ JNIEXPORT jlong JNICALL Java_com_k2fsa_sherpa_onnx_KeywordSpotter_newFromAsset( AAssetManager *mgr = AAssetManager_fromJava(env, asset_manager); if (!mgr) { SHERPA_ONNX_LOGE("Failed to get asset manager: %p", mgr); + return 0; } #endif auto config = sherpa_onnx::GetKwsConfig(env, _config); SHERPA_ONNX_LOGE("config:\n%s", config.ToString().c_str()); + auto kws = new sherpa_onnx::KeywordSpotter( #if __ANDROID_API__ >= 9 mgr, diff --git a/sherpa-onnx/jni/offline-punctuation.cc b/sherpa-onnx/jni/offline-punctuation.cc index 5056a3ac4..efe03cac0 100644 --- a/sherpa-onnx/jni/offline-punctuation.cc +++ b/sherpa-onnx/jni/offline-punctuation.cc @@ -53,10 +53,12 @@ Java_com_k2fsa_sherpa_onnx_OfflinePunctuation_newFromAsset( AAssetManager *mgr = AAssetManager_fromJava(env, asset_manager); if (!mgr) { SHERPA_ONNX_LOGE("Failed to get asset manager: %p", mgr); + return 0; } #endif auto config = sherpa_onnx::GetOfflinePunctuationConfig(env, _config); SHERPA_ONNX_LOGE("config:\n%s", config.ToString().c_str()); + auto model = new sherpa_onnx::OfflinePunctuation( #if __ANDROID_API__ >= 9 mgr, diff --git a/sherpa-onnx/jni/offline-recognizer.cc b/sherpa-onnx/jni/offline-recognizer.cc index 8c1265bba..5e4b359b6 100644 --- a/sherpa-onnx/jni/offline-recognizer.cc +++ b/sherpa-onnx/jni/offline-recognizer.cc @@ -233,10 +233,12 @@ Java_com_k2fsa_sherpa_onnx_OfflineRecognizer_newFromAsset(JNIEnv *env, AAssetManager *mgr = AAssetManager_fromJava(env, asset_manager); if (!mgr) { SHERPA_ONNX_LOGE("Failed to get asset manager: %p", mgr); + return 0; } #endif auto config = sherpa_onnx::GetOfflineConfig(env, _config); SHERPA_ONNX_LOGE("config:\n%s", config.ToString().c_str()); + auto model = new sherpa_onnx::OfflineRecognizer( #if __ANDROID_API__ >= 9 mgr, diff --git a/sherpa-onnx/jni/offline-speaker-diarization.cc b/sherpa-onnx/jni/offline-speaker-diarization.cc index e82962c80..ba4e14bc3 100644 --- a/sherpa-onnx/jni/offline-speaker-diarization.cc +++ b/sherpa-onnx/jni/offline-speaker-diarization.cc @@ -101,7 +101,24 @@ SHERPA_ONNX_EXTERN_C JNIEXPORT jlong JNICALL Java_com_k2fsa_sherpa_onnx_OfflineSpeakerDiarization_newFromAsset( JNIEnv *env, jobject /*obj*/, jobject asset_manager, jobject _config) { - return 0; +#if __ANDROID_API__ >= 9 + AAssetManager *mgr = AAssetManager_fromJava(env, asset_manager); + if (!mgr) { + SHERPA_ONNX_LOGE("Failed to get asset manager: %p", mgr); + return 0; + } +#endif + + auto config = sherpa_onnx::GetOfflineSpeakerDiarizationConfig(env, _config); + SHERPA_ONNX_LOGE("config:\n%s", config.ToString().c_str()); + + auto sd = new sherpa_onnx::OfflineSpeakerDiarization( +#if __ANDROID_API__ >= 9 + mgr, +#endif + config); + + return (jlong)sd; } SHERPA_ONNX_EXTERN_C diff --git a/sherpa-onnx/jni/offline-tts.cc b/sherpa-onnx/jni/offline-tts.cc index 43a93e0e0..4d67afc27 100644 --- a/sherpa-onnx/jni/offline-tts.cc +++ b/sherpa-onnx/jni/offline-tts.cc @@ -105,6 +105,7 @@ JNIEXPORT jlong JNICALL Java_com_k2fsa_sherpa_onnx_OfflineTts_newFromAsset( AAssetManager *mgr = AAssetManager_fromJava(env, asset_manager); if (!mgr) { SHERPA_ONNX_LOGE("Failed to get asset manager: %p", mgr); + return 0; } #endif auto config = sherpa_onnx::GetOfflineTtsConfig(env, _config); diff --git a/sherpa-onnx/jni/online-recognizer.cc b/sherpa-onnx/jni/online-recognizer.cc index 1793cf73b..dbe205c4e 100644 --- a/sherpa-onnx/jni/online-recognizer.cc +++ b/sherpa-onnx/jni/online-recognizer.cc @@ -267,6 +267,7 @@ Java_com_k2fsa_sherpa_onnx_OnlineRecognizer_newFromAsset(JNIEnv *env, AAssetManager *mgr = AAssetManager_fromJava(env, asset_manager); if (!mgr) { SHERPA_ONNX_LOGE("Failed to get asset manager: %p", mgr); + return 0; } #endif auto config = sherpa_onnx::GetConfig(env, _config); diff --git a/sherpa-onnx/jni/speaker-embedding-extractor.cc b/sherpa-onnx/jni/speaker-embedding-extractor.cc index b1190bffc..33d630ee6 100644 --- a/sherpa-onnx/jni/speaker-embedding-extractor.cc +++ b/sherpa-onnx/jni/speaker-embedding-extractor.cc @@ -45,6 +45,7 @@ Java_com_k2fsa_sherpa_onnx_SpeakerEmbeddingExtractor_newFromAsset( AAssetManager *mgr = AAssetManager_fromJava(env, asset_manager); if (!mgr) { SHERPA_ONNX_LOGE("Failed to get asset manager: %p", mgr); + return 0; } #endif auto config = sherpa_onnx::GetSpeakerEmbeddingExtractorConfig(env, _config); diff --git a/sherpa-onnx/jni/spoken-language-identification.cc b/sherpa-onnx/jni/spoken-language-identification.cc index 278c6adbf..fcb6f228a 100644 --- a/sherpa-onnx/jni/spoken-language-identification.cc +++ b/sherpa-onnx/jni/spoken-language-identification.cc @@ -62,6 +62,7 @@ Java_com_k2fsa_sherpa_onnx_SpokenLanguageIdentification_newFromAsset( AAssetManager *mgr = AAssetManager_fromJava(env, asset_manager); if (!mgr) { SHERPA_ONNX_LOGE("Failed to get asset manager: %p", mgr); + return 0; } #endif diff --git a/sherpa-onnx/jni/voice-activity-detector.cc b/sherpa-onnx/jni/voice-activity-detector.cc index 319edd09b..a30423f70 100644 --- a/sherpa-onnx/jni/voice-activity-detector.cc +++ b/sherpa-onnx/jni/voice-activity-detector.cc @@ -71,10 +71,12 @@ JNIEXPORT jlong JNICALL Java_com_k2fsa_sherpa_onnx_Vad_newFromAsset( AAssetManager *mgr = AAssetManager_fromJava(env, asset_manager); if (!mgr) { SHERPA_ONNX_LOGE("Failed to get asset manager: %p", mgr); + return 0; } #endif auto config = sherpa_onnx::GetVadModelConfig(env, _config); SHERPA_ONNX_LOGE("config:\n%s", config.ToString().c_str()); + auto model = new sherpa_onnx::VoiceActivityDetector( #if __ANDROID_API__ >= 9 mgr, From 5a22f74b2b0700b9f986bc9f01ae93b58b2117c9 Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Sun, 13 Oct 2024 14:02:57 +0800 Subject: [PATCH 198/201] Android demo for speaker diarization (#1423) --- .../workflows/apk-speaker-diarization.yaml | 175 +++++++++++++++ .../workflows/apk-speaker-identification.yaml | 62 ++++++ .github/workflows/apk-vad.yaml | 2 +- README.md | 51 +++-- android/README.md | 2 + .../SherpaOnnxSpeakerDiarization/.gitignore | 15 ++ .../app/.gitignore | 1 + .../app/build.gradle.kts | 71 ++++++ .../app/proguard-rules.pro | 21 ++ .../diarization/ExampleInstrumentedTest.kt | 24 ++ .../app/src/main/AndroidManifest.xml | 32 +++ .../app/src/main/assets/.gitkeep | 0 .../onnx/speaker/diarization/BarItem.kt | 13 ++ .../onnx/speaker/diarization/MainActivity.kt | 132 +++++++++++ .../onnx/speaker/diarization/NavBarItems.kt | 20 ++ .../onnx/speaker/diarization/NavRoutes.kt | 6 + .../diarization/OfflineSpeakerDiarization.kt | 1 + .../onnx/speaker/diarization/ReadWaveFile.kt | 137 ++++++++++++ .../diarization/SpeakerDiarizationObject.kt | 66 ++++++ .../SpeakerEmbeddingExtractorConfig.kt | 1 + .../onnx/speaker/diarization/screens/Help.kt | 38 ++++ .../onnx/speaker/diarization/screens/Home.kt | 210 ++++++++++++++++++ .../speaker/diarization/ui/theme/Color.kt | 11 + .../speaker/diarization/ui/theme/Theme.kt | 58 +++++ .../onnx/speaker/diarization/ui/theme/Type.kt | 34 +++ .../app/src/main/jniLibs/arm64-v8a/.gitkeep | 0 .../app/src/main/jniLibs/armeabi-v7a/.gitkeep | 0 .../app/src/main/jniLibs/x86/.gitkeep | 0 .../app/src/main/jniLibs/x86_64/.gitkeep | 0 .../drawable-v24/ic_launcher_foreground.xml | 30 +++ .../res/drawable/ic_launcher_background.xml | 170 ++++++++++++++ .../res/mipmap-anydpi-v26/ic_launcher.xml | 6 + .../mipmap-anydpi-v26/ic_launcher_round.xml | 6 + .../src/main/res/mipmap-hdpi/ic_launcher.webp | Bin 0 -> 1404 bytes .../res/mipmap-hdpi/ic_launcher_round.webp | Bin 0 -> 2898 bytes .../src/main/res/mipmap-mdpi/ic_launcher.webp | Bin 0 -> 982 bytes .../res/mipmap-mdpi/ic_launcher_round.webp | Bin 0 -> 1772 bytes .../main/res/mipmap-xhdpi/ic_launcher.webp | Bin 0 -> 1900 bytes .../res/mipmap-xhdpi/ic_launcher_round.webp | Bin 0 -> 3918 bytes .../main/res/mipmap-xxhdpi/ic_launcher.webp | Bin 0 -> 2884 bytes .../res/mipmap-xxhdpi/ic_launcher_round.webp | Bin 0 -> 5914 bytes .../main/res/mipmap-xxxhdpi/ic_launcher.webp | Bin 0 -> 3844 bytes .../res/mipmap-xxxhdpi/ic_launcher_round.webp | Bin 0 -> 7778 bytes .../app/src/main/res/values/colors.xml | 10 + .../app/src/main/res/values/strings.xml | 3 + .../app/src/main/res/values/themes.xml | 5 + .../app/src/main/res/xml/backup_rules.xml | 13 ++ .../main/res/xml/data_extraction_rules.xml | 19 ++ .../speaker/diarization/ExampleUnitTest.kt | 17 ++ .../build.gradle.kts | 5 + .../gradle.properties | 23 ++ .../gradle/libs.versions.toml | 35 +++ .../gradle/wrapper/gradle-wrapper.jar | Bin 0 -> 59203 bytes .../gradle/wrapper/gradle-wrapper.properties | 6 + android/SherpaOnnxSpeakerDiarization/gradlew | 185 +++++++++++++++ .../SherpaOnnxSpeakerDiarization/gradlew.bat | 89 ++++++++ .../settings.gradle.kts | 23 ++ .../SpeakerEmbeddingExtractorConfig.kt | 1 + .../SpeakerEmbeddingExtractorConfig.kt | 1 + kotlin-api-examples/run.sh | 2 + scripts/apk/build-apk-speaker-diarization.sh | 73 ++++++ .../kotlin-api/OfflineSpeakerDiarization.kt | 11 +- sherpa-onnx/kotlin-api/Speaker.kt | 7 - .../SpeakerEmbeddingExtractorConfig.kt | 8 + 64 files changed, 1905 insertions(+), 26 deletions(-) create mode 100644 .github/workflows/apk-speaker-diarization.yaml create mode 100644 android/SherpaOnnxSpeakerDiarization/.gitignore create mode 100644 android/SherpaOnnxSpeakerDiarization/app/.gitignore create mode 100644 android/SherpaOnnxSpeakerDiarization/app/build.gradle.kts create mode 100644 android/SherpaOnnxSpeakerDiarization/app/proguard-rules.pro create mode 100644 android/SherpaOnnxSpeakerDiarization/app/src/androidTest/java/com/k2fsa/sherpa/onnx/speaker/diarization/ExampleInstrumentedTest.kt create mode 100644 android/SherpaOnnxSpeakerDiarization/app/src/main/AndroidManifest.xml create mode 100644 android/SherpaOnnxSpeakerDiarization/app/src/main/assets/.gitkeep create mode 100644 android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/BarItem.kt create mode 100644 android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/MainActivity.kt create mode 100644 android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/NavBarItems.kt create mode 100644 android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/NavRoutes.kt create mode 120000 android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/OfflineSpeakerDiarization.kt create mode 100644 android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/ReadWaveFile.kt create mode 100644 android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/SpeakerDiarizationObject.kt create mode 120000 android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/SpeakerEmbeddingExtractorConfig.kt create mode 100644 android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/screens/Help.kt create mode 100644 android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/screens/Home.kt create mode 100644 android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/ui/theme/Color.kt create mode 100644 android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/ui/theme/Theme.kt create mode 100644 android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/ui/theme/Type.kt create mode 100644 android/SherpaOnnxSpeakerDiarization/app/src/main/jniLibs/arm64-v8a/.gitkeep create mode 100644 android/SherpaOnnxSpeakerDiarization/app/src/main/jniLibs/armeabi-v7a/.gitkeep create mode 100644 android/SherpaOnnxSpeakerDiarization/app/src/main/jniLibs/x86/.gitkeep create mode 100644 android/SherpaOnnxSpeakerDiarization/app/src/main/jniLibs/x86_64/.gitkeep create mode 100644 android/SherpaOnnxSpeakerDiarization/app/src/main/res/drawable-v24/ic_launcher_foreground.xml create mode 100644 android/SherpaOnnxSpeakerDiarization/app/src/main/res/drawable/ic_launcher_background.xml create mode 100644 android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-anydpi-v26/ic_launcher.xml create mode 100644 android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-anydpi-v26/ic_launcher_round.xml create mode 100644 android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-hdpi/ic_launcher.webp create mode 100644 android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-hdpi/ic_launcher_round.webp create mode 100644 android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-mdpi/ic_launcher.webp create mode 100644 android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-mdpi/ic_launcher_round.webp create mode 100644 android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-xhdpi/ic_launcher.webp create mode 100644 android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-xhdpi/ic_launcher_round.webp create mode 100644 android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-xxhdpi/ic_launcher.webp create mode 100644 android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-xxhdpi/ic_launcher_round.webp create mode 100644 android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-xxxhdpi/ic_launcher.webp create mode 100644 android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-xxxhdpi/ic_launcher_round.webp create mode 100644 android/SherpaOnnxSpeakerDiarization/app/src/main/res/values/colors.xml create mode 100644 android/SherpaOnnxSpeakerDiarization/app/src/main/res/values/strings.xml create mode 100644 android/SherpaOnnxSpeakerDiarization/app/src/main/res/values/themes.xml create mode 100644 android/SherpaOnnxSpeakerDiarization/app/src/main/res/xml/backup_rules.xml create mode 100644 android/SherpaOnnxSpeakerDiarization/app/src/main/res/xml/data_extraction_rules.xml create mode 100644 android/SherpaOnnxSpeakerDiarization/app/src/test/java/com/k2fsa/sherpa/onnx/speaker/diarization/ExampleUnitTest.kt create mode 100644 android/SherpaOnnxSpeakerDiarization/build.gradle.kts create mode 100644 android/SherpaOnnxSpeakerDiarization/gradle.properties create mode 100644 android/SherpaOnnxSpeakerDiarization/gradle/libs.versions.toml create mode 100644 android/SherpaOnnxSpeakerDiarization/gradle/wrapper/gradle-wrapper.jar create mode 100644 android/SherpaOnnxSpeakerDiarization/gradle/wrapper/gradle-wrapper.properties create mode 100755 android/SherpaOnnxSpeakerDiarization/gradlew create mode 100644 android/SherpaOnnxSpeakerDiarization/gradlew.bat create mode 100644 android/SherpaOnnxSpeakerDiarization/settings.gradle.kts create mode 120000 android/SherpaOnnxSpeakerIdentification/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/identification/SpeakerEmbeddingExtractorConfig.kt create mode 120000 kotlin-api-examples/SpeakerEmbeddingExtractorConfig.kt create mode 100755 scripts/apk/build-apk-speaker-diarization.sh create mode 100644 sherpa-onnx/kotlin-api/SpeakerEmbeddingExtractorConfig.kt diff --git a/.github/workflows/apk-speaker-diarization.yaml b/.github/workflows/apk-speaker-diarization.yaml new file mode 100644 index 000000000..19f0b99bc --- /dev/null +++ b/.github/workflows/apk-speaker-diarization.yaml @@ -0,0 +1,175 @@ +name: apk-speaker-diarization + +on: + push: + branches: + - apk + - android-demo-speaker-diarization-2 + + workflow_dispatch: + +concurrency: + group: apk-speaker-diarization-${{ github.ref }} + cancel-in-progress: true + +permissions: + contents: write + +jobs: + apk_speaker_identification: + if: github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa' + runs-on: ${{ matrix.os }} + name: apk for speaker diarization ${{ matrix.index }}/${{ matrix.total }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest] + total: ["1"] + index: ["0"] + + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + # https://github.com/actions/setup-java + - uses: actions/setup-java@v4 + with: + distribution: 'temurin' # See 'Supported distributions' for available options + java-version: '21' + + - name: ccache + uses: hendrikmuhs/ccache-action@v1.2 + with: + key: ${{ matrix.os }}-android + + - name: Display NDK HOME + shell: bash + run: | + echo "ANDROID_NDK_LATEST_HOME: ${ANDROID_NDK_LATEST_HOME}" + ls -lh ${ANDROID_NDK_LATEST_HOME} + + - name: Install Python dependencies + shell: bash + run: | + python3 -m pip install --upgrade pip jinja2 + + - name: Setup build tool version variable + shell: bash + run: | + echo "---" + ls -lh /usr/local/lib/android/ + echo "---" + + ls -lh /usr/local/lib/android/sdk + echo "---" + + ls -lh /usr/local/lib/android/sdk/build-tools + echo "---" + + BUILD_TOOL_VERSION=$(ls /usr/local/lib/android/sdk/build-tools/ | tail -n 1) + echo "BUILD_TOOL_VERSION=$BUILD_TOOL_VERSION" >> $GITHUB_ENV + echo "Last build tool version is: $BUILD_TOOL_VERSION" + + - name: Generate build script + shell: bash + run: | + cd scripts/apk + + chmod +x build-apk-speaker-diarization.sh + mv -v ./build-apk-speaker-diarization.sh ../.. + + - name: build APK + shell: bash + run: | + export CMAKE_CXX_COMPILER_LAUNCHER=ccache + export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH" + cmake --version + + export ANDROID_NDK=$ANDROID_NDK_LATEST_HOME + ./build-apk-speaker-diarization.sh + + - name: Display APK + shell: bash + run: | + ls -lh ./apks/ + du -h -d1 . + + # https://github.com/marketplace/actions/sign-android-release + - uses: r0adkll/sign-android-release@v1 + name: Sign app APK + with: + releaseDirectory: ./apks + signingKeyBase64: ${{ secrets.ANDROID_SIGNING_KEY }} + alias: ${{ secrets.ANDROID_SIGNING_KEY_ALIAS }} + keyStorePassword: ${{ secrets.ANDROID_SIGNING_KEY_STORE_PASSWORD }} + env: + BUILD_TOOLS_VERSION: ${{ env.BUILD_TOOL_VERSION }} + + - name: Display APK after signing + shell: bash + run: | + ls -lh ./apks/ + du -h -d1 . + + - name: Rename APK after signing + shell: bash + run: | + cd apks + rm -fv signingKey.jks + rm -fv *.apk.idsig + rm -fv *-aligned.apk + + all_apks=$(ls -1 *-signed.apk) + echo "----" + echo $all_apks + echo "----" + for apk in ${all_apks[@]}; do + n=$(echo $apk | sed -e s/-signed//) + mv -v $apk $n + done + + cd .. + + ls -lh ./apks/ + du -h -d1 . + + - name: Display APK after rename + shell: bash + run: | + ls -lh ./apks/ + du -h -d1 . + + - name: Publish to huggingface + env: + HF_TOKEN: ${{ secrets.HF_TOKEN }} + uses: nick-fields/retry@v3 + with: + max_attempts: 20 + timeout_seconds: 200 + shell: bash + command: | + git config --global user.email "csukuangfj@gmail.com" + git config --global user.name "Fangjun Kuang" + + rm -rf huggingface + export GIT_LFS_SKIP_SMUDGE=1 + export GIT_CLONE_PROTECTION_ACTIVE=false + + SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2) + echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION" + + git clone https://huggingface.co/csukuangfj/sherpa-onnx-apk huggingface + cd huggingface + git fetch + git pull + git merge -m "merge remote" --ff origin main + + d=speaker-diarization/$SHERPA_ONNX_VERSION + mkdir -p $d/ + cp -v ../apks/*.apk $d/ + git status + git lfs track "*.apk" + git add . + git commit -m "add more apks" + git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-apk main diff --git a/.github/workflows/apk-speaker-identification.yaml b/.github/workflows/apk-speaker-identification.yaml index ca89ec49f..e32ad3bc9 100644 --- a/.github/workflows/apk-speaker-identification.yaml +++ b/.github/workflows/apk-speaker-identification.yaml @@ -53,6 +53,23 @@ jobs: run: | python3 -m pip install --upgrade pip jinja2 + - name: Setup build tool version variable + shell: bash + run: | + echo "---" + ls -lh /usr/local/lib/android/ + echo "---" + + ls -lh /usr/local/lib/android/sdk + echo "---" + + ls -lh /usr/local/lib/android/sdk/build-tools + echo "---" + + BUILD_TOOL_VERSION=$(ls /usr/local/lib/android/sdk/build-tools/ | tail -n 1) + echo "BUILD_TOOL_VERSION=$BUILD_TOOL_VERSION" >> $GITHUB_ENV + echo "Last build tool version is: $BUILD_TOOL_VERSION" + - name: Generate build script shell: bash run: | @@ -82,6 +99,51 @@ jobs: ls -lh ./apks/ du -h -d1 . + # https://github.com/marketplace/actions/sign-android-release + - uses: r0adkll/sign-android-release@v1 + name: Sign app APK + with: + releaseDirectory: ./apks + signingKeyBase64: ${{ secrets.ANDROID_SIGNING_KEY }} + alias: ${{ secrets.ANDROID_SIGNING_KEY_ALIAS }} + keyStorePassword: ${{ secrets.ANDROID_SIGNING_KEY_STORE_PASSWORD }} + env: + BUILD_TOOLS_VERSION: ${{ env.BUILD_TOOL_VERSION }} + + - name: Display APK after signing + shell: bash + run: | + ls -lh ./apks/ + du -h -d1 . + + - name: Rename APK after signing + shell: bash + run: | + cd apks + rm -fv signingKey.jks + rm -fv *.apk.idsig + rm -fv *-aligned.apk + + all_apks=$(ls -1 *-signed.apk) + echo "----" + echo $all_apks + echo "----" + for apk in ${all_apks[@]}; do + n=$(echo $apk | sed -e s/-signed//) + mv -v $apk $n + done + + cd .. + + ls -lh ./apks/ + du -h -d1 . + + - name: Display APK after rename + shell: bash + run: | + ls -lh ./apks/ + du -h -d1 . + - name: Publish to huggingface env: HF_TOKEN: ${{ secrets.HF_TOKEN }} diff --git a/.github/workflows/apk-vad.yaml b/.github/workflows/apk-vad.yaml index 8253145b6..d9af75477 100644 --- a/.github/workflows/apk-vad.yaml +++ b/.github/workflows/apk-vad.yaml @@ -166,7 +166,7 @@ jobs: git pull git merge -m "merge remote" --ff origin main - d=vad/SHERPA_ONNX_VERSION + d=vad/$SHERPA_ONNX_VERSION mkdir -p $d cp -v ../apks/*.apk $d/ git status diff --git a/README.md b/README.md index 1828847e5..32d141f90 100644 --- a/README.md +++ b/README.md @@ -84,8 +84,9 @@ with the following APIs ### Links for Huggingface Spaces -You can visit the following Huggingface spaces to try `sherpa-onnx` without -installing anything. All you need is a browser. +
+You can visit the following Huggingface spaces to try sherpa-onnx without +installing anything. All you need is a browser. | Description | URL | |-------------------------------------------------------|------------------------------------| @@ -118,23 +119,34 @@ We also have spaces built using WebAssembly. They are listed below: |Speech synthesis (German) |[Click me][wasm-hf-tts-piper-de]| [地址][wasm-ms-tts-piper-de]| |Speaker diarization |[Click me][wasm-hf-speaker-diarization]|[地址][wasm-ms-speaker-diarization]| +
+ ### Links for pre-built Android APKs -| Description | URL | 中国用户 | -|----------------------------------------|------------------------------|-----------------------------| -| Streaming speech recognition | [Address][apk-streaming-asr] | [点此][apk-streaming-asr-cn]| -| Text-to-speech | [Address][apk-tts] | [点此][apk-tts-cn] | -| Voice activity detection (VAD) | [Address][apk-vad] | [点此][apk-vad-cn] | -| VAD + non-streaming speech recognition | [Address][apk-vad-asr] | [点此][apk-vad-asr-cn] | -| Two-pass speech recognition | [Address][apk-2pass] | [点此][apk-2pass-cn] | -| Audio tagging | [Address][apk-at] | [点此][apk-at-cn] | -| Audio tagging (WearOS) | [Address][apk-at-wearos] | [点此][apk-at-wearos-cn] | -| Speaker identification | [Address][apk-sid] | [点此][apk-sid-cn] | -| Spoken language identification | [Address][apk-slid] | [点此][apk-slid-cn] | -| Keyword spotting | [Address][apk-kws] | [点此][apk-kws-cn] | +
+ +You can find pre-built Android APKs for this repository in the following table + +| Description | URL | 中国用户 | +|----------------------------------------|------------------------------------|-----------------------------------| +| Speaker diarization | [Address][apk-speaker-diarization] | [点此][apk-speaker-diarization-cn]| +| Streaming speech recognition | [Address][apk-streaming-asr] | [点此][apk-streaming-asr-cn] | +| Text-to-speech | [Address][apk-tts] | [点此][apk-tts-cn] | +| Voice activity detection (VAD) | [Address][apk-vad] | [点此][apk-vad-cn] | +| VAD + non-streaming speech recognition | [Address][apk-vad-asr] | [点此][apk-vad-asr-cn] | +| Two-pass speech recognition | [Address][apk-2pass] | [点此][apk-2pass-cn] | +| Audio tagging | [Address][apk-at] | [点此][apk-at-cn] | +| Audio tagging (WearOS) | [Address][apk-at-wearos] | [点此][apk-at-wearos-cn] | +| Speaker identification | [Address][apk-sid] | [点此][apk-sid-cn] | +| Spoken language identification | [Address][apk-slid] | [点此][apk-slid-cn] | +| Keyword spotting | [Address][apk-kws] | [点此][apk-kws-cn] | + +
### Links for pre-built Flutter APPs +
+ #### Real-time speech recognition | Description | URL | 中国用户 | @@ -153,17 +165,24 @@ We also have spaces built using WebAssembly. They are listed below: > Note: You need to build from source for iOS. +
+ ### Links for pre-built Lazarus APPs +
+ #### Generating subtitles | Description | URL | 中国用户 | |--------------------------------|----------------------------|----------------------------| | Generate subtitles (生成字幕) | [Address][lazarus-subtitle]| [点此][lazarus-subtitle-cn]| +
### Links for pre-trained models +
+ | Description | URL | |---------------------------------------------|---------------------------------------------------------------------------------------| | Speech recognition (speech to text, ASR) | [Address][asr-models] | @@ -176,6 +195,8 @@ We also have spaces built using WebAssembly. They are listed below: | Punctuation | [Address][punct-models] | | Speaker segmentation | [Address][speaker-segmentation-models] | +
+ ### Useful links - Documentation: https://k2-fsa.github.io/sherpa/onnx/ @@ -265,6 +286,8 @@ Video demo in Chinese: [爆了!炫神教你开打字挂!真正影响胜率 [wasm-ms-tts-piper-de]: https://modelscope.cn/studios/k2-fsa/web-assembly-tts-sherpa-onnx-de [wasm-hf-speaker-diarization]: https://huggingface.co/spaces/k2-fsa/web-assembly-speaker-diarization-sherpa-onnx [wasm-ms-speaker-diarization]: https://www.modelscope.cn/studios/csukuangfj/web-assembly-speaker-diarization-sherpa-onnx +[apk-speaker-diarization]: https://k2-fsa.github.io/sherpa/onnx/speaker-diarization/apk.html +[apk-speaker-diarization-cn]: https://k2-fsa.github.io/sherpa/onnx/speaker-diarization/apk-cn.html [apk-streaming-asr]: https://k2-fsa.github.io/sherpa/onnx/android/apk.html [apk-streaming-asr-cn]: https://k2-fsa.github.io/sherpa/onnx/android/apk-cn.html [apk-tts]: https://k2-fsa.github.io/sherpa/onnx/tts/apk-engine.html diff --git a/android/README.md b/android/README.md index 42b29e08f..bae335598 100644 --- a/android/README.md +++ b/android/README.md @@ -4,6 +4,8 @@ Please refer to https://k2-fsa.github.io/sherpa/onnx/android/index.html for usage. +- [SherpaOnnxSpeakerDiarization](./SherpaOnnxSpeakerDiarization) It is for speaker diarization. + - [SherpaOnnx](./SherpaOnnx) It uses a streaming ASR model. - [SherpaOnnx2Pass](./SherpaOnnx2Pass) It uses a streaming ASR model diff --git a/android/SherpaOnnxSpeakerDiarization/.gitignore b/android/SherpaOnnxSpeakerDiarization/.gitignore new file mode 100644 index 000000000..aa724b770 --- /dev/null +++ b/android/SherpaOnnxSpeakerDiarization/.gitignore @@ -0,0 +1,15 @@ +*.iml +.gradle +/local.properties +/.idea/caches +/.idea/libraries +/.idea/modules.xml +/.idea/workspace.xml +/.idea/navEditor.xml +/.idea/assetWizardSettings.xml +.DS_Store +/build +/captures +.externalNativeBuild +.cxx +local.properties diff --git a/android/SherpaOnnxSpeakerDiarization/app/.gitignore b/android/SherpaOnnxSpeakerDiarization/app/.gitignore new file mode 100644 index 000000000..42afabfd2 --- /dev/null +++ b/android/SherpaOnnxSpeakerDiarization/app/.gitignore @@ -0,0 +1 @@ +/build \ No newline at end of file diff --git a/android/SherpaOnnxSpeakerDiarization/app/build.gradle.kts b/android/SherpaOnnxSpeakerDiarization/app/build.gradle.kts new file mode 100644 index 000000000..7a390ba42 --- /dev/null +++ b/android/SherpaOnnxSpeakerDiarization/app/build.gradle.kts @@ -0,0 +1,71 @@ +plugins { + alias(libs.plugins.android.application) + alias(libs.plugins.jetbrains.kotlin.android) +} + +android { + namespace = "com.k2fsa.sherpa.onnx.speaker.diarization" + compileSdk = 34 + + defaultConfig { + applicationId = "com.k2fsa.sherpa.onnx.speaker.diarization" + minSdk = 21 + targetSdk = 34 + versionCode = 1 + versionName = "1.0" + + testInstrumentationRunner = "androidx.test.runner.AndroidJUnitRunner" + vectorDrawables { + useSupportLibrary = true + } + } + + buildTypes { + release { + isMinifyEnabled = false + proguardFiles( + getDefaultProguardFile("proguard-android-optimize.txt"), + "proguard-rules.pro" + ) + } + } + compileOptions { + sourceCompatibility = JavaVersion.VERSION_1_8 + targetCompatibility = JavaVersion.VERSION_1_8 + } + kotlinOptions { + jvmTarget = "1.8" + } + buildFeatures { + compose = true + } + composeOptions { + kotlinCompilerExtensionVersion = "1.5.1" + } + packaging { + resources { + excludes += "/META-INF/{AL2.0,LGPL2.1}" + } + } +} + +dependencies { + + implementation(libs.androidx.core.ktx) + implementation(libs.androidx.lifecycle.runtime.ktx) + implementation(libs.androidx.activity.compose) + implementation(platform(libs.androidx.compose.bom)) + implementation(libs.androidx.ui) + implementation(libs.androidx.ui.graphics) + implementation(libs.androidx.ui.tooling.preview) + implementation(libs.androidx.material3) + implementation(libs.androidx.navigation.compose) + implementation(libs.androidx.documentfile) + testImplementation(libs.junit) + androidTestImplementation(libs.androidx.junit) + androidTestImplementation(libs.androidx.espresso.core) + androidTestImplementation(platform(libs.androidx.compose.bom)) + androidTestImplementation(libs.androidx.ui.test.junit4) + debugImplementation(libs.androidx.ui.tooling) + debugImplementation(libs.androidx.ui.test.manifest) +} \ No newline at end of file diff --git a/android/SherpaOnnxSpeakerDiarization/app/proguard-rules.pro b/android/SherpaOnnxSpeakerDiarization/app/proguard-rules.pro new file mode 100644 index 000000000..481bb4348 --- /dev/null +++ b/android/SherpaOnnxSpeakerDiarization/app/proguard-rules.pro @@ -0,0 +1,21 @@ +# Add project specific ProGuard rules here. +# You can control the set of applied configuration files using the +# proguardFiles setting in build.gradle. +# +# For more details, see +# http://developer.android.com/guide/developing/tools/proguard.html + +# If your project uses WebView with JS, uncomment the following +# and specify the fully qualified class name to the JavaScript interface +# class: +#-keepclassmembers class fqcn.of.javascript.interface.for.webview { +# public *; +#} + +# Uncomment this to preserve the line number information for +# debugging stack traces. +#-keepattributes SourceFile,LineNumberTable + +# If you keep the line number information, uncomment this to +# hide the original source file name. +#-renamesourcefileattribute SourceFile \ No newline at end of file diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/androidTest/java/com/k2fsa/sherpa/onnx/speaker/diarization/ExampleInstrumentedTest.kt b/android/SherpaOnnxSpeakerDiarization/app/src/androidTest/java/com/k2fsa/sherpa/onnx/speaker/diarization/ExampleInstrumentedTest.kt new file mode 100644 index 000000000..53d7af15f --- /dev/null +++ b/android/SherpaOnnxSpeakerDiarization/app/src/androidTest/java/com/k2fsa/sherpa/onnx/speaker/diarization/ExampleInstrumentedTest.kt @@ -0,0 +1,24 @@ +package com.k2fsa.sherpa.onnx.speaker.diarization + +import androidx.test.platform.app.InstrumentationRegistry +import androidx.test.ext.junit.runners.AndroidJUnit4 + +import org.junit.Test +import org.junit.runner.RunWith + +import org.junit.Assert.* + +/** + * Instrumented test, which will execute on an Android device. + * + * See [testing documentation](http://d.android.com/tools/testing). + */ +@RunWith(AndroidJUnit4::class) +class ExampleInstrumentedTest { + @Test + fun useAppContext() { + // Context of the app under test. + val appContext = InstrumentationRegistry.getInstrumentation().targetContext + assertEquals("com.k2fsa.sherpa.onnx.speaker.diarization", appContext.packageName) + } +} \ No newline at end of file diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/AndroidManifest.xml b/android/SherpaOnnxSpeakerDiarization/app/src/main/AndroidManifest.xml new file mode 100644 index 000000000..d58f7e8d7 --- /dev/null +++ b/android/SherpaOnnxSpeakerDiarization/app/src/main/AndroidManifest.xml @@ -0,0 +1,32 @@ + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/assets/.gitkeep b/android/SherpaOnnxSpeakerDiarization/app/src/main/assets/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/BarItem.kt b/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/BarItem.kt new file mode 100644 index 000000000..0895cf52c --- /dev/null +++ b/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/BarItem.kt @@ -0,0 +1,13 @@ +package com.k2fsa.sherpa.onnx.speaker.diarization + +import androidx.compose.ui.graphics.vector.ImageVector + +data class BarItem( + val title: String, + + // see https://www.composables.com/icons + // and + // https://developer.android.com/reference/kotlin/androidx/compose/material/icons/filled/package-summary + val image: ImageVector, + val route: String, +) \ No newline at end of file diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/MainActivity.kt b/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/MainActivity.kt new file mode 100644 index 000000000..7a25d49b9 --- /dev/null +++ b/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/MainActivity.kt @@ -0,0 +1,132 @@ +package com.k2fsa.sherpa.onnx.speaker.diarization + +import android.os.Bundle +import androidx.activity.ComponentActivity +import androidx.activity.compose.setContent +import androidx.activity.enableEdgeToEdge +import androidx.compose.foundation.layout.Column +import androidx.compose.foundation.layout.fillMaxSize +import androidx.compose.foundation.layout.padding +import androidx.compose.material3.CenterAlignedTopAppBar +import androidx.compose.material3.ExperimentalMaterial3Api +import androidx.compose.material3.Icon +import androidx.compose.material3.MaterialTheme +import androidx.compose.material3.NavigationBar +import androidx.compose.material3.NavigationBarItem +import androidx.compose.material3.Scaffold +import androidx.compose.material3.Surface +import androidx.compose.material3.Text +import androidx.compose.material3.TopAppBarDefaults +import androidx.compose.runtime.Composable +import androidx.compose.runtime.getValue +import androidx.compose.ui.Modifier +import androidx.compose.ui.text.font.FontWeight +import androidx.compose.ui.tooling.preview.Preview +import androidx.navigation.NavGraph.Companion.findStartDestination +import androidx.navigation.NavHostController +import androidx.navigation.compose.NavHost +import androidx.navigation.compose.composable +import androidx.navigation.compose.currentBackStackEntryAsState +import androidx.navigation.compose.rememberNavController +import com.k2fsa.sherpa.onnx.speaker.diarization.screens.HelpScreen +import com.k2fsa.sherpa.onnx.speaker.diarization.screens.HomeScreen +import com.k2fsa.sherpa.onnx.speaker.diarization.ui.theme.SherpaOnnxSpeakerDiarizationTheme + +const val TAG = "sherpa-onnx-sd" + +class MainActivity : ComponentActivity() { + override fun onCreate(savedInstanceState: Bundle?) { + super.onCreate(savedInstanceState) + enableEdgeToEdge() + setContent { + SherpaOnnxSpeakerDiarizationTheme { + // A surface container using the 'background' color from the theme + Surface( + modifier = Modifier.fillMaxSize(), + color = MaterialTheme.colorScheme.background + ) { + MainScreen() + } + } + } + SpeakerDiarizationObject.initSpeakerDiarization(this.assets) + } +} + +@OptIn(ExperimentalMaterial3Api::class) +@Composable +fun MainScreen(modifier: Modifier = Modifier) { + val navController = rememberNavController() + Scaffold( + topBar = { + CenterAlignedTopAppBar( + colors = TopAppBarDefaults.topAppBarColors( + containerColor = MaterialTheme.colorScheme.primaryContainer, + titleContentColor = MaterialTheme.colorScheme.primary, + ), + title = { + Text( + "Next-gen Kaldi: Speaker Diarization", + fontWeight = FontWeight.Bold, + ) + }, + ) + }, + content = { padding -> + Column(Modifier.padding(padding)) { + NavigationHost(navController = navController) + + } + }, + bottomBar = { + BottomNavigationBar(navController = navController) + } + ) +} + +@Composable +fun NavigationHost(navController: NavHostController) { + NavHost(navController = navController, startDestination = NavRoutes.Home.route) { + composable(NavRoutes.Home.route) { + HomeScreen() + } + + composable(NavRoutes.Help.route) { + HelpScreen() + } + } +} + +@Composable +fun BottomNavigationBar(navController: NavHostController) { + NavigationBar { + val backStackEntry by navController.currentBackStackEntryAsState() + val currentRoute = backStackEntry?.destination?.route + + NavBarItems.BarItems.forEach { navItem -> + NavigationBarItem(selected = currentRoute == navItem.route, + onClick = { + navController.navigate(navItem.route) { + popUpTo(navController.graph.findStartDestination().id) { + saveState = true + } + launchSingleTop = true + restoreState = true + } + }, + icon = { + Icon(imageVector = navItem.image, contentDescription = navItem.title) + }, label = { + Text(text = navItem.title) + }) + } + } +} + +@Preview(showBackground = true) +@Composable +fun MainScreenPreview() { + SherpaOnnxSpeakerDiarizationTheme { + MainScreen() + } +} \ No newline at end of file diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/NavBarItems.kt b/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/NavBarItems.kt new file mode 100644 index 000000000..65c737f97 --- /dev/null +++ b/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/NavBarItems.kt @@ -0,0 +1,20 @@ +package com.k2fsa.sherpa.onnx.speaker.diarization + +import androidx.compose.material.icons.Icons +import androidx.compose.material.icons.filled.Home +import androidx.compose.material.icons.filled.Info + +object NavBarItems { + val BarItems = listOf( + BarItem( + title = "Home", + image = Icons.Filled.Home, + route = "home", + ), + BarItem( + title = "Help", + image = Icons.Filled.Info, + route = "help", + ), + ) +} \ No newline at end of file diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/NavRoutes.kt b/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/NavRoutes.kt new file mode 100644 index 000000000..2e1ae90b5 --- /dev/null +++ b/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/NavRoutes.kt @@ -0,0 +1,6 @@ +package com.k2fsa.sherpa.onnx.speaker.diarization + +sealed class NavRoutes(val route: String) { + object Home : NavRoutes("home") + object Help : NavRoutes("help") +} \ No newline at end of file diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/OfflineSpeakerDiarization.kt b/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/OfflineSpeakerDiarization.kt new file mode 120000 index 000000000..459cc22cc --- /dev/null +++ b/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/OfflineSpeakerDiarization.kt @@ -0,0 +1 @@ +../../../../../../../../../../../../sherpa-onnx/kotlin-api/OfflineSpeakerDiarization.kt \ No newline at end of file diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/ReadWaveFile.kt b/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/ReadWaveFile.kt new file mode 100644 index 000000000..940a2b643 --- /dev/null +++ b/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/ReadWaveFile.kt @@ -0,0 +1,137 @@ +package com.k2fsa.sherpa.onnx.speaker.diarization.screens + +import android.content.Context +import android.media.AudioFormat +import android.media.MediaCodec +import android.media.MediaExtractor +import android.media.MediaFormat +import android.net.Uri + +data class WaveData( + val sampleRate: Int? = null, + val samples: FloatArray? = null, + val msg: String? = null +) + +// It supports only 16-bit encoded wave files +// +// References +// - https://gist.github.com/a-m-s/1991ab18fbcb0fcc2cf9 +// - https://github.com/taehwandev/MediaCodecExample/blob/master/app/src/main/java/tech/thdev/mediacodecexample/audio/AACAudioDecoderThread.kt +fun readUri(context: Context, uri: Uri): WaveData { + val extractor = MediaExtractor() + extractor.setDataSource(context, uri, null) + + val samplesList: MutableList = ArrayList() + + for (i in 0 until extractor.trackCount) { + val format = extractor.getTrackFormat(i) + val mime = format.getString(MediaFormat.KEY_MIME) + if (mime?.startsWith("audio/") == true) { + extractor.selectTrack(i) + + var encoding: Int = -1 + try { + encoding = format.getInteger(MediaFormat.KEY_PCM_ENCODING) + } catch (_: Exception) { + } + + if (encoding != AudioFormat.ENCODING_PCM_16BIT) { + return WaveData(msg = "We support only 16-bit encoded wave files") + } + + val sampleRate = format.getInteger(MediaFormat.KEY_SAMPLE_RATE) + val decoder = MediaCodec.createDecoderByType(mime) + decoder.configure(format, null, null, 0) + decoder.start() + + val inputBuffers = decoder.inputBuffers + var outputBuffers = decoder.outputBuffers + + val info = MediaCodec.BufferInfo() + var eof = false + + var outputBufferIndex = -1 + + while (true) { + if (!eof) { + val inputBufferIndex = decoder.dequeueInputBuffer(10000) + if (inputBufferIndex > 0) { + val size = extractor.readSampleData(inputBuffers[inputBufferIndex], 0) + if (size < 0) { + decoder.queueInputBuffer( + inputBufferIndex, + 0, + 0, + 0, + MediaCodec.BUFFER_FLAG_END_OF_STREAM + ) + eof = true + } else { + decoder.queueInputBuffer( + inputBufferIndex, + 0, + size, + extractor.sampleTime, + 0 + ) + extractor.advance() + } + } + } // if (!eof) + + if (outputBufferIndex >= 0) { + outputBuffers[outputBufferIndex].position(0) + } + + outputBufferIndex = decoder.dequeueOutputBuffer(info, 10000) + if (outputBufferIndex >= 0) { + if (info.flags != 0) { + decoder.stop() + decoder.release() + + var k = 0 + for (s in samplesList) { + k += s.size + } + if (k == 0) { + return WaveData(msg = "Failed to read selected file") + } + + val ans = FloatArray(k) + k = 0 + for (s in samplesList) { + s.copyInto(ans, k) + k += s.size + } + + return WaveData(sampleRate = sampleRate, samples = ans) + } + + val buffer = outputBuffers[outputBufferIndex] + val chunk = ByteArray(info.size) + buffer[chunk] + buffer.clear() + + val numSamples = info.size / 2 + + val samples = FloatArray(numSamples) + for (k in 0 until numSamples) { + // assume little endian + val s = chunk[2 * k] + (chunk[2 * k + 1] * 256.0f) + + samples[k] = s / 32768.0f + } + samplesList.add(samples) + + decoder.releaseOutputBuffer(outputBufferIndex, false) + } else if (outputBufferIndex == MediaCodec.INFO_OUTPUT_BUFFERS_CHANGED) { + outputBuffers = decoder.outputBuffers + } + } + } + } + + extractor.release() + return WaveData(msg = "not an audio file") +} \ No newline at end of file diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/SpeakerDiarizationObject.kt b/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/SpeakerDiarizationObject.kt new file mode 100644 index 000000000..f4bc24554 --- /dev/null +++ b/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/SpeakerDiarizationObject.kt @@ -0,0 +1,66 @@ +package com.k2fsa.sherpa.onnx.speaker.diarization + +import android.content.res.AssetManager +import android.util.Log +import com.k2fsa.sherpa.onnx.FastClusteringConfig +import com.k2fsa.sherpa.onnx.OfflineSpeakerDiarization +import com.k2fsa.sherpa.onnx.OfflineSpeakerDiarizationConfig +import com.k2fsa.sherpa.onnx.OfflineSpeakerSegmentationModelConfig +import com.k2fsa.sherpa.onnx.OfflineSpeakerSegmentationPyannoteModelConfig +import com.k2fsa.sherpa.onnx.SpeakerEmbeddingExtractorConfig + +// Please download +// https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2 +// then unzip it, rename model.onnx to segmentation.onnx, and mv +// segmentation.onnx to the assets folder +val segmentationModel = "segmentation.onnx" + +// please download it from +// https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx +// and move it to the assets folder +val embeddingModel = "3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx" + +// in the end, your assets folder should look like below +/* +(py38) fangjuns-MacBook-Pro:assets fangjun$ pwd +/Users/fangjun/open-source/sherpa-onnx/android/SherpaOnnxSpeakerDiarization/app/src/main/assets +(py38) fangjuns-MacBook-Pro:assets fangjun$ ls -lh +total 89048 +-rw-r--r-- 1 fangjun staff 38M Oct 12 20:28 3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx +-rw-r--r-- 1 fangjun staff 5.7M Oct 12 20:28 segmentation.onnx + */ + +object SpeakerDiarizationObject { + var _sd: OfflineSpeakerDiarization? = null + val sd: OfflineSpeakerDiarization + get() { + return _sd!! + } + + fun initSpeakerDiarization(assetManager: AssetManager? = null) { + synchronized(this) { + if (_sd != null) { + return + } + Log.i(TAG, "Initializing sherpa-onnx speaker diarization") + + val config = OfflineSpeakerDiarizationConfig( + segmentation = OfflineSpeakerSegmentationModelConfig( + pyannote = OfflineSpeakerSegmentationPyannoteModelConfig( + segmentationModel + ), + debug = true, + ), + embedding = SpeakerEmbeddingExtractorConfig( + model = embeddingModel, + debug = true, + numThreads = 2, + ), + clustering = FastClusteringConfig(numClusters = -1, threshold = 0.5f), + minDurationOn = 0.2f, + minDurationOff = 0.5f, + ) + _sd = OfflineSpeakerDiarization(assetManager = assetManager, config = config) + } + } +} \ No newline at end of file diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/SpeakerEmbeddingExtractorConfig.kt b/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/SpeakerEmbeddingExtractorConfig.kt new file mode 120000 index 000000000..9bab8fe88 --- /dev/null +++ b/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/SpeakerEmbeddingExtractorConfig.kt @@ -0,0 +1 @@ +../../../../../../../../../../../../sherpa-onnx/kotlin-api/SpeakerEmbeddingExtractorConfig.kt \ No newline at end of file diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/screens/Help.kt b/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/screens/Help.kt new file mode 100644 index 000000000..b3640b9e9 --- /dev/null +++ b/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/screens/Help.kt @@ -0,0 +1,38 @@ +package com.k2fsa.sherpa.onnx.speaker.diarization.screens + +import androidx.compose.foundation.layout.Box +import androidx.compose.foundation.layout.Column +import androidx.compose.foundation.layout.Spacer +import androidx.compose.foundation.layout.fillMaxSize +import androidx.compose.foundation.layout.height +import androidx.compose.foundation.layout.padding +import androidx.compose.material3.Text +import androidx.compose.runtime.Composable +import androidx.compose.ui.Modifier +import androidx.compose.ui.unit.dp +import androidx.compose.ui.unit.sp + +@Composable +fun HelpScreen() { + Box(modifier = Modifier.fillMaxSize()) { + Column( + modifier = Modifier.padding(8.dp) + ) { + Text( + "This app accepts only 16kHz 16-bit 1-channel *.wav files. " + + "It has two arguments: Number of speakers and clustering threshold. " + + "If you know the actual number of speakers in the file, please set it. " + + "Otherwise, please set it to 0. In that case, you have to set the threshold. " + + "A larger threshold leads to fewer segmented speakers." + ) + Spacer(modifier = Modifier.height(5.dp)) + Text("The speaker segmentation model is from " + + "pyannote-audio (https://huggingface.co/pyannote/segmentation-3.0), "+ + "whereas the embedding extractor model is from 3D-Speaker (https://github.com/modelscope/3D-Speaker)") + Spacer(modifier = Modifier.height(5.dp)) + Text("Please see http://github.com/k2-fsa/sherpa-onnx ") + Spacer(modifier = Modifier.height(5.dp)) + Text("Everything is open-sourced!", fontSize = 20.sp) + } + } +} diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/screens/Home.kt b/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/screens/Home.kt new file mode 100644 index 000000000..a5a9cd31c --- /dev/null +++ b/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/screens/Home.kt @@ -0,0 +1,210 @@ +package com.k2fsa.sherpa.onnx.speaker.diarization.screens + +import android.util.Log +import androidx.activity.compose.rememberLauncherForActivityResult +import androidx.activity.result.contract.ActivityResultContracts +import androidx.compose.foundation.layout.Arrangement +import androidx.compose.foundation.layout.Column +import androidx.compose.foundation.layout.Row +import androidx.compose.foundation.layout.Spacer +import androidx.compose.foundation.layout.fillMaxWidth +import androidx.compose.foundation.layout.padding +import androidx.compose.foundation.layout.size +import androidx.compose.foundation.rememberScrollState +import androidx.compose.foundation.verticalScroll +import androidx.compose.material3.Button +import androidx.compose.material3.OutlinedTextField +import androidx.compose.material3.Text +import androidx.compose.runtime.Composable +import androidx.compose.runtime.getValue +import androidx.compose.runtime.mutableStateOf +import androidx.compose.runtime.remember +import androidx.compose.runtime.setValue +import androidx.compose.ui.Alignment +import androidx.compose.ui.Modifier +import androidx.compose.ui.platform.LocalClipboardManager +import androidx.compose.ui.platform.LocalContext +import androidx.compose.ui.text.AnnotatedString +import androidx.compose.ui.unit.dp +import androidx.compose.ui.unit.sp +import androidx.documentfile.provider.DocumentFile +import com.k2fsa.sherpa.onnx.speaker.diarization.SpeakerDiarizationObject +import com.k2fsa.sherpa.onnx.speaker.diarization.TAG +import kotlin.concurrent.thread + + +private var samples: FloatArray? = null + +@Composable +fun HomeScreen() { + val context = LocalContext.current + + var sampleRate: Int + var filename by remember { mutableStateOf("") } + var status by remember { mutableStateOf("") } + var progress by remember { mutableStateOf("") } + val clipboardManager = LocalClipboardManager.current + var done by remember { mutableStateOf(false) } + var fileIsOk by remember { mutableStateOf(false) } + var started by remember { mutableStateOf(false) } + var numSpeakers by remember { mutableStateOf(0) } + var threshold by remember { mutableStateOf(0.5f) } + + + val callback = here@{ numProcessedChunks: Int, numTotalChunks: Int, arg: Long -> + Int + val percent = 100.0 * numProcessedChunks / numTotalChunks + progress = "%.2f%%".format(percent) + Log.i(TAG, progress) + return@here 0 + } + + val launcher = rememberLauncherForActivityResult(ActivityResultContracts.OpenDocument()) { + it?.let { + val documentFile = DocumentFile.fromSingleUri(context, it) + filename = documentFile?.name ?: "" + + progress = "" + done = false + fileIsOk = false + + if (filename.isNotEmpty()) { + val data = readUri(context, it) + Log.i(TAG, "sample rate: ${data.sampleRate}") + Log.i(TAG, "numSamples: ${data.samples?.size ?: 0}") + if (data.msg != null) { + Log.i(TAG, "failed to read $filename") + status = data.msg + } else if (data.sampleRate != SpeakerDiarizationObject.sd.sampleRate()) { + status = + "Expected sample rate: ${SpeakerDiarizationObject.sd.sampleRate()}. Given wave file with sample rate: ${data.sampleRate}" + } else { + samples = data.samples!! + fileIsOk = true + } + } + } + } + + Column( + modifier = Modifier.padding(10.dp), + verticalArrangement = Arrangement.Top, + ) { + Row( + modifier = Modifier.fillMaxWidth(), + horizontalArrangement = Arrangement.SpaceEvenly, + verticalAlignment = Alignment.CenterVertically + ) { + + Button(onClick = { + launcher.launch(arrayOf("audio/*")) + }) { + Text("Select a .wav file") + } + + Button(enabled = fileIsOk && !started, + onClick = { + Log.i(TAG, "started") + Log.i(TAG, "num samples: ${samples?.size}") + started = true + progress = "" + + val config = SpeakerDiarizationObject.sd.config + config.clustering.numClusters = numSpeakers + config.clustering.threshold = threshold + + SpeakerDiarizationObject.sd.setConfig(config) + + thread(true) { + done = false + status = "Started! Please wait" + val segments = SpeakerDiarizationObject.sd.processWithCallback( + samples!!, + callback = callback, + ) + done = true + started = false + status = "" + for (s in segments) { + val start = "%.2f".format(s.start) + val end = "%.2f".format(s.end) + val speaker = "speaker_%02d".format(s.speaker) + status += "$start -- $end $speaker\n" + Log.i(TAG, "$start -- $end $speaker") + } + + Log.i(TAG, status) + } + }) { + Text("Start") + } + if (progress.isNotEmpty()) { + Text(progress, fontSize = 25.sp) + } + } + + Row( + modifier = Modifier.fillMaxWidth(), + horizontalArrangement = Arrangement.SpaceEvenly, + verticalAlignment = Alignment.CenterVertically + ) { + OutlinedTextField( + value = numSpeakers.toString(), + onValueChange = { + if (it.isEmpty() || it.isBlank()) { + numSpeakers = 0 + } else { + numSpeakers = it.toIntOrNull() ?: 0 + } + }, + label = { + Text("Number of Speakers") + }, + ) + } + + Row( + modifier = Modifier.fillMaxWidth(), + horizontalArrangement = Arrangement.SpaceEvenly, + verticalAlignment = Alignment.CenterVertically + ) { + OutlinedTextField( + value = threshold.toString(), + onValueChange = { + if (it.isEmpty() || it.isBlank()) { + threshold = 0.5f + } else { + threshold = it.toFloatOrNull() ?: 0.5f + } + }, + label = { + Text("Clustering threshold") + }, + ) + } + + if (filename.isNotEmpty()) { + Text(text = "Selected $filename") + Spacer(Modifier.size(20.dp)) + } + + if (done) { + Button(onClick = { + clipboardManager.setText(AnnotatedString(status)) + progress = "Copied!" + }) { + Text("Copy result") + } + Spacer(Modifier.size(20.dp)) + } + + if (status.isNotEmpty()) { + Text( + status, + modifier = Modifier.verticalScroll(rememberScrollState()), + ) + } + + + } +} \ No newline at end of file diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/ui/theme/Color.kt b/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/ui/theme/Color.kt new file mode 100644 index 000000000..a96515d3d --- /dev/null +++ b/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/ui/theme/Color.kt @@ -0,0 +1,11 @@ +package com.k2fsa.sherpa.onnx.speaker.diarization.ui.theme + +import androidx.compose.ui.graphics.Color + +val Purple80 = Color(0xFFD0BCFF) +val PurpleGrey80 = Color(0xFFCCC2DC) +val Pink80 = Color(0xFFEFB8C8) + +val Purple40 = Color(0xFF6650a4) +val PurpleGrey40 = Color(0xFF625b71) +val Pink40 = Color(0xFF7D5260) \ No newline at end of file diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/ui/theme/Theme.kt b/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/ui/theme/Theme.kt new file mode 100644 index 000000000..5dbbe7e59 --- /dev/null +++ b/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/ui/theme/Theme.kt @@ -0,0 +1,58 @@ +package com.k2fsa.sherpa.onnx.speaker.diarization.ui.theme + +import android.app.Activity +import android.os.Build +import androidx.compose.foundation.isSystemInDarkTheme +import androidx.compose.material3.MaterialTheme +import androidx.compose.material3.darkColorScheme +import androidx.compose.material3.dynamicDarkColorScheme +import androidx.compose.material3.dynamicLightColorScheme +import androidx.compose.material3.lightColorScheme +import androidx.compose.runtime.Composable +import androidx.compose.ui.platform.LocalContext + +private val DarkColorScheme = darkColorScheme( + primary = Purple80, + secondary = PurpleGrey80, + tertiary = Pink80 +) + +private val LightColorScheme = lightColorScheme( + primary = Purple40, + secondary = PurpleGrey40, + tertiary = Pink40 + + /* Other default colors to override + background = Color(0xFFFFFBFE), + surface = Color(0xFFFFFBFE), + onPrimary = Color.White, + onSecondary = Color.White, + onTertiary = Color.White, + onBackground = Color(0xFF1C1B1F), + onSurface = Color(0xFF1C1B1F), + */ +) + +@Composable +fun SherpaOnnxSpeakerDiarizationTheme( + darkTheme: Boolean = isSystemInDarkTheme(), + // Dynamic color is available on Android 12+ + dynamicColor: Boolean = true, + content: @Composable () -> Unit +) { + val colorScheme = when { + dynamicColor && Build.VERSION.SDK_INT >= Build.VERSION_CODES.S -> { + val context = LocalContext.current + if (darkTheme) dynamicDarkColorScheme(context) else dynamicLightColorScheme(context) + } + + darkTheme -> DarkColorScheme + else -> LightColorScheme + } + + MaterialTheme( + colorScheme = colorScheme, + typography = Typography, + content = content + ) +} \ No newline at end of file diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/ui/theme/Type.kt b/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/ui/theme/Type.kt new file mode 100644 index 000000000..39a81b941 --- /dev/null +++ b/android/SherpaOnnxSpeakerDiarization/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/diarization/ui/theme/Type.kt @@ -0,0 +1,34 @@ +package com.k2fsa.sherpa.onnx.speaker.diarization.ui.theme + +import androidx.compose.material3.Typography +import androidx.compose.ui.text.TextStyle +import androidx.compose.ui.text.font.FontFamily +import androidx.compose.ui.text.font.FontWeight +import androidx.compose.ui.unit.sp + +// Set of Material typography styles to start with +val Typography = Typography( + bodyLarge = TextStyle( + fontFamily = FontFamily.Default, + fontWeight = FontWeight.Normal, + fontSize = 16.sp, + lineHeight = 24.sp, + letterSpacing = 0.5.sp + ) + /* Other default text styles to override + titleLarge = TextStyle( + fontFamily = FontFamily.Default, + fontWeight = FontWeight.Normal, + fontSize = 22.sp, + lineHeight = 28.sp, + letterSpacing = 0.sp + ), + labelSmall = TextStyle( + fontFamily = FontFamily.Default, + fontWeight = FontWeight.Medium, + fontSize = 11.sp, + lineHeight = 16.sp, + letterSpacing = 0.5.sp + ) + */ +) \ No newline at end of file diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/jniLibs/arm64-v8a/.gitkeep b/android/SherpaOnnxSpeakerDiarization/app/src/main/jniLibs/arm64-v8a/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/jniLibs/armeabi-v7a/.gitkeep b/android/SherpaOnnxSpeakerDiarization/app/src/main/jniLibs/armeabi-v7a/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/jniLibs/x86/.gitkeep b/android/SherpaOnnxSpeakerDiarization/app/src/main/jniLibs/x86/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/jniLibs/x86_64/.gitkeep b/android/SherpaOnnxSpeakerDiarization/app/src/main/jniLibs/x86_64/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/res/drawable-v24/ic_launcher_foreground.xml b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/drawable-v24/ic_launcher_foreground.xml new file mode 100644 index 000000000..2b068d114 --- /dev/null +++ b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/drawable-v24/ic_launcher_foreground.xml @@ -0,0 +1,30 @@ + + + + + + + + + + + \ No newline at end of file diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/res/drawable/ic_launcher_background.xml b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/drawable/ic_launcher_background.xml new file mode 100644 index 000000000..07d5da9cb --- /dev/null +++ b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/drawable/ic_launcher_background.xml @@ -0,0 +1,170 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-anydpi-v26/ic_launcher.xml b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-anydpi-v26/ic_launcher.xml new file mode 100644 index 000000000..6f3b755bf --- /dev/null +++ b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-anydpi-v26/ic_launcher.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-anydpi-v26/ic_launcher_round.xml b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-anydpi-v26/ic_launcher_round.xml new file mode 100644 index 000000000..6f3b755bf --- /dev/null +++ b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-anydpi-v26/ic_launcher_round.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-hdpi/ic_launcher.webp b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-hdpi/ic_launcher.webp new file mode 100644 index 0000000000000000000000000000000000000000..c209e78ecd372343283f4157dcfd918ec5165bb3 GIT binary patch literal 1404 zcmV-?1%vuhNk&F=1pok7MM6+kP&il$0000G0000-002h-06|PpNX!5L00Dqw+t%{r zzW2vH!KF=w&cMnnN@{whkTw+#mAh0SV?YL=)3MimFYCWp#fpdtz~8$hD5VPuQgtcN zXl<@<#Cme5f5yr2h%@8TWh?)bSK`O z^Z@d={gn7J{iyxL_y_%J|L>ep{dUxUP8a{byupH&!UNR*OutO~0{*T4q5R6@ApLF! z5{w?Z150gC7#>(VHFJZ-^6O@PYp{t!jH(_Z*nzTK4 zkc{fLE4Q3|mA2`CWQ3{8;gxGizgM!zccbdQoOLZc8hThi-IhN90RFT|zlxh3Ty&VG z?Fe{#9RrRnxzsu|Lg2ddugg7k%>0JeD+{XZ7>Z~{=|M+sh1MF7~ zz>To~`~LVQe1nNoR-gEzkpe{Ak^7{{ZBk2i_<+`Bq<^GB!RYG+z)h;Y3+<{zlMUYd zrd*W4w&jZ0%kBuDZ1EW&KLpyR7r2=}fF2%0VwHM4pUs}ZI2egi#DRMYZPek*^H9YK zay4Iy3WXFG(F14xYsoDA|KXgGc5%2DhmQ1gFCkrgHBm!lXG8I5h*uf{rn48Z!_@ z4Bk6TJAB2CKYqPjiX&mWoW>OPFGd$wqroa($ne7EUK;#3VYkXaew%Kh^3OrMhtjYN?XEoY`tRPQsAkH-DSL^QqyN0>^ zmC>{#F14jz4GeW{pJoRpLFa_*GI{?T93^rX7SPQgT@LbLqpNA}<@2wH;q493)G=1Y z#-sCiRNX~qf3KgiFzB3I>4Z%AfS(3$`-aMIBU+6?gbgDb!)L~A)je+;fR0jWLL-Fu z4)P{c7{B4Hp91&%??2$v9iRSFnuckHUm}or9seH6 z>%NbT+5*@L5(I9j@06@(!{ZI?U0=pKn8uwIg&L{JV14+8s2hnvbRrU|hZCd}IJu7*;;ECgO%8_*W Kmw_-CKmY()leWbG literal 0 HcmV?d00001 diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-hdpi/ic_launcher_round.webp b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-hdpi/ic_launcher_round.webp new file mode 100644 index 0000000000000000000000000000000000000000..b2dfe3d1ba5cf3ee31b3ecc1ced89044a1f3b7a9 GIT binary patch literal 2898 zcmV-Y3$650Nk&FW3jhFDMM6+kP&il$0000G0000-002h-06|PpNWB9900E$G+qN-D z+81ABX7q?;bwx%xBg?kcwr$(C-Tex-ZCkHUw(Y9#+`E5-zuONG5fgw~E2WDng@Bc@ z24xy+R1n%~6xI#u9vJ8zREI)sb<&Il(016}Z~V1n^PU3-_H17A*Bf^o)&{_uBv}Py zulRfeE8g(g6HFhk_?o_;0@tz?1I+l+Y#Q*;RVC?(ud`_cU-~n|AX-b`JHrOIqn(-t&rOg-o`#C zh0LPxmbOAEb;zHTu!R3LDh1QO zZTf-|lJNUxi-PpcbRjw3n~n-pG;$+dIF6eqM5+L();B2O2tQ~|p{PlpNcvDbd1l%c zLtXn%lu(3!aNK!V#+HNn_D3lp z2%l+hK-nsj|Bi9;V*WIcQRTt5j90A<=am+cc`J zTYIN|PsYAhJ|=&h*4wI4ebv-C=Be#u>}%m;a{IGmJDU`0snWS&$9zdrT(z8#{OZ_Y zxwJx!ZClUi%YJjD6Xz@OP8{ieyJB=tn?>zaI-4JN;rr`JQbb%y5h2O-?_V@7pG_+y z(lqAsqYr!NyVb0C^|uclHaeecG)Sz;WV?rtoqOdAAN{j%?Uo%owya(F&qps@Id|Of zo@~Y-(YmfB+chv^%*3g4k3R0WqvuYUIA+8^SGJ{2Bl$X&X&v02>+0$4?di(34{pt* zG=f#yMs@Y|b&=HyH3k4yP&goF2LJ#tBLJNNDo6lG06r}ghC-pC4Q*=x3;|+W04zte zAl>l4kzUBQFYF(E`KJy?ZXd1tnfbH+Z~SMmA21KokJNs#eqcXWKUIC>{TuoKe^vhF z);H)o`t9j~`$h1D`#bxe@E`oE`cM9w(@)5Bp8BNukIwM>wZHfd0S;5bcXA*5KT3bj zc&_~`&{z7u{Et!Z_k78H75gXf4g8<_ul!H$eVspPeU3j&&Au=2R*Zp#M9$9s;fqwgzfiX=E_?BwVcfx3tG9Q-+<5fw z%Hs64z)@Q*%s3_Xd5>S4dg$s>@rN^ixeVj*tqu3ZV)biDcFf&l?lGwsa zWj3rvK}?43c{IruV2L`hUU0t^MemAn3U~x3$4mFDxj=Byowu^Q+#wKRPrWywLjIAp z9*n}eQ9-gZmnd9Y0WHtwi2sn6n~?i#n9VN1B*074_VbZZ=WrpkMYr{RsI ztM_8X1)J*DZejxkjOTRJ&a*lrvMKBQURNP#K)a5wIitfu(CFYV4FT?LUB$jVwJSZz zNBFTWg->Yk0j&h3e*a5>B=-xM7dE`IuOQna!u$OoxLlE;WdrNlN)1 z7**de7-hZ!(%_ZllHBLg`Ir#|t>2$*xVOZ-ADZKTN?{(NUeLU9GbuG-+Axf*AZ-P1 z0ZZ*fx+ck4{XtFsbcc%GRStht@q!m*ImssGwuK+P@%gEK!f5dHymg<9nSCXsB6 zQ*{<`%^bxB($Z@5286^-A(tR;r+p7B%^%$N5h%lb*Vlz-?DL9x;!j<5>~kmXP$E}m zQV|7uv4SwFs0jUervsxVUm>&9Y3DBIzc1XW|CUZrUdb<&{@D5yuLe%Xniw^x&{A2s z0q1+owDSfc3Gs?ht;3jw49c#mmrViUfX-yvc_B*wY|Lo7; zGh!t2R#BHx{1wFXReX*~`NS-LpSX z#TV*miO^~B9PF%O0huw!1Zv>^d0G3$^8dsC6VI!$oKDKiXdJt{mGkyA`+Gwd4D-^1qtNTUK)`N*=NTG-6}=5k6suNfdLt*dt8D| z%H#$k)z#ZRcf|zDWB|pn<3+7Nz>?WW9WdkO5(a^m+D4WRJ9{wc>Y}IN)2Kbgn;_O? zGqdr&9~|$Y0tP=N(k7^Eu;iO*w+f%W`20BNo)=Xa@M_)+o$4LXJyiw{F?a633SC{B zl~9FH%?^Rm*LVz`lkULs)%idDX^O)SxQol(3jDRyBVR!7d`;ar+D7do)jQ}m`g$TevUD5@?*P8)voa?kEe@_hl{_h8j&5eB-5FrYW&*FHVt$ z$kRF9Nstj%KRzpjdd_9wO=4zO8ritN*NPk_9avYrsF(!4))tm{Ga#OY z(r{0buexOzu7+rw8E08Gxd`LTOID{*AC1m*6Nw@osfB%0oBF5sf<~wH1kL;sd zo)k6^VyRFU`)dt*iX^9&QtWbo6yE8XXH?`ztvpiOLgI3R+=MOBQ9=rMVgi<*CU%+d1PQQ0a1U=&b0vkF207%xU0ssI2 literal 0 HcmV?d00001 diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-mdpi/ic_launcher.webp b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-mdpi/ic_launcher.webp new file mode 100644 index 0000000000000000000000000000000000000000..4f0f1d64e58ba64d180ce43ee13bf9a17835fbca GIT binary patch literal 982 zcmV;{11bDcNk&G_0{{S5MM6+kP&il$0000G0000l001ul06|PpNU8t;00Dqo+t#w^ z^1csucXz7-Qrhzl9HuHB%l>&>1tG2^vb*E&k^T3$FG1eQZ51g$uv4V+kI`0<^1Z@N zk?Jjh$olyC%l>)Xq;7!>{iBj&BjJ`P&$fsCfpve_epJOBkTF?nu-B7D!hO=2ZR}

C%4 zc_9eOXvPbC4kzU8YowIA8cW~Uv|eB&yYwAObSwL2vY~UYI7NXPvf3b+c^?wcs~_t{ ze_m66-0)^{JdOMKPwjpQ@Sna!*?$wTZ~su*tNv7o!gXT!GRgivP}ec?5>l1!7<(rT zds|8x(qGc673zrvYIz;J23FG{9nHMnAuP}NpAED^laz3mAN1sy+NXK)!6v1FxQ;lh zOBLA>$~P3r4b*NcqR;y6pwyhZ3_PiDb|%n1gGjl3ZU}ujInlP{eks-#oA6>rh&g+!f`hv#_%JrgYPu z(U^&XLW^QX7F9Z*SRPpQl{B%x)_AMp^}_v~?j7 zapvHMKxSf*Mtyx8I}-<*UGn3)oHd(nn=)BZ`d$lDBwq_GL($_TPaS{UeevT(AJ`p0 z9%+hQb6z)U9qjbuXjg|dExCLjpS8$VKQ55VsIC%@{N5t{NsW)=hNGI`J=x97_kbz@ E0Of=7!TQj4N+cqN`nQhxvX7dAV-`K|Ub$-q+H-5I?Tx0g9jWxd@A|?POE8`3b8fO$T))xP* z(X?&brZw({`)WU&rdAs1iTa0x6F@PIxJ&&L|dpySV!ID|iUhjCcKz(@mE z!x@~W#3H<)4Ae(4eQJRk`Iz3<1)6^m)0b_4_TRZ+cz#eD3f8V;2r-1fE!F}W zEi0MEkTTx}8i1{`l_6vo0(Vuh0HD$I4SjZ=?^?k82R51bC)2D_{y8mi_?X^=U?2|F{Vr7s!k(AZC$O#ZMyavHhlQ7 zUR~QXuH~#o#>(b$u4?s~HLF*3IcF7023AlwAYudn0FV~|odGH^05AYPEfR)8p`i{n zwg3zPVp{+wOsxKc>)(pMupKF!Y2HoUqQ3|Yu|8lwR=?5zZuhG6J?H`bSNk_wPoM{u zSL{c@pY7+c2kck>`^q1^^gR0QB7Y?KUD{vz-uVX~;V-rW)PDcI)$_UjgVV?S?=oLR zf4}zz{#*R_{LkiJ#0RdQLNC^2Vp%JPEUvG9ra2BVZ92(p9h7Ka@!yf9(lj#}>+|u* z;^_?KWdzkM`6gqPo9;;r6&JEa)}R3X{(CWv?NvgLeOTq$cZXqf7|sPImi-7cS8DCN zGf;DVt3Am`>hH3{4-WzH43Ftx)SofNe^-#|0HdCo<+8Qs!}TZP{HH8~z5n`ExcHuT zDL1m&|DVpIy=xsLO>8k92HcmfSKhflQ0H~9=^-{#!I1g(;+44xw~=* zxvNz35vfsQE)@)Zsp*6_GjYD};Squ83<_?^SbALb{a`j<0Gn%6JY!zhp=Fg}Ga2|8 z52e1WU%^L1}15Ex0fF$e@eCT(()_P zvV?CA%#Sy08_U6VPt4EtmVQraWJX` zh=N|WQ>LgrvF~R&qOfB$!%D3cGv?;Xh_z$z7k&s4N)$WYf*k=|*jCEkO19{h_(%W4 zPuOqbCw`SeAX*R}UUsbVsgtuG?xs(#Ikx9`JZoQFz0n*7ZG@Fv@kZk`gzO$HoA9kN z8U5{-yY zvV{`&WKU2$mZeoBmiJrEdzUZAv1sRxpePdg1)F*X^Y)zp^Y*R;;z~vOv-z&)&G)JQ{m!C9cmziu1^nHA z`#`0c>@PnQ9CJKgC5NjJD8HM3|KC(g5nnCq$n0Gsu_DXk36@ql%npEye|?%RmG)

FJ$wK}0tWNB{uH;AM~i literal 0 HcmV?d00001 diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-xhdpi/ic_launcher.webp b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-xhdpi/ic_launcher.webp new file mode 100644 index 0000000000000000000000000000000000000000..948a3070fe34c611c42c0d3ad3013a0dce358be0 GIT binary patch literal 1900 zcmV-y2b1_xNk&Fw2LJ$9MM6+kP&il$0000G0001A003VA06|PpNH75a00DqwTbm-~ zullQTcXxO9ki!OCRx^i?oR|n!<8G0=kI^!JSjFi-LL*`V;ET0H2IXfU0*i>o6o6Gy zRq6Ap5(_{XLdXcL-MzlN`ugSdZY_`jXhcENAu)N_0?GhF))9R;E`!bo9p?g?SRgw_ zEXHhFG$0{qYOqhdX<(wE4N@es3VIo$%il%6xP9gjiBri+2pI6aY4 zJbgh-Ud|V%3O!IcHKQx1FQH(_*TK;1>FQWbt^$K1zNn^cczkBs=QHCYZ8b&l!UV{K z{L0$KCf_&KR^}&2Fe|L&?1I7~pBENnCtCuH3sjcx6$c zwqkNkru);ie``q+_QI;IYLD9OV0ZxkuyBz|5<$1BH|vtey$> z5oto4=l-R-Aaq`Dk0}o9N0VrkqW_#;!u{!bJLDq%0092{Ghe=F;(kn} z+sQ@1=UlX30+2nWjkL$B^b!H2^QYO@iFc0{(-~yXj2TWz?VG{v`Jg zg}WyYnwGgn>{HFaG7E~pt=)sOO}*yd(UU-D(E&x{xKEl6OcU?pl)K%#U$dn1mDF19 zSw@l8G!GNFB3c3VVK0?uyqN&utT-D5%NM4g-3@Sii9tSXKtwce~uF zS&Jn746EW^wV~8zdQ1XC28~kXu8+Yo9p!<8h&(Q({J*4DBglPdpe4M_mD8AguZFn~ ztiuO~{6Bx?SfO~_ZV(GIboeR9~hAym{{fV|VM=77MxDrbW6`ujX z<3HF(>Zr;#*uCvC*bpoSr~C$h?_%nXps@A)=l_;({Fo#6Y1+Zv`!T5HB+)#^-Ud_; zBwftPN=d8Vx)*O1Mj+0oO=mZ+NVH*ptNDC-&zZ7Hwho6UQ#l-yNvc0Cm+2$$6YUk2D2t#vdZX-u3>-Be1u9gtTBiMB^xwWQ_rgvGpZ6(C@e23c!^K=>ai-Rqu zhqT`ZQof;9Bu!AD(i^PCbYV%yha9zuoKMp`U^z;3!+&d@Hud&_iy!O-$b9ZLcSRh? z)R|826w}TU!J#X6P%@Zh=La$I6zXa#h!B;{qfug}O%z@K{EZECu6zl)7CiNi%xti0 zB{OKfAj83~iJvmpTU|&q1^?^cIMn2RQ?jeSB95l}{DrEPTW{_gmU_pqTc)h@4T>~& zluq3)GM=xa(#^VU5}@FNqpc$?#SbVsX!~RH*5p0p@w z;~v{QMX0^bFT1!cXGM8K9FP+=9~-d~#TK#ZE{4umGT=;dfvWi?rYj;^l_Zxywze`W z^Cr{55U@*BalS}K%Czii_80e0#0#Zkhlij4-~I@}`-JFJ7$5{>LnoJSs??J8kWVl6|8A}RCGAu9^rAsfCE=2}tHwl93t0C?#+jMpvr7O3`2=tr{Hg$=HlnjVG^ewm|Js0J*kfPa6*GhtB>`fN!m#9J(sU!?(OSfzY*zS(FJ<-Vb zfAIg+`U)YaXv#sY(c--|X zEB+TVyZ%Ie4L$gi#Fc++`h6%vzsS$pjz9aLt+ZL(g;n$Dzy5=m=_TV(3H8^C{r0xd zp#a%}ht55dOq?yhwYPrtp-m1xXp;4X;)NhxxUpgP%XTLmO zcjaFva^}dP3$&sfFTIR_jC=2pHh9kpI@2(6V*GQo7Ws)`j)hd+tr@P~gR*2gO@+1? zG<`_tB+LJuF|SZ9tIec;h%}}6WClT`L>HSW?E{Hp1h^+mlbf_$9zA>!ug>NALJsO{ mU%z=YwVD?}XMya)Bp;vlyE5&E_6!fzx9pwrdz474!~g(M6R?N? literal 0 HcmV?d00001 diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-xhdpi/ic_launcher_round.webp b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/mipmap-xhdpi/ic_launcher_round.webp new file mode 100644 index 0000000000000000000000000000000000000000..1b9a6956b3acdc11f40ce2bb3f6efbd845cc243f GIT binary patch literal 3918 zcmV-U53%r4Nk&FS4*&pHMM6+kP&il$0000G0001A003VA06|PpNSy@$00HoY|G(*G z+qV7x14$dSO^Re!iqt-AAIE9iwr$(CZQJL$blA4B`>;C3fBY6Q8_YSjb2%a=fc}4E zrSzssacq<^nmW|Rs93PJni30R<8w<(bK_$LO4L?!_OxLl$}K$MUEllnMK|rg=f3;y z*?;3j|Nh>)p0JQ3A~rf(MibH2r+)3cyV1qF&;8m{w-S*y+0mM){KTK^M5}ksc`qX3 zy>rf^b>~l>SSHds8(I@hz3&PD@LmEs4&prkT=BjsBCXTMhN$_)+kvnl0bLKW5rEsj z*d#KXGDB4P&>etx0X+`R19yC=LS)j!mgs5M0L~+o-T~Jl!p!AJxnGAhV%~rhYUL4hlWhgES3Kb5oA&X z{}?3OBSS-{!v$nCIGj->(-TAG)8LR{htr41^gxsT8yqt2@DEG6Yl`Uma3Nd4;YUoW zTbkYl3CMU5ypMF3EIkYmWL|*BknM`0+Kq6CpvO(y$#j94e+q{vI{Zp8cV_6RK!`&C zob$*5Q|$IZ09dW=L!V zw@#2wviu|<#3lgGE8GEhcx+zBt`} zOwP8j9X%^f7i_bth4PiJ$LYtFJSCN$3xwDN;8mr*B;CJwBP2G0TMq0uNt7S^DO_wE zepk!Wrn#Z#03j{`c*Rf~y3o7?J}w?tEELRUR2cgxB*Y{LzA#pxHgf}q?u5idu>077 zd^=p)`nA}6e`|@`p?u}YU66PP_MA}Zqqe!c{nK&z%Jwq1N4e_q<#4g^xaz=ao;u|6 zwpRcW2Lax=ZGbx=Q*HhlJ`Ns#Y*r0*%!T?P*TTiX;rb)$CGLz=rSUum$)3Qyv{BL2 zO*=OI2|%(Yz~`pNEOnLp>+?T@glq-DujlIp?hdJeZ7ctP4_OKx|5@EOps3rr(pWzg zK4d3&oN-X2qN(d_MkfwB4I)_)!I_6nj2iA9u^pQ{;GckGLxBGrJUM2Wdda!k)Y>lq zmjws>dVQ*vW9lvEMkiN3wE-__6OWD0txS&Qn0n22cyj4Q*8(nG4!G{6OOwNvsrPIL zCl-$W9UwkEUVuLwyD%|inbOF*xMODZ4VMEVAq_zUxZ+K#Gdqf!DW$5f)?7UNOFMz! zrB~tuu=6X2FE(p^iqgxr+?ZK;=yz`e;C$#_@D9Lj-+TDVOrva>(#*PVbaHO>A)mhl z07OJWCqYC60518$!&c`eNBcBW%GnfaQ*$eazV^2_AW?j)h;J1nUjN(I9=0+!RVx~% z3@Tf!P0TE+98jA?WceK-}A1% zW!K)lyKcGqy#M~})315-A#2NXQ`?6NR#Apo=S!oF=JfpX>iR*49ec{7AN$xxpK{D$ z2d%Fz&rdfSqourN$~Y^NFIMV1CZ?J*bMx~H3k&meGtH@q9ra2vZxmA$S(#jaaj-g4 ztJmxG+DLV<*q<|sDXPp$X>E)#S}Vm&sRaO5P&goh2><}FEdZSXDqsL$06sAkh(e+v zAsBhKSRexgwg6tIy~GFJzaTxXD(}|+0eOwFDA%rn`X;MVwDHT9=4=g%OaJ9s%3b9>9EUTnnp0t;2Zpa{*>mk~hZqItE_!dQ zOtC>8`$l|mV43Jbudf0N6&&X;{=z}Zi}d1`2qmJ}i|0*GsulD3>GgQXHN)pkR6sf1 z?5ZU%&xtL}oH;YiAA)d*^Ndw2T$+Mjuzyzz@-SM`9df7LqTxLuIwC~S0092~+=qYv z@*ja;?Wt!T!{U?c*Z0YtGe)XbI&y-?B&G2$`JDM)(dIV9G`Sc#6?sI60de6kv+)Qb zUW~2|WjvJq3TA8`0+sWA3zRhY9a~ow)O~&StBkG2{*{TGiY~S8ep{V&Vo2l<6LWsu z^#p0-v*t2?3&aA1)ozu|%efSR=XnpX$lvTeRdKlvM!@|pM5p2w3u-6 zU>}t2xiYLS+{|%C65AzX+23Mtlq?BS&YdYcYsVjoiE&rT>;Necn6l^K)T^lmE`5u{ zm1i+-a-gc;Z&v-{;8r)z6NYfBUv+=_L}ef}qa9FX01)+Aaf+;xj(mL6|JUzGJR1|fnanb%?BPPIp>SCjP|8qE5qJ{=n5ZGw?81z3(k;pzH%1CtlX50{E7h)$h{qGKfzC`e2o`*IqA#tjA z`Fz&^%$b9F*N`)U-#6>a)Z`55`$Dd0cfcs0$d13^ONrdCu9xcv_=n#WQo8stcz3jP9|2EvdI-RhJM3%Q%oM&!OlShM|0 z?gz?wHZSnm45njLtsz8PVT1S&jAlbKg5kVam$p16=EK@Sj4EP0OtH zmJDmdc^v)x>56Qg_wmYHz6h)>kl_h$>0@J!ypv%APmjZTAQVLy6Fu50RGY&JAVNhx zrF_qG6`x9MkT;1SFWo$)l{M$;3qUDn9JwE}z zRl#E_bDRJFii61kPgBybIgp8dNW!Cc1b*^YYk-#oWLJvtM_v^hQx~9?8LD4VFFxBF z3MlrsSC%f9Oupn*ctPL0U1fwfX?`tRhPD{PSLFPQOmIt$mDy0SgpNVvHS+f#Do>h1Gn?LZU9(KaN>Q_=Y*_T zvtD7%_u^^+{g`0VGzg(VZrpVQ6Ub5M=tI_p7T93R8@3Zulu3|#{iNcu!oiHxZ4Rf*( zfmiN$$ru(*_Zqn=`Gq#OuHRTSwp7uH_SokR&|)RuW5yo=Z|_4?qU-JU+tpt>!B&Is z@N(=SG;bpVc;AO@zbmMM zScqq1)b-ZQIrs={oD}|?6y{$HNB1U0^LsBh8JI&3!GBZxOXI<}&5-$lgkAaYqhOTb z?2vEnZ$-kk;*M_17(upJF3%+iH*s0-r{vttXVB2OUwI1s^+G(Ft(U8gYFXC}#P&E^ z>T@C^tS`Z7{6HT4_nF~n>JlZtk5&qDBl6r|^kzQYe`wq!C)n@$c>WOPA61NDFj<<6 zGW71NMMhwAl!U-yqrq2xrSFqRCI8acw7?}3j;ynxo*-b7Co;g5r%^j=H@9({PXXBf z@r>U>>N;E)81wx`B4f%{PB~MHka_);%kBCb(d|Jy5!MqJ%2p`t&@L)4$T2j&-WHvG zv3(uyA_gwqNu(k?jQTtv3dgPKRZoH8prxe7>pQBW5L&dpumS&5Ld2?(sCpJjvc4L5 zEnh&?91WVm)ZdTj=fjJ$pPDdgAttLXuke+?KdKxu*;kTC(r!tQk6;gxj4h%FdHAt(^M3YvYj(!tOeN)+Hvj6+< zzyJRG?^lZfWuR#t!tUKP&(?%3v&Zd$R2YN>lB(Lq`OInY48%4%yTv2 zYe1{G`3)(PDEio5Y@-I5tUf`c%%OCJMtSW56g3iEg%3`$7XSJJHyA z<|7&N)5Xrlgv~%BO24eFd;Hd;uiK%D`EdK|quUeRZDqbh9l)%j%J#0lfrZumvA<_w zu&=AVvdChf6}eqh(bUz`(`Ue*p01{fBAcTgKyDYLs_I+YyJEk+rM@avU~>fB$n)HS zM7pfJydu`i%gfS<{PF94kZDv$t>06sAkheDzu40NJ$5CMW%n^Lls?8^p^QGWURbKu3ZduZQZ((s2? zzE`}<{;Zt7<$C|9R8A~DJ~@%x>TfP zF>TX8)@v|t)q4GjRt<}5s6hLHwRel7>V@&r-O|Av(yh;Q1A{E>Ir>p+%dHD|=l+lT zpr(Dg&>#Nu=!)6bCLr-ZS%|;h)Ij$+e@r8_{qO19QvDe=&1tmpY*0lcA^Cc-#{9fQ z<~$*<&P$Q<_jy#<$40PMofM7aQ}C=jphI`4kLg}Z7CIN#26D{-4v-_CA-LiE@(%{y!BzsU%gG`Q?sjLUf%qFSl0y)2#ae*+EI>s|i`d^V$Dn)qmzqRq6VJRY|{4ujsIU%#bnqU6MR&-1I_43=|5(6Jr;Jvert) zE?S|Tmn}Tv<-??sxV5@9t}3D=>YZ0JrQe$CO~|EY=Lj9RM&4svQHPQL6%pV5fPFiH zfXDx;l@~et{*{U*#c#Dvzu)|znDO7$#CRx)Z&yp-}SrD{&|(MQtfUz~n35@RLfUy=aqrhCX0M}J_r5QsK~NmRCR|Nm&L z41UdsLjWxSUlL41r^0K&nCCK>fdR-!MYjFg(z9_mF^C|#ZQw?`)f6uVzF^`bRnVY& zo}@M06J&_+>w9@jpaO4snmU;0t-(zYW1qVBHtuD!d?%?AtN7Plp><-1Y8Rqb20ZaP zTCgn*-Sri4Q8Xn>=gNaWQ57%!D35UkA@ksOlPB*Dvw}t02ENAqw|kFhn%ZyyW%+t{ zNdM!uqEM^;2}f+tECHbwLmH*!nZVrb$-az%t50Y2pg(HqhvY-^-lb}>^6l{$jOI6} zo_kBzj%8aX|6H5M0Y<)7pzz_wLkIpRm!;PzY)9+24wk2&TT{w--phDGDCOz{cN_ca zpnm7`$oDy=HX%0i-`769*0M6(e5j-?(?24%)<)&46y0e&6@HCDZAm9W6Ib#Y#BF6- z=30crHGg+RRTe%VBC>T00OV6F+gQDAK38Ne3N9bm|62tPccBJi)5{B z4zc^Db72XiBd}v$CF|yU{Z=M|DZ%-(XarYNclODlb1Kz1_EKLy(NSLCN`eUl(rBCL zT*jx@wNvze0|TSqgE(QArOZU)_?qH(sj#TwzElLs9q)(0u!_P|R%Cy_0JFQxgGV>1 zz4?_uq<8_gM0`c*Hh|;UMz~vrg1gQXp{ufg`hM_qU;U>+zmvc5blCLSq@PrEBSGR# z&8=2Z4uXN`F3p73ueD1l{s{k$WipAvSh5W7ABe?4)t;r@V?y`bNB5FvBuE|0VRTb< zM1Hn^?DSsJY+sX@T5xW=#>T9VEV|?<(=6|ge$X6Sb05!LFdjDcoq*gM(Zq=t;_)Le&jyt(&9jzR73noru`a# zN*<`KwGa^gZU3-)MSLF0aFag#f0<>E(bYTeHmtdbns#|I)-$)mJ`q9ctQ8g0=ET?| zdO}eZ*b_p>ygRTtR^5Ggdam=Zb5wmd{}np+Jn1d_=M`~P=M67jj})fH4ztb5yQqQW z^C|C&^LHAK-u+ooIK)yM)QM?t;|<{P;;{`p=BclzAN#JzL4jCwXkQB1Dy{=^KR`=~ zTrr)y7eiYBzSNs_DvO=4A6#EgGS-zY%Vi)N*Yb`U;6o}KR}dq{r9pT5wqZ@3NOE8- z9-(}D|Nc5732CSYQbL)!gPQ#RbD8BhK3dl{sUuPvei0tkvnJBxDEAYTesU8H$)g(Plra{VH(v3u^CO1~(+ zU0O7#)jaS4{NcwA+LuSm&VBcX2#Im3xg)W}ySNw%->orn1taZ&+d)}8gJTqA!u|5P z{yv?zol_3|(1(%M(EVU=cp?L`{Pi|ixk{U)*guFML3P!OSlz;zGA#T+E@8@cgQ_mv1o7RSU=Zo_82F?&&2r;WE z@wk}JHYEZ9nYUc(Vv~iTCa3u8e4q(yq<29VoNbKk|`mq%I6u)My=gPIDuUb&lzf4`MEA9^g8u z)vp8|$$HE9m_BTV?lOosIGa4jud=jIbw)O2eCMfyw2*S8?hjWw^nqws$O*M$3I1)x zR0PWFb3$ySOcGTe1dz%N0l;RPc`x%05FtT^f^j{YCP}*Q=lvp4$ZXrTZQHhO+w%wJn3c8j%+5C3UAFD&%8dBl_qi9D5g8fry}6Ev z2_Q~)5^N$!IU`BPh1O|=BxQ#*C5*}`lluC515$lxc-vNC)IgW=K|=z7o%cWFpndn= zX}f{`!VK02_kU+Q5a3m37J;c} zTzbxteE{GNf?yLt5X=Bzc-mio^Up0nunMCgp*ZJ;%MJvPM3QK)BryP(_v@ei4UvHr z6+sbCifQaOkL6-;5fL8$W($zZ_;CZp305C;~$hhRquZr-r)jjd1z z31%ZK{-(`P#|Um_Sivn@p$-vz46uqT>QG0B1w9znfS9A8PB2LaHdzA|_)yjXVR*l{ zkcu3@vEf7bxH0nkh`q?8FmoO_Ucui*>_a~P?qQrlZ9@+D7%MTpSnztpylXrt5!-k8_QPB?YL8Kx_On8WD zgT+111d(Op$^$&KLAN5+@?>f7F4~wFi(8TL8+szgVmcMDTp5l&k6~=rA{Dt}!gb^r zSWY<)M7D|Z2P0cEodj6E42PV>&>DFmQpgt)E-|#sSUU@uKed+F680H@<;-x{p|nuH4!_mn85rx>wz;0mPi2ZkL#k6;sznu?cXh!T0S>{w6 zL^gvR05NY64l*<+_L>On$rjx9!US;l;LX6@z}yi#2XHh)F@Oo+l)h%fq$v}DNmF2> zfs^_t0)3N-W<9-N?uedVv{)-J0W5mh#29QM5R5h&KuiRM=0Zvnf#lF=K#WlCgc#9c zS;qvh(P$!_a8JwyhI^ZJV2k+B6Z^64?w|1?5gyo6y{}923CRZfYVe1#?F% z7h2SUiNO3;T#JUOyovSs@@C1GtwipycA=*x5{BpIZ_#GCMuV8XK=x;qCNy{d7?wA~ zC+=vjls;ci&zW=6$H~4^K%v{p}Ab?U%C6Z4p%eC<3ExqU$XR<}LLF67A$Sr20DR_pJ3yeBa~ z^sw{V0FI5;UpwXsScYuhbqGQ`YQ25;6p6W^+tgL&;Ml;>S3CGpSZ>VrTn0m1$y$HU z&65)I!c?oREz};c=nLCliriqQX->4uivHTgd${GqeAlf*!P^B|jkU|*IdNP(&6C>4 zqOW$)Nw9nvjy^&`?E|gotDV{JmJ9Q~vuhy<`^C4XIUDt|j4o6rK^e8_(=YqC zuaR6TRVf@tUFHB079o4MBIh{M~4>WwnGgesQH*3?w(RA%hCZ*7)b!aNV=yOQ%o_Y=Lt0Sl*(9^jfRnC210Om$=y>*o|3z} zAR&vAdrB#mWoaB0fJSw9xw|Am$fzK>rx-~R#7IFSAwdu_EI|SRfB*yl0w8oX09H^q zAjl2?0I)v*odGJ40FVGaF&2qJq9Gv`>V>2r0|c`GX8h>CX8eHcOy>S0@<;M3<_6UM z7yCEpug5NZL!H_0>Hg_HasQGxR`rY&Z{geOy?N92Z z{lER^um|$*?*G63*njwc(R?NT)Bei*3jVzR>FWUDb^gKhtL4A=kE_1p-%Fo2`!8M} z(0AjuCiS;G{?*^1tB-uY%=)SRx&D)pK4u@>f6@KPe3}2j_har$>HqzH;UCR^ssFD0 z7h+VLO4o@_Yt>>AeaZKUxqyvxWCAjKB>qjQ30UA)#w z&=RmdwlT`7a8J8Yae=7*c8XL|{@%wA8uvCqfsNX^?UZsS>wX}QD{K}ad4y~iO*p%4 z_cS{u7Ek%?WV6em2(U9#d8(&JDirb^u~7wK4+xP$iiI6IlD|a&S)6o=kG;59N|>K1 zn(0mUqbG3YIY7dQd+*4~)`!S9m7H6HP6YcKHhBc#b%1L}VIisp%;TckEkcu0>lo@u995$<*Em;XNodjTiCdC%R+TX|_ZR#|1`RR|`^@Teh zl#w@8fI1FTx2Dy+{blUT{`^kY*V-AZUd?ZZqCS4gW(kY5?retkLbF=>p=59Nl|=sf zo1Pc|{{N4>5nt#627ylGF`3n>X%`w%bw-Y~zWM_{Si$dc82|=YhISal{N7OY?O`C4 zD|qb}6nLWJ`hUyL+E>-;ricg9J@ZNYP(x(Sct&OI$Y!QWr*=^VN;G3#i>^1n4e#Je zOVhbFbLpXVu*16enDM+ic;97@R~u&kh__kgP#!R`*rQEnA+_dLkNP~L`0alC|J;c; zeiK=s8;BsLE)KbG3BD&Br@(Ha@SBT&$?xX`=$;eeel=|R_dIr6-Ro?=HEjnsJ_b`1 zK6Yg^-6;^2aW!xeTK)A~3Rm|L^FCHB_I>jIju7ZGo&N_1*QHkxH2!!%@o4iZ?vntS;&zJdPe1dH#04YD93A44o-MpfD zP{rn_aq>U%RDvC2+bp;xPlsOzauIi3*Lf42`jVKKZCRuKdYhi>FDuL2l=v{$BCN#Q6796s%r-AG$Q^t(3c@ zD?w0UhYr11@feiyl9kY_@H8~|xlmO<8PfQmj1!$@WieW@VxR@Psxfe-v9WCi1+f>F4VL?0O~K7T?m4-u|pSkBpUJZZe*16_wAp zSYZ@;k`3;W3UHKUWc8QeI}0jH5Ly=cGWQPw(Kr2fm=-5L(d`lcXofy8tJY3@Tuadz zYWXR{mW7XT!RF#RVCe%}=tM*O6!AD3^(!8un~opNI%Uko7$5t@<8+?; zTxDys(MyyGsUjtSu9$+|_-t!U3fVb1dkK?l`17<+jfl=hrBHnDSV>^R1=TnQeyqbW z>ov#l%!1|S!1>8UUxIdhQq`_klcHVx0{?#>K3#$4GlXncwldt!g17TcvKq-jo_996 z>oA=tH9CqRl6Yw?Uc`am!V?lHJbizOJaVaScf1UP5e7Dbgabq=b!B~T&_F6?ooU>w%x0A zH~&MHJ=q`fCH{U<7MDXE4SD32cDZA)WJeWkllJ`UspWaS#eDe^kg^oU_A14UE9zG-a^g{xaXf$})Wik>gT zl#dkzGr(;h0JZDuFn(+k8wNq?PZ5grQ<+sM?wBGt@JnH6v0#or-5wBQWKU~(S_> zkE!tc*ZJ1Y&*p(xX84POb3cClRMd!^qJ#CAZfIepEj-<`VURS_yCz0(?*Ixcj4 z-!zV1_QZhpm=0<;*(nm+F>T=)o?ep@CK5I%g^VAA+RB25ab?7)A~z~egru=I1S|@v zH7tXV!0wmGS^qj#e+MY;C5eUjEAp$Y?LDkS^QPZ}8WN85?r$u<-Epi;yZ1|J2J`se z$D6DpH~2F=eI0B&=UFAUnJvZAmClJlK)sutJ?M>xpZiWV&0=G4MZP+x+p>EX=HbCz zxls%Mw?*u^;LbHWIWCyq+yi)`GmFn9J112CZda_u@YIP%i;srFg_paU02Ifij*7}l z&CF-(3|>*a|+vbNR`^RP=9G?ymEJ0Z~)d&c*UE$UMepZ zcITr{0WqhxkjUnM15js_gW=e3Uh|y6ZReaXHIz-=p`x5VvB&rH9y>Amv@^WmXFEw) zQXYrk3feir=a{jMQ+wDIkkFnZ$k{sJakHn*?u za%4b!00ev8NVLM1TY=cl?KB&55BY_MU-sg?c>=Dbz_W{(Z~c?HJi*XpYL)C6Bd8WH zt+v-#0&o~@t4qESi*)+eW%@VD0|o^yF)n0hME$UtXF$*Lvh}7sso{`|pn*JDIy5^Fm3s$5*zEE=?u5<=l8FJc3r%+H} zdfoNl2J0^~!-*mOL5o-x32|e0Im*E!yY7F7E5N)W3>+v_LBydlEx?4$RL5f2oYRD# zaR0wv(-p~wO0eLDl3K=%`{5+0Gd$ktO=W)gWlGZJ0`K z$_RNA=ckrfa;H0KA~dR^p�(p-{x$&=IACIfoAR!za)F-^da-t3#0Dycnp zwO~NVXwXCl;jE<}>%@xz|=8fIJAB?>+E{7)|4l${4ngA3G|=r z2Dyv;VVWSgZx9Wj>qUjleGl3Ei9K4>h!(lPS%8VOG>Xu0%6VDz^O=bjJmuP7>DeUv zrbI}MlHB^^d?{zv6d=@_ZD2lg1&G7UjnVN{1}9WkaM3H~btX0GtSzB+tZ^qRgWo4m z!GmimlG$=wgXCnr6j@m<1gAL46#T~5Bnm=2{^@>|t&`9mkEPddj zAvG~@Tv~TAm2i%VW}R-g(Z0)z-Y|szHr@rk>4MAyG*Ma*7Yh#H7(!-5>DZ@8r;_dx z{prSe<>~099F8vsYd2xff7uAS%7{S)f(|@me3t2$iy&NEc7OUEchp@9A|X;;IA>8!oX+y(BKJ$EzV* znR$z;!L$s7uy@{OT~nG#B!NRraT8(X##Ho!0r_o@gg0CA-9H^;-uE&?$2$nHv_00o z%cbuUc-tCx$Uh&EZ4Nf4Zgqv)Y6>usG3>GeQnxx_Z6+PcbX-+ysbt1hQ`K1LDpOE? zrAhIZhSN9yVIAOa22gn577tbc&i3|3V8NWy&!tw##`}9*x}gtI^h1DzZRA>UuaJG) zaZ7j)dq!O}{?#8Y7~7i6fHh4{`pL?>-18|p!S75Y#^DM>-S3)vuZG+Q7l@ek zQP~#cBpWgg#mApc_sPYjpw8odQuRokmTkzcNl`^CcKB7e&;zViV;{Y{o^Y$%7i0m# z62%#1Lq!RC?}lK>%mp}T!3Xv;L*0v*>USLm``N%>w>@fwC+#T&Tx2bN4w(20JB}oU zuSa6v^kXi0xPs?pbaOHnyiqq6By1EZY9OZ^^QA>{q-Hsd&m`pbQ%8121aWG-F5xf zlZ%;B{;C>X19|`^_?dVyCq>n+41w7|!tUS!{9rHlbhX=SZO5CQ^;!Du_E7*`GiR^Q w)2!4MKjfSAeNo!9>IaV6aUZ*?W>} zs4%E?srLW`CJh0GCIK@hTkrW7A15Iu%N&?Q^$0+!{Tv&|t^Y@u%!L zglTg&?Q5q#ijZ;&HBQ?FNPp;k3J5!&{^+SGq?AX~SiOM9jJMRpyP?RCr@z38AQyy&WRMaC;n4una$~nJKSp?q|s8F00c9?Q! zY_ovvjTFm+DeQM^LXJ#v0}6HRt3R1%5PT*}W!k8BEM;Jrj8dIceFo2fhzTqaB3KKk zGlCLI)gU25(#u6ch6GeB1k@eHq7l{EHXv0n6xE#ws#ri}08kkCf8hUt{|Ejb`2YW* zvg}0nSSX1m=76s?sZhRY$K=3dpJ+y*eDULGnL2}4>4nvW^7_<~wIM_5fjvwt4h1|g z)g0Z6ZFq9j<~9~b8((~TN{Z?ZQfw|is&Xp~AC61sj;xItKyCHdI|tCMC_LbXF>~vR z=w6V3^H=W4CbAgR4#xw}ETTwu2guW~=Crl@SMXv85jQ=%y!s^?m4PI0My7MWICO;- z175jm%&PcPWh8QdOU(#8bp4!N7ET-+)N}N2zk2)8ch|4Q&lPFNQgT-thu053`r*h3 z_8dI@G;`zn;lH$zX3RzIk`E8~`J=BBdR}qD%n@vVG1834)!pS1Y?zVkJGtsa(sB~y zNfMYKsOJb%5J(0ivK8d+l2D2y&5X!cg3BG!AJ}910|_${nF}sC1QF^nLIhzXk-Y#x z0)&1iK!O;Og0Ky!;`b~v%b$`S4E&fB)1NB4v@8wr( z&+NX4e^&o)ecb=)dd~C!{(1e6t?&9j{l8%U*k4)?`(L3;Qjw z#w7FS+U(94MaJKS!J9O8^$)36_J8;thW#2$y9i{bB{?M{QS_inZIJ!jwqAbfXYVd$ zQ5fC$6Nc9hFi8m^;oI-%C#BS|c8vy+@{jx6hFcf^_;2VRgkoN(0h!_VSGmgNPRsxI z8$rTo0LaYq-H5i&gtj81=&xU?H-Y2==G@uQV7E`@+2E9XQW@{&j`?EOktk|Ho{HU>ZqDzvgjwBmdex z&uZNd2C1h{{}2k6Ys9$*nFP3;K%u!MhW`uZy7Sn`1M1zs@Es&;z*Z>Gsh@-3Fe6pE zQD2@cqF((NrRevgvLsvM_8;;iNyJ5nyPyy?e!kvKjGj`6diRFBEe49Oa7wwkJFV7Z z$YT&DWloYu-H?3<0BKn9L&JYDT-SK~*6c5pi18P26$JESKRYj{T7Zk6KiRJcbvOO*{P56Q6s8msbeI3>|j>K9}Q9UBeq*inXKemCm`-<5|-$ZyN4u$(3 z&HcvqehFD%5Yrmykg-^d`=BSa8(i=>ZoC77^mWY{evp(km@aHqhUECBz76YiR+VYK zY_avFC~V3$=`6C4JhfHAQ@DZtUOwH`L;oYX6zK0-uI^?hS$ALfq}A7evR;ohJHij} zHSZdW?EKv9U1s4oD*<(0oQ*;MaQ6@cvGL zuHCPgm_NhVsgp^sfr*ia^Db}swo1?O(_Q2)y+S$CBm+g=9wCOUPbz(x)_GbaKa@A7 zuI&!ynLiZRT#V%_y_-D`0Z5lT*auoe{(U5NylTzFSJW()W-#F6*&A`LNO1bV#Y;QJ zSbLBnp|B^dtK|KIWC|No>JjWBWE@n7O)x{&^E(WMeMvp57#qA8m* zeTow*U@_86B#Fm*rxyYu5PRWaWHx8y> z*qmHEp(AMDl0v)ij(AY8fnH=~ZwwjVAbu*m5;xPfidh@ov6d8g zfJsi&!QyK53Es%sC39ts;54V68koALD4b|%tNHW0bIkZAJKa=W&FomJSEDT>W1xIX z1x%Z>AvNIsSPLcn3RTcHXb@KB?cuM)=x6fcIx>&(GxqZ8w3p#jJ(GVgc*`c0HG}dv zIop&Qim!K1NFwic%07KcjWgHBPUkq7f~lj;TPqVGTiT#cUeim>;nY`>h@a*S{qQex zQ`z62WK|Mj)Y{tfF{;T4P;c8$Q|KU?Joh zIkA^z%X7z|r>4aTh@|StTi!-r1D!g=zb#3d#{{&K3CqE$Iz-UH<%37c zRfkO`&uM%#AD3PHv`g5t0e^O%nVL0d{Xlx^EjEC3#skF@`zl-7PF^0oxW)1!C!JxR zWvuAHH?)61FKA1QeT*_sY7;_Id#!GmV4n`MO{~sv}VLSK` zXRw=Y=Clz*00B(5y^K;gCZMAzjT5+c3IC=)l(9VIDdatpxj3y89WwI|bH&$!ZEvp` zPR!T@#!(|KfI-w?!&+7$N3F6>tD{YO4Qg$d_`nNEdfVCha9vaPn0jI0`)`@*72hq! zpU5ND^P*RoEkbD5o#az(-g=Y)L>HH>Oc%}$ zT3Rs_ih0;4+Lv4Y;@Iv(;fUbQ=i-G(#>vghec~*j(I#r|5mqFiJBpzi&hzEcD{u$< zRsm0BVYn=pT;0>R(itW|*D&;O%bOc7et9ACaH#J>z3A1A~6fdP>pmbM%xzm4>|;c_?B+%sl;Qs2{t!60$^u zH1t@9^6>;?!FuusnISi$f5CL&;z?EqJN$FBuWDA#D5`cy_UvCFIVvf{c?4N0teh;d zET$7aVbj08KTQS!x?Nd1Is8q8qFzs}a=!@nJ;7FSfCY^T@D-gpw`w<6e#X3+;O}1h z$%I!M)0bg|EKUA04Qjn@+x{Rj8vt6Wn!R|3A92z}^$KfF5(#CWr4y#~re1CN4i4w0 z#GsypBR{xA3Er7sgAi(|}1-W?s~n$7?K|9WL8kpVfw-;#b9 z+mn;=ep!162U5R>_t}fOt~tE?s#m( zO-S$7>Ay6*hHdZ)7_oU915WYYCIX;hFI-U2EWYX!pllONr@Q--2o~`!isi6vTPLJ4@(|o=%NHYjo0_S&q*UQIROw@*N-By@PaQ&;YxFZ0aR zX&}LeOEz);#m~Hwm^VAY8DK}b$F4bo{jMN?d!lxKPhNklzr^Cd`0f4oJr^z=I|l`* zm8AHm*fPV`0=lF3Pnnp}&J0N1X@}-D94YvmUabFrLGSnTz7Mu^21F#O5tN#CuY9Vh zUZBH=ez%h*wkf0hBtXJh1SN3d+IF{gzT7lp)j}n?03lt;XSQRAh7qd&v;RwTYDuQ# zbI2*r<>?x-G0@hM{;%{VBD7nLKt~D`T~-HAt5;h%i0_=Ifs=yHma5dhJ+QMG?Ux(a z|E?1CMy1!~oA`FP!k~iG=t&5#>bVdz=peT8HMB6Y)#7PpETtNryT^+Rv3vpJaF^zP z{H}0-LyV9Fu21ID%wO9f1IKlFr1p4c{o-?03vyB-tr5duk^&L$;m_|f$vs`^Sl{j2 z95}oY{LlY+=ZS%J+tZoXCd0*sSU7w^gjovXn+g7uyra5{cU49@yHf#Z^Jl-$9cIfo z+AJuxH$VLb=#+uBbVmUjnx zxb1pZ@-O9=AIk4@S)m6fJ2?{HrNYwwnL3a45muuNjr;6$O`bGEM0T4A2_S$t=86*- zcO+0mywg*j#A4mU}enR_!cGmIYQ;qwfchWtFEXL)AK%*;=j znYne+hS4EMy3S)C*mZ1KI>!+)0V@9!N6H$Y}~MJ{rYuf zz^KljIWvFi-?#?V@LPR&c6Nn{!=XM z>}-h$S76;$H{E{Y%@^zlmOl^efBwa%UU+jJD9UVukQ3ti_kH-?H*RC0?M1W%FCvMB zM_+v6fk$6X2sx)-p~B3&Kl{nscK}pNLM*qjtpaf9>AU{-iPKQZR8yCg!TY}Qg*(;) z)gdvCcB%kppZc$VdvsK@)3l1{&DG!d_6OHOS`y=ITLEVu`unSKA2E%JD*DVX{LJ}K z9l>hMRDqxQh0lnpGHpVYneX}eA3Pt|2v%=q;rt)``R|#bDyB)OXY&vI_@|*}h}G?^ z@aZ4_!7cQPX`!fW_?{oT1NTwHs#l5L-0`E|y@48<3Q^HFf8=Idi zpJYD%1MkII!~|7I^WGo)IF=?{>ACnjJ_WUi39C}!Q{QnheVJqeKKqq5^o5CBde(g9 zvw$X6^jz_^E2$wSw4!q5*RG(C2_^XO$HBn_55vbl44OnTTRwRaePP0vo{K)U1#99& z<>rq7V&V(<&@I%MFoN5zrY}sz=(*-L&}1QQ*a%`u25h{cFj===17eB_uGuzG&byQ< zrm8BJZl4r_E$3k|Wo6FW0-6M7>qac5uFQsQcmkLWGfeH74S3Z_rJ!jgN++!@i=HW8 zkyjI(oPH-+-N#Qc^-mpNO`bc6r=2-<%&Wy5K1vfFJB(L_IkpS6fY^NmuL8qsgj>MD zn~BHH9WM~32_3vd=W&B)k7F9q%stJx+b_L_X-4zr^LVUMCmyCTA3sWtkvsmME?Xiy z?xOSfB=_$oY06~J-HcCq&)qcW{j;uP;?Dm}=hkq?zh&n!;m((-G-u_t|6x399Q;>A zgNpxoJNj{u|MFDH7Rhq@FCAl0dE|ddnl!oh9{Lq?@JDoR6L;C941IK`ISfdE$4S zE0AUQ8+2|Ncl_q5QkSp#AODp~(^mfP&%Au@@|TBQwoP`UU+V{6u8|)6ZA{~uKmQ*M zmrMTDU8S~8Eqi{^v0Ug&5Upcm#y7Z1(RbgZAG8jB$eRwCspQ)>5;U)oGZ&E5aeR*K z8Yt`Y0$G))Yd(Y3KH}tA4`-_QmNke5hU_|nq=xtyjwW(_o?itz>B>WM&^63bNdQ)k@-IgDHW*RW$Xo9#RzrTrCn7L2H{9Amq|qNg@#eZY=|P zCoI?2s+L)zsM%WX(NbVEY^`C>lFjIBYmJ6@DKJ0ZT4&F&WHW!dwa%QzOG!?jY_2(S zDcEzZbz*2Q!43|z))9yOP9X1Xt%DXzwY(3tl-TR=Qb_MbZYRrooh;dYYmS!U_as1(=YVB?Q_A|tNu5Ut&_q3jbfDM zoFxT^uEuH`nX3*sB%K?GuHUkweYReBwnHqh3P)~`+s3+Tj!rDA1e)8vuBv5J*IsxC zkd^~b(aGzArj08{>cnzOuy04C+C`}gb|Yz-1avxeWzev3NzcHbz_&4W@QCr$z3~w=8Ua- z`;vfG1~BP8CyLb=F7t1am~ph_#|O%$khSJ9%Vtcn)YmpgQxF?xM^_Vb+5fnpB^W0I`f%X8gb9#X{Q-yJG0{Z56aWeI&zPxnf5pdJA38bM`cYnS#x)% z`n1tFf$i)W-hGm(f9mde^=X@NcV_lFb=P`4&CI&H=IArijGwdCk&X@uQ$5xmj!~^? z#$ROCI)V-~t%L%GS#wo@U27ddR`4`3)WoB{R-4snfNrfee|kI8^bu#yDgYqOwas9# zmcb`3!kRJ`Cr=_tq)8aMt{aGtUZsqwVlj6DgCGre>AEt&x8H_in!x@uwgExIh|-mA zjdaC(29~CTVSaaF7HPbql&*9Uo8P@f)>LqCXclr}peS7_1BQ28u9PO8Eq1@`l3q9o zkfKCaO2?T?ZyA6loW<#9_c^O=m<&h}CA!ineAD@=(gbq`vyT|tiJ6#^B1$P;;qax` z55k&Q?wEh#87niLo*+n4L@65J(Nz~=Ya%7^(miLb(E>A3B@|Jjl;FU&D>o|9#7PJH z?|ago!o;WC^h=|T7PVBg(DAB}72cyUS zb(f>Bwbr!F1eTCO5fpj<{PqhY5>143p?~5ZA5H40);=@M#MYvrB6gqHbU_!GSY??i z%s=>-ciA4*zOOZHds0a(kWewZ4h(k8h(ua7HX)Au&mY~H8KY6(_cb$_&fA@QjIW-*heP3%$d!m5^AdnT}`12qA^c@!g3DOwZ5WwE2?)-yU z!)Vx#Mtxt?FzFTwK!77sy7)sMzUd->w4^bxtpM2j!b1pjgyk zGKwWGeb4)^zjy{9Es&PU1}gwg?|J#L$KJB7ett9@4M%-nGtIQr0>Fl@8-yh`-+1ed zS6r}(MeSvgSoFmH*_WPu@i?}!AB~2?;i&IxrkNg~cQ9Som98tcq)k^|eeER|Zl77t za-TVUc;DNvzVXJ%w52+#weN?+;i#{f#!Oc&z?81*N>^e~ltRS%ZI@lR{rs()HmqG! zx*}ZrI-EZ}ckJMiy>A^oofwDfC~IH)z8{VHKGT@#E5I(Ll&+MnMCl>~AV7+>Gi%mF zkU1QlKASdR0B80!YhP<$Ywi0?W2Ux45oPfxv9QolWzJPD^weBfvo4SONxP35106sAmh(e+vAs0GboFD@PvNs)jNPvarhW}0YliZEg{Gazv z+JDIpoojRVPr<*C|BTq<`6ga{5q^8^!|0cxe=rZ!zxH3%f5ZO0cQ*Z<^$Yt2{|Ek0 zyT|*F+CO@K;(owBKtGg!S^xj-Z~rga2m6nxKl9J=fBSuNKW_dLKWhJKeg^-Xe`^1? z`TyJj)8E!#>_3Y?uKrwqq3LJ#SGU>AzUO|6`nR^u&3FNN_jGOc zw)Nw`wr3yIKhgcee6IaN=ws>M{6677%)hPwx&HzC(f&u~&)6@b2kNRzBDQAP0*H73 zq%McOmRk{B3i47qRe=DA*$&odrbEJZ*pV9XXa&p@wlW~@Yfs>V{yiTtplMhgM*-Bz zsSnlq&pG;z0OUN%$~$3=g1UF+G*>+17eRbBf3=y79J}KR8owon@$1Z7MIrvvWWH)34nK2SD)GsrJ{l z1Cl#oVo3A8qY3e=aF)qzms~FG#2$LzT=gs&aVMOj>(%{y<&O0cG!nCiESl~x=^dF{ zKvj8F1K8Ng171wwM5Fh4KoQw`_c6#y$(5cAm7e}~nJ#A*fx+c9;y#&W!#VukR)ugk zKp3=+;Ut+IYn%m+r4d*<`L2h%aDnX5}^!5R|H;(34AoVWjRx(msBZvk;rCI*|~ zdOijqI@9Z{Vu!~jvHW{lBa$rnl4+!s_5sfK3bCGk-B%iDe&@-}+%fOKU|(9?V1 zHE8&@4z)Kx!RAvAs z!Wic9=o#(bg?kc-G68-m(jZ`^=XGUXb)}t(%&~sjFnV^sEX%hSy6UKC4iOhgV=BHV z2w`4g7Y=s#Vu2B_?#VQ|hP39@eArgfX>-0S+dd&^mx0*wp}>)x;c4RUgxz%;oNe?& z-7-lJ@Y^2^C;=qJsxx5|xF)*pTGhch2B&kxtn;f!7=gznk}I3}Dh}(CoMXgA5-p&kS202!l?!fT3t|HG*rIP~mS* z$Wjo}jq3}z$Qq!9yrtd3fM0N629ZM?LU$nv@Tv9b7I;D|;0H2dsA~g7Z7zp1| zB)XmrkMgF6OQr|R)HHD^TE{Y#j!~SR?b`Xt3Qs`B+x<hxexYeAjMUWdZ-*n9%(1)Wb(n2U<><7&9dwGJmrob)4%H? zlQ%z+L-^$dFhhH|@u$%97Qz?*Ynh2VG@q|?8vY&L74&fs&_b&3$x&Oyjl~LQDRRap zJU4U*R+(2Dd!G+lh8!V{pT_UJn+^1Qg6$` zqkNm(a#hWyc6SP+p5=C4HL8-m`pO`5o~`-LI?_h5CsH?F_%?nDodmz&pWR20WTpJE z?N|wSzLjMUK8E)a2tI}Lf;+;*M|h3Y(U#>)g1>zk9|Hd}oZAa2 zLYBWBoSW!Ts!RwXr^8h+U*@{9{zqS^iH)Op<;r`Uw~nc}<^$V~_i%$GFjaG?X1@E|M`h)nekvFKt`Dh-f>@|0-`Xoq)o` zx;JmzDfOV9qCx|EVpogEe0LK~tGS?5$$L_i6P$P6wIsCQaP_;d{{N=iV@+8LI}o#( zvo*Ejy=IIn{rdIQh1&q-{EuohpVOjJ^Q3lD*YTp37$^RRgn8ihpdu5{Ct%5-KO!VL zcNB6dUajXI9jkm-P|i3~GB-A(X`P1Oqqb$tcku)UJw0w3GeUijb__#QT4j%64z%EeB7S?jlWwx_7&+EEvB|6N=kV}DwnyAlX=?j`) zmU#!$*^@NIu#n_d7;WoJV@*Fbv9|yJO4;n|BNF2xy(54RyB>t~8lUOUW$&2%Nwi1y zx6JxW88>U2$#qhl^6KUbtmg9}D0o5vYDT7kWJthLGkpGnN4T>{St^_EU>4;DmLF9o zr|LqsA8_MoNLQ=}w?8u!ziSZ@PC#Y<#9uJFo-ozVo6D;<8j^1$c|qAE3ZTE5i~zmE z$BU5lw6l=EWsg^y^;8>r9qH{xfL|~PZYK#md$zZ0?o11gV<*WSW~cgy2GYGQir%wf zt4iW8D+;s*;RGrmd(-T<@2&j(Cb9xhV*l-x`TpK`xq|7p?5R%5*s!69?2c!cC*VY* z2DE^9pvOPLU!1e}wA8S8opcTJ3`NB>hY=JQnL~QFXR4K8A$BqJnoEB$wn-%u@E6Mh zCfMF4kusv3N!(aHC}4)Xs^xoOwXd%e^6pi5|DZo=Q25j+6HlJ^7FodH6y1bMROR^q zGu6)fopS`h%Sw<;ZH%TEPf+#81-#_v+@8nlR0jLcIDKQtLleOC)6yLZgC!D9X3GgS zohwU{v$jl=quD#Go^hB{`@Qw*a%`(^jyT~=q^bWgGzRj;|12J55HWdCWV}EB|K=%N z3Nq-qxJJ`>^|1MNN+q}zTB&ooE3j==AgK@^UW<^oSbeALa2peF)Th6{@sj0KyMNHZ zksk1+MXN2tv+22A%cQOGpS9)77(uP9mh+!5T5ERLvF@b}$+WvXM45Z?-kCa)fb~f1 znVbTD$Gx-0Zxc`0D@YgHakge6SL0H`-vN_x?AP0>iGH0_EE&=v83hMJgaKAI0jJXm zVxVz;X<$v6WW7}fxROO7vr#YLP;;lij5VrX{;>7kK6TtOH&6|Ar^xo>00%+u$C4@# z>!jOt6*3><171+WxoZnKDTzJtDRw+T030;yI}~uV@9fCnei^I*j>Bp&mzP2d=FPb_ zCM*l_+$LDR3B*a!A$g#>xsrZvw0lckxmMg>0aQd7tPyN=t{dgXb;Ie+T8{fZH=gdu zM7Rg9c(kg(Jg0?ARRRl=AONFKrvFj)lTY$KfT%6^6s`mk*ABGhsce*LsoD>K{z_M2 ziPpnu+lw22PfF!CoId^6n*G4H(Ix+#+N{C(da7t1BYMGEaE#PdpOLxsVD5riQXHp@OX;`S`8VnpM~)I920w~<3|mo0 zf8~Az`*?2?H&gZ&*K&bRkV@qzvMlRHXys8*Ze2+1c?5o!^+$&MHxB@4Ee5cke52R! zmn7AZtY6ST%ixgU5)%$%QcwHj7Es-Qu^kLAPwy%7pGBw_4Q9#da^W2$}axNHr03)_nw z5?yuNmXrI5HgS46)c5&}B)Tts49oU92>3xBLLy}FMUW=84DQbVq^;7_e7|(Sdz|&J z73N+M`rc2rt*oSWu#7S{*s~nH6HRHJS1SmzeXk|;CA)FI4bat3<%}nkB%;;?=F>B7ms9QSxv#@+69;@>QaR?REYX4&)=itG>rM{<{A79Rmk)`5ON#GL`*KX%}Ihk3w(RtM-WLt z?f&FLF}4N^yE!(pZ&Yj&Bc`~K0@4_}*0Om?wN|}4WJ>WL;G^H2*QpgEkGA~OET-Km zkwz|5{6dnz1U<2Pe9DNL>3g5FEIvp1jzP&2K#z~j%g6!7B;^zF+o95?fV{3mnB8*RMhCDNp>Am-3e@jNfMj?jHV$MWjk!DDKP zkAz$Y?Sr)!GUOX}qTQ5aMh|wq1uq}~joWyKl=b_LboM#wi{CMuz5x6BKlA-qy++cM01D3b7`uD z#l6M4pI;JCypO8JZ6?U&wNxR!{4oB_ zlV!x9+-&Qy6{%MQ{~yoZGkKiTSC`YS_j22~G;xUV855g2&C(zm^V!(wpcm@zn{%!g z4}JGo(sGZ1O~to-}le

UmY2RIYtNPVDpE$%vda+HD#3m z&VuXJ{BK&Qe+rBa7eq}Q(bq|tn(RrJAk|ztj2(i{d>nmQnM?;HF2k&9sA6up5tmjl z7lySlzMbifH17-m-Lwa_F&e7nOH?ESi3#ckR3tsM+jsck3`oG!uMS}|eAwVXv>}qxwq?QY%QJ0}r@^;fhuUA9W z*BVl>TGo&N004@xSiwDUXUvp51sVmqO3m)=B55aPwf@0=e}cN+$-BdKxY`YrT_4)0 z_d10#i44Q*rFr8MC>*)v$EJvz``(pb{e&*6k+b zsMz%($|1+8hn8c2?P(l@;Rb&CsZeYoCI3?2!LqjbwPXW3z4G$Qfj=cT5Yb%vY0(AX oeb?AaKtwrnc|$|zzw9vfvn^aJJ!zd)XFXqqy0000001=f@-~a#s literal 0 HcmV?d00001 diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/res/values/colors.xml b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/values/colors.xml new file mode 100644 index 000000000..f8c6127d3 --- /dev/null +++ b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/values/colors.xml @@ -0,0 +1,10 @@ + + + #FFBB86FC + #FF6200EE + #FF3700B3 + #FF03DAC5 + #FF018786 + #FF000000 + #FFFFFFFF + \ No newline at end of file diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/res/values/strings.xml b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/values/strings.xml new file mode 100644 index 000000000..05f2df090 --- /dev/null +++ b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/values/strings.xml @@ -0,0 +1,3 @@ + + SherpaOnnxSpeakerDiarization + \ No newline at end of file diff --git a/android/SherpaOnnxSpeakerDiarization/app/src/main/res/values/themes.xml b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/values/themes.xml new file mode 100644 index 000000000..34d1d96ed --- /dev/null +++ b/android/SherpaOnnxSpeakerDiarization/app/src/main/res/values/themes.xml @@ -0,0 +1,5 @@ + + + +