From 80094dfaafcf4917748124c276d9b68c8d667606 Mon Sep 17 00:00:00 2001 From: Albert Ho Date: Fri, 24 Nov 2023 17:29:56 -0800 Subject: [PATCH 1/2] update versions --- .github/workflows/react-codestyle.yml | 8 ++++++++ .github/workflows/react-demos.yml | 8 ++++++++ .github/workflows/react.yml | 4 ++++ binding/react/package.json | 4 ++-- binding/react/yarn.lock | 8 ++++---- demo/react/package.json | 4 ++-- demo/react/scripts/run_demo.js | 2 +- demo/react/src/VoiceWidget.tsx | 19 +++++++++++-------- demo/react/yarn.lock | 16 +++++++--------- 9 files changed, 47 insertions(+), 26 deletions(-) diff --git a/.github/workflows/react-codestyle.yml b/.github/workflows/react-codestyle.yml index e877f1d9..96ff5d91 100644 --- a/.github/workflows/react-codestyle.yml +++ b/.github/workflows/react-codestyle.yml @@ -34,6 +34,14 @@ jobs: - name: Pre-build dependencies run: npm install yarn + - name: Build Local Web SDK + run: yarn && yarn copywasm && yarn build + working-directory: binding/web + + - name: Build Local React SDK + run: yarn && yarn build + working-directory: binding/react + - name: Run Binding Linter run: yarn && yarn lint working-directory: binding/react diff --git a/.github/workflows/react-demos.yml b/.github/workflows/react-demos.yml index 6dfc22b7..770f5fd3 100644 --- a/.github/workflows/react-demos.yml +++ b/.github/workflows/react-demos.yml @@ -38,6 +38,14 @@ jobs: - name: Pre-build dependencies run: npm install yarn + - name: Build Local Web SDK + run: yarn && yarn copywasm && yarn build + working-directory: binding/web + + - name: Build Local React SDK + run: yarn && yarn build + working-directory: binding/react + - name: Install dependencies run: yarn install diff --git a/.github/workflows/react.yml b/.github/workflows/react.yml index 1eed6171..61c95f59 100644 --- a/.github/workflows/react.yml +++ b/.github/workflows/react.yml @@ -46,6 +46,10 @@ jobs: - name: Pre-build dependencies run: npm install yarn + - name: Build Local Web SDK + run: yarn && yarn copywasm && yarn build + working-directory: binding/web + - name: Install dependencies run: yarn install diff --git a/binding/react/package.json b/binding/react/package.json index 9665b657..67a1205c 100644 --- a/binding/react/package.json +++ b/binding/react/package.json @@ -1,6 +1,6 @@ { "name": "@picovoice/leopard-react", - "version": "1.2.0", + "version": "2.0.0", "description": "React hook for Leopard Web SDK", "entry": "src/index.ts", "module": "dist/esm/index.js", @@ -35,7 +35,7 @@ "test": "cypress run --component" }, "dependencies": { - "@picovoice/leopard-web": "=1.2.2" + "@picovoice/leopard-web": "=2.0.0" }, "devDependencies": { "@babel/core": "^7.21.3", diff --git a/binding/react/yarn.lock b/binding/react/yarn.lock index 5dd16d7f..18f87aa0 100644 --- a/binding/react/yarn.lock +++ b/binding/react/yarn.lock @@ -1110,10 +1110,10 @@ "@nodelib/fs.scandir" "2.1.5" fastq "^1.6.0" -"@picovoice/leopard-web@=1.2.2": - version "1.2.2" - resolved "https://registry.npmjs.org/@picovoice/leopard-web/-/leopard-web-1.2.2.tgz" - integrity sha512-/YzFi5O+j+qx4+3vVXPA8hpjaN/Z3ozUkv50GN2NgmVb7VXaLqrAs3Yr+zgFRzejJSzJ8Uu8edn6f4GHp6+n7g== +"@picovoice/leopard-web@=2.0.0": + version "2.0.0" + resolved "https://registry.yarnpkg.com/@picovoice/leopard-web/-/leopard-web-2.0.0.tgz#6452d1e49f983542db73a3c8cdf24208c30af234" + integrity sha512-kfJtUCopyvmN028wiaH6hdEHEhOx7QTruiaL5ykVZcpMq1lV/O02hwr29i3U7FqoH6VQQYZl7pRIvlPjDaj+eg== dependencies: "@picovoice/web-utils" "=1.3.1" diff --git a/demo/react/package.json b/demo/react/package.json index 003ac05f..24f89a06 100644 --- a/demo/react/package.json +++ b/demo/react/package.json @@ -1,10 +1,10 @@ { "name": "leopard-react-demo", - "version": "1.0.0", + "version": "2.0.0", "private": true, "description": "Leopard React demo (made with Create React App)", "dependencies": { - "@picovoice/leopard-react": "~1.2.0", + "@picovoice/leopard-react": "file:../../binding/react", "@picovoice/web-voice-processor": "~4.0.8", "@types/node": "^18.11.9", "@types/react": "^18.0.17", diff --git a/demo/react/scripts/run_demo.js b/demo/react/scripts/run_demo.js index 126acc8c..90b9a563 100644 --- a/demo/react/scripts/run_demo.js +++ b/demo/react/scripts/run_demo.js @@ -3,7 +3,7 @@ const fs = require("fs"); const path = require("path"); const testData = require("../../../resources/.test/test_data.json"); -const availableLanguages = testData["tests"]["parameters"].map( +const availableLanguages = testData["tests"]["language_tests"].map( (x) => x["language"] ); diff --git a/demo/react/src/VoiceWidget.tsx b/demo/react/src/VoiceWidget.tsx index 38993e68..5c17c4ee 100644 --- a/demo/react/src/VoiceWidget.tsx +++ b/demo/react/src/VoiceWidget.tsx @@ -26,13 +26,10 @@ export default function VoiceWidget() { } setIsBusy(true); - await init( - accessKeyRef.current, - leopardModel, - { - enableAutomaticPunctuation: true, - } - ); + await init(accessKeyRef.current, leopardModel, { + enableAutomaticPunctuation: true, + enableDiarization: true, + }); setIsBusy(false); }, [init]); @@ -106,7 +103,11 @@ export default function VoiceWidget() { {recordingElapsedSec}s

-

Transcript:

@@ -119,6 +120,7 @@ export default function VoiceWidget() { startSec endSec confidence + speakerTag @@ -128,6 +130,7 @@ export default function VoiceWidget() { {obj.startSec.toFixed(3)} {obj.endSec.toFixed(3)} {obj.confidence.toFixed(3)} + {obj.speakerTag} ))} diff --git a/demo/react/yarn.lock b/demo/react/yarn.lock index cd10dd57..e1a583ef 100644 --- a/demo/react/yarn.lock +++ b/demo/react/yarn.lock @@ -1608,17 +1608,15 @@ "@nodelib/fs.scandir" "2.1.5" fastq "^1.6.0" -"@picovoice/leopard-react@~1.2.0": - version "1.2.0" - resolved "https://registry.yarnpkg.com/@picovoice/leopard-react/-/leopard-react-1.2.0.tgz#2c6319cf0d3d62453a3ea2cf9207f237a799ffcb" - integrity sha512-OPmFGF+XMVoGIl7FAbYWnz8ZJC1KX/0Ly7WFzjBSN5K6XOJ1mh8qf5OUauiyVm52ldsjos2IH9sGr3+59vhZaA== +"@picovoice/leopard-react@file:../../binding/react": + version "2.0.0" dependencies: - "@picovoice/leopard-web" "=1.2.2" + "@picovoice/leopard-web" "=2.0.0" -"@picovoice/leopard-web@=1.2.2": - version "1.2.2" - resolved "https://registry.yarnpkg.com/@picovoice/leopard-web/-/leopard-web-1.2.2.tgz#6759ca4333a28d356b6a9fa4cb7c964bd8928363" - integrity sha512-/YzFi5O+j+qx4+3vVXPA8hpjaN/Z3ozUkv50GN2NgmVb7VXaLqrAs3Yr+zgFRzejJSzJ8Uu8edn6f4GHp6+n7g== +"@picovoice/leopard-web@=2.0.0": + version "2.0.0" + resolved "https://registry.yarnpkg.com/@picovoice/leopard-web/-/leopard-web-2.0.0.tgz#6452d1e49f983542db73a3c8cdf24208c30af234" + integrity sha512-kfJtUCopyvmN028wiaH6hdEHEhOx7QTruiaL5ykVZcpMq1lV/O02hwr29i3U7FqoH6VQQYZl7pRIvlPjDaj+eg== dependencies: "@picovoice/web-utils" "=1.3.1" From e85e537535f0933c291f364f7dffd08eb58c265c Mon Sep 17 00:00:00 2001 From: Albert Ho Date: Mon, 27 Nov 2023 13:40:51 -0800 Subject: [PATCH 2/2] fix tests --- binding/react/cypress/support/commands.ts | 10 +- binding/react/cypress/support/index.ts | 2 +- binding/react/test/use_leopard.test.ts | 302 +++++++++++++++++----- 3 files changed, 247 insertions(+), 67 deletions(-) diff --git a/binding/react/cypress/support/commands.ts b/binding/react/cypress/support/commands.ts index 3d8ae700..0f35bcc0 100644 --- a/binding/react/cypress/support/commands.ts +++ b/binding/react/cypress/support/commands.ts @@ -1,6 +1,8 @@ import { WebVoiceProcessor } from '@picovoice/web-voice-processor'; import { act } from '@testing-library/react-hooks/dom'; +const WAV_HEADER_SIZE = 44; + Cypress.Commands.add('wrapHook', (fn: () => Promise) => cy.wrap(null).then(async () => { await act(async () => { @@ -15,7 +17,7 @@ Cypress.Commands.add('getFileObj', (path: string) => { .then(blob => new File([blob], 'test_audio')); }); -Cypress.Commands.add('mockRecording', (path: string, delayMs = 1000) => { +Cypress.Commands.add('mockRecording', (path: string) => { // @ts-ignore const instance = WebVoiceProcessor.instance(); @@ -26,12 +28,12 @@ Cypress.Commands.add('mockRecording', (path: string, delayMs = 1000) => { cy.fixture(path, 'base64') .then(Cypress.Blob.base64StringToBlob) .then(async blob => { - const data = new Int16Array(await blob.arrayBuffer()); + let data = new Int16Array(await blob.arrayBuffer()); + data = data.slice(WAV_HEADER_SIZE / Int16Array.BYTES_PER_ELEMENT); for (let i = 0; i < data.length; i += 512) { instance.recorderCallback(data.slice(i, i + 512)); } - }) - .wait(delayMs); + }); instance._microphoneStream?.getAudioTracks().forEach((track: any) => { track.enabled = true; diff --git a/binding/react/cypress/support/index.ts b/binding/react/cypress/support/index.ts index 904b6543..7ff62d3e 100644 --- a/binding/react/cypress/support/index.ts +++ b/binding/react/cypress/support/index.ts @@ -5,7 +5,7 @@ declare global { interface Chainable { wrapHook(fn: () => Promise): Chainable; getFileObj(path: string): Chainable; - mockRecording(path: string, delayMs?: number): Chainable; + mockRecording(path: string): Chainable; } } } diff --git a/binding/react/test/use_leopard.test.ts b/binding/react/test/use_leopard.test.ts index 008bafb9..be204df0 100644 --- a/binding/react/test/use_leopard.test.ts +++ b/binding/react/test/use_leopard.test.ts @@ -1,5 +1,5 @@ import { renderHook } from '@testing-library/react-hooks/dom'; - +import { LeopardWord } from '@picovoice/leopard-web'; import { useLeopard } from '../src'; // @ts-ignore @@ -10,6 +10,124 @@ import testData from './test_data.json'; const ACCESS_KEY = Cypress.env('ACCESS_KEY'); +const levenshteinDistance = (words1: string[], words2: string[]) => { + const res = Array.from( + Array(words1.length + 1), + () => new Array(words2.length + 1) + ); + for (let i = 0; i <= words1.length; i++) { + res[i][0] = i; + } + for (let j = 0; j <= words2.length; j++) { + res[0][j] = j; + } + for (let i = 1; i <= words1.length; i++) { + for (let j = 1; j <= words2.length; j++) { + res[i][j] = Math.min( + res[i - 1][j] + 1, + res[i][j - 1] + 1, + res[i - 1][j - 1] + + (words1[i - 1].toUpperCase() === words2[j - 1].toUpperCase() ? 0 : 1) + ); + } + } + return res[words1.length][words2.length]; +}; + +const wordErrorRate = ( + reference: string, + hypothesis: string, + useCER = false +): number => { + const splitter = useCER ? '' : ' '; + const ed = levenshteinDistance( + reference.split(splitter), + hypothesis.split(splitter) + ); + return ed / reference.length; +}; + +const validateMetadata = ( + words: LeopardWord[], + expectedWords: LeopardWord[], + enableDiarization: boolean +) => { + expect(words.length).to.be.eq(expectedWords.length); + for (let i = 0; i < words.length; i += 1) { + expect(words[i].word).to.be.eq(expectedWords[i].word); + expect(words[i].startSec).to.be.closeTo(expectedWords[i].startSec, 0.1); + expect(words[i].endSec).to.be.closeTo(expectedWords[i].endSec, 0.1); + expect(words[i].confidence).to.be.closeTo(expectedWords[i].confidence, 0.1); + if (enableDiarization) { + expect(words[i].speakerTag).to.be.eq(expectedWords[i].speakerTag); + } else { + expect(words[i].speakerTag).to.be.eq(-1); + } + } +}; + +const runProcTest = async ( + file: File, + expectedTranscript: string, + expectedWords: Record[], + expectedErrorRate: number, + params: { + accessKey?: string; + model?: Record; + enableAutomaticPunctuation?: boolean; + enableDiarization?: boolean; + useCER?: boolean; + } = {} +) => { + const { + accessKey = ACCESS_KEY, + model = { publicPath: '/test/leopard_params.pv', forceWrite: true }, + enableAutomaticPunctuation = false, + enableDiarization = false, + useCER = false, + } = params; + const { result } = renderHook(() => useLeopard()); + + cy.wrapHook(() => + result.current.init(accessKey, model, { + enableAutomaticPunctuation: enableAutomaticPunctuation, + enableDiarization: enableDiarization, + }) + ).then(() => { + expect( + result.current.isLoaded, + `Failed to load '${model.publicPath}' with ${result.current.error}` + ).to.be.true; + }); + + cy.wrapHook(() => result.current.processFile(file)).then(() => { + const transcript = result.current.result!.transcript; + expect(transcript).to.eq(expectedTranscript); + + validateMetadata( + result.current.result!.words, + expectedWords.map((w: any) => ({ + word: w.word, + startSec: w.start_sec, + endSec: w.end_sec, + confidence: w.confidence, + speakerTag: w.speaker_tag, + })), + enableDiarization + ); + + const errorRate = wordErrorRate(transcript, expectedTranscript, useCER); + expect(errorRate).to.be.lt(expectedErrorRate); + }); + + cy.wrapHook(result.current.release).then(() => { + expect( + result.current.isLoaded, + `Failed to release leopard with ${result.current.error}` + ).to.be.false; + }); +}; + describe('Leopard binding', () => { it('should be able to init via public path', () => { const { result } = renderHook(() => useLeopard()); @@ -25,13 +143,6 @@ describe('Leopard binding', () => { `Failed to load 'leopard_params.pv' with ${result.current.error}` ).to.be.true; }); - - cy.wrapHook(result.current.release).then(() => { - expect( - result.current.isLoaded, - `Failed to release leopard with ${result.current.error}` - ).to.be.false; - }); }); it('should be able to init via base64', () => { @@ -80,51 +191,130 @@ describe('Leopard binding', () => { }); }); - for (const testInfo of testData.tests.parameters) { - it(`should be able to process audio file (${testInfo.language})`, () => { + for (const testParam of testData.tests.language_tests) { + const suffix = testParam.language === 'en' ? '' : `_${testParam.language}`; + + it(`should be able to process (${testParam.language})`, () => { + cy.getFileObj(`audio_samples/${testParam.audio_file}`).then( + async file => { + await runProcTest( + file, + testParam.transcript, + testParam.words, + testParam.error_rate, + { + model: { + publicPath: `/test/leopard_params${suffix}.pv`, + forceWrite: true, + }, + } + ); + } + ); + }); + + it(`should be able to process with punctuation (${testParam.language})`, () => { + cy.getFileObj(`audio_samples/${testParam.audio_file}`).then( + async file => { + await runProcTest( + file, + testParam.transcript_with_punctuation, + testParam.words, + testParam.error_rate, + { + enableAutomaticPunctuation: true, + model: { + publicPath: `/test/leopard_params${suffix}.pv`, + forceWrite: true, + }, + } + ); + } + ); + }); + + it(`should be able to process with diarization (${testParam.language})`, () => { + cy.getFileObj(`audio_samples/${testParam.audio_file}`).then( + async file => { + await runProcTest( + file, + testParam.transcript, + testParam.words, + testParam.error_rate, + { + enableDiarization: true, + model: { + publicPath: `/test/leopard_params${suffix}.pv`, + forceWrite: true, + }, + } + ); + } + ); + }); + + it(`should be able to process audio recording (${testParam.language})`, () => { const { result } = renderHook(() => useLeopard()); cy.wrapHook(() => - result.current.init( - ACCESS_KEY, - { - publicPath: - testInfo.language === 'en' - ? `/test/leopard_params.pv` - : `/test/leopard_params_${testInfo.language}.pv`, - forceWrite: true, - }, - { - enableAutomaticPunctuation: true, - } - ) + result.current.init(ACCESS_KEY, { + publicPath: + testParam.language === 'en' + ? `/test/leopard_params.pv` + : `/test/leopard_params_${testParam.language}.pv`, + forceWrite: true, + }) ).then(() => { expect( result.current.isLoaded, - `Failed to load ${testInfo.audio_file} (${testInfo.language}) with ${result.current.error}` + `Failed to load ${testParam.audio_file} (${testParam.language}) with ${result.current.error}` ).to.be.true; }); - cy.getFileObj(`audio_samples/${testInfo.audio_file}`).then(file => { - cy.wrapHook(() => result.current.processFile(file)).then(() => { - const transcript = result.current.result?.transcript; - expect(transcript).to.be.eq(testInfo.transcript); - result.current.result?.words.forEach( - ({ word, startSec, endSec, confidence }) => { - const wordRegex = new RegExp(`${word}`, 'i'); - expect(transcript).to.match(wordRegex); - expect(startSec).to.be.gt(0); - expect(endSec).to.be.gt(0); - expect(confidence).to.be.gt(0).and.lt(1); - } + cy.wrapHook(result.current.startRecording).then(() => { + expect(result.current.isRecording).to.be.true; + }); + + cy.mockRecording(`audio_samples/${testParam.audio_file}`).then(() => { + cy.wrapHook(result.current.stopRecording).then(() => { + expect(result.current.isRecording).to.be.false; + + const { + transcript: expectedTranscript, + words: expectedWords, + error_rate: expectedErrorRate, + language, + } = testParam; + const useCER = language === 'ja'; + + const transcript = result.current.result!.transcript; + expect(transcript).to.eq(expectedTranscript); + + validateMetadata( + result.current.result!.words, + expectedWords.map((w: any) => ({ + word: w.word, + startSec: w.start_sec, + endSec: w.end_sec, + confidence: w.confidence, + speakerTag: w.speaker_tag, + })), + false ); + + const errorRate = wordErrorRate( + transcript, + expectedTranscript, + useCER + ); + expect(errorRate).to.be.lt(expectedErrorRate); }); }); }); } - for (const testInfo of testData.tests.parameters) { - it(`should be able to process audio recording (${testInfo.language})`, () => { + for (const testParam of testData.tests.diarization_tests) { + it(`should be able to process diarization multiple speakers (${testParam.language})`, () => { const { result } = renderHook(() => useLeopard()); cy.wrapHook(() => @@ -132,40 +322,28 @@ describe('Leopard binding', () => { ACCESS_KEY, { publicPath: - testInfo.language === 'en' + testParam.language === 'en' ? `/test/leopard_params.pv` - : `/test/leopard_params_${testInfo.language}.pv`, + : `/test/leopard_params_${testParam.language}.pv`, forceWrite: true, }, - { - enableAutomaticPunctuation: true, - } + { enableDiarization: true } ) ).then(() => { expect( result.current.isLoaded, - `Failed to load ${testInfo.audio_file} (${testInfo.language}) with ${result.current.error}` + `Failed to load ${testParam.audio_file} (${testParam.language}) with ${result.current.error}` ).to.be.true; }); - cy.wrapHook(result.current.startRecording).then(() => { - expect(result.current.isRecording).to.be.true; - }); - - cy.mockRecording(`audio_samples/${testInfo.audio_file}`).then(() => { - cy.wrapHook(result.current.stopRecording).then(() => { - const transcript = result.current.result?.transcript; - expect(transcript).to.be.eq(testInfo.transcript); - expect(result.current.isRecording).to.be.false; - result.current.result?.words.forEach( - ({ word, startSec, endSec, confidence }) => { - const wordRegex = new RegExp(`${word}`, 'i'); - expect(transcript).to.match(wordRegex); - expect(startSec).to.be.gt(0); - expect(endSec).to.be.gt(0); - expect(confidence).to.be.gt(0).and.lt(1); - } - ); + cy.getFileObj(`audio_samples/${testParam.audio_file}`).then(file => { + cy.wrapHook(() => result.current.processFile(file)).then(() => { + const words = result.current.result!.words; + expect(words.length).to.eq(testParam.words.length); + for (let i = 0; i < words.length; i++) { + expect(words[i].word).to.eq(testParam.words[i].word); + expect(words[i].speakerTag).to.eq(testParam.words[i].speaker_tag); + } }); }); });