From 80094dfaafcf4917748124c276d9b68c8d667606 Mon Sep 17 00:00:00 2001
From: Albert Ho <albert@picovoice.ai>
Date: Fri, 24 Nov 2023 17:29:56 -0800
Subject: [PATCH 1/2] update versions

---
 .github/workflows/react-codestyle.yml |  8 ++++++++
 .github/workflows/react-demos.yml     |  8 ++++++++
 .github/workflows/react.yml           |  4 ++++
 binding/react/package.json            |  4 ++--
 binding/react/yarn.lock               |  8 ++++----
 demo/react/package.json               |  4 ++--
 demo/react/scripts/run_demo.js        |  2 +-
 demo/react/src/VoiceWidget.tsx        | 19 +++++++++++--------
 demo/react/yarn.lock                  | 16 +++++++---------
 9 files changed, 47 insertions(+), 26 deletions(-)

diff --git a/.github/workflows/react-codestyle.yml b/.github/workflows/react-codestyle.yml
index e877f1d9..96ff5d91 100644
--- a/.github/workflows/react-codestyle.yml
+++ b/.github/workflows/react-codestyle.yml
@@ -34,6 +34,14 @@ jobs:
       - name: Pre-build dependencies
         run: npm install yarn
 
+      - name: Build Local Web SDK
+        run: yarn && yarn copywasm && yarn build
+        working-directory: binding/web
+
+      - name: Build Local React SDK
+        run: yarn && yarn build
+        working-directory: binding/react
+
       - name: Run Binding Linter
         run: yarn && yarn lint
         working-directory: binding/react
diff --git a/.github/workflows/react-demos.yml b/.github/workflows/react-demos.yml
index 6dfc22b7..770f5fd3 100644
--- a/.github/workflows/react-demos.yml
+++ b/.github/workflows/react-demos.yml
@@ -38,6 +38,14 @@ jobs:
       - name: Pre-build dependencies
         run: npm install yarn
 
+      - name: Build Local Web SDK
+        run: yarn && yarn copywasm && yarn build
+        working-directory: binding/web
+
+      - name: Build Local React SDK
+        run: yarn && yarn build
+        working-directory: binding/react
+
       - name: Install dependencies
         run: yarn install
 
diff --git a/.github/workflows/react.yml b/.github/workflows/react.yml
index 1eed6171..61c95f59 100644
--- a/.github/workflows/react.yml
+++ b/.github/workflows/react.yml
@@ -46,6 +46,10 @@ jobs:
       - name: Pre-build dependencies
         run: npm install yarn
 
+      - name: Build Local Web SDK
+        run: yarn && yarn copywasm && yarn build
+        working-directory: binding/web
+
       - name: Install dependencies
         run: yarn install
 
diff --git a/binding/react/package.json b/binding/react/package.json
index 9665b657..67a1205c 100644
--- a/binding/react/package.json
+++ b/binding/react/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@picovoice/leopard-react",
-  "version": "1.2.0",
+  "version": "2.0.0",
   "description": "React hook for Leopard Web SDK",
   "entry": "src/index.ts",
   "module": "dist/esm/index.js",
@@ -35,7 +35,7 @@
     "test": "cypress run --component"
   },
   "dependencies": {
-    "@picovoice/leopard-web": "=1.2.2"
+    "@picovoice/leopard-web": "=2.0.0"
   },
   "devDependencies": {
     "@babel/core": "^7.21.3",
diff --git a/binding/react/yarn.lock b/binding/react/yarn.lock
index 5dd16d7f..18f87aa0 100644
--- a/binding/react/yarn.lock
+++ b/binding/react/yarn.lock
@@ -1110,10 +1110,10 @@
     "@nodelib/fs.scandir" "2.1.5"
     fastq "^1.6.0"
 
-"@picovoice/leopard-web@=1.2.2":
-  version "1.2.2"
-  resolved "https://registry.npmjs.org/@picovoice/leopard-web/-/leopard-web-1.2.2.tgz"
-  integrity sha512-/YzFi5O+j+qx4+3vVXPA8hpjaN/Z3ozUkv50GN2NgmVb7VXaLqrAs3Yr+zgFRzejJSzJ8Uu8edn6f4GHp6+n7g==
+"@picovoice/leopard-web@=2.0.0":
+  version "2.0.0"
+  resolved "https://registry.yarnpkg.com/@picovoice/leopard-web/-/leopard-web-2.0.0.tgz#6452d1e49f983542db73a3c8cdf24208c30af234"
+  integrity sha512-kfJtUCopyvmN028wiaH6hdEHEhOx7QTruiaL5ykVZcpMq1lV/O02hwr29i3U7FqoH6VQQYZl7pRIvlPjDaj+eg==
   dependencies:
     "@picovoice/web-utils" "=1.3.1"
 
diff --git a/demo/react/package.json b/demo/react/package.json
index 003ac05f..24f89a06 100644
--- a/demo/react/package.json
+++ b/demo/react/package.json
@@ -1,10 +1,10 @@
 {
   "name": "leopard-react-demo",
-  "version": "1.0.0",
+  "version": "2.0.0",
   "private": true,
   "description": "Leopard React demo (made with Create React App)",
   "dependencies": {
-    "@picovoice/leopard-react": "~1.2.0",
+    "@picovoice/leopard-react": "file:../../binding/react",
     "@picovoice/web-voice-processor": "~4.0.8",
     "@types/node": "^18.11.9",
     "@types/react": "^18.0.17",
diff --git a/demo/react/scripts/run_demo.js b/demo/react/scripts/run_demo.js
index 126acc8c..90b9a563 100644
--- a/demo/react/scripts/run_demo.js
+++ b/demo/react/scripts/run_demo.js
@@ -3,7 +3,7 @@ const fs = require("fs");
 const path = require("path");
 const testData = require("../../../resources/.test/test_data.json");
 
-const availableLanguages = testData["tests"]["parameters"].map(
+const availableLanguages = testData["tests"]["language_tests"].map(
   (x) => x["language"]
 );
 
diff --git a/demo/react/src/VoiceWidget.tsx b/demo/react/src/VoiceWidget.tsx
index 38993e68..5c17c4ee 100644
--- a/demo/react/src/VoiceWidget.tsx
+++ b/demo/react/src/VoiceWidget.tsx
@@ -26,13 +26,10 @@ export default function VoiceWidget() {
     }
 
     setIsBusy(true);
-    await init(
-      accessKeyRef.current,
-      leopardModel,
-      {
-        enableAutomaticPunctuation: true,
-      }
-    );
+    await init(accessKeyRef.current, leopardModel, {
+      enableAutomaticPunctuation: true,
+      enableDiarization: true,
+    });
     setIsBusy(false);
   }, [init]);
 
@@ -106,7 +103,11 @@ export default function VoiceWidget() {
       <span>{recordingElapsedSec}s</span>
       <br />
       <br />
-      <button id="record-audio" onClick={toggleRecord} disabled={!isLoaded || isBusy}>
+      <button
+        id="record-audio"
+        onClick={toggleRecord}
+        disabled={!isLoaded || isBusy}
+      >
         {isRecording ? "Stop Recording" : "Start Recording"}
       </button>
       <h3>Transcript:</h3>
@@ -119,6 +120,7 @@ export default function VoiceWidget() {
               <th>startSec</th>
               <th>endSec</th>
               <th>confidence</th>
+              <th>speakerTag</th>
             </tr>
           </thead>
           <tbody>
@@ -128,6 +130,7 @@ export default function VoiceWidget() {
                 <td>{obj.startSec.toFixed(3)}</td>
                 <td>{obj.endSec.toFixed(3)}</td>
                 <td>{obj.confidence.toFixed(3)}</td>
+                <td>{obj.speakerTag}</td>
               </tr>
             ))}
           </tbody>
diff --git a/demo/react/yarn.lock b/demo/react/yarn.lock
index cd10dd57..e1a583ef 100644
--- a/demo/react/yarn.lock
+++ b/demo/react/yarn.lock
@@ -1608,17 +1608,15 @@
     "@nodelib/fs.scandir" "2.1.5"
     fastq "^1.6.0"
 
-"@picovoice/leopard-react@~1.2.0":
-  version "1.2.0"
-  resolved "https://registry.yarnpkg.com/@picovoice/leopard-react/-/leopard-react-1.2.0.tgz#2c6319cf0d3d62453a3ea2cf9207f237a799ffcb"
-  integrity sha512-OPmFGF+XMVoGIl7FAbYWnz8ZJC1KX/0Ly7WFzjBSN5K6XOJ1mh8qf5OUauiyVm52ldsjos2IH9sGr3+59vhZaA==
+"@picovoice/leopard-react@file:../../binding/react":
+  version "2.0.0"
   dependencies:
-    "@picovoice/leopard-web" "=1.2.2"
+    "@picovoice/leopard-web" "=2.0.0"
 
-"@picovoice/leopard-web@=1.2.2":
-  version "1.2.2"
-  resolved "https://registry.yarnpkg.com/@picovoice/leopard-web/-/leopard-web-1.2.2.tgz#6759ca4333a28d356b6a9fa4cb7c964bd8928363"
-  integrity sha512-/YzFi5O+j+qx4+3vVXPA8hpjaN/Z3ozUkv50GN2NgmVb7VXaLqrAs3Yr+zgFRzejJSzJ8Uu8edn6f4GHp6+n7g==
+"@picovoice/leopard-web@=2.0.0":
+  version "2.0.0"
+  resolved "https://registry.yarnpkg.com/@picovoice/leopard-web/-/leopard-web-2.0.0.tgz#6452d1e49f983542db73a3c8cdf24208c30af234"
+  integrity sha512-kfJtUCopyvmN028wiaH6hdEHEhOx7QTruiaL5ykVZcpMq1lV/O02hwr29i3U7FqoH6VQQYZl7pRIvlPjDaj+eg==
   dependencies:
     "@picovoice/web-utils" "=1.3.1"
 

From e85e537535f0933c291f364f7dffd08eb58c265c Mon Sep 17 00:00:00 2001
From: Albert Ho <albert@picovoice.ai>
Date: Mon, 27 Nov 2023 13:40:51 -0800
Subject: [PATCH 2/2] fix tests

---
 binding/react/cypress/support/commands.ts |  10 +-
 binding/react/cypress/support/index.ts    |   2 +-
 binding/react/test/use_leopard.test.ts    | 302 +++++++++++++++++-----
 3 files changed, 247 insertions(+), 67 deletions(-)

diff --git a/binding/react/cypress/support/commands.ts b/binding/react/cypress/support/commands.ts
index 3d8ae700..0f35bcc0 100644
--- a/binding/react/cypress/support/commands.ts
+++ b/binding/react/cypress/support/commands.ts
@@ -1,6 +1,8 @@
 import { WebVoiceProcessor } from '@picovoice/web-voice-processor';
 import { act } from '@testing-library/react-hooks/dom';
 
+const WAV_HEADER_SIZE = 44;
+
 Cypress.Commands.add('wrapHook', (fn: () => Promise<void>) =>
   cy.wrap(null).then(async () => {
     await act(async () => {
@@ -15,7 +17,7 @@ Cypress.Commands.add('getFileObj', (path: string) => {
     .then(blob => new File([blob], 'test_audio'));
 });
 
-Cypress.Commands.add('mockRecording', (path: string, delayMs = 1000) => {
+Cypress.Commands.add('mockRecording', (path: string) => {
   // @ts-ignore
   const instance = WebVoiceProcessor.instance();
 
@@ -26,12 +28,12 @@ Cypress.Commands.add('mockRecording', (path: string, delayMs = 1000) => {
   cy.fixture(path, 'base64')
     .then(Cypress.Blob.base64StringToBlob)
     .then(async blob => {
-      const data = new Int16Array(await blob.arrayBuffer());
+      let data = new Int16Array(await blob.arrayBuffer());
+      data = data.slice(WAV_HEADER_SIZE / Int16Array.BYTES_PER_ELEMENT);
       for (let i = 0; i < data.length; i += 512) {
         instance.recorderCallback(data.slice(i, i + 512));
       }
-    })
-    .wait(delayMs);
+    });
 
   instance._microphoneStream?.getAudioTracks().forEach((track: any) => {
     track.enabled = true;
diff --git a/binding/react/cypress/support/index.ts b/binding/react/cypress/support/index.ts
index 904b6543..7ff62d3e 100644
--- a/binding/react/cypress/support/index.ts
+++ b/binding/react/cypress/support/index.ts
@@ -5,7 +5,7 @@ declare global {
     interface Chainable {
       wrapHook(fn: () => Promise<any>): Chainable<void>;
       getFileObj(path: string): Chainable<File>;
-      mockRecording(path: string, delayMs?: number): Chainable<void>;
+      mockRecording(path: string): Chainable<void>;
     }
   }
 }
diff --git a/binding/react/test/use_leopard.test.ts b/binding/react/test/use_leopard.test.ts
index 008bafb9..be204df0 100644
--- a/binding/react/test/use_leopard.test.ts
+++ b/binding/react/test/use_leopard.test.ts
@@ -1,5 +1,5 @@
 import { renderHook } from '@testing-library/react-hooks/dom';
-
+import { LeopardWord } from '@picovoice/leopard-web';
 import { useLeopard } from '../src';
 
 // @ts-ignore
@@ -10,6 +10,124 @@ import testData from './test_data.json';
 
 const ACCESS_KEY = Cypress.env('ACCESS_KEY');
 
+const levenshteinDistance = (words1: string[], words2: string[]) => {
+  const res = Array.from(
+    Array(words1.length + 1),
+    () => new Array(words2.length + 1)
+  );
+  for (let i = 0; i <= words1.length; i++) {
+    res[i][0] = i;
+  }
+  for (let j = 0; j <= words2.length; j++) {
+    res[0][j] = j;
+  }
+  for (let i = 1; i <= words1.length; i++) {
+    for (let j = 1; j <= words2.length; j++) {
+      res[i][j] = Math.min(
+        res[i - 1][j] + 1,
+        res[i][j - 1] + 1,
+        res[i - 1][j - 1] +
+          (words1[i - 1].toUpperCase() === words2[j - 1].toUpperCase() ? 0 : 1)
+      );
+    }
+  }
+  return res[words1.length][words2.length];
+};
+
+const wordErrorRate = (
+  reference: string,
+  hypothesis: string,
+  useCER = false
+): number => {
+  const splitter = useCER ? '' : ' ';
+  const ed = levenshteinDistance(
+    reference.split(splitter),
+    hypothesis.split(splitter)
+  );
+  return ed / reference.length;
+};
+
+const validateMetadata = (
+  words: LeopardWord[],
+  expectedWords: LeopardWord[],
+  enableDiarization: boolean
+) => {
+  expect(words.length).to.be.eq(expectedWords.length);
+  for (let i = 0; i < words.length; i += 1) {
+    expect(words[i].word).to.be.eq(expectedWords[i].word);
+    expect(words[i].startSec).to.be.closeTo(expectedWords[i].startSec, 0.1);
+    expect(words[i].endSec).to.be.closeTo(expectedWords[i].endSec, 0.1);
+    expect(words[i].confidence).to.be.closeTo(expectedWords[i].confidence, 0.1);
+    if (enableDiarization) {
+      expect(words[i].speakerTag).to.be.eq(expectedWords[i].speakerTag);
+    } else {
+      expect(words[i].speakerTag).to.be.eq(-1);
+    }
+  }
+};
+
+const runProcTest = async (
+  file: File,
+  expectedTranscript: string,
+  expectedWords: Record<string, string | number>[],
+  expectedErrorRate: number,
+  params: {
+    accessKey?: string;
+    model?: Record<string, string | boolean>;
+    enableAutomaticPunctuation?: boolean;
+    enableDiarization?: boolean;
+    useCER?: boolean;
+  } = {}
+) => {
+  const {
+    accessKey = ACCESS_KEY,
+    model = { publicPath: '/test/leopard_params.pv', forceWrite: true },
+    enableAutomaticPunctuation = false,
+    enableDiarization = false,
+    useCER = false,
+  } = params;
+  const { result } = renderHook(() => useLeopard());
+
+  cy.wrapHook(() =>
+    result.current.init(accessKey, model, {
+      enableAutomaticPunctuation: enableAutomaticPunctuation,
+      enableDiarization: enableDiarization,
+    })
+  ).then(() => {
+    expect(
+      result.current.isLoaded,
+      `Failed to load '${model.publicPath}' with ${result.current.error}`
+    ).to.be.true;
+  });
+
+  cy.wrapHook(() => result.current.processFile(file)).then(() => {
+    const transcript = result.current.result!.transcript;
+    expect(transcript).to.eq(expectedTranscript);
+
+    validateMetadata(
+      result.current.result!.words,
+      expectedWords.map((w: any) => ({
+        word: w.word,
+        startSec: w.start_sec,
+        endSec: w.end_sec,
+        confidence: w.confidence,
+        speakerTag: w.speaker_tag,
+      })),
+      enableDiarization
+    );
+
+    const errorRate = wordErrorRate(transcript, expectedTranscript, useCER);
+    expect(errorRate).to.be.lt(expectedErrorRate);
+  });
+
+  cy.wrapHook(result.current.release).then(() => {
+    expect(
+      result.current.isLoaded,
+      `Failed to release leopard with ${result.current.error}`
+    ).to.be.false;
+  });
+};
+
 describe('Leopard binding', () => {
   it('should be able to init via public path', () => {
     const { result } = renderHook(() => useLeopard());
@@ -25,13 +143,6 @@ describe('Leopard binding', () => {
         `Failed to load 'leopard_params.pv' with ${result.current.error}`
       ).to.be.true;
     });
-
-    cy.wrapHook(result.current.release).then(() => {
-      expect(
-        result.current.isLoaded,
-        `Failed to release leopard with ${result.current.error}`
-      ).to.be.false;
-    });
   });
 
   it('should be able to init via base64', () => {
@@ -80,51 +191,130 @@ describe('Leopard binding', () => {
     });
   });
 
-  for (const testInfo of testData.tests.parameters) {
-    it(`should be able to process audio file (${testInfo.language})`, () => {
+  for (const testParam of testData.tests.language_tests) {
+    const suffix = testParam.language === 'en' ? '' : `_${testParam.language}`;
+
+    it(`should be able to process (${testParam.language})`, () => {
+      cy.getFileObj(`audio_samples/${testParam.audio_file}`).then(
+        async file => {
+          await runProcTest(
+            file,
+            testParam.transcript,
+            testParam.words,
+            testParam.error_rate,
+            {
+              model: {
+                publicPath: `/test/leopard_params${suffix}.pv`,
+                forceWrite: true,
+              },
+            }
+          );
+        }
+      );
+    });
+
+    it(`should be able to process with punctuation (${testParam.language})`, () => {
+      cy.getFileObj(`audio_samples/${testParam.audio_file}`).then(
+        async file => {
+          await runProcTest(
+            file,
+            testParam.transcript_with_punctuation,
+            testParam.words,
+            testParam.error_rate,
+            {
+              enableAutomaticPunctuation: true,
+              model: {
+                publicPath: `/test/leopard_params${suffix}.pv`,
+                forceWrite: true,
+              },
+            }
+          );
+        }
+      );
+    });
+
+    it(`should be able to process with diarization (${testParam.language})`, () => {
+      cy.getFileObj(`audio_samples/${testParam.audio_file}`).then(
+        async file => {
+          await runProcTest(
+            file,
+            testParam.transcript,
+            testParam.words,
+            testParam.error_rate,
+            {
+              enableDiarization: true,
+              model: {
+                publicPath: `/test/leopard_params${suffix}.pv`,
+                forceWrite: true,
+              },
+            }
+          );
+        }
+      );
+    });
+
+    it(`should be able to process audio recording (${testParam.language})`, () => {
       const { result } = renderHook(() => useLeopard());
 
       cy.wrapHook(() =>
-        result.current.init(
-          ACCESS_KEY,
-          {
-            publicPath:
-              testInfo.language === 'en'
-                ? `/test/leopard_params.pv`
-                : `/test/leopard_params_${testInfo.language}.pv`,
-            forceWrite: true,
-          },
-          {
-            enableAutomaticPunctuation: true,
-          }
-        )
+        result.current.init(ACCESS_KEY, {
+          publicPath:
+            testParam.language === 'en'
+              ? `/test/leopard_params.pv`
+              : `/test/leopard_params_${testParam.language}.pv`,
+          forceWrite: true,
+        })
       ).then(() => {
         expect(
           result.current.isLoaded,
-          `Failed to load ${testInfo.audio_file} (${testInfo.language}) with ${result.current.error}`
+          `Failed to load ${testParam.audio_file} (${testParam.language}) with ${result.current.error}`
         ).to.be.true;
       });
 
-      cy.getFileObj(`audio_samples/${testInfo.audio_file}`).then(file => {
-        cy.wrapHook(() => result.current.processFile(file)).then(() => {
-          const transcript = result.current.result?.transcript;
-          expect(transcript).to.be.eq(testInfo.transcript);
-          result.current.result?.words.forEach(
-            ({ word, startSec, endSec, confidence }) => {
-              const wordRegex = new RegExp(`${word}`, 'i');
-              expect(transcript).to.match(wordRegex);
-              expect(startSec).to.be.gt(0);
-              expect(endSec).to.be.gt(0);
-              expect(confidence).to.be.gt(0).and.lt(1);
-            }
+      cy.wrapHook(result.current.startRecording).then(() => {
+        expect(result.current.isRecording).to.be.true;
+      });
+
+      cy.mockRecording(`audio_samples/${testParam.audio_file}`).then(() => {
+        cy.wrapHook(result.current.stopRecording).then(() => {
+          expect(result.current.isRecording).to.be.false;
+
+          const {
+            transcript: expectedTranscript,
+            words: expectedWords,
+            error_rate: expectedErrorRate,
+            language,
+          } = testParam;
+          const useCER = language === 'ja';
+
+          const transcript = result.current.result!.transcript;
+          expect(transcript).to.eq(expectedTranscript);
+
+          validateMetadata(
+            result.current.result!.words,
+            expectedWords.map((w: any) => ({
+              word: w.word,
+              startSec: w.start_sec,
+              endSec: w.end_sec,
+              confidence: w.confidence,
+              speakerTag: w.speaker_tag,
+            })),
+            false
           );
+
+          const errorRate = wordErrorRate(
+            transcript,
+            expectedTranscript,
+            useCER
+          );
+          expect(errorRate).to.be.lt(expectedErrorRate);
         });
       });
     });
   }
 
-  for (const testInfo of testData.tests.parameters) {
-    it(`should be able to process audio recording (${testInfo.language})`, () => {
+  for (const testParam of testData.tests.diarization_tests) {
+    it(`should be able to process diarization multiple speakers (${testParam.language})`, () => {
       const { result } = renderHook(() => useLeopard());
 
       cy.wrapHook(() =>
@@ -132,40 +322,28 @@ describe('Leopard binding', () => {
           ACCESS_KEY,
           {
             publicPath:
-              testInfo.language === 'en'
+              testParam.language === 'en'
                 ? `/test/leopard_params.pv`
-                : `/test/leopard_params_${testInfo.language}.pv`,
+                : `/test/leopard_params_${testParam.language}.pv`,
             forceWrite: true,
           },
-          {
-            enableAutomaticPunctuation: true,
-          }
+          { enableDiarization: true }
         )
       ).then(() => {
         expect(
           result.current.isLoaded,
-          `Failed to load ${testInfo.audio_file} (${testInfo.language}) with ${result.current.error}`
+          `Failed to load ${testParam.audio_file} (${testParam.language}) with ${result.current.error}`
         ).to.be.true;
       });
 
-      cy.wrapHook(result.current.startRecording).then(() => {
-        expect(result.current.isRecording).to.be.true;
-      });
-
-      cy.mockRecording(`audio_samples/${testInfo.audio_file}`).then(() => {
-        cy.wrapHook(result.current.stopRecording).then(() => {
-          const transcript = result.current.result?.transcript;
-          expect(transcript).to.be.eq(testInfo.transcript);
-          expect(result.current.isRecording).to.be.false;
-          result.current.result?.words.forEach(
-            ({ word, startSec, endSec, confidence }) => {
-              const wordRegex = new RegExp(`${word}`, 'i');
-              expect(transcript).to.match(wordRegex);
-              expect(startSec).to.be.gt(0);
-              expect(endSec).to.be.gt(0);
-              expect(confidence).to.be.gt(0).and.lt(1);
-            }
-          );
+      cy.getFileObj(`audio_samples/${testParam.audio_file}`).then(file => {
+        cy.wrapHook(() => result.current.processFile(file)).then(() => {
+          const words = result.current.result!.words;
+          expect(words.length).to.eq(testParam.words.length);
+          for (let i = 0; i < words.length; i++) {
+            expect(words[i].word).to.eq(testParam.words[i].word);
+            expect(words[i].speakerTag).to.eq(testParam.words[i].speaker_tag);
+          }
         });
       });
     });