diff --git a/.github/workflows/dotnet-demos.yml b/.github/workflows/dotnet-demos.yml
index 8a3d9b83..cdf6b939 100644
--- a/.github/workflows/dotnet-demos.yml
+++ b/.github/workflows/dotnet-demos.yml
@@ -38,6 +38,15 @@ jobs:
- name: Package restore
run: dotnet restore
+ # *********** REMOVE AFTER RELEASE **********************
+ - name: Pack binding for local ref
+ run: dotnet pack -c Release
+ working-directory: binding/dotnet
+
+ - name: Add binding to demo
+ run: dotnet add package -s ../../../binding/dotnet/Leopard/bin/Release Leopard
+ # ******************************************************
+
- name: Dotnet build micdemo
run: dotnet build -c MicDemo.Release
@@ -60,6 +69,15 @@ jobs:
- name: Package restore
run: dotnet restore
+ # *********** REMOVE AFTER RELEASE **********************
+ - name: Pack binding for local ref
+ run: dotnet pack -c Release
+ working-directory: binding/dotnet
+
+ - name: Add binding to demo
+ run: dotnet add package -s ../../../binding/dotnet/Leopard/bin/Release Leopard
+ # ******************************************************
+
- name: Dotnet build micdemo
run: dotnet build -c MicDemo.Release
diff --git a/binding/dotnet/Leopard/Leopard.cs b/binding/dotnet/Leopard/Leopard.cs
index beaf8629..75d0877d 100644
--- a/binding/dotnet/Leopard/Leopard.cs
+++ b/binding/dotnet/Leopard/Leopard.cs
@@ -10,7 +10,6 @@ specific language governing permissions and limitations under the License.
*/
using System;
-using System.Collections.Generic;
using System.IO;
using System.Reflection;
using System.Runtime.InteropServices;
@@ -42,9 +41,9 @@ public enum PvStatus
public class Leopard : IDisposable
{
private const string LIBRARY = "libpv_leopard";
- private IntPtr _libraryPointer = IntPtr.Zero;
public static readonly string DEFAULT_MODEL_PATH;
+ private IntPtr _libraryPointer;
static Leopard()
{
@@ -62,7 +61,7 @@ static Leopard()
private static IntPtr ImportResolver(string libraryName, Assembly assembly, DllImportSearchPath? searchPath)
{
- IntPtr libHandle = IntPtr.Zero;
+ IntPtr libHandle;
NativeLibrary.TryLoad(Utils.PvLibraryPath(libraryName), out libHandle);
return libHandle;
}
@@ -73,12 +72,10 @@ private static IntPtr ImportResolver(string libraryName, Assembly assembly, DllI
private static extern PvStatus pv_leopard_init(
IntPtr accessKey,
IntPtr modelPath,
- bool enable_automatic_punctuation,
+ bool enableAutomaticPunctuation,
+ bool enableDiarization,
out IntPtr handle);
- [DllImport(LIBRARY, CallingConvention = CallingConvention.Cdecl)]
- private static extern Int32 pv_sample_rate();
-
[DllImport(LIBRARY, CallingConvention = CallingConvention.Cdecl)]
private static extern void pv_leopard_delete(IntPtr handle);
@@ -108,16 +105,29 @@ private static extern PvStatus pv_leopard_process_file(
[DllImport(LIBRARY, CallingConvention = CallingConvention.Cdecl)]
private static extern IntPtr pv_leopard_version();
+ [DllImport(LIBRARY, CallingConvention = CallingConvention.Cdecl)]
+ private static extern Int32 pv_sample_rate();
+
+ [DllImport(LIBRARY, CallingConvention = CallingConvention.Cdecl)]
+ private static extern void pv_set_sdk(string sdk);
+
+ [DllImport(LIBRARY, CallingConvention = CallingConvention.Cdecl)]
+ private static extern PvStatus pv_get_error_stack(out IntPtr messageStack, out int messageStackDepth);
+
+ [DllImport(LIBRARY, CallingConvention = CallingConvention.Cdecl)]
+ private static extern void pv_free_error_stack(IntPtr messageStack);
+
///
/// C Struct for storing word metadata
///
[StructLayout(LayoutKind.Sequential, CharSet = CharSet.Ansi)]
private struct CWord
{
- public IntPtr wordPtr;
- public float startSec;
- public float endSec;
- public float confidence;
+ public readonly IntPtr wordPtr;
+ public readonly float startSec;
+ public readonly float endSec;
+ public readonly float confidence;
+ public readonly Int32 speakerTag;
}
///
@@ -131,10 +141,22 @@ private struct CWord
///
/// Set to `true` to enable automatic punctuation insertion.
///
+ ///
+ /// Set to `true` to enable speaker diarization, which allows Leopard to differentiate speakers as
+ /// part of the transcription process. Word metadata will include a `SpeakerTag` to identify unique speakers.
+ ///
/// An instance of Leopard Speech-to-Text engine.
- public static Leopard Create(string accessKey, string modelPath = null, bool enableAutomaticPunctuation = false)
+ public static Leopard Create(
+ string accessKey,
+ string modelPath = null,
+ bool enableAutomaticPunctuation = false,
+ bool enableDiarization = false)
{
- return new Leopard(accessKey, modelPath ?? DEFAULT_MODEL_PATH, enableAutomaticPunctuation);
+ return new Leopard(
+ accessKey,
+ modelPath ?? DEFAULT_MODEL_PATH,
+ enableAutomaticPunctuation,
+ enableDiarization);
}
///
@@ -148,10 +170,15 @@ public static Leopard Create(string accessKey, string modelPath = null, bool ena
///
/// Set to `true` to enable automatic punctuation insertion.
///
+ ///
+ /// Set to `true` to enable speaker diarization, which allows Leopard to differentiate speakers as
+ /// part of the transcription process. Word metadata will include a `SpeakerTag` to identify unique speakers.
+ ///
private Leopard(
string accessKey,
string modelPath,
- bool enableAutomaticPunctuation)
+ bool enableAutomaticPunctuation,
+ bool enableDiarization)
{
if (string.IsNullOrEmpty(accessKey))
{
@@ -166,10 +193,13 @@ private Leopard(
IntPtr accessKeyPtr = Utils.GetPtrFromUtf8String(accessKey);
IntPtr modelPathPtr = Utils.GetPtrFromUtf8String(modelPath);
+ pv_set_sdk("dotnet");
+
PvStatus status = pv_leopard_init(
accessKeyPtr,
modelPathPtr,
enableAutomaticPunctuation,
+ enableDiarization,
out _libraryPointer);
Marshal.FreeHGlobal(accessKeyPtr);
@@ -177,7 +207,8 @@ private Leopard(
if (status != PvStatus.SUCCESS)
{
- throw PvStatusToException(status);
+ string[] messageStack = GetMessageStack();
+ throw PvStatusToException(status, "Leopard init failed", messageStack);
}
Version = Utils.GetUtf8StringFromPtr(pv_leopard_version());
@@ -188,47 +219,54 @@ private Leopard(
/// Processes a given audio data and returns its transcription.
///
///
- /// Audio data. The audio needs to have a sample rate equal to `.SampleRate` and be 16-bit linearly-encoded. This function operates on single-channel audio.
+ /// Audio data. The audio needs to have a sample rate equal to `.SampleRate` and be 16-bit linearly-encoded. This
+ /// function operates on single-channel audio.
///
///
/// LeopardTranscript object which contains the transcription results of the engine.
///
public LeopardTranscript Process(Int16[] pcm)
{
- if (pcm.Length == 0 | pcm == null)
+ if (pcm == null || pcm.Length == 0)
{
throw new LeopardInvalidArgumentException("Input audio frame is empty");
}
- IntPtr transcriptPtr = IntPtr.Zero;
- Int32 numWords = 0;
- IntPtr wordsPtr = IntPtr.Zero;
+ IntPtr transcriptPtr;
+ Int32 numWords;
+ IntPtr wordsPtr;
PvStatus status = pv_leopard_process
(_libraryPointer,
pcm,
- (Int32)pcm.Length,
+ pcm.Length,
out transcriptPtr,
out numWords,
out wordsPtr);
if (status != PvStatus.SUCCESS)
{
- throw PvStatusToException(status, "Leopard failed to process the audio frame.");
+ string[] messageStack = GetMessageStack();
+ throw PvStatusToException(status, "Leopard process failed", messageStack);
}
string transcript = Utils.GetUtf8StringFromPtr(transcriptPtr);
pv_leopard_transcript_delete(transcriptPtr);
- List wordsList = new List();
+
+ LeopardWord[] wordsList = new LeopardWord[numWords];
IntPtr orgWordsPtr = wordsPtr;
for (int i = 0; i < numWords; i++)
{
CWord cword = (CWord)Marshal.PtrToStructure(wordsPtr, typeof(CWord));
- string word = Utils.GetUtf8StringFromPtr(cword.wordPtr);
- wordsList.Add(new LeopardWord(word, cword.confidence, cword.startSec, cword.endSec));
+ wordsList[i] = new LeopardWord(
+ Utils.GetUtf8StringFromPtr(cword.wordPtr),
+ cword.confidence,
+ cword.startSec,
+ cword.endSec,
+ cword.speakerTag);
wordsPtr += Marshal.SizeOf(typeof(CWord));
}
pv_leopard_words_delete(orgWordsPtr);
- return new LeopardTranscript(transcript, wordsList.ToArray());
+ return new LeopardTranscript(transcript, wordsList);
}
///
@@ -243,6 +281,11 @@ public LeopardTranscript Process(Int16[] pcm)
///
public LeopardTranscript ProcessFile(string audioPath)
{
+ if (String.IsNullOrEmpty(audioPath))
+ {
+ throw new LeopardInvalidArgumentException("Audio file path was empty");
+ }
+
if (!File.Exists(audioPath))
{
throw new LeopardIOException($"Couldn't find audio file at '{audioPath}'");
@@ -250,9 +293,9 @@ public LeopardTranscript ProcessFile(string audioPath)
IntPtr audioPathPtr = Utils.GetPtrFromUtf8String(audioPath);
- IntPtr transcriptPtr = IntPtr.Zero;
- Int32 numWords = 0;
- IntPtr wordsPtr = IntPtr.Zero;
+ IntPtr transcriptPtr;
+ Int32 numWords;
+ IntPtr wordsPtr;
PvStatus status = pv_leopard_process_file(
_libraryPointer,
audioPathPtr,
@@ -264,70 +307,86 @@ public LeopardTranscript ProcessFile(string audioPath)
if (status != PvStatus.SUCCESS)
{
- throw PvStatusToException(status, "Leopard failed to process the audio file.");
+ string[] messageStack = GetMessageStack();
+ throw PvStatusToException(status, "Leopard process file failed", messageStack);
}
string transcript = Utils.GetUtf8StringFromPtr(transcriptPtr);
pv_leopard_transcript_delete(transcriptPtr);
+
+ LeopardWord[] wordsList = new LeopardWord[numWords];
IntPtr orgWordsPtr = wordsPtr;
- List wordsList = new List();
for (int i = 0; i < numWords; i++)
{
CWord cword = (CWord)Marshal.PtrToStructure(wordsPtr, typeof(CWord));
- string word = Utils.GetUtf8StringFromPtr(cword.wordPtr);
- wordsList.Add(new LeopardWord(word, cword.confidence, cword.startSec, cword.endSec));
+ wordsList[i] = new LeopardWord(
+ Utils.GetUtf8StringFromPtr(cword.wordPtr),
+ cword.confidence,
+ cword.startSec,
+ cword.endSec,
+ cword.speakerTag);
wordsPtr += Marshal.SizeOf(typeof(CWord));
}
pv_leopard_words_delete(orgWordsPtr);
- return new LeopardTranscript(transcript, wordsList.ToArray());
+ return new LeopardTranscript(transcript, wordsList);
}
///
/// Gets the version number of the Leopard library.
///
/// Version of Leopard
- public string Version { get; private set; }
+ public string Version { get; }
///
/// Get the audio sample rate required by Leopard
///
/// Required sample rate.
- public Int32 SampleRate { get; private set; }
+ public Int32 SampleRate { get; }
///
/// Coverts status codes to relevant .NET exceptions
///
/// Picovoice library status code.
+ /// Default error message.
+ /// Error stack returned from Picovoice library.
/// .NET exception
- private static Exception PvStatusToException(PvStatus status, string message = "")
+ private static Exception PvStatusToException(
+ PvStatus status,
+ string message = "",
+ string[] messageStack = null)
{
+ if (messageStack == null)
+ {
+ messageStack = new string[] { };
+ }
+
switch (status)
{
case PvStatus.OUT_OF_MEMORY:
- return new LeopardMemoryException(message);
+ return new LeopardMemoryException(message, messageStack);
case PvStatus.IO_ERROR:
- return new LeopardIOException(message);
+ return new LeopardIOException(message, messageStack);
case PvStatus.INVALID_ARGUMENT:
- return new LeopardInvalidArgumentException(message);
+ return new LeopardInvalidArgumentException(message, messageStack);
case PvStatus.STOP_ITERATION:
- return new LeopardStopIterationException(message);
+ return new LeopardStopIterationException(message, messageStack);
case PvStatus.KEY_ERROR:
- return new LeopardKeyException(message);
+ return new LeopardKeyException(message, messageStack);
case PvStatus.INVALID_STATE:
- return new LeopardInvalidStateException(message);
+ return new LeopardInvalidStateException(message, messageStack);
case PvStatus.RUNTIME_ERROR:
- return new LeopardRuntimeException(message);
+ return new LeopardRuntimeException(message, messageStack);
case PvStatus.ACTIVATION_ERROR:
- return new LeopardActivationException(message);
+ return new LeopardActivationException(message, messageStack);
case PvStatus.ACTIVATION_LIMIT_REACHED:
- return new LeopardActivationLimitException(message);
+ return new LeopardActivationLimitException(message, messageStack);
case PvStatus.ACTIVATION_THROTTLED:
- return new LeopardActivationThrottledException(message);
+ return new LeopardActivationThrottledException(message, messageStack);
case PvStatus.ACTIVATION_REFUSED:
- return new LeopardActivationRefusedException(message);
+ return new LeopardActivationRefusedException(message, messageStack);
default:
- return new LeopardException("Unmapped error code returned from Leopard.");
+ return new LeopardException("Unmapped error code returned from Leopard.", messageStack);
}
}
@@ -350,5 +409,29 @@ public void Dispose()
{
Dispose();
}
+
+ private string[] GetMessageStack()
+ {
+ Int32 messageStackDepth;
+ IntPtr messageStackRef;
+
+ PvStatus status = pv_get_error_stack(out messageStackRef, out messageStackDepth);
+ if (status != PvStatus.SUCCESS)
+ {
+ throw PvStatusToException(status, "Unable to get Leopard error state");
+ }
+
+ int elementSize = Marshal.SizeOf(typeof(IntPtr));
+ string[] messageStack = new string[messageStackDepth];
+
+ for (int i = 0; i < messageStackDepth; i++)
+ {
+ messageStack[i] = Marshal.PtrToStringAnsi(Marshal.ReadIntPtr(messageStackRef, i * elementSize));
+ }
+
+ pv_free_error_stack(messageStackRef);
+
+ return messageStack;
+ }
}
}
\ No newline at end of file
diff --git a/binding/dotnet/Leopard/Leopard.csproj b/binding/dotnet/Leopard/Leopard.csproj
index f6158107..b1a6b2f2 100644
--- a/binding/dotnet/Leopard/Leopard.csproj
+++ b/binding/dotnet/Leopard/Leopard.csproj
@@ -2,16 +2,16 @@
net6.0;netcoreapp3.0;netstandard2.0
- 1.2.1
+ 2.0.0
Picovoice
-
+
Leopard Speech-to-Text Engine
Apache-2.0
https://github.com/Picovoice/leopard
https://github.com/Picovoice/leopard.git
git
Speech-to-Text, Speech Recognition, Voice Recognition, ASR
- See https://github.com/Picovoice/leopard/
+ See https://github.com/Picovoice/leopard/
Picovoice 2022-2023
Leopard is an on-device speech-to-text engine. Leopard is:
@@ -24,6 +24,8 @@
true
pv_circle_512.png
+
+
@@ -33,20 +35,15 @@
- build/netcoreapp3.0;
- build/net6.0;
-
- PreserveNewest
-
-
-
- build/netstandard2.0/lib/common/leopard_params.pv;
- build/netcoreapp3.0/lib/common/leopard_params.pv;
- build/net6.0/lib/common/leopard_params.pv;
+ build/netcoreapp3.0/Leopard.targets;
+ build/net6.0/Leopard.targets;
- lib\common\leopard_params.pv
PreserveNewest
+
+
+
+
build/netstandard2.0/libpv_leopard.dll;
@@ -78,8 +75,10 @@
false
+
+
-
+
build/netcoreapp3.0/lib/raspberry-pi;
build/net6.0/lib/raspberry-pi;
@@ -98,6 +97,8 @@
false
+
+
@@ -108,8 +109,22 @@
false
+
+
+
+
+
+ build/netstandard2.0/lib/common/leopard_params.pv;
+ build/netcoreapp3.0/lib/common/leopard_params.pv;
+ build/net6.0/lib/common/leopard_params.pv;
+
+ lib\common\leopard_params.pv
+ PreserveNewest
+
+
+
-
+
diff --git a/binding/dotnet/Leopard/LeopardException.cs b/binding/dotnet/Leopard/LeopardException.cs
index b8e33f13..4d5ccd70 100644
--- a/binding/dotnet/Leopard/LeopardException.cs
+++ b/binding/dotnet/Leopard/LeopardException.cs
@@ -1,5 +1,5 @@
/*
- Copyright 2022 Picovoice Inc.
+ Copyright 2022-2023 Picovoice Inc.
You may not use this file except in compliance with the license. A copy of the license is located in the "LICENSE"
file accompanying this source.
@@ -15,10 +15,35 @@ namespace Pv
{
public class LeopardException : Exception
{
+ private readonly string[] _messageStack;
+
public LeopardException() { }
public LeopardException(string message) : base(message) { }
+ public LeopardException(string message, string[] messageStack) : base(ModifyMessages(message, messageStack))
+ {
+ this._messageStack = messageStack;
+ }
+
+ public string[] MessageStack
+ {
+ get => _messageStack;
+ }
+
+ private static string ModifyMessages(string message, string[] messageStack)
+ {
+ string messageString = message;
+ if (messageStack.Length > 0)
+ {
+ messageString += ":";
+ for (int i = 0; i < messageStack.Length; i++)
+ {
+ messageString += $"\n [{i}] {messageStack[i]}";
+ }
+ }
+ return messageString;
+ }
}
public class LeopardMemoryException : LeopardException
@@ -26,6 +51,8 @@ public class LeopardMemoryException : LeopardException
public LeopardMemoryException() { }
public LeopardMemoryException(string message) : base(message) { }
+
+ public LeopardMemoryException(string message, string[] messageStack) : base(message, messageStack) { }
}
public class LeopardIOException : LeopardException
@@ -33,6 +60,8 @@ public class LeopardIOException : LeopardException
public LeopardIOException() { }
public LeopardIOException(string message) : base(message) { }
+
+ public LeopardIOException(string message, string[] messageStack) : base(message, messageStack) { }
}
public class LeopardInvalidArgumentException : LeopardException
@@ -40,6 +69,8 @@ public class LeopardInvalidArgumentException : LeopardException
public LeopardInvalidArgumentException() { }
public LeopardInvalidArgumentException(string message) : base(message) { }
+
+ public LeopardInvalidArgumentException(string message, string[] messageStack) : base(message, messageStack) { }
}
public class LeopardStopIterationException : LeopardException
@@ -47,6 +78,8 @@ public class LeopardStopIterationException : LeopardException
public LeopardStopIterationException() { }
public LeopardStopIterationException(string message) : base(message) { }
+
+ public LeopardStopIterationException(string message, string[] messageStack) : base(message, messageStack) { }
}
public class LeopardKeyException : LeopardException
@@ -54,6 +87,8 @@ public class LeopardKeyException : LeopardException
public LeopardKeyException() { }
public LeopardKeyException(string message) : base(message) { }
+
+ public LeopardKeyException(string message, string[] messageStack) : base(message, messageStack) { }
}
public class LeopardInvalidStateException : LeopardException
@@ -61,6 +96,8 @@ public class LeopardInvalidStateException : LeopardException
public LeopardInvalidStateException() { }
public LeopardInvalidStateException(string message) : base(message) { }
+
+ public LeopardInvalidStateException(string message, string[] messageStack) : base(message, messageStack) { }
}
public class LeopardRuntimeException : LeopardException
@@ -68,6 +105,8 @@ public class LeopardRuntimeException : LeopardException
public LeopardRuntimeException() { }
public LeopardRuntimeException(string message) : base(message) { }
+
+ public LeopardRuntimeException(string message, string[] messageStack) : base(message, messageStack) { }
}
public class LeopardActivationException : LeopardException
@@ -75,6 +114,8 @@ public class LeopardActivationException : LeopardException
public LeopardActivationException() { }
public LeopardActivationException(string message) : base(message) { }
+
+ public LeopardActivationException(string message, string[] messageStack) : base(message, messageStack) { }
}
public class LeopardActivationLimitException : LeopardException
@@ -82,6 +123,8 @@ public class LeopardActivationLimitException : LeopardException
public LeopardActivationLimitException() { }
public LeopardActivationLimitException(string message) : base(message) { }
+
+ public LeopardActivationLimitException(string message, string[] messageStack) : base(message, messageStack) { }
}
public class LeopardActivationThrottledException : LeopardException
@@ -89,6 +132,8 @@ public class LeopardActivationThrottledException : LeopardException
public LeopardActivationThrottledException() { }
public LeopardActivationThrottledException(string message) : base(message) { }
+
+ public LeopardActivationThrottledException(string message, string[] messageStack) : base(message, messageStack) { }
}
public class LeopardActivationRefusedException : LeopardException
@@ -96,6 +141,8 @@ public class LeopardActivationRefusedException : LeopardException
public LeopardActivationRefusedException() { }
public LeopardActivationRefusedException(string message) : base(message) { }
+
+ public LeopardActivationRefusedException(string message, string[] messageStack) : base(message, messageStack) { }
}
}
\ No newline at end of file
diff --git a/binding/dotnet/Leopard/LeopardTranscript.cs b/binding/dotnet/Leopard/LeopardTranscript.cs
index f67623e9..b2d2155a 100644
--- a/binding/dotnet/Leopard/LeopardTranscript.cs
+++ b/binding/dotnet/Leopard/LeopardTranscript.cs
@@ -1,5 +1,5 @@
/*
- Copyright 2022 Picovoice Inc.
+ Copyright 2022-2023 Picovoice Inc.
You may not use this file except in compliance with the license. A copy of the license is located in the "LICENSE"
file accompanying this source.
@@ -9,6 +9,8 @@ file accompanying this source.
specific language governing permissions and limitations under the License.
*/
+using System;
+
namespace Pv
{
///
@@ -19,18 +21,26 @@ public class LeopardWord
///
/// Constructor.
///
- /// Transcribed word.
- /// Transcription confidence. It is a number within [0, 1].
- /// Start of word in seconds.
- /// End of word in seconds.
- ///
- public LeopardWord(string word, float confidence, float startSec, float endSec)
+ /// Transcribed word.
+ /// Transcription confidence. It is a number within [0, 1].
+ /// Start of word in seconds.
+ /// End of word in seconds.
+ ///
+ /// The speaker tag is `-1` if diarization is not enabled during initialization; otherwise,
+ /// it's a non-negative integer identifying unique speakers, with `0` reserved for unknown speakers.
+ ///
+ public LeopardWord(
+ string word,
+ float confidence,
+ float startSec,
+ float endSec,
+ Int32 speakerTag)
{
Word = word;
Confidence = confidence;
-
StartSec = startSec;
EndSec = endSec;
+ SpeakerTag = speakerTag;
}
///
@@ -52,6 +62,11 @@ public LeopardWord(string word, float confidence, float startSec, float endSec)
/// Getter for endSec.
///
public float EndSec { get; }
+
+ ///
+ /// Getter for speakerTag.
+ ///
+ public Int32 SpeakerTag { get; }
}
///
@@ -63,7 +78,7 @@ public class LeopardTranscript
/// Constructor.
///
///
- /// transcript String transcript returned from Leopard.
+ /// Transcript returned from Leopard.
///
///
/// Transcribed words and their associated metadata.
diff --git a/binding/dotnet/LeopardTest/MainTest.cs b/binding/dotnet/LeopardTest/MainTest.cs
index bc39852e..a5b1551f 100644
--- a/binding/dotnet/LeopardTest/MainTest.cs
+++ b/binding/dotnet/LeopardTest/MainTest.cs
@@ -13,6 +13,7 @@ specific language governing permissions and limitations under the License.
using System.Collections.Generic;
using System.IO;
using System.Reflection;
+using System.Runtime.InteropServices;
using Fastenshtein;
@@ -36,110 +37,170 @@ public static void ClassInitialize(TestContext _)
_accessKey = Environment.GetEnvironmentVariable("ACCESS_KEY");
}
- private static List GetPcmFromFile(string audioFilePath, int expectedSampleRate)
+ [Serializable]
+ private class TestJson
{
- List data = new List();
- using (BinaryReader reader = new BinaryReader(File.Open(audioFilePath, FileMode.Open)))
- {
- reader.ReadBytes(24); // skip over part of the header
- Assert.AreEqual(reader.ReadInt32(), expectedSampleRate, "Specified sample rate did not match test file.");
- reader.ReadBytes(16); // skip over the rest of the header
-
- while (reader.BaseStream.Position != reader.BaseStream.Length)
- {
- data.Add(reader.ReadInt16());
- }
- }
-
- return data;
+ public LanguageTestJson[] language_tests { get; set; }
+ public DiarizationTestJson[] diarization_tests { get; set; }
}
-
- private static JObject LoadJsonTestData()
+ [Serializable]
+ private class LanguageTestJson
{
- string content = File.ReadAllText(Path.Combine(ROOT_DIR, "resources/.test/test_data.json"));
- return JObject.Parse(content);
+ public string language { get; set; }
+ public string audio_file { get; set; }
+ public string transcript { get; set; }
+
+ public string transcript_with_punctuation { get; set; }
+ public float error_rate { get; set; }
+ public WordJson[] words { get; set; }
}
[Serializable]
- private class TestParameterJson
+ private class DiarizationTestJson
{
public string language { get; set; }
public string audio_file { get; set; }
- public string transcript { get; set; }
+ public WordJson[] words { get; set; }
+ }
- public string[] punctuations { get; set; }
+ [Serializable]
+ private class WordJson
+ {
+ public string word { get; set; }
+ public float start_sec { get; set; }
+ public float end_sec { get; set; }
+ public float confidence { get; set; }
+ public Int32 speaker_tag { get; set; }
+ }
- public float error_rate { get; set; }
+ private static TestJson LoadJsonTestData()
+ {
+ string content = File.ReadAllText(Path.Combine(ROOT_DIR, "resources/.test/test_data.json"));
+ return JObject.Parse(content)["tests"].ToObject();
}
- public static IEnumerable
/// Required argument. Absolute path to input audio file.
- /// AccessKey obtained from Picovoice Console (https://console.picovoice.ai/).
- /// Absolute path to the file containing model parameters. If not set it will be set to the default location.
+ ///
+ /// AccessKey obtained from Picovoice Console (https://console.picovoice.ai/).
+ ///
+ ///
+ /// Absolute path to the file containing model parameters.
+ /// If not set it will be set to the default location.
///
/// Set to `true` to enable automatic punctuation insertion.
///
+ ///
+ /// Set to `true` to enable speaker diarization, which allows Leopard to differentiate speakers as
+ /// part of the transcription process. Word metadata will include a `SpeakerTag` to identify unique speakers.
+ ///
///
/// Enable verbose logging.
///
- ///
public static void RunDemo(
string accessKey,
string inputAudioPath,
string modelPath,
bool enableAutomaticPunctuation,
- bool verbose
- )
+ bool enableDiarization,
+ bool verbose)
{
// init Leopard speech-to-text engine
using (Leopard leopard = Leopard.Create(
accessKey: accessKey,
modelPath: modelPath,
- enableAutomaticPunctuation: enableAutomaticPunctuation))
+ enableAutomaticPunctuation: enableAutomaticPunctuation,
+ enableDiarization: enableDiarization))
{
try
@@ -56,11 +65,25 @@ bool verbose
Console.WriteLine(result.TranscriptString);
if (verbose)
{
- Console.WriteLine(String.Format("\n|{0,-15}|{1,-10:0.00}|{2,-10:0.00}|{3,-10:0.00}|\n", "word", "Confidence", "StartSec", "EndSec"));
+ Console.WriteLine(
+ string.Format(
+ "\n|{0,-15}|{1,11:0.00}|{2,10:0.00}|{3,10:0.00}|{4,11}|\n",
+ "Word",
+ "Confidence",
+ "StartSec",
+ "EndSec",
+ "SpeakerTag"));
for (int i = 0; i < result.WordArray.Length; i++)
{
LeopardWord word = result.WordArray[i];
- Console.WriteLine(String.Format("|{0,-15}|{1,10:0.00}|{2,10:0.00}|{3,10:0.00}|", word.Word, word.Confidence, word.StartSec, word.EndSec));
+ Console.WriteLine(
+ string.Format(
+ "|{0,-15}|{1,11:0.00}|{2,10:0.00}|{3,10:0.00}|{4,11}|",
+ word.Word,
+ word.Confidence,
+ word.StartSec,
+ word.EndSec,
+ word.SpeakerTag));
}
}
}
@@ -85,7 +108,8 @@ public static void Main(string[] args)
string accessKey = null;
string modelPath = null;
bool enableAutomaticPunctuation = true;
- bool verbose = true;
+ bool enableDiarization = true;
+ bool verbose = false;
bool showHelp = false;
// parse command line arguments
@@ -118,6 +142,11 @@ public static void Main(string[] args)
enableAutomaticPunctuation = false;
argIndex++;
}
+ else if (args[argIndex] == "--disable_speaker_diarization")
+ {
+ enableDiarization = false;
+ argIndex++;
+ }
else if (args[argIndex] == "--verbose")
{
verbose = true;
@@ -149,7 +178,9 @@ public static void Main(string[] args)
}
if (!File.Exists(inputAudioPath))
{
- throw new ArgumentException($"Audio file at path {inputAudioPath} does not exist", "--input_audio_path");
+ throw new ArgumentException(
+ $"Audio file at path {inputAudioPath} does not exist",
+ "input_audio_path");
}
RunDemo(
@@ -157,6 +188,7 @@ public static void Main(string[] args)
inputAudioPath,
modelPath,
enableAutomaticPunctuation,
+ enableDiarization,
verbose);
}
@@ -172,6 +204,7 @@ private static void OnUnhandledException(object sender, UnhandledExceptionEventA
"\t--access_key (required): AccessKey obtained from Picovoice Console (https://console.picovoice.ai/)\n" +
"\t--model_path: Absolute path to the file containing model parameters.\n" +
"\t--disable_automatic_punctuation: Disable automatic punctuation.\n" +
- "\t--verbose: Enable verbose logging";
+ "\t--disable_speaker_diarization: Disable speaker diarization.\n" +
+ "\t--verbose: Enable verbose output. Prints Leopard word metadata.";
}
}
\ No newline at end of file
diff --git a/demo/dotnet/LeopardDemo/MicDemo.cs b/demo/dotnet/LeopardDemo/MicDemo.cs
index 4e1df02e..6fb481d0 100644
--- a/demo/dotnet/LeopardDemo/MicDemo.cs
+++ b/demo/dotnet/LeopardDemo/MicDemo.cs
@@ -19,38 +19,49 @@ specific language governing permissions and limitations under the License.
namespace LeopardDemo
{
///
- /// Microphone Demo for Leopard Speech-to-Text engine. It creates an input audio stream from a microphone.
+ /// Microphone Demo for Leopard Speech-to-Text engine.
+ /// It creates an input audio stream from a microphone and processes it with Leopard.
///
public class MicDemo
{
-
private static readonly int PV_RECORDER_FRAME_LENGTH = 2048;
///
/// Creates an input audio stream and instantiates an instance of Leopard object.
///
- /// AccessKey obtained from Picovoice Console (https://console.picovoice.ai/).
- /// Absolute path to the file containing model parameters. If not set it will be set to the default location.
+ ///
+ /// AccessKey obtained from Picovoice Console (https://console.picovoice.ai/).
+ ///
+ ///
+ /// Absolute path to the file containing model parameters.
+ /// If not set it will be set to the default location.
+ ///
///
/// Set to `true` to enable automatic punctuation insertion.
///
- ///
- /// Enable verbose logging.
+ ///
+ /// Set to `true` to enable speaker diarization, which allows Leopard to differentiate speakers as
+ /// part of the transcription process. Word metadata will include a `SpeakerTag` to identify unique speakers.
+ ///
+ /// Enable verbose logging.
+ ///
+ /// Optional argument. If provided, audio is recorded from this input device.
+ /// Otherwise, the default audio input device is used.
///
- /// Optional argument. If provided, audio is recorded from this input device. Otherwise, the default audio input device is used.
- public static void RunDemo(
+ private static void RunDemo(
string accessKey,
string modelPath,
bool enableAutomaticPunctuation,
+ bool enableDiarization,
bool verbose,
int audioDeviceIndex)
{
using (Leopard leopard = Leopard.Create(
accessKey: accessKey,
modelPath: modelPath,
- enableAutomaticPunctuation: enableAutomaticPunctuation))
+ enableAutomaticPunctuation: enableAutomaticPunctuation,
+ enableDiarization: enableDiarization))
{
-
using (PvRecorder recorder = PvRecorder.Create(PV_RECORDER_FRAME_LENGTH, audioDeviceIndex))
{
Console.WriteLine($"Using device: {recorder.SelectedDevice}");
@@ -98,11 +109,25 @@ public static void RunDemo(
Console.WriteLine(result.TranscriptString);
if (verbose)
{
- Console.WriteLine(string.Format("\n|{0,-15}|{1,-10:0.00}|{2,-10:0.00}|{3,-10:0.00}|\n", "word", "Confidence", "StartSec", "EndSec"));
+ Console.WriteLine(
+ string.Format(
+ "\n|{0,-15}|{1,11:0.00}|{2,10:0.00}|{3,10:0.00}|{4,11}|\n",
+ "Word",
+ "Confidence",
+ "StartSec",
+ "EndSec",
+ "SpeakerTag"));
for (int i = 0; i < result.WordArray.Length; i++)
{
LeopardWord word = result.WordArray[i];
- Console.WriteLine(string.Format("|{0,-15}|{1,10:0.00}|{2,10:0.00}|{3,10:0.00}|", word.Word, word.Confidence, word.StartSec, word.EndSec));
+ Console.WriteLine(
+ string.Format(
+ "|{0,-15}|{1,11:0.00}|{2,10:0.00}|{3,10:0.00}|{4,11}|",
+ word.Word,
+ word.Confidence,
+ word.StartSec,
+ word.EndSec,
+ word.SpeakerTag));
}
Console.WriteLine();
}
@@ -119,7 +144,7 @@ public static void RunDemo(
///
/// Lists available audio input devices.
///
- public static void ShowAudioDevices()
+ private static void ShowAudioDevices()
{
string[] devices = PvRecorder.GetAvailableDevices();
for (int i = 0; i < devices.Length; i++)
@@ -141,7 +166,8 @@ public static void Main(string[] args)
string accessKey = null;
string modelPath = null;
bool enableAutomaticPunctuation = true;
- bool verbose = true;
+ bool enableDiarization = true;
+ bool verbose = false;
int audioDeviceIndex = -1;
bool showAudioDevices = false;
bool showHelp = false;
@@ -169,6 +195,11 @@ public static void Main(string[] args)
enableAutomaticPunctuation = false;
argIndex++;
}
+ else if (args[argIndex] == "--disable_speaker_diarization")
+ {
+ enableDiarization = false;
+ argIndex++;
+ }
else if (args[argIndex] == "--verbose")
{
verbose = true;
@@ -219,6 +250,7 @@ public static void Main(string[] args)
accessKey,
modelPath,
enableAutomaticPunctuation,
+ enableDiarization,
verbose,
audioDeviceIndex);
}
@@ -236,6 +268,7 @@ private static void OnUnhandledException(object sender, UnhandledExceptionEventA
"\t--audio_device_index: Index of input audio device.\n" +
"\t--show_audio_devices: Print available recording devices.\n" +
"\t--disable_automatic_punctuation: Disable automatic punctuation.\n" +
- "\t--verbose: Enable verbose logging";
+ "\t--disable_speaker_diarization: Disable speaker diarization.\n" +
+ "\t--verbose: Enable verbose output. Prints Leopard word metadata.";
}
}
\ No newline at end of file