Speech GA – auto_punctuation & model selection (#179)

* Speech-GA : speech_transcribe_auto_punctuation speech_transcribe_enhanced_model speech_transcribe_model_selection * Speech-GA : Fixing review issues.
GoogleCloudPlatform · Nov 17, 2022 · 877ef00 · 877ef00
1 parent d6d43d9
commit 877ef00
Show file tree

Hide file tree

Showing 4 changed files with 330 additions and 329 deletions.
diff --git a/speech/recognize.js b/speech/recognize.js
@@ -453,7 +453,237 @@ function streamingMicRecognize(encoding, sampleRateHertz, languageCode) {
   // [END speech_transcribe_streaming_mic]
 }
 
-require(`yargs`)
+function syncRecognizeModelSelection(
+  filename,
+  model,
+  encoding,
+  sampleRateHertz,
+  languageCode
+) {
+  // [START speech_transcribe_model_selection]
+  // Imports the Google Cloud client library for Beta API
+  /**
+   * TODO(developer): Update client library import to use new
+   * version of API when desired features become available
+   */
+  const speech = require('@google-cloud/speech').v1p1beta1;
+  const fs = require('fs');
+
+  // Creates a client
+  const client = new speech.SpeechClient();
+
+  /**
+   * TODO(developer): Uncomment the following lines before running the sample.
+   */
+  // const filename = 'Local path to audio file, e.g. /path/to/audio.raw';
+  // const model = 'Model to use, e.g. phone_call, video, default';
+  // const encoding = 'Encoding of the audio file, e.g. LINEAR16';
+  // const sampleRateHertz = 16000;
+  // const languageCode = 'BCP-47 language code, e.g. en-US';
+
+  const config = {
+    encoding: encoding,
+    sampleRateHertz: sampleRateHertz,
+    languageCode: languageCode,
+    model: model,
+  };
+  const audio = {
+    content: fs.readFileSync(filename).toString('base64'),
+  };
+
+  const request = {
+    config: config,
+    audio: audio,
+  };
+
+  // Detects speech in the audio file
+  client
+    .recognize(request)
+    .then(data => {
+      const response = data[0];
+      const transcription = response.results
+        .map(result => result.alternatives[0].transcript)
+        .join('\n');
+      console.log(`Transcription: `, transcription);
+    })
+    .catch(err => {
+      console.error('ERROR:', err);
+    });
+  // [END speech_transcribe_model_selection]
+}
+
+function syncRecognizeModelSelectionGCS(
+  gcsUri,
+  model,
+  encoding,
+  sampleRateHertz,
+  languageCode
+) {
+  // [START speech_transcribe_model_selection_gcs]
+  // Imports the Google Cloud client library for Beta API
+  /**
+   * TODO(developer): Update client library import to use new
+   * version of API when desired features become available
+   */
+  const speech = require('@google-cloud/speech').v1p1beta1;
+
+  // Creates a client
+  const client = new speech.SpeechClient();
+
+  /**
+   * TODO(developer): Uncomment the following lines before running the sample.
+   */
+  // const gcsUri = 'gs://my-bucket/audio.raw';
+  // const model = 'Model to use, e.g. phone_call, video, default';
+  // const encoding = 'Encoding of the audio file, e.g. LINEAR16';
+  // const sampleRateHertz = 16000;
+  // const languageCode = 'BCP-47 language code, e.g. en-US';
+
+  const config = {
+    encoding: encoding,
+    sampleRateHertz: sampleRateHertz,
+    languageCode: languageCode,
+    model: model,
+  };
+  const audio = {
+    uri: gcsUri,
+  };
+
+  const request = {
+    config: config,
+    audio: audio,
+  };
+
+  // Detects speech in the audio file
+  client
+    .recognize(request)
+    .then(data => {
+      const response = data[0];
+      const transcription = response.results
+        .map(result => result.alternatives[0].transcript)
+        .join('\n');
+      console.log(`Transcription: `, transcription);
+    })
+    .catch(err => {
+      console.error('ERROR:', err);
+    });
+  // [END speech_transcribe_model_selection_gcs]
+}
+
+function syncRecognizeWithAutoPunctuation(
+  filename,
+  encoding,
+  sampleRateHertz,
+  languageCode
+) {
+  // [START speech_transcribe_auto_punctuation]
+  // Imports the Google Cloud client library for Beta API
+  /**
+   * TODO(developer): Update client library import to use new
+   * version of API when desired features become available
+   */
+  const speech = require('@google-cloud/speech').v1p1beta1;
+  const fs = require('fs');
+
+  // Creates a client
+  const client = new speech.SpeechClient();
+
+  /**
+   * TODO(developer): Uncomment the following lines before running the sample.
+   */
+  // const filename = 'Local path to audio file, e.g. /path/to/audio.raw';
+  // const encoding = 'Encoding of the audio file, e.g. LINEAR16';
+  // const sampleRateHertz = 16000;
+  // const languageCode = 'BCP-47 language code, e.g. en-US';
+
+  const config = {
+    encoding: encoding,
+    languageCode: languageCode,
+    enableAutomaticPunctuation: true,
+  };
+  const audio = {
+    content: fs.readFileSync(filename).toString('base64'),
+  };
+
+  const request = {
+    config: config,
+    audio: audio,
+  };
+
+  // Detects speech in the audio file
+  client
+    .recognize(request)
+    .then(data => {
+      const response = data[0];
+      const transcription = response.results
+        .map(result => result.alternatives[0].transcript)
+        .join('\n');
+      console.log(`Transcription: `, transcription);
+    })
+    .catch(err => {
+      console.error('ERROR:', err);
+    });
+  // [END speech_transcribe_auto_punctuation]
+}
+
+function syncRecognizeWithEnhancedModel(
+  filename,
+  encoding,
+  sampleRateHertz,
+  languageCode
+) {
+  // [START speech_transcribe_enhanced_model]
+  // Imports the Google Cloud client library for Beta API
+  /**
+   * TODO(developer): Update client library import to use new
+   * version of API when desired features become available
+   */
+  const speech = require('@google-cloud/speech').v1p1beta1;
+  const fs = require('fs');
+
+  // Creates a client
+  const client = new speech.SpeechClient();
+
+  /**
+   * TODO(developer): Uncomment the following lines before running the sample.
+   */
+  // const filename = 'Local path to audio file, e.g. /path/to/audio.raw';
+  // const encoding = 'Encoding of the audio file, e.g. LINEAR16';
+  // const sampleRateHertz = 16000;
+  // const languageCode = 'BCP-47 language code, e.g. en-US';
+
+  const config = {
+    encoding: encoding,
+    languageCode: languageCode,
+    useEnhanced: true,
+    model: 'phone_call',
+  };
+  const audio = {
+    content: fs.readFileSync(filename).toString('base64'),
+  };
+
+  const request = {
+    config: config,
+    audio: audio,
+  };
+
+  // Detects speech in the audio file
+  client
+    .recognize(request)
+    .then(data => {
+      const response = data[0];
+      response.results.forEach(result => {
+        const alternative = result.alternatives[0];
+        console.log(alternative.transcript);
+      });
+    })
+    .catch(err => {
+      console.error('ERROR:', err);
+    });
+  // [END speech_transcribe_enhanced_model]
+}
+
+require(`yargs`)  // eslint-disable-line
   .demand(1)
   .command(
     `sync <filename>`,
@@ -550,6 +780,56 @@ require(`yargs`)
         opts.languageCode
       )
   )
+  .command(
+    `sync-model <filename> <model>`,
+    `Detects speech in a local audio file using provided model.`,
+    {},
+    opts =>
+      syncRecognizeModelSelection(
+        opts.filename,
+        opts.model,
+        opts.encoding,
+        opts.sampleRateHertz,
+        opts.languageCode
+      )
+  )
+  .command(
+    `sync-model-gcs <gcsUri> <model>`,
+    `Detects speech in an audio file located in a Google Cloud Storage bucket using provided model.`,
+    {},
+    opts =>
+      syncRecognizeModelSelectionGCS(
+        opts.gcsUri,
+        opts.model,
+        opts.encoding,
+        opts.sampleRateHertz,
+        opts.languageCode
+      )
+  )
+  .command(
+    `sync-auto-punctuation <filename>`,
+    `Detects speech in a local audio file with auto punctuation.`,
+    {},
+    opts =>
+      syncRecognizeWithAutoPunctuation(
+        opts.filename,
+        opts.encoding,
+        opts.sampleRateHertz,
+        opts.languageCode
+      )
+  )
+  .command(
+    `sync-enhanced-model <filename>`,
+    `Detects speech in a local audio file using an enhanced model.`,
+    {},
+    opts =>
+      syncRecognizeWithEnhancedModel(
+        opts.filename,
+        opts.encoding,
+        opts.sampleRateHertz,
+        opts.languageCode
+      )
+  )
   .options({
     encoding: {
       alias: 'e',
@@ -577,6 +857,14 @@ require(`yargs`)
   .example(`node $0 async-gcs gs://gcs-test-data/vr.flac -e FLAC -r 16000`)
   .example(`node $0 stream ./resources/audio.raw  -e LINEAR16 -r 16000`)
   .example(`node $0 listen`)
+  .example(
+    `node $0 sync-model ./resources/Google_Gnome.wav video -e LINEAR16 -r 16000`
+  )
+  .example(
+    `node $0 sync-model-gcs gs://gcs-test-data/Google_Gnome.wav phone_call -e LINEAR16 -r 16000`
+  )
+  .example(`node $0 sync-auto-punctuation ./resources/commercial_mono.wav`)
+  .example(`node $0 sync-enhanced-model ./resources/commercial_mono.wav`)
   .wrap(120)
   .recommendCommands()
   .epilogue(`For more information, see https://cloud.google.com/speech/docs`)