Skip to content

Commit

Permalink
Speech GA – auto_punctuation & model selection (#179)
Browse files Browse the repository at this point in the history
* Speech-GA : speech_transcribe_auto_punctuation speech_transcribe_enhanced_model speech_transcribe_model_selection

* Speech-GA : Fixing review issues.
  • Loading branch information
nirupa-kumar authored and Ace Nassri committed Nov 17, 2022
1 parent d6d43d9 commit 877ef00
Show file tree
Hide file tree
Showing 4 changed files with 330 additions and 329 deletions.
290 changes: 289 additions & 1 deletion speech/recognize.js
Original file line number Diff line number Diff line change
Expand Up @@ -453,7 +453,237 @@ function streamingMicRecognize(encoding, sampleRateHertz, languageCode) {
// [END speech_transcribe_streaming_mic]
}

require(`yargs`)
function syncRecognizeModelSelection(
filename,
model,
encoding,
sampleRateHertz,
languageCode
) {
// [START speech_transcribe_model_selection]
// Imports the Google Cloud client library for Beta API
/**
* TODO(developer): Update client library import to use new
* version of API when desired features become available
*/
const speech = require('@google-cloud/speech').v1p1beta1;
const fs = require('fs');

// Creates a client
const client = new speech.SpeechClient();

/**
* TODO(developer): Uncomment the following lines before running the sample.
*/
// const filename = 'Local path to audio file, e.g. /path/to/audio.raw';
// const model = 'Model to use, e.g. phone_call, video, default';
// const encoding = 'Encoding of the audio file, e.g. LINEAR16';
// const sampleRateHertz = 16000;
// const languageCode = 'BCP-47 language code, e.g. en-US';

const config = {
encoding: encoding,
sampleRateHertz: sampleRateHertz,
languageCode: languageCode,
model: model,
};
const audio = {
content: fs.readFileSync(filename).toString('base64'),
};

const request = {
config: config,
audio: audio,
};

// Detects speech in the audio file
client
.recognize(request)
.then(data => {
const response = data[0];
const transcription = response.results
.map(result => result.alternatives[0].transcript)
.join('\n');
console.log(`Transcription: `, transcription);
})
.catch(err => {
console.error('ERROR:', err);
});
// [END speech_transcribe_model_selection]
}

function syncRecognizeModelSelectionGCS(
gcsUri,
model,
encoding,
sampleRateHertz,
languageCode
) {
// [START speech_transcribe_model_selection_gcs]
// Imports the Google Cloud client library for Beta API
/**
* TODO(developer): Update client library import to use new
* version of API when desired features become available
*/
const speech = require('@google-cloud/speech').v1p1beta1;

// Creates a client
const client = new speech.SpeechClient();

/**
* TODO(developer): Uncomment the following lines before running the sample.
*/
// const gcsUri = 'gs://my-bucket/audio.raw';
// const model = 'Model to use, e.g. phone_call, video, default';
// const encoding = 'Encoding of the audio file, e.g. LINEAR16';
// const sampleRateHertz = 16000;
// const languageCode = 'BCP-47 language code, e.g. en-US';

const config = {
encoding: encoding,
sampleRateHertz: sampleRateHertz,
languageCode: languageCode,
model: model,
};
const audio = {
uri: gcsUri,
};

const request = {
config: config,
audio: audio,
};

// Detects speech in the audio file
client
.recognize(request)
.then(data => {
const response = data[0];
const transcription = response.results
.map(result => result.alternatives[0].transcript)
.join('\n');
console.log(`Transcription: `, transcription);
})
.catch(err => {
console.error('ERROR:', err);
});
// [END speech_transcribe_model_selection_gcs]
}

function syncRecognizeWithAutoPunctuation(
filename,
encoding,
sampleRateHertz,
languageCode
) {
// [START speech_transcribe_auto_punctuation]
// Imports the Google Cloud client library for Beta API
/**
* TODO(developer): Update client library import to use new
* version of API when desired features become available
*/
const speech = require('@google-cloud/speech').v1p1beta1;
const fs = require('fs');

// Creates a client
const client = new speech.SpeechClient();

/**
* TODO(developer): Uncomment the following lines before running the sample.
*/
// const filename = 'Local path to audio file, e.g. /path/to/audio.raw';
// const encoding = 'Encoding of the audio file, e.g. LINEAR16';
// const sampleRateHertz = 16000;
// const languageCode = 'BCP-47 language code, e.g. en-US';

const config = {
encoding: encoding,
languageCode: languageCode,
enableAutomaticPunctuation: true,
};
const audio = {
content: fs.readFileSync(filename).toString('base64'),
};

const request = {
config: config,
audio: audio,
};

// Detects speech in the audio file
client
.recognize(request)
.then(data => {
const response = data[0];
const transcription = response.results
.map(result => result.alternatives[0].transcript)
.join('\n');
console.log(`Transcription: `, transcription);
})
.catch(err => {
console.error('ERROR:', err);
});
// [END speech_transcribe_auto_punctuation]
}

function syncRecognizeWithEnhancedModel(
filename,
encoding,
sampleRateHertz,
languageCode
) {
// [START speech_transcribe_enhanced_model]
// Imports the Google Cloud client library for Beta API
/**
* TODO(developer): Update client library import to use new
* version of API when desired features become available
*/
const speech = require('@google-cloud/speech').v1p1beta1;
const fs = require('fs');

// Creates a client
const client = new speech.SpeechClient();

/**
* TODO(developer): Uncomment the following lines before running the sample.
*/
// const filename = 'Local path to audio file, e.g. /path/to/audio.raw';
// const encoding = 'Encoding of the audio file, e.g. LINEAR16';
// const sampleRateHertz = 16000;
// const languageCode = 'BCP-47 language code, e.g. en-US';

const config = {
encoding: encoding,
languageCode: languageCode,
useEnhanced: true,
model: 'phone_call',
};
const audio = {
content: fs.readFileSync(filename).toString('base64'),
};

const request = {
config: config,
audio: audio,
};

// Detects speech in the audio file
client
.recognize(request)
.then(data => {
const response = data[0];
response.results.forEach(result => {
const alternative = result.alternatives[0];
console.log(alternative.transcript);
});
})
.catch(err => {
console.error('ERROR:', err);
});
// [END speech_transcribe_enhanced_model]
}

require(`yargs`) // eslint-disable-line
.demand(1)
.command(
`sync <filename>`,
Expand Down Expand Up @@ -550,6 +780,56 @@ require(`yargs`)
opts.languageCode
)
)
.command(
`sync-model <filename> <model>`,
`Detects speech in a local audio file using provided model.`,
{},
opts =>
syncRecognizeModelSelection(
opts.filename,
opts.model,
opts.encoding,
opts.sampleRateHertz,
opts.languageCode
)
)
.command(
`sync-model-gcs <gcsUri> <model>`,
`Detects speech in an audio file located in a Google Cloud Storage bucket using provided model.`,
{},
opts =>
syncRecognizeModelSelectionGCS(
opts.gcsUri,
opts.model,
opts.encoding,
opts.sampleRateHertz,
opts.languageCode
)
)
.command(
`sync-auto-punctuation <filename>`,
`Detects speech in a local audio file with auto punctuation.`,
{},
opts =>
syncRecognizeWithAutoPunctuation(
opts.filename,
opts.encoding,
opts.sampleRateHertz,
opts.languageCode
)
)
.command(
`sync-enhanced-model <filename>`,
`Detects speech in a local audio file using an enhanced model.`,
{},
opts =>
syncRecognizeWithEnhancedModel(
opts.filename,
opts.encoding,
opts.sampleRateHertz,
opts.languageCode
)
)
.options({
encoding: {
alias: 'e',
Expand Down Expand Up @@ -577,6 +857,14 @@ require(`yargs`)
.example(`node $0 async-gcs gs://gcs-test-data/vr.flac -e FLAC -r 16000`)
.example(`node $0 stream ./resources/audio.raw -e LINEAR16 -r 16000`)
.example(`node $0 listen`)
.example(
`node $0 sync-model ./resources/Google_Gnome.wav video -e LINEAR16 -r 16000`
)
.example(
`node $0 sync-model-gcs gs://gcs-test-data/Google_Gnome.wav phone_call -e LINEAR16 -r 16000`
)
.example(`node $0 sync-auto-punctuation ./resources/commercial_mono.wav`)
.example(`node $0 sync-enhanced-model ./resources/commercial_mono.wav`)
.wrap(120)
.recommendCommands()
.epilogue(`For more information, see https://cloud.google.com/speech/docs`)
Expand Down
Loading

0 comments on commit 877ef00

Please sign in to comment.