diff --git a/speech/package.json b/speech/package.json index 523fb7323d..913d1cb737 100644 --- a/speech/package.json +++ b/speech/package.json @@ -11,7 +11,7 @@ "dependencies": { "async": "^1.5.2", "google-auto-auth": "^0.2.4", - "google-proto-files": "^0.2.4", + "google-proto-files": "^0.3.0", "googleapis": "^7.1.0", "grpc": "^0.15.0" } diff --git a/speech/recognize.js b/speech/recognize.js index 86d513f481..5aab9d5309 100644 --- a/speech/recognize.js +++ b/speech/recognize.js @@ -20,13 +20,8 @@ var async = require('async'); var fs = require('fs'); // [END import_libraries] -// Url to discovery doc file -// [START discovery_doc] -var url = 'https://speech.googleapis.com/$discovery/rest'; -// [END discovery_doc] - // [START authenticating] -function getSpeechService (callback) { +function getSpeechService (host, callback) { // Acquire credentials google.auth.getApplicationDefault(function (err, authClient) { if (err) { @@ -49,9 +44,16 @@ function getSpeechService (callback) { // Load the speach service using acquired credentials console.log('Loading speech service...'); + + // Url to discovery doc file + // [START discovery_doc] + host = host || 'speech.googleapis.com'; + var url = 'https://' + host + '/$discovery/rest'; + // [END discovery_doc] + google.discoverAPI({ url: url, - version: 'v1', + version: 'v1beta1', auth: authClient }, function (err, speechService) { if (err) { @@ -72,11 +74,11 @@ function prepareRequest (inputFile, callback) { console.log('Got audio file!'); var encoded = new Buffer(audioFile).toString('base64'); var payload = { - initialRequest: { + config: { encoding: 'LINEAR16', sampleRate: 16000 }, - audioRequest: { + audio: { content: encoded } }; @@ -85,7 +87,7 @@ function prepareRequest (inputFile, callback) { } // [END construct_request] -function main (inputFile, callback) { +function main (inputFile, host, callback) { var requestPayload; async.waterfall([ @@ -94,12 +96,12 @@ function main (inputFile, callback) { }, function (payload, cb) { requestPayload = payload; - getSpeechService(cb); + getSpeechService(host, cb); }, // [START send_request] function sendRequest (speechService, authClient, cb) { console.log('Analyzing speech...'); - speechService.speech.recognize({ + speechService.speech.syncrecognize({ auth: authClient, resource: requestPayload }, function (err, result) { @@ -117,11 +119,12 @@ function main (inputFile, callback) { // [START run_application] if (module === require.main) { if (process.argv.length < 3) { - console.log('Usage: node recognize '); + console.log('Usage: node recognize [speech_api_host]'); process.exit(); } var inputFile = process.argv[2]; - main(inputFile, console.log); + var host = process.argv[3]; + main(inputFile, host || 'speech.googleapis.com', console.log); } // [END run_application] // [END app] diff --git a/speech/recognize_streaming.js b/speech/recognize_streaming.js index 9bf67f6350..cf4afd59ad 100644 --- a/speech/recognize_streaming.js +++ b/speech/recognize_streaming.js @@ -19,23 +19,23 @@ var path = require('path'); var grpc = require('grpc'); var googleProtoFiles = require('google-proto-files'); var googleAuth = require('google-auto-auth'); +var Transform = require('stream').Transform; // [START proto] var PROTO_ROOT_DIR = googleProtoFiles('..'); -var PROTO_FILE_PATH = googleProtoFiles('cloud', 'speech', 'v1', 'cloud_speech.proto'); var protoDescriptor = grpc.load({ root: PROTO_ROOT_DIR, - file: path.relative(PROTO_ROOT_DIR, PROTO_FILE_PATH) + file: path.relative(PROTO_ROOT_DIR, googleProtoFiles.speech.v1beta1) }, 'proto', { binaryAsBase64: true, convertFieldsToCamelCase: true }); -var speechProto = protoDescriptor.google.cloud.speech.v1; +var speechProto = protoDescriptor.google.cloud.speech.v1beta1; // [END proto] // [START authenticating] -function getSpeechService (callback) { +function getSpeechService (host, callback) { var googleAuthClient = googleAuth({ scopes: [ 'https://www.googleapis.com/auth/cloud-platform' @@ -53,46 +53,31 @@ function getSpeechService (callback) { ); console.log('Loading speech service...'); - var stub = new speechProto.Speech('speech.googleapis.com', credentials); + var stub = new speechProto.Speech(host, credentials); return callback(null, stub); }); } // [END authenticating] -// [START construct_request] -function getAudioFile (inputFile, callback) { - fs.readFile(inputFile, function (err, audioFile) { - if (err) { - return callback(err); - } - console.log('Got audio file!'); - return callback(null, audioFile); - }); -} -// [END construct_request] - -function main (inputFile, callback) { - var audioFile; - +function main (inputFile, host, callback) { async.waterfall([ function (cb) { - getAudioFile(inputFile, cb); - }, - function (_audioFile, cb) { - audioFile = _audioFile; - getSpeechService(cb); + getSpeechService(host, cb); }, // [START send_request] function sendRequest (speechService, cb) { console.log('Analyzing speech...'); var responses = []; - var call = speechService.recognize(); + var call = speechService.streamingRecognize(); // Listen for various responses call.on('error', cb); call.on('data', function (recognizeResponse) { if (recognizeResponse) { responses.push(recognizeResponse); + if (recognizeResponse.results && recognizeResponse.results.length) { + console.log(JSON.stringify(recognizeResponse.results, null, 2)); + } } }); call.on('end', function () { @@ -100,25 +85,28 @@ function main (inputFile, callback) { }); // Write the initial recognize reqeust - call.write(new speechProto.RecognizeRequest({ - initialRequest: new speechProto.InitialRecognizeRequest({ - encoding: 'LINEAR16', - sampleRate: 16000, + call.write({ + streamingConfig: { + config: { + encoding: 'LINEAR16', + sampleRate: 16000 + }, interimResults: false, - continuous: false, - enableEndpointerEvents: false - }) - })); - - // Write an audio request - call.write(new speechProto.RecognizeRequest({ - audioRequest: new speechProto.AudioRequest({ - content: audioFile - }) - })); + singleUtterance: false + } + }); - // Signal that we're done writing - call.end(); + var toRecognizeRequest = new Transform({ objectMode: true }); + toRecognizeRequest._transform = function (chunk, encoding, done) { + done(null, { + audioContent: chunk + }); + }; + + // Stream the audio to the Speech API + fs.createReadStream(inputFile) + .pipe(toRecognizeRequest) + .pipe(call); } // [END send_request] ], callback); @@ -127,11 +115,12 @@ function main (inputFile, callback) { // [START run_application] if (module === require.main) { if (process.argv.length < 3) { - console.log('Usage: node recognize_streaming '); + console.log('Usage: node recognize_streaming [speech_api_host]'); process.exit(); } var inputFile = process.argv[2]; - main(inputFile, console.log); + var host = process.argv[3]; + main(inputFile, host || 'speech.googleapis.com', console.log); } // [END run_application] diff --git a/test/speech/recognize.test.js b/test/speech/recognize.test.js index 76b10a5057..a1ffaedc09 100644 --- a/test/speech/recognize.test.js +++ b/test/speech/recognize.test.js @@ -17,15 +17,16 @@ var path = require('path'); var recognizeExample = require('../../speech/recognize'); describe('speech:recognize', function () { - it('should list entries', function (done) { + it('should recognize speech in audio', function (done) { recognizeExample.main( path.join(__dirname, '../../speech/resources/audio.raw'), + process.env.SPEECH_API_HOST || 'speech.googleapis.com', function (err, result) { assert(!err); assert(result); - assert(result.responses); - assert(result.responses.length === 1); - assert(result.responses[0].results); + assert(Array.isArray(result.results)); + assert(result.results.length === 1); + assert(result.results[0].alternatives); assert(console.log.calledWith('Got audio file!')); assert(console.log.calledWith('Loading speech service...')); assert(console.log.calledWith('Analyzing speech...')); diff --git a/test/speech/recognize_streaming.test.js b/test/speech/recognize_streaming.test.js index f3dff00f52..0ae242210f 100644 --- a/test/speech/recognize_streaming.test.js +++ b/test/speech/recognize_streaming.test.js @@ -17,15 +17,18 @@ var path = require('path'); var recognizeExample = require('../../speech/recognize_streaming'); describe('speech:recognize_streaming', function () { - it('should list entries', function (done) { + it('should recognize audio', function (done) { recognizeExample.main( path.join(__dirname, '../../speech/resources/audio.raw'), + process.env.SPEECH_API_HOST || 'speech.googleapis.com', function (err, results) { assert(!err); assert(results); - assert(results.length === 1); + assert(results.length === 3); assert(results[0].results); - assert(console.log.calledWith('Got audio file!')); + assert(results[1].results); + assert(results[2].results); + assert(results[2].results.length === 1); assert(console.log.calledWith('Loading speech service...')); assert(console.log.calledWith('Analyzing speech...')); done();