diff --git a/packages/google-cloud-speech/README.md b/packages/google-cloud-speech/README.md new file mode 100644 index 00000000000..61626e2fd8a --- /dev/null +++ b/packages/google-cloud-speech/README.md @@ -0,0 +1,116 @@ +# @google-cloud/speech +> Google Cloud Speech Client Library for Node.js + +> **This is a Beta release of Google Cloud Speech.** This feature is not covered by any SLA or deprecation policy and may be subject to backward-incompatible changes. + +*Looking for more Google APIs than just Speech? You might want to check out [`google-cloud`][google-cloud].* + +- [API Documentation][gcloud-speech-docs] +- [Official Documentation][cloud-speech-docs] + + +```sh +$ npm install --save @google-cloud/speech +``` +```js +var speech = require('@google-cloud/speech')({ + projectId: 'grape-spaceship-123', + keyFilename: '/path/to/keyfile.json' +}); + +// Detect the speech in an audio file. +speechClient.recognize('./audio.raw', { + encoding: 'LINEAR16', + sampleRate: 16000 +}, function(err, transcript) { + // transcript = 'how old is the Brooklyn Bridge' +}); + +// Detect the speech in an audio file stream. +fs.createReadStream('./audio.raw') + .on('error', console.error) + .pipe(speech.createRecognizeStream({ + config: { + encoding: 'LINEAR16', + sampleRate: 16000 + }, + singleUtterance: false, + interimResults: false + })) + .on('error', console.error) + .on('data', function(data) { + // The first "data" event emitted might look like: + // data = { + // endpointerType: Speech.endpointerTypes.START_OF_SPEECH, + // ... + // } + // + // A later "data" event emitted might look like: + // data = { + // endpointerType: Speech.endpointerTypes.END_OF_AUDIO, + // ... + // } + // + // A final "data" event emitted might look like: + // data = { + // endpointerType: Speech.endpointerTypes.END_OF_AUDIO, + // results: "how old is the Brooklyn Bridge", + // ... + // } + }); +``` + + +## Authentication + +It's incredibly easy to get authenticated and start using Google's APIs. You can set your credentials on a global basis as well as on a per-API basis. See each individual API section below to see how you can auth on a per-API-basis. This is useful if you want to use different accounts for different Google Cloud services. + +### On Google Compute Engine + +If you are running this client on Google Compute Engine, we handle authentication for you with no configuration. You just need to make sure that when you [set up the GCE instance][gce-how-to], you add the correct scopes for the APIs you want to access. + +``` js +// Authenticating on a global basis. +var projectId = process.env.GCLOUD_PROJECT; // E.g. 'grape-spaceship-123' + +var speech = require('@google-cloud/speech')({ + projectId: projectId +}); + +// ...you're good to go! +``` + +### Elsewhere + +If you are not running this client on Google Compute Engine, you need a Google Developers service account. To create a service account: + +1. Visit the [Google Developers Console][dev-console]. +2. Create a new project or click on an existing project. +3. Navigate to **APIs & auth** > **APIs section** and turn on the following APIs (you may need to enable billing in order to use these services): + * Google Cloud Speech API +4. Navigate to **APIs & auth** > **Credentials** and then: + * If you want to use a new service account, click on **Create new Client ID** and select **Service account**. After the account is created, you will be prompted to download the JSON key file that the library uses to authenticate your requests. + * If you want to generate a new key for an existing service account, click on **Generate new JSON key** and download the JSON key file. + +``` js +var projectId = process.env.GCLOUD_PROJECT; // E.g. 'grape-spaceship-123' + +var speech = require('@google-cloud/speech')({ + projectId: projectId, + + // The path to your key file: + keyFilename: '/path/to/keyfile.json' + + // Or the contents of the key file: + credentials: require('./path/to/keyfile.json') +}); + +// ...you're good to go! +``` + + +[google-cloud]: https://github.com/GoogleCloudPlatform/google-cloud-node/ +[gce-how-to]: https://cloud.google.com/compute/docs/authentication#using +[dev-console]: https://console.developers.google.com/project +[gcloud-speech-docs]: https://googlecloudplatform.github.io/google-cloud-node/#/docs/speech +[cloud-speech-docs]: https://cloud.google.com/speech diff --git a/packages/google-cloud-speech/package.json b/packages/google-cloud-speech/package.json new file mode 100644 index 00000000000..443028489db --- /dev/null +++ b/packages/google-cloud-speech/package.json @@ -0,0 +1,89 @@ +{ + "name": "@google-cloud/speech", + "version": "0.1.0", + "author": "Google Inc.", + "description": "Google Cloud Speech Client Library for Node.js", + "contributors": [ + { + "name": "Burcu Dogan", + "email": "jbd@google.com" + }, + { + "name": "Jason Dobry", + "email": "jason.dobry@gmail.com" + }, + { + "name": "Johan Euphrosine", + "email": "proppy@google.com" + }, + { + "name": "Patrick Costello", + "email": "pcostell@google.com" + }, + { + "name": "Ryan Seys", + "email": "ryan@ryanseys.com" + }, + { + "name": "Silvano Luciani", + "email": "silvano@google.com" + }, + { + "name": "Stephen Sawchuk", + "email": "sawchuk@gmail.com" + } + ], + "main": "./src/index.js", + "files": [ + "./src/*", + "AUTHORS", + "CONTRIBUTORS", + "COPYING" + ], + "repository": "googlecloudplatform/google-cloud-node", + "keywords": [ + "google apis client", + "google api client", + "google apis", + "google api", + "google", + "google cloud platform", + "google cloud", + "cloud", + "google speech", + "speech" + ], + "dependencies": { + "@google-cloud/common": "^0.5.0", + "events-intercept": "^2.0.0", + "extend": "^3.0.0", + "google-proto-files": "^0.7.0", + "is": "^3.1.0", + "modelo": "^4.2.0", + "propprop": "^0.3.1", + "pumpify": "^1.3.5", + "request": "^2.74.0", + "stream-events": "^1.0.1", + "string-format-obj": "^1.1.0", + "through2": "^2.0.1" + }, + "devDependencies": { + "@google-cloud/storage": "*", + "async": "^2.0.1", + "methmeth": "^1.1.0", + "mocha": "^3.0.2", + "node-uuid": "^1.4.7", + "proxyquire": "^1.7.10", + "sinon": "^1.17.5", + "tmp": "0.0.28" + }, + "scripts": { + "publish-module": "node ../../scripts/publish.js speech", + "test": "mocha test/*.js", + "system-test": "mocha system-test/*.js --no-timeouts --bail" + }, + "license": "Apache-2.0", + "engines": { + "node": ">=0.12.0" + } +} diff --git a/packages/google-cloud-speech/src/index.js b/packages/google-cloud-speech/src/index.js new file mode 100644 index 00000000000..7697ea3d904 --- /dev/null +++ b/packages/google-cloud-speech/src/index.js @@ -0,0 +1,829 @@ +/*! + * Copyright 2016 Google Inc. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*! + * @module speech + */ + +'use strict'; + +var common = require('@google-cloud/common'); +var eventsIntercept = require('events-intercept'); +var extend = require('extend'); +var format = require('string-format-obj'); +var fs = require('fs'); +var googleProtoFiles = require('google-proto-files'); +var is = require('is'); +var path = require('path'); +var prop = require('propprop'); +var pumpify = require('pumpify'); +var request = require('request'); +var streamEvents = require('stream-events'); +var through = require('through2'); +var util = require('util'); + +/** + * The [Cloud Speech API](https://cloud.google.com/speech/docs) enables easy + * integration of Google speech recognition technologies into developer + * applications. Send audio and receive a text transcription from the Cloud + * Speech API service. + * + * @constructor + * @alias module:speech + * + * @classdesc + *

+ * **This is a Beta release of Google Cloud Speech.** This feature is not + * covered by any SLA or deprecation policy and may be subject to + * backward-incompatible changes. + *

+ * + * To learn more about the Speech API, see the + * [Getting Started guide](https://cloud.google.com/speech/docs/getting-started). + * + * @resource [Getting Started]{@link https://cloud.google.com/speech/docs/getting-started} + * @resource [Speech Best Practices]{@link https://cloud.google.com/speech/docs/best-practices} + * + * @param {object} options - [Configuration object](#/docs). + */ +function Speech(options) { + if (!(this instanceof Speech)) { + options = common.util.normalizeArguments(this, options); + return new Speech(options); + } + + var config = { + baseUrl: 'speech.googleapis.com', + projectIdRequired: false, + service: 'speech', + protoServices: { + Speech: { + path: googleProtoFiles.speech.v1beta1, + service: 'cloud.speech', + apiVersion: 'v1beta1' + }, + Operations: { + path: googleProtoFiles('longrunning', 'operations.proto'), + service: 'longrunning' + } + }, + scopes: [ + 'https://www.googleapis.com/auth/cloud-platform' + ], + packageJson: require('../package.json') + }; + + common.GrpcService.call(this, config, options); +} + +util.inherits(Speech, common.GrpcService); + +/** + * The endpointer types that the Speech API will return while processing a + * {module:speech#createRecognizeStream} request. You can track the progress of + * audio recognition by comparing the `data.endpointerType` property with these + * values. + * + * - `Speech.endpointerTypes.ENDPOINTER_EVENT_UNSPECIFIED`: No endpointer + * event specified. + * - `Speech.endpointerTypes.START_OF_SPEECH`: Speech has been detected in the + * audio stream. + * - `Speech.endpointerTypes.END_OF_SPEECH`: Speech has ceased to be detected + * in the audio stream. + * - `Speech.endpointerTypes.END_OF_AUDIO`: The end of the audio stream has + * been reached and it is being processed. + * - `Speech.endpointerTypes.END_OF_UTTERANCE`: This event is only sent when + * `config.singleUtterance` passed to {module:speech#createRecognizeStream} + * is `true`. It indicates that the server has detected the end of the + * user's speech utterance and expects no additional speech. Therefore, the + * server will not process additional audio. The client should stop sending + * additional audio data. + * + * @type {object} + */ +Speech.endpointerTypes = +Speech.prototype.endpointerTypes = { + END_OF_AUDIO: 'END_OF_AUDIO', + END_OF_SPEECH: 'END_OF_SPEECH', + END_OF_UTTERANCE: 'END_OF_UTTERANCE', + ENDPOINTER_EVENT_UNSPECIFIED: 'ENDPOINTER_EVENT_UNSPECIFIED', + START_OF_SPEECH: 'START_OF_SPEECH' +}; + +/** + * Guess the audio encoding from the file's extension. + * + * @resource [AudioEncoding API Documentation]{@link https://cloud.google.com/speech/reference/rpc/google.cloud.speech.v1beta1#audioencoding} + * @private + * + * @throws {Error} If an encoding type could not be determined from the file's + * extension. + * + * @param {string} filename - The name of the file. + * @returns {string} The audio encoding. + */ +Speech.detectEncoding_ = function(filename) { + if (!is.string(filename)) { + return; + } + + switch (path.extname(filename).toLowerCase()) { + case '.raw': { + return 'LINEAR16'; + } + case '.amr': { + return 'AMR'; + } + case '.awb': { + return 'AMR_WB'; + } + case '.flac': { + return 'FLAC'; + } + case '.au': + case '.wav': { + return 'MULAW'; + } + default: { + throw new Error('Encoding could not be determined for file: ' + filename); + } + } +}; + +/** + * Determine the type of file the user is asking to be processed. If a + * {module:storage/file}, convert to its "gs://{bucket}/{file}" URL. If a remote + * URL, read the contents. If a file path, load the file. + * + * @private + */ +Speech.findFile_ = function(file, callback) { + if (common.util.isCustomType(file, 'storage/file')) { + // File is an instance of module:storage/file. + callback(null, { + uri: format('gs://{bucketName}/{fileName}', { + bucketName: file.bucket.name, + fileName: file.name + }) + }); + return; + } + + if (is.string(file) && file.indexOf('gs://') === 0) { + // File is a Google Cloud Storage URI. + callback(null, { + uri: file + }); + return; + } + + if (/^http/.test(file)) { + // File is a URL. + request({ + uri: file, + encoding: null + }, function(err, resp, body) { + if (err) { + callback(err); + return; + } + + callback(null, { + content: body + }); + }); + return; + } + + if (is.object(file)) { + // This might be a RecognitionAudio object. + if (!file.content && !file.uri) { + var errorMsg = 'RecognitionAudio requires a "content" or "uri" property.'; + callback(new Error(errorMsg)); + } else { + callback(null, file); + } + return; + } + + // File exists on disk. + fs.readFile(file, function(err, contents) { + if (err) { + callback(err); + return; + } + + callback(null, { + content: contents + }); + }); +}; + +/** + * Simplify the transcription results from the API. + * + * @resource [SpeechRecognitionResult API Documentation]{@link https://cloud.google.com/speech/reference/rpc/google.cloud.speech.v1beta1#google.cloud.speech.v1beta1.SpeechRecognitionResult} + * @resource [StreamingRecognitionResult API Documentation]{@link https://cloud.google.com/speech/reference/rpc/google.cloud.speech.v1beta1#google.cloud.speech.v1beta1.StreamingRecognitionResult} + * + * @private + * + * @param {object} resultSets - A `SpeechRecognitionResult` or + * `StreamingRecognitionResult` object. + * @param {boolean} verbose - Whether to use verbose mode. + * @return {object} - The simplified results. + * + * @example + * var resultSets = [ + * { + * alternatives: [ + * { + * transcript: 'Result 1a', + * confidence: 0.70 + * }, + * { + * transcript: 'Result 1b', + * confidence: 0.60 + * }, + * ... + * ] + * }, + * { + * alternatives: [ + * { + * transcript: 'Result 2a', + * confidence: 0.90 + * }, + * { + * transcript: 'Result 2b', + * confidence: 0.80 + * }, + * ... + * ] + * } + * ]; + * + * //- + * // Default output. + * //- + * Speech.formatResults_(resultSets); + * // 'Result 1a Result 2a' + * + * //- + * // Verbose output. + * //- + * Speech.formatResults_(resultSets, true); + * // [ + * // { + * // transcript: 'Result 1a', + * // confidence: 70, + * // alternatives: [ + * // { + * // transcript: 'Result 1b', + * // confidence: 60 + * // }, + * // ... + * // ] + * // }, + * // { + * // transcript: 'Result 2a', + * // confidence: 90, + * // alternatives: [ + * // { + * // transcript: 'Result 2b', + * // confidence: 80 + * // }, + * // ... + * // ] + * // } + * // ] + */ +Speech.formatResults_ = function(resultSets, verboseMode) { + function multiplyScores(result) { + if (is.defined(result.confidence)) { + result.confidence *= 100; + } + + if (is.defined(result.stability)) { + result.stability *= 100; + } + + return result; + } + + var verboseResultSets = resultSets + .map(function(resultSet) { + resultSet = extend(true, {}, resultSet); + + var mostProbableResult = multiplyScores(resultSet.alternatives.shift()); + + resultSet.transcript = mostProbableResult.transcript; + + if (is.defined(mostProbableResult.confidence)) { + resultSet.confidence = mostProbableResult.confidence; + } + + if (is.defined(mostProbableResult.stability)) { + resultSet.stability = mostProbableResult.stability; + } + + resultSet.alternatives = resultSet.alternatives.map(multiplyScores); + + return resultSet; + }); + + if (!verboseMode) { + return verboseResultSets.map(prop('transcript')).join(' '); + } + + return verboseResultSets; +}; + +/** + * Perform bidirectional streaming speech-recognition: receive results while + * sending audio. + * + * Each emitted `data` event is a + * [`StreamingRecognizeResponse`](https://cloud.google.com/speech/reference/rpc/google.cloud.speech.v1beta1#streamingrecognizeresponse) + * object, containing these properties: + * + * - **`endpointerType`** See {module:speech#endpointerTypes}. + * - **`results`** By default, a combined string of transcripts. When + * `config.verbose` is enabled, this is an object including a `transcript` + * property, a `confidence` score from `0` - `100`, and an `alternatives` + * array consisting of other transcription possibilities. + * + * @resource [StreamingRecognize API Reference]{@link https://cloud.google.com/speech/reference/rpc/google.cloud.speech.v1beta1#google.cloud.speech.v1beta1.Speech.StreamingRecognize} + * @resource [StreamingRecognizeRequest API Reference]{@link https://cloud.google.com/speech/reference/rpc/google.cloud.speech.v1beta1#google.cloud.speech.v1beta1.StreamingRecognizeRequest} + * + * @param {object} config - A `StreamingRecognitionConfig` object. See + * [`StreamingRecognitionConfig`](https://cloud.google.com/speech/reference/rpc/google.cloud.speech.v1beta1#google.cloud.speech.v1beta1.StreamingRecognitionConfig). + * @param {boolean=} config.verbose - Enable verbose mode for a more detailed + * response. See the examples below. Default: `false`. + * + * @example + * var fs = require('fs'); + * + * //- + * // See + * // `StreamingRecognizeRequest` for all of the available configuration + * // options. + * //- + * var request = { + * config: { + * encoding: 'LINEAR16', + * sampleRate: 16000 + * }, + * singleUtterance: false, + * interimResults: false + * }; + * + * fs.createReadStream('./bridge.raw') + * .on('error', console.error) + * .pipe(speech.createRecognizeStream(request)) + * .on('error', console.error) + * .on('data', function(data) { + * // The first "data" event emitted might look like: + * // data = { + * // endpointerType: Speech.endpointerTypes.START_OF_SPEECH, + * // results: "", + * // ... + * // } + * + * // A later "data" event emitted might look like: + * // data = { + * // endpointerType: Speech.endpointerTypes.END_OF_AUDIO, + * // results: "", + * // ... + * // } + * + * // A final "data" event emitted might look like: + * // data = { + * // endpointerType: + * // Speech.endpointerTypes.ENDPOINTER_EVENT_UNSPECIFIED, + * // results: "how old is the Brooklyn Bridge", + * // ... + * // } + * }); + * + * //- + * // Enable verbose mode for more detailed results. + * //- + * var request = { + * config: { + * encoding: 'LINEAR16', + * sampleRate: 16000 + * }, + * singleUtterance: false, + * interimResults: false, + * verbose: true + * }; + * + * fs.createReadStream('./system-test/data/bridge.raw') + * .on('error', console.error) + * .pipe(speech.createRecognizeStream(request)) + * .on('error', console.error) + * .on('data', function(data) { + * // The first "data" event emitted might look like: + * // data = { + * // endpointerType: Speech.endpointerTypes.START_OF_SPEECH, + * // results: [], + * // ... + * // } + * + * // A later "data" event emitted might look like: + * // data = { + * // endpointerType: Speech.endpointerTypes.END_OF_AUDIO, + * // results: [], + * // ... + * // } + * + * // A final "data" event emitted might look like: + * // data = { + * // endpointerType: + * // Speech.endpointerTypes.ENDPOINTER_EVENT_UNSPECIFIED, + * // results: [ + * // { + * // transcript: "how old is the Brooklyn Bridge", + * // confidence: 88.15 + * // } + * // ], + * // ... + * // } + * }); + */ +Speech.prototype.createRecognizeStream = function(config) { + var self = this; + + var protoOpts = { + service: 'Speech', + method: 'streamingRecognize' + }; + + var verboseMode = config.verbose === true; + delete config.verbose; + + var recognizeStream = streamEvents(pumpify.obj()); + + recognizeStream.once('writing', function() { + var requestStream = self.requestWritableStream(protoOpts); + + requestStream.on('response', function(response) { + recognizeStream.emit('response', response); + }); + + requestStream.write({ + streamingConfig: config + }); + + this.setPipeline([ + // Format the user's input. + through.obj(function(obj, _, next) { + next(null, { + audioContent: obj + }); + }), + + requestStream, + + // Format the results. + through.obj(function(obj, _, next) { + obj.results = Speech.formatResults_(obj.results, verboseMode); + next(null, obj); + }) + ]); + }); + + return recognizeStream; +}; + +/*! Developer Documentation + * + * @returns {module:common/grpcOperation} + */ +/** + * Get a reference to an existing operation. + * + * @throws {Error} If a name is not provided. + * + * @param {string} name - The name of the operation. + * + * @example + * var operation = speech.operation('68850831366825'); + */ +Speech.prototype.operation = function(name) { + if (!name) { + throw new Error('A name must be specified for an operation.'); + } + + return new common.GrpcOperation(this, name); +}; + +/** + * Perform synchronous speech recognition and receive results after all audio + * has been sent and processed. This is ideal for files 1 MB or below. For + * larger files, you will need to use {module:speech#startRecognition} or + * {module:speech#createRecognizeStream}. + * + * @resource [SyncRecognize API Reference]{@link https://cloud.google.com/speech/reference/rpc/google.cloud.speech.v1beta1#google.cloud.speech.v1beta1.Speech.SyncRecognize} + * @resource [SyncRecognizeRequest API Reference]{@link https://cloud.google.com/speech/reference/rpc/google.cloud.speech.v1beta1#google.cloud.speech.v1beta1.SyncRecognizeRequest} + * + * @param {string|object|module:storage/file} file - The source file to run the + * detection on. It can be either a local file path, a remote file URL, a + * Cloud Storage URI, a Cloud Storage File object, or a + * [`RecognitionAudio`](https://cloud.google.com/speech/reference/rpc/google.cloud.speech.v1beta1#google.cloud.speech.v1beta1.RecognitionAudio) + * object. + * @param {object} config - A `RecognitionConfig` object. See + * [`RecognitionConfig`](https://cloud.google.com/speech/reference/rpc/google.cloud.speech.v1beta1#google.cloud.speech.v1beta1.RecognitionConfig). + * @param {boolean=} config.verbose - Enable verbose mode for a more detailed + * response. See the examples below. Default: `false`. + * @param {function} callback - The callback function. + * @param {?error} callback.err - An error returned while making this request. + * @param {string|object[]} callback.results - By default, this will be a string + * comprised of all of the transcriptions recognized from the audio. If + * `config.verbose` is enabled, this is an object including a `transcript` + * property, a `confidence` score from `0` - `100`, and an `alternatives` + * array consisting of other transcription possibilities. See the examples + * below for more. + * @param {object} callback.apiResponse - Raw API response. See + * [`SyncRecognizeResponse`](https://cloud.google.com/speech/reference/rpc/google.cloud.speech.v1beta1#syncrecognizeresponse). + * + * @example + * var config = { + * encoding: 'LINEAR16', + * sampleRate: 16000 + * }; + * + * function callback(err, transcript, apiResponse) { + * if (err) { + * // Error handling omitted. + * } + * + * // transcript = "how old is the Brooklyn Bridge" + * } + * + * //- + * // Run speech detection over a local file. + * //- + * speech.recognize('./bridge.raw', config, callback); + * + * //- + * // Run speech recognition over a file in Cloud Storage. + * //- + * speech.recognize('gs://your-bucket-name/bridge.raw', config, callback); + * + * //- + * // Run speech recognition over raw file contents. + * //- + * speech.recognize({ + * content: fs.readFileSync('./bridge.raw') + * }, config, callback); + * + * //- + * // Run speech recognition over a remote file. + * // + * // Note: This is not an officially supported feature of the Speech API. + * // This library will make a request to the URL given and send the file + * // contents to the upstream API. + * //- + * speech.recognize('https://example.com/files/bridge.raw', config, callback); + * + * //- + * // Enable verbose mode for more detailed results. + * //- + * var config = { + * encoding: 'LINEAR16', + * sampleRate: 16000, + * verbose: true + * }; + * + * speech.recognize('./bridge.raw', config, function(err, results) { + * if (err) { + * // Error handling omitted. + * } + * + * // results = [ + * // { + * // transcript: "how old is the Brooklyn Bridge", + * // confidence: 88.15, + * // alternatives: [ + * // { + * // transcript: "how old is the Brooklyn brim", + * // confidence: 22.39 + * // } + * // ] + * // } + * // ] + * }); + */ +Speech.prototype.recognize = function(file, config, callback) { + var self = this; + + var protoOpts = { + service: 'Speech', + method: 'syncRecognize' + }; + + config = extend({}, config); + + if (!config.encoding) { + config.encoding = Speech.detectEncoding_(file); + } + + var verboseMode = config.verbose === true; + delete config.verbose; + + Speech.findFile_(file, function(err, foundFile) { + if (err) { + callback(err); + return; + } + + var reqOpts = { + audio: foundFile, + config: config + }; + + self.request(protoOpts, reqOpts, function(err, apiResponse) { + if (err) { + callback(err, null, apiResponse); + return; + } + + var response = new self.protos.Speech.SyncRecognizeResponse(apiResponse); + var results = Speech.formatResults_(response.results, verboseMode); + + callback(null, results, apiResponse); + }); + }); +}; + +/** + * Perform asynchronous speech recognition. + * + * This method sends audio to the Speech API, which immediately responds with an + * Operation object. Register event handlers for the "error" and "complete" + * events to see how the operation finishes. Follow along with the examples + * below. + * + * @resource [AsyncRecognize API Reference]{@link https://cloud.google.com/speech/reference/rpc/google.cloud.speech.v1beta1#google.cloud.speech.v1beta1.Speech.AsyncRecognize} + * @resource [AsyncRecognizeRequest API Reference]{@link https://cloud.google.com/speech/reference/rpc/google.cloud.speech.v1beta1#google.cloud.speech.v1beta1.AsyncRecognizeRequest} + * @resource [AsyncRecognizeResponse API Reference]{@link https://cloud.google.com/speech/reference/rpc/google.cloud.speech.v1beta1#google.cloud.speech.v1beta1.AsyncRecognizeResponse} + * + * @param {string|object|module:storage/file} file - The source file to run the + * detection on. It can be either a local file path, a remote file URL, a + * Cloud Storage URI, a Cloud Storage File object, or a + * [`RecognitionAudio`](https://cloud.google.com/speech/reference/rpc/google.cloud.speech.v1beta1#google.cloud.speech.v1beta1.RecognitionAudio) + * object. + * @param {object} config - A `RecognitionConfig` object. See + * [`RecognitionConfig`](https://cloud.google.com/speech/reference/rpc/google.cloud.speech.v1beta1#google.cloud.speech.v1beta1.RecognitionConfig). + * @param {boolean=} config.verbose - Enable verbose mode for a more detailed + * response. See the examples below. Default: `false`. + * @param {function} callback - The callback function. + * @param {?error} callback.err - An error returned while making this request. + * @param {module:speech/operation} callback.operation - An operation object + * that can be used to check the status of the request. + * @param {object} callback.apiResponse - Raw API response. + * + * @example + * var config = { + * encoding: 'LINEAR16', + * sampleRate: 16000 + * }; + * + * function callback(err, operation, apiResponse) { + * if (err) { + * // Error handling omitted. + * } + * + * operation + * .on('error', function(err) {}) + * .on('complete', function(transcript) { + * // transcript = "how old is the Brooklyn Bridge" + * }); + * } + * + * //- + * // Run speech detection over a local file. + * //- + * speech.startRecognition('./bridge.raw', config, callback); + * + * //- + * // Run speech detection over a file in Cloud Storage. + * //- + * var file = 'gs://your-bucket-name/bridge.raw'; + * speech.startRecognition(file, config, callback); + * + * //- + * // Run speech detection over raw file contents. + * //- + * speech.startRecognition({ + * content: fs.readFileSync('./bridge.raw') + * }, config, callback); + * + * //- + * // Run speech detection over a remote file. + * // + * // Note: This is not an officially supported feature of the Speech API. + * // This library will make a request to the URL given and send the file + * // contents to the upstream API. + * //- + * var file = 'https://example.com/files/bridge.raw'; + * + * speech.startRecognition(file, config, callback); + * + * //- + * // Enable verbose mode for more detailed results. + * //- + * var config = { + * encoding: 'LINEAR16', + * sampleRate: 16000, + * verbose: true + * }; + * + * speech.startRecognition('./bridge.raw', config, function(err, operation) { + * if (err) { + * // Error handling omitted. + * } + * + * operation + * .on('error', function(err) {}) + * .on('complete', function(results) { + * // results = [ + * // { + * // transcript: "how old is the Brooklyn Bridge", + * // confidence: 88.15 + * // } + * // ] + * }); + * }); + */ +Speech.prototype.startRecognition = function(file, config, callback) { + var self = this; + + var protoOpts = { + service: 'Speech', + method: 'asyncRecognize' + }; + + config = extend({}, config); + + if (!config.encoding) { + config.encoding = Speech.detectEncoding_(file); + } + + var verboseMode = config.verbose === true; + delete config.verbose; + + Speech.findFile_(file, function(err, foundFile) { + if (err) { + callback(err); + return; + } + + var reqOpts = { + audio: foundFile, + config: config + }; + + self.request(protoOpts, reqOpts, function(err, apiResponse) { + if (err) { + callback(err, null, apiResponse); + return; + } + + var operation = self.operation(apiResponse.name); + operation.metadata = apiResponse; + + // Intercept the "complete" event to decode and format the results of the + // operation for the user. + eventsIntercept.patch(operation); + operation.intercept('complete', function(metadata, callback) { + var response = metadata.response; + + if (response && is.string(response.value)) { + var value = response.value; + response = self.protos.Speech.AsyncRecognizeResponse.decode(value); + } + + callback(null, Speech.formatResults_(response.results, verboseMode)); + }); + + callback(null, operation, apiResponse); + }); + }); +}; + +module.exports = Speech; diff --git a/packages/google-cloud-speech/system-test/data/bridge.raw b/packages/google-cloud-speech/system-test/data/bridge.raw new file mode 100644 index 00000000000..5ebf79d3c9c Binary files /dev/null and b/packages/google-cloud-speech/system-test/data/bridge.raw differ diff --git a/packages/google-cloud-speech/system-test/data/quit.raw b/packages/google-cloud-speech/system-test/data/quit.raw new file mode 100644 index 00000000000..a01dfc45a59 Binary files /dev/null and b/packages/google-cloud-speech/system-test/data/quit.raw differ diff --git a/packages/google-cloud-speech/system-test/data/spain.raw b/packages/google-cloud-speech/system-test/data/spain.raw new file mode 100644 index 00000000000..35413b78817 Binary files /dev/null and b/packages/google-cloud-speech/system-test/data/spain.raw differ diff --git a/packages/google-cloud-speech/system-test/speech.js b/packages/google-cloud-speech/system-test/speech.js new file mode 100644 index 00000000000..9bc85892c0a --- /dev/null +++ b/packages/google-cloud-speech/system-test/speech.js @@ -0,0 +1,349 @@ +/*! + * Copyright 2016 Google Inc. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +'use strict'; + +var assert = require('assert'); +var async = require('async'); +var exec = require('methmeth'); +var extend = require('extend'); +var fs = require('fs'); +var path = require('path'); +var uuid = require('node-uuid'); + +var env = require('../../../system-test/env.js'); +var Speech = require('../'); +var storage = require('@google-cloud/storage')(env); + +var FILENAMES = ['bridge', 'spain', 'quit']; +var AUDIO_FILES = {}; +var BUCKET_NAME = 'gcloud-test-bucket-temp-' + uuid.v1(); + +FILENAMES.forEach(function(filename) { + var name = filename + '.raw'; + + AUDIO_FILES[filename] = { + name: name, + path: path.join(__dirname, 'data/' + name), + gcsUri: 'gs://' + BUCKET_NAME + '/' + name, + httpUri: 'https://storage.googleapis.com/' + BUCKET_NAME + '/' + name + }; +}); + +describe('Speech', function() { + var speech = new Speech(env); + var bucket = storage.bucket(BUCKET_NAME); + + var OPTIONS = { + encoding: 'LINEAR16', + sampleRate: 16000 + }; + + var OPTIONS_VERBOSE = extend({}, OPTIONS, { + verbose: true + }); + + before(function(done) { + async.waterfall([ + function(next) { + bucket.create(next); + }, + + function(_, apiResponse, next) { + async.map(FILENAMES, function(filename, onComplete) { + fs.readFile(AUDIO_FILES[filename].path, onComplete); + }, next); + }, + + function(files, next) { + FILENAMES.forEach(function(filename, i) { + AUDIO_FILES[filename].content = files[i]; + }); + + async.map(FILENAMES, function(filename, onComplete) { + var file = bucket.file(AUDIO_FILES[filename].name); + + file.save(AUDIO_FILES[filename].content, function(err) { + onComplete(err, file); + }); + }, next); + }, + + function(files, next) { + async.map(files, exec('makePublic'), next); + } + ], done); + }); + + after(function(done) { + bucket.deleteFiles({ + force: true + }, function(err) { + if (err) { + done(err); + return; + } + + bucket.delete(done); + }); + }); + + describe('recognize', function() { + it('recognizes speech from raw audio', function(done) { + fs.readFile(AUDIO_FILES.bridge.path, function(err, audioFile) { + assert.ifError(err); + + speech.recognize({ + content: audioFile + }, OPTIONS, assertSimplifiedResponse(done)); + }); + }); + + it('recognizes speech in verbose mode', function(done) { + fs.readFile(AUDIO_FILES.bridge.path, function(err, audioFile) { + assert.ifError(err); + + speech.recognize({ + content: audioFile + }, OPTIONS_VERBOSE, assertVerboseResponse(done)); + }); + }); + + it('recognizes speech from local file', function(done) { + speech.recognize(AUDIO_FILES.bridge.path, { + // encoding should be automatically detected + sampleRate: 16000 + }, assertSimplifiedResponse(done)); + }); + + it('recognizes speech from remote GCS audio file', function(done) { + var uri = AUDIO_FILES.bridge.gcsUri; + + speech.recognize(uri, OPTIONS, assertSimplifiedResponse(done)); + }); + + it('recognizes speech from remote audio file', function(done) { + var uri = AUDIO_FILES.bridge.httpUri; + + speech.recognize(uri, OPTIONS, assertSimplifiedResponse(done)); + }); + }); + + describe('startRecognition', function() { + it('recognizes speech from raw audio', function(done) { + fs.readFile(AUDIO_FILES.bridge.path, function(err, audioFile) { + assert.ifError(err); + + speech.startRecognition({ + content: audioFile + }, OPTIONS, function(err, operation) { + assert.ifError(err); + + operation + .on('error', done) + .on('complete', assertSimplifiedResponseOperation(done)); + }); + }); + }); + + it('recognizes speech from raw audio in verbose mode', function(done) { + fs.readFile(AUDIO_FILES.bridge.path, function(err, audioFile) { + assert.ifError(err); + + speech.startRecognition({ + content: audioFile + }, OPTIONS_VERBOSE, function(err, operation) { + assert.ifError(err); + + operation + .on('error', done) + .on('complete', assertVerboseResponseOperation(done)); + }); + }); + }); + + it('recognizes speech from local file', function(done) { + var options = { + // encoding should be automatically detected + sampleRate: 16000 + }; + + var path = AUDIO_FILES.bridge.path; + + speech.startRecognition(path, options, function(err, operation) { + assert.ifError(err); + + operation + .on('error', done) + .on('complete', assertSimplifiedResponseOperation(done)); + }); + }); + + it('recognizes speech from remote GCS audio file', function(done) { + var uri = AUDIO_FILES.bridge.gcsUri; + + speech.startRecognition(uri, OPTIONS, function(err, operation) { + assert.ifError(err); + + operation + .on('error', done) + .on('complete', assertSimplifiedResponseOperation(done)); + }); + }); + + it('recognizes speech from remote audio file', function(done) { + var uri = AUDIO_FILES.bridge.httpUri; + + speech.startRecognition(uri, OPTIONS, function(err, operation) { + assert.ifError(err); + + operation + .on('error', done) + .on('complete', assertSimplifiedResponseOperation(done)); + }); + }); + }); + + describe('createRecognizeStream', function() { + it('recognizes speech from raw audio', function(done) { + var correctDetectionsEmitted = 0; + var responseEmitted = false; + + fs.createReadStream(AUDIO_FILES.bridge.path) + .on('error', done) + .pipe(speech.createRecognizeStream({ + config: OPTIONS, + interimResults: false, + singleUtterance: false + })) + .on('error', done) + .on('response', function() { + responseEmitted = true; + }) + .on('data', function(data) { + switch (data.endpointerType) { + case Speech.endpointerTypes.START_OF_SPEECH: { + if (data.results.length === 0) { + correctDetectionsEmitted++; + } + return; + } + + case Speech.endpointerTypes.END_OF_AUDIO: { + if (data.results.length === 0) { + correctDetectionsEmitted++; + } + return; + } + + case Speech.endpointerTypes.ENDPOINTER_EVENT_UNSPECIFIED: { + var transcript = data.results; + if (transcript === 'how old is the Brooklyn Bridge') { + correctDetectionsEmitted++; + } + return; + } + } + }) + .on('end', function() { + setTimeout(function() { + assert.strictEqual(responseEmitted, true); + assert.strictEqual(correctDetectionsEmitted, 3); + done(); + }, 1500); + }); + }); + + it('recognizes speech from raw audio in verbose mode', function(done) { + var correctDetectionsEmitted = 0; + var responseEmitted = false; + + fs.createReadStream(AUDIO_FILES.bridge.path) + .on('error', done) + .pipe(speech.createRecognizeStream({ + config: OPTIONS, + interimResults: false, + singleUtterance: false, + verbose: true + })) + .on('error', done) + .on('response', function() { + responseEmitted = true; + }) + .on('data', function(data) { + switch (data.endpointerType) { + case Speech.endpointerTypes.START_OF_SPEECH: { + if (data.results.length === 0) { + correctDetectionsEmitted++; + } + return; + } + + case Speech.endpointerTypes.END_OF_AUDIO: { + if (data.results.length === 0) { + correctDetectionsEmitted++; + } + return; + } + + case Speech.endpointerTypes.ENDPOINTER_EVENT_UNSPECIFIED: { + var transcript = data.results[0].transcript; + if (transcript === 'how old is the Brooklyn Bridge') { + correctDetectionsEmitted++; + } + return; + } + } + }) + .on('end', function() { + setTimeout(function() { + assert.strictEqual(responseEmitted, true); + assert.strictEqual(correctDetectionsEmitted, 3); + done(); + }, 1500); + }); + }); + }); + + function assertSimplifiedResponse(done) { + return function(err, transcript) { + assert.ifError(err); + assert.strictEqual(transcript, 'how old is the Brooklyn Bridge'); + done(); + }; + } + + function assertVerboseResponse(done) { + return function(err, results) { + assert.ifError(err); + + assert(results.length > 0); + + var transcript = results[0].transcript; + assert.strictEqual(transcript, 'how old is the Brooklyn Bridge'); + + done(); + }; + } + + function assertSimplifiedResponseOperation(done) { + return assertSimplifiedResponse(done).bind(null, null); + } + + function assertVerboseResponseOperation(done) { + return assertVerboseResponse(done).bind(null, null); + } +}); diff --git a/packages/google-cloud-speech/test/index.js b/packages/google-cloud-speech/test/index.js new file mode 100644 index 00000000000..137b75c5ef8 --- /dev/null +++ b/packages/google-cloud-speech/test/index.js @@ -0,0 +1,997 @@ +/** + * Copyright 2016 Google Inc. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +'use strict'; + +var assert = require('assert'); +var async = require('async'); +var extend = require('extend'); +var fs = require('fs'); +var googleProtoFiles = require('google-proto-files'); +var proxyquire = require('proxyquire'); +var through = require('through2'); +var tmp = require('tmp'); + +var util = require('@google-cloud/common').util; + +var fakeUtil = extend({}, util); + +function FakeGrpcOperation() { + this.calledWith_ = arguments; +} + +function FakeGrpcService() { + this.calledWith_ = arguments; +} + +var requestOverride = null; +var fakeRequest = function() { + return (requestOverride || util.noop).apply(this, arguments); +}; + +describe('Speech', function() { + var PROJECT_ID = 'project-id'; + + var Speech; + var speech; + + var originalStaticMembers; + + before(function() { + Speech = proxyquire('../', { + '@google-cloud/common': { + GrpcOperation: FakeGrpcOperation, + GrpcService: FakeGrpcService, + util: fakeUtil + }, + request: fakeRequest + }); + + originalStaticMembers = Object.keys(Speech).reduce(function(statics, key) { + statics[key] = Speech[key]; + return statics; + }, {}); + }); + + beforeEach(function() { + requestOverride = null; + + speech = new Speech({ + projectId: PROJECT_ID + }); + + extend(Speech, originalStaticMembers); + }); + + describe('instantiation', function() { + it('should normalize the arguments', function() { + var normalizeArguments = fakeUtil.normalizeArguments; + var normalizeArgumentsCalled = false; + var fakeOptions = { projectId: PROJECT_ID }; + var fakeContext = {}; + + fakeUtil.normalizeArguments = function(context, options) { + normalizeArgumentsCalled = true; + assert.strictEqual(context, fakeContext); + assert.strictEqual(options, fakeOptions); + return options; + }; + + Speech.call(fakeContext, fakeOptions); + assert(normalizeArgumentsCalled); + + fakeUtil.normalizeArguments = normalizeArguments; + }); + + it('should inherit from GrpcService', function() { + assert(speech instanceof FakeGrpcService); + + var calledWith = speech.calledWith_[0]; + + assert.deepEqual(calledWith, { + baseUrl: 'speech.googleapis.com', + projectIdRequired: false, + service: 'speech', + protoServices: { + Speech: { + path: googleProtoFiles.speech.v1beta1, + service: 'cloud.speech', + apiVersion: 'v1beta1' + }, + Operations: { + path: googleProtoFiles('longrunning', 'operations.proto'), + service: 'longrunning' + } + }, + scopes: [ + 'https://www.googleapis.com/auth/cloud-platform' + ], + packageJson: require('../package.json') + }); + }); + }); + + describe('endpointerTypes', function() { + var ENDPOINTER_TYPES = { + END_OF_AUDIO: 'END_OF_AUDIO', + END_OF_SPEECH: 'END_OF_SPEECH', + END_OF_UTTERANCE: 'END_OF_UTTERANCE', + ENDPOINTER_EVENT_UNSPECIFIED: 'ENDPOINTER_EVENT_UNSPECIFIED', + START_OF_SPEECH: 'START_OF_SPEECH' + }; + + it('should export static endpointerTypes', function() { + assert.deepEqual(Speech.endpointerTypes, ENDPOINTER_TYPES); + }); + + it('should export instance endpointerTypes', function() { + assert.deepEqual(speech.endpointerTypes, ENDPOINTER_TYPES); + }); + }); + + describe('detectEncoding_', function() { + it('should detect encoding', function() { + assert.equal(Speech.detectEncoding_('foo.raw'), 'LINEAR16'); + assert.equal(Speech.detectEncoding_('foo.amr'), 'AMR'); + assert.equal(Speech.detectEncoding_('foo.awb'), 'AMR_WB'); + assert.equal(Speech.detectEncoding_('foo.flac'), 'FLAC'); + assert.equal(Speech.detectEncoding_('foo.fLAc'), 'FLAC'); + assert.equal(Speech.detectEncoding_('foo.wav'), 'MULAW'); + assert.equal(Speech.detectEncoding_('foo.au'), 'MULAW'); + }); + + it('should throw if a supported encoding is not detected', function() { + assert.throws(function() { + Speech.detectEncoding_('blah.mp3'); + }, /Encoding could not be determined for file: blah\.mp3/); + }); + }); + + describe('findFile_', function() { + it('should convert a File object', function(done) { + var file = { + bucket: { + name: 'bucket-name' + }, + name: 'file-name' + }; + + var isCustomTypeCalled = false; + var isCustomType = fakeUtil.isCustomType; + + fakeUtil.isCustomType = function(obj, module) { + isCustomTypeCalled = true; + fakeUtil.isCustomType = isCustomType; + assert.strictEqual(obj, file); + assert.strictEqual(module, 'storage/file'); + return true; + }; + + Speech.findFile_(file, function(err, foundFile) { + assert.ifError(err); + + assert.deepEqual(foundFile, { + uri: 'gs://' + file.bucket.name + '/' + file.name + }); + + assert.strictEqual(isCustomTypeCalled, true); + + done(); + }); + }); + + it('should detect a gs:// path', function(done) { + var file = 'gs://your-bucket-name/audio.raw'; + + Speech.findFile_(file, function(err, foundFile) { + assert.ifError(err); + + assert.deepEqual(foundFile, { + uri: file + }); + + done(); + }); + }); + + it('should get a file from a URL', function(done) { + var fileUri = 'http://www.google.com/audio.raw'; + var body = 'body'; + + requestOverride = function(reqOpts, callback) { + assert.strictEqual(reqOpts.uri, fileUri); + assert.strictEqual(reqOpts.encoding, null); + + var response = { + body: new Buffer(body) + }; + + callback(null, response, response.body); + }; + + Speech.findFile_(fileUri, function(err, foundFile) { + assert.ifError(err); + assert.deepEqual(foundFile, { + content: new Buffer(body) + }); + done(); + }); + }); + + it('should return an error from reading a URL', function(done) { + var fileUri = 'http://www.google.com/audio.raw'; + var error = new Error('Error.'); + + requestOverride = function(options, callback) { + callback(error); + }; + + Speech.findFile_(fileUri, function(err) { + assert.strictEqual(err, error); + done(); + }); + }); + + it('should validate RecognitionAudio object', function(done) { + var file = {}; + + Speech.findFile_(file, function(err) { + assert.strictEqual( + err.message, + 'RecognitionAudio requires a "content" or "uri" property.' + ); + + done(); + }); + }); + + it('should accept RecognitionAudio object', function(done) { + var file = { + content: 'aGk=' + }; + + Speech.findFile_(file, function(err, foundFile) { + assert.ifError(err); + assert.strictEqual(foundFile, file); + done(); + }); + }); + + it('should read from a file path', function(done) { + tmp.setGracefulCleanup(); + + tmp.file(function tempFileCreated_(err, tmpFilePath) { + assert.ifError(err); + + var contents = 'abcdef'; + + function writeFile(callback) { + fs.writeFile(tmpFilePath, contents, callback); + } + + function convertFile(callback) { + Speech.findFile_(tmpFilePath, callback); + } + + async.waterfall([writeFile, convertFile], function(err, foundFile) { + assert.ifError(err); + + assert.deepEqual(foundFile, { + content: new Buffer(contents) + }); + + done(); + }); + }); + }); + + it('should return an error when file cannot be found', function(done) { + Speech.findFile_('./not-real-file.raw', function(err) { + assert.strictEqual(err.code, 'ENOENT'); + done(); + }); + }); + }); + + describe('formatResults_', function() { + describe('SpeechRecognitionResult', function() { + var SPEECH_RECOGNITION = { + original: [ + { + alternatives: [ + { + transcript: 'Result 1a', + confidence: 0.70, + stability: 0.1 + }, + { + transcript: 'Result 1b', + confidence: 0.60, + stability: 0.1 + } + ] + }, + { + alternatives: [ + { + transcript: 'Result 2a', + confidence: 0.90, + stability: 0.1 + }, + { + transcript: 'Result 2b', + confidence: 0.80, + stability: 0.1 + } + ] + } + ], + + expectedDefault: 'Result 1a Result 2a', + + expectedVerbose: [ + { + transcript: 'Result 1a', + confidence: 70, + stability: 10, + alternatives: [ + { + transcript: 'Result 1b', + confidence: 60, + stability: 10, + } + ] + }, + { + transcript: 'Result 2a', + confidence: 90, + stability: 10, + alternatives: [ + { + transcript: 'Result 2b', + confidence: 80, + stability: 10 + } + ] + } + ] + }; + + it('should simplify the results', function() { + assert.deepEqual( + Speech.formatResults_(SPEECH_RECOGNITION.original), + SPEECH_RECOGNITION.expectedDefault + ); + }); + + it('should simplify the results in verbose mode', function() { + assert.deepEqual( + Speech.formatResults_(SPEECH_RECOGNITION.original, true), + SPEECH_RECOGNITION.expectedVerbose + ); + }); + }); + }); + + describe('createRecognizeStream', function() { + var CONFIG = {}; + var stream; + var requestStream; + + beforeEach(function() { + stream = speech.createRecognizeStream(CONFIG); + + stream.setPipeline = util.noop; + + speech.requestWritableStream = function() { + requestStream = through.obj(); + return requestStream; + }; + }); + + it('should make the correct request once writing started', function(done) { + speech.requestWritableStream = function(protoOpts) { + assert.deepEqual(protoOpts, { + service: 'Speech', + method: 'streamingRecognize' + }); + + setImmediate(done); + + return through.obj(); + }; + + stream.emit('writing'); + }); + + it('should emit the response event on the user stream', function(done) { + var response = {}; + + stream.on('response', function(response_) { + assert.strictEqual(response_, response); + done(); + }); + + speech.requestWritableStream = function() { + var requestStream = through.obj(); + + setImmediate(function() { + requestStream.emit('response', response); + }); + + return requestStream; + }; + + stream.emit('writing'); + }); + + it('should send the initial write to the request stream', function(done) { + speech.requestWritableStream = function() { + var requestStream = through.obj(); + + requestStream.once('data', function(data) { + assert.deepEqual(data, { + streamingConfig: CONFIG + }); + done(); + }); + + return requestStream; + }; + + stream.emit('writing'); + }); + + it('should format the incoming data into a duplex stream', function(done) { + stream.setPipeline = function(streams) { + var formatStream = streams[0]; + assert.strictEqual(streams[1], requestStream); + + var chunk = {}; + formatStream.once('data', function(data) { + assert.deepEqual(data, { + audioContent: chunk + }); + done(); + }); + + formatStream.end(chunk); + }; + + stream.emit('writing'); + }); + + it('should format the results from the API', function(done) { + stream.setPipeline = function(streams) { + var formatStream = streams[2]; + + var streamingRecognizeResponse = { + results: [] + }; + + var formattedResults = []; + + Speech.formatResults_ = function(results, verboseMode) { + assert.strictEqual(results, streamingRecognizeResponse.results); + assert.strictEqual(verboseMode, false); + return formattedResults; + }; + + formatStream.once('data', function(data) { + assert.strictEqual(data, streamingRecognizeResponse); + assert.deepEqual(data.results, formattedResults); + done(); + }); + + formatStream.end(streamingRecognizeResponse); + }; + + stream.emit('writing'); + }); + + it('should format results from the API in verbose mode', function(done) { + var stream = speech.createRecognizeStream({ + verbose: true + }); + + speech.requestWritableStream = function() { + return through.obj(); + }; + + stream.setPipeline = function(streams) { + var formatStream = streams[2]; + + Speech.formatResults_ = function(results, verboseMode) { + assert.strictEqual(verboseMode, true); + done(); + }; + + formatStream.end({}); + }; + + stream.emit('writing'); + }); + + it('should delete verbose option from request object', function(done) { + var stream = speech.createRecognizeStream({ + verbose: true + }); + + speech.requestWritableStream = function() { + var stream = through.obj(); + + stream.on('data', function(data) { + assert.deepEqual(data, { + streamingConfig: {} // No `verbose` property. + }); + done(); + }); + + return stream; + }; + + stream.emit('writing'); + }); + }); + + describe('operation', function() { + var NAME = 'op-name'; + + it('should throw if a name is not provided', function() { + assert.throws(function() { + speech.operation(); + }, /A name must be specified for an operation\./); + }); + + it('should return an Operation object', function() { + var operation = speech.operation(NAME); + assert(operation instanceof FakeGrpcOperation); + assert.strictEqual(operation.calledWith_[0], speech); + assert.strictEqual(operation.calledWith_[1], NAME); + }); + }); + + describe('recognize', function() { + var FILE = {}; + var FOUND_FILE = {}; + var CONFIG = { a: 'b' }; + var DETECTED_ENCODING = 'LINEAR16'; + + beforeEach(function() { + Speech.detectEncoding_ = function() { + return DETECTED_ENCODING; + }; + + Speech.findFile_ = function(files, callback) { + callback(null, FOUND_FILE); + }; + }); + + it('should find the files', function(done) { + Speech.findFile_ = function(file) { + assert.strictEqual(file, FILE); + done(); + }; + + speech.recognize(FILE, CONFIG, assert.ifError); + }); + + it('should make the correct request', function(done) { + speech.request = function(protoOpts, reqOpts) { + assert.deepEqual(protoOpts, { + service: 'Speech', + method: 'syncRecognize' + }); + + assert.deepEqual(reqOpts, { + config: extend({}, CONFIG, { encoding: DETECTED_ENCODING }), + audio: FOUND_FILE + }); + + done(); + }; + + speech.recognize(FILE, CONFIG, assert.ifError); + }); + + it('should respect the provided encoding', function(done) { + var config = { + encoding: 'LINEAR32' + }; + + Speech.detectEncoding_ = function() { + done(); // Will cause test to fail. + }; + + speech.request = function(protoOpts, reqOpts) { + assert.strictEqual(reqOpts.config.encoding, config.encoding); + done(); + }; + + speech.recognize(FILE, config, assert.ifError); + }); + + it('should guess the encoding if it is not specified', function(done) { + var expectedEncoding = 'LINEAR16'; + + Speech.detectEncoding_ = function(file) { + assert.strictEqual(file, FILE); + return expectedEncoding; + }; + + speech.request = function(protoOpts, reqOpts) { + assert.strictEqual(reqOpts.config.encoding, expectedEncoding); + done(); + }; + + speech.recognize(FILE, {}, assert.ifError); + }); + + it('should return an error from findFile_', function(done) { + var error = new Error('Error.'); + + Speech.findFile_ = function(files, callback) { + callback(error); + }; + + speech.recognize(FILE, CONFIG, function(err) { + assert.strictEqual(err, error); + done(); + }); + }); + + describe('error', function() { + var error = new Error('Error.'); + var apiResponse = {}; + + beforeEach(function() { + speech.request = function(protoOpts, reqOpts, callback) { + callback(error, apiResponse); + }; + }); + + it('should return the error & API response', function(done) { + speech.recognize(FILE, CONFIG, function(err, results, apiResponse_) { + assert.strictEqual(err, error); + assert.strictEqual(results, null); + assert.strictEqual(apiResponse_, apiResponse); + done(); + }); + }); + }); + + describe('success', function() { + var apiResponse = { + results: [] + }; + var decodedResponse = { + results: [] + }; + var formattedResults = []; + + beforeEach(function() { + speech.protos = { + Speech: { + SyncRecognizeResponse: function() { + return decodedResponse; + } + } + }; + + Speech.formatResults_ = function() { + return formattedResults; + }; + + speech.request = function(protoOpts, reqOpts, callback) { + callback(null, apiResponse); + }; + }); + + it('should return the detections & API response', function(done) { + speech.protos = { + Speech: { + SyncRecognizeResponse: function(response) { + assert.strictEqual(response, apiResponse); + return decodedResponse; + } + } + }; + + Speech.formatResults_ = function(results, verboseMode) { + assert.strictEqual(results, decodedResponse.results); + assert.strictEqual(verboseMode, false); + return formattedResults; + }; + + speech.recognize(FILE, CONFIG, function(err, results, apiResponse_) { + assert.ifError(err); + assert.strictEqual(results, formattedResults); + assert.strictEqual(apiResponse_, apiResponse); + done(); + }); + }); + + it('should return the detections in verbose mode', function(done) { + Speech.formatResults_ = function(results, verboseMode) { + assert.strictEqual(verboseMode, true); + done(); + }; + + var config = extend({}, CONFIG, { + verbose: true + }); + + speech.recognize(FILE, config, assert.ifError); + }); + + it('should return the detections in verbose mode', function(done) { + Speech.formatResults_ = function(results, verboseMode) { + assert.strictEqual(verboseMode, true); + done(); + }; + + var config = extend({}, CONFIG, { + verbose: true + }); + + speech.recognize(FILE, config, assert.ifError); + }); + + it('should delete verbose option from request object', function(done) { + speech.request = function(protoOpts, reqOpts) { + assert.strictEqual(reqOpts.config.verbose, undefined); + done(); + }; + + var config = extend({}, CONFIG, { + verbose: true + }); + + speech.recognize(FILE, config, assert.ifError); + }); + }); + }); + + describe('startRecognition', function() { + var FILE = {}; + var FOUND_FILE = {}; + var CONFIG = { a: 'b' }; + var DETECTED_ENCODING = 'LINEAR16'; + + beforeEach(function() { + Speech.detectEncoding_ = function() { + return DETECTED_ENCODING; + }; + + Speech.findFile_ = function(files, callback) { + callback(null, FOUND_FILE); + }; + }); + + it('should find the files', function(done) { + Speech.findFile_ = function(file) { + assert.strictEqual(file, FILE); + done(); + }; + + speech.startRecognition(FILE, CONFIG, assert.ifError); + }); + + it('should make the correct request', function(done) { + speech.request = function(protoOpts, reqOpts) { + assert.deepEqual(protoOpts, { + service: 'Speech', + method: 'asyncRecognize' + }); + + assert.deepEqual(reqOpts, { + config: extend({}, CONFIG, { encoding: DETECTED_ENCODING }), + audio: FOUND_FILE + }); + + done(); + }; + + speech.startRecognition(FILE, CONFIG, assert.ifError); + }); + + it('should respect the provided encoding', function(done) { + var config = { + encoding: 'LINEAR32' + }; + + Speech.detectEncoding_ = function() { + done(); // Will cause test to fail. + }; + + speech.request = function(protoOpts, reqOpts) { + assert.strictEqual(reqOpts.config.encoding, config.encoding); + done(); + }; + + speech.startRecognition(FILE, config, assert.ifError); + }); + + it('should guess the encoding if it is not specified', function(done) { + var expectedEncoding = 'LINEAR16'; + + Speech.detectEncoding_ = function(file) { + assert.strictEqual(file, FILE); + return expectedEncoding; + }; + + speech.request = function(protoOpts, reqOpts) { + assert.strictEqual(reqOpts.config.encoding, expectedEncoding); + done(); + }; + + speech.startRecognition(FILE, {}, assert.ifError); + }); + + it('should return an error from findFile_', function(done) { + var error = new Error('Error.'); + + Speech.findFile_ = function(files, callback) { + callback(error); + }; + + speech.startRecognition(FILE, CONFIG, function(err) { + assert.strictEqual(err, error); + done(); + }); + }); + + describe('error', function() { + var error = new Error('Error.'); + var apiResponse = {}; + + beforeEach(function() { + speech.request = function(protoOpts, reqOpts, callback) { + callback(error, apiResponse); + }; + }); + + it('should return the error & API response', function(done) { + speech.startRecognition(FILE, CONFIG, function(err, op, apiResponse_) { + assert.strictEqual(err, error); + assert.strictEqual(op, null); + assert.strictEqual(apiResponse_, apiResponse); + done(); + }); + }); + }); + + describe('success', function() { + var apiResponse = { + name: 'operation-name', + response: { + value: 'value string to be decoded' + } + }; + + var decodedResponse = { + results: [] + }; + + beforeEach(function() { + speech.protos = { + Speech: { + AsyncRecognizeResponse: { + decode: function() { + return decodedResponse; + } + } + } + }; + + Speech.formatResults_ = util.noop; + + speech.operation = function() { + return through.obj(); + }; + + speech.request = function(protoOpts, reqOpts, callback) { + callback(null, apiResponse); + }; + }); + + it('should return an operation & API response', function(done) { + var fakeOperation = through.obj(); + + speech.operation = function(name) { + assert.strictEqual(name, apiResponse.name); + return fakeOperation; + }; + + speech.startRecognition(FILE, CONFIG, function(err, op, apiResponse_) { + assert.ifError(err); + + assert.strictEqual(op, fakeOperation); + assert.strictEqual(op.metadata, apiResponse); + + assert.strictEqual(apiResponse_, apiResponse); + + done(); + }); + }); + + it('should format the results', function(done) { + var fakeOperation = through.obj(); + speech.operation = function() { + return fakeOperation; + }; + + speech.protos = { + Speech: { + AsyncRecognizeResponse: { + decode: function(value) { + assert.strictEqual(value, apiResponse.response.value); + return decodedResponse; + } + } + } + }; + + var formattedResults = []; + Speech.formatResults_ = function(results, verboseMode) { + assert.strictEqual(results, decodedResponse.results); + assert.strictEqual(verboseMode, false); + return formattedResults; + }; + + speech.startRecognition(FILE, CONFIG, function(err, op) { + assert.ifError(err); + + op.on('complete', function(results) { + assert.strictEqual(results, formattedResults); + done(); + }); + + op.emit('complete', apiResponse); + }); + }); + + it('should format results in verbose mode', function(done) { + Speech.formatResults_ = function(results, verboseMode) { + assert.strictEqual(verboseMode, true); + done(); + }; + + var config = extend({}, CONFIG, { + verbose: true + }); + + speech.startRecognition(FILE, config, function(err, op) { + assert.ifError(err); + op.emit('complete', apiResponse); + }); + }); + + it('should delete verbose option from request object', function(done) { + speech.request = function(protoOpts, reqOpts) { + assert.strictEqual(reqOpts.config.verbose, undefined); + done(); + }; + + var config = extend({}, CONFIG, { + verbose: true + }); + + speech.startRecognition(FILE, config, assert.ifError); + }); + }); + }); +});