Skip to content

Commit

Permalink
Merge pull request #113 from GoogleCloudPlatform/text
Browse files Browse the repository at this point in the history
Add Cloud Vision text detection sample.
  • Loading branch information
jmdobry committed May 23, 2016
2 parents 18d8e96 + ca02977 commit f5c0f0e
Show file tree
Hide file tree
Showing 13 changed files with 325 additions and 6 deletions.
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@ __Other Examples__
- Face detection - [Source code][vision_1] | [Documentation][vision_2]
- Label detection - [Source code][vision_3] | [Documentation][vision_4]
- Landmark detection - [Source code][vision_5] | [Documentation][vision_6]
- Text detection - [Source code][vision_7] | [Documentation][vision_8]

## Google Prediction API

Expand Down Expand Up @@ -328,6 +329,8 @@ See [LICENSE](https://github.com/GoogleCloudPlatform/nodejs-docs-samples/blob/ma
[vision_4]: https://cloud.google.com/vision/docs/label-tutorial
[vision_5]: https://github.com/GoogleCloudPlatform/nodejs-docs-samples/blob/master/vision/landmarkDetection.js
[vision_6]: https://cloud.google.com/vision/docs/landmark-tutorial
[vision_7]: https://github.com/GoogleCloudPlatform/nodejs-docs-samples/blob/master/vision/textDetection.js
[vision_8]: https://cloud.google.com/vision/docs/text-tutorial

[predictionapi_1]: https://github.com/GoogleCloudPlatform/nodejs-docs-samples/blob/master/prediction/hostedmodels.js
[predictionapi_2]: https://cloud.google.com/prediction/docs/developer-guide#predictionfromappengine
Expand Down
38 changes: 38 additions & 0 deletions test/vision/textDetection.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
// Copyright 2016, Google, Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

'use strict';

var test = require('ava');
var path = require('path');
var inputDir = path.resolve('../../vision/resources');
var textDetectionSample = require('../../vision/textDetection');

test.cb('should detect texts', function (t) {
textDetectionSample.main(
inputDir,
function (err, textResponse) {
t.ifError(err);
t.ok(Object.keys(textResponse).length > 0);
textDetectionSample.lookup(
['the', 'sunbeams', 'in'],
function (err, hits) {
t.ifError(err);
t.ok(hits.length > 0);
t.ok(hits[0].length > 0);
t.end();
}
);
}
);
});
7 changes: 7 additions & 0 deletions vision/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,3 +40,10 @@ Execute the sample:
Execute the sample:

node landmarkDetection "https://cloud-samples-tests.storage.googleapis.com/vision/water.jpg"

### Text detection sample

Execute the sample:

node textDetection analyze resources
node textDetection lookup the sunbeams in
2 changes: 2 additions & 0 deletions vision/not-a-meme.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
I am not a meme. Don't fail if you accidently include me in your Vision API
request, please.
10 changes: 4 additions & 6 deletions vision/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,11 @@
"engines": {
"node": ">=0.10.x"
},
"scripts": {
"faceDetection": "node faceDetection.js",
"labelDetection": "node labelDetection.js",
"landmarkDetection": "node landmarkDetection.js"
},
"dependencies": {
"async": "^1.5.0",
"canvas": "^1.3.15",
"gcloud": "^0.32.0",
"canvas": "^1.3.15"
"natural": "^0.4.0",
"redis": "^2.6.0-2"
}
}
Binary file added vision/resources/bonito.gif
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added vision/resources/mountain.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added vision/resources/no-text.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added vision/resources/sabertooth.gif
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added vision/resources/succulents.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added vision/resources/sunbeamkitties.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added vision/resources/wakeupcat.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
271 changes: 271 additions & 0 deletions vision/textDetection.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,271 @@
// Copyright 2016, Google, Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

'use strict';

// [START app]
// [START import_libraries]
var async = require('async');
var fs = require('fs');
var path = require('path');
var gcloud = require('gcloud')();
var natural = require('natural');
var redis = require('redis');
// Get a reference to the vision component
var vision = gcloud.vision();
// [END import_libraries]

function Index() {
// Connect to a redis server.
var TOKEN_DB = 0;
var DOCS_DB = 1;
var PORT = process.env.REDIS_PORT || '6379';
var HOST = process.env.REDIS_HOST || '127.0.0.1';

this.tokenClient = redis.createClient(PORT, HOST, {
db: TOKEN_DB
}).on('error', function (err) {
console.error('ERR:REDIS: ' + err);
});
this.docsClient = redis.createClient(PORT, HOST, {
db: DOCS_DB
}).on('error', function (err) {
console.error('ERR:REDIS: ' + err);
});
}

Index.prototype.quit = function () {
this.tokenClient.quit();
this.docsClient.quit();
};

Index.prototype.add = function (filename, document, callback) {
var self = this;
var PUNCTUATION = ['.', ',', ':', ''];
var tokenizer = new natural.WordTokenizer();
var tokens = tokenizer.tokenize(document);

// TODO: Remove stop words

var tasks = tokens.filter(function (token) {
return PUNCTUATION.indexOf(token) === -1;
}).map(function (token) {
return function (cb) {
self.tokenClient.sadd(token, filename, cb);
};
});

tasks.push(function (cb) {
self.tokenClient.set(filename, document, cb);
});

async.parallel(tasks, callback);
};

Index.prototype.lookup = function (words, callback) {
var self = this;
var tasks = words.map(function (word) {
word = word.toLowerCase();
return function (cb) {
self.tokenClient.smembers(word, cb);
};
});
async.parallel(tasks, callback);
};

Index.prototype.documentIsProcessed = function (filename, callback) {
this.docsClient.GET(filename, function (err, value) {
if (err) {
return callback(err);
}
if (value) {
console.log(filename + ' already added to index.');
callback(null, true);
} else if (value === '') {
console.log(filename + ' was already checked, and contains no text.');
callback(null, true);
} else {
callback(null, false);
}
});
};

Index.prototype.setContainsNoText = function (filename, callback) {
this.docsClient.set(filename, '', callback);
};

function lookup(words, callback) {
var index = new Index();
index.lookup(words, function (err, hits) {
index.quit();
if (err) {
return callback(err);
}
words.forEach(function (word, i) {
console.log('hits for \"' + word + '\":', hits[i].join(', '));
});
callback(null, hits);
});
}

// [START extract_descrs]
function extractDescription(texts) {
var document = '';
texts.forEach(function (text) {
document += (text.desc || '');
});
return document;
}

function extractDescriptions(filename, index, texts, callback) {
if (texts.length) {
index.add(filename, extractDescription(texts), callback);
} else {
console.log(filename + ' had no discernable text.');
index.setContainsNoText(filename, callback);
}
}
// [END extract_descrs]

// [START get_text]
function getTextFromFiles(index, inputFiles, callback) {
var options = { verbose: true };

// Make a call to the Vision API to detect text
vision.detectText(inputFiles, options, function (err, detections) {
if (err) {
return callback(err);
}
var textResponse = {};
var tasks = [];
inputFiles.forEach(function (filename, i) {
var response = detections[i];
if (response.error) {
console.log('API Error for ' + filename, response.error);
return;
} else if (Array.isArray(response)) {
textResponse[filename] = 1;
} else {
textResponse[filename] = 0;
}
tasks.push(function (cb) {
extractDescriptions(filename, index, response, cb);
});
});
async.parallel(tasks, function (err) {
if (err) {
return callback(err);
}
callback(null, textResponse);
});
});
}

// Run the example
function main(inputDir, callback) {
var index = new Index();

async.waterfall([
// Scan the specified directory for files
function (cb) {
fs.readdir(inputDir, cb);
},
// Separate directories from files
function (files, cb) {
async.parallel(files.map(function (file) {
var filename = path.join(inputDir, file);
return function (cb) {
fs.stat(filename, function (err, stats) {
if (err) {
return cb(err);
}
if (!stats.isDirectory()) {
return cb(null, filename);
}
cb();
});
};
}), cb);
},
// Figure out which files have already been processed
function (allImageFiles, cb) {
var tasks = allImageFiles.filter(function (filename) {
return filename;
}).map(function (filename) {
return function (cb) {
index.documentIsProcessed(filename, function (err, processed) {
if (err) {
return cb(err);
}
if (!processed) {
// Forward this filename on for further processing
return cb(null, filename);
}
cb();
});
};
});
async.parallel(tasks, cb);
},
// Analyze any remaining unprocessed files
function (imageFilesToProcess, cb) {
imageFilesToProcess = imageFilesToProcess.filter(function (filename) {
return filename;
});
if (imageFilesToProcess.length) {
return getTextFromFiles(index, imageFilesToProcess, cb);
}
console.log('All files processed!');
cb();
}
], function (err, result) {
index.quit();
callback(err, result);
});
}
// [END get_text]

// [START run_application]
if (module === require.main) {
var generalError = 'Usage: node textDetection <command> <arg> ...\n\n' +
'\tCommands: analyze, lookup';
if (process.argv.length < 3) {
console.log(generalError);
process.exit(1);
}
var args = process.argv.slice(2);
var command = args.shift();
if (command === 'analyze') {
if (!args.length) {
console.log('Usage: node textDetection analyze <dir>');
process.exit(1);
}
main(args[0], console.log);
} else if (command === 'lookup') {
if (!args.length) {
console.log('Usage: node textDetection lookup <word> ...');
process.exit(1);
}
lookup(args, console.log);
} else {
console.log(generalError);
process.exit(1);
}
}
// [END run_application]
// [END app]

exports.Index = Index;
exports.lookup = lookup;
exports.getTextFromFiles = getTextFromFiles;
exports.main = main;

0 comments on commit f5c0f0e

Please sign in to comment.