Skip to content

Commit

Permalink
Merge pull request #124 from touzbi/master
Browse files Browse the repository at this point in the history
'skipDuplicates' bug  + test case
  • Loading branch information
paulvalla committed Nov 12, 2014
2 parents 3b63a0a + e0f228f commit 3cc7b0b
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 20 deletions.
23 changes: 10 additions & 13 deletions lib/crawler.js
Original file line number Diff line number Diff line change
Expand Up @@ -262,22 +262,19 @@ Crawler.prototype._makeCrawlerRequest = function _makeCrawlerRequest (options) {

Crawler.prototype._executeCrawlerRequest = function _executeCrawlerRequest (options) {
var self = this;
var cacheData = self.cache[options.uri];

if (useCache(options)) {
//If a query has already been made to self URL, don't callback again
if (useCache(options) && cacheData) {

var cacheData = self.cache[options.uri];

//If a query has already been made to self URL, don't callback again
if (cacheData) {

// Make sure we actually have cached data, and not just a note
// that the page was already crawled
if (_.isArray(cacheData)) {
self._onContent(null, options, cacheData[0], true);
} else {
self.emit('pool:release', options);
}
// Make sure we actually have cached data, and not just a note
// that the page was already crawled
if (_.isArray(cacheData)) {
self._onContent(null, options, cacheData[0], true);
} else {
self.emit('pool:release', options);
}

} else {
self._buildHttpRequest(options);
}
Expand Down
28 changes: 21 additions & 7 deletions tests/cacheOption.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,26 @@ describe('Cache features tests', function() {
});
});

//describe('Skip Duplicate', function() {
// afterEach(function () {
// c = {};
// });
// it('should skip previous crawled urls', function (done) {});
// it('should not skip one single url', function (done) {});
//});
describe('Skip Duplicate active', function() {
afterEach(function () {
c = {};
});

it('should not skip one single url', function (done) {
c = new Crawler({
jquery: false,
skipDuplicates: true,
callback: function (error, result) {
expect(error).to.be.null;
expect(result.statusCode).to.equal(200);
done();
},
});

c.queue('http://' + httpbinHost + '/status/200');
});

//it('should skip previous crawled urls', function (done) {});
});
});

0 comments on commit 3cc7b0b

Please sign in to comment.