-
Notifications
You must be signed in to change notification settings - Fork 2.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
async.forEach { async.waterfall } #513
Comments
Hmmm.... I don't see how I can upload my example script and data here... Let me know where I should send them to you |
@TheRoSS You can see it in this https://help.github.com/articles/github-flavored-markdown#fenced-code-blocks |
Thanks. Here is the code. But how to upload example data? // 1. node script - launches version without inner waterfall
// 2. node script any_arg - launches version with inner waterfall
var fs = require("fs");
var util = require("util");
var async = require("async");
var moment = require("moment");
// read data from file
var start = moment();
var json = fs.readFileSync("./sendResult.json");
console.log("readFile:", moment().diff(start, "seconds", true));
// JSON parse
start = moment();
var data = JSON.parse(json);
console.log("JSON.parse:", moment().diff(start, "seconds", true));
// count number of fields
start = moment();
var counter = 1;
count(data);
console.log("COUNT:", counter, moment().diff(start, "seconds", true));
// MAIN CODE
start = moment();
if (process.argv.length == 2) {
toModels(data, function (err, data) {
console.log("toModels:", moment().diff(start, "seconds", true));
});
} else {
toModelsOld(data, function (err, data) {
console.log("toModels:", moment().diff(start, "seconds", true));
});
}
// toModels, no inner waterfall
function toModels(data, callback) {
if (data instanceof Array) {
async.map(data, function (dataSet, next) {
toModels(dataSet, next);
}, callback);
} else if (data instanceof Object) {
var convertedData = {};
async.each(Object.keys(data), function (key, next) {
toModels(data[key], function (err, result) {
convertedData[key] = result;
next(err);
});
}, function (err) {
callback(err, convertedData);
});
} else {
callback(null, data);
}
}
// toModels, with inner waterfall
function toModelsOld(data, callback) {
if (data instanceof Array) {
async.map(data, function (dataSet, cb) {
toModelsOld(dataSet, cb);
}, callback);
} else if (data instanceof Object) {
var convertedData = {};
async.forEach(Object.keys(data), function (prop, cb) {
async.waterfall([
function (next) {
toModelsOld(data[prop], next);
},
function (dataSet, next) {
convertedData[prop] = dataSet;
next(null, convertedData);
}
], cb);
}, function (err) {
if (err) {
callback(err);
} else {
callback(null, convertedData);
}
});
} else {
callback(null, data);
}
}
// count fields
function count(o) {
if (Array.isArray(o)) {
o.forEach(function (item) {
counter++;
count(item);
});
} else if (o instanceof Object) {
Object.keys(o).forEach(function (key) {
counter++;
count(o[key]);
});
}
} |
The example data file is just json data file of ~3 mb in size. You can use any data. Here is my results:
async 0.7.0 (Windows 8, Intel Core i5 3210M, 2.5 GHz)
async 0.2.9 (Ubuntu 13.10, Intel Core i5 460M, 2.53 GHz)
async 0.7.0 (Ubuntu 13.10, Intel Core i5 460M, 2.53 GHz)
|
This does appear to be a major performance regression, but unless I'm mistaken, your solution is just a very convoluted way to implement Lodash.cloneDeep() |
As I understand Lodash.cloneDeep() is synchronous but my solution should be asynchronous to load some data from database. Since the database code is not relevant to async performance problem I removed it from this example. I think async inside async might lead to some performance regression. But two orders! |
Can we get this down to a more simple benchmark? There are many variables at play here. |
Performance regression test below. Async 0.7 handles this example. I was unable to hang up it with this simplified test. Results: Performance regression is 35 times. Is it ok? var async = require("async");
var moment = require("moment");
// start timer
var start = moment();
// prepare data
var data = [];
for (var i = 0; i < 200000; i++) {
data.push(i);
}
// call function
f1(data, function (err, data) {
console.log("Finished in", moment().diff(start, "seconds", true));
});
function f1(data, callback) {
async.each(data, function (item, next) {
next();
}, callback);
}
function f2(data, callback) {
async.each(data, function (item, next) {
async.waterfall([
function (cb) {
cb();
}
], next);
}, callback);
} |
@TheRoSS thanks for this, I'll take a look soon |
I have a way to vastly simplify Between 0.2.9 and 0.7, a |
Waterfall should be faster in 2.0, and without a deferral between the steps. (#815) This should solve this specific performance regression. |
I have too versions of function. They do the same things, load and parse data, but first do it with only async.each, but second do the same handling with async.each and inner async.waterflow
I supposed that there should be difference between them in timings, but not 100 times!
Even more, I tested the script on async 0.2.9
When I tried to launch it under asyn 0.7, the version with waterfall didn't work at all
And the very interesting thing that if I set timings on function execution, the function execution is not very different. The main lags appear between function calls. So I concluded that async perform some very heavy parallel stuff between toModels function calls.
The text was updated successfully, but these errors were encountered: