forked from lessfish/funny-node
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathindex.js
66 lines (51 loc) · 1.35 KB
/
index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
// 多页面抓取
// async 模块控制并发量
var cheerio = require('cheerio')
, superagent = require('superagent')
, eventproxy = require('eventproxy')
, express = require('express')
, async = require('async');
// 需要爬的网址
function getUrls() {
var urls = []
, baseUrl = 'http://acm.hdu.edu.cn/statistic.php?pid=';
for (var i = 1000; i < 1100; i++) {
var tmp = baseUrl + i;
urls.push(tmp);
}
return urls;
}
// 页面解析,返回需要的内容
function analyze(page) {
var $ = cheerio.load(page);
var postTime = $('.table_text td').eq(6).html();
return postTime;
}
// 抓取网页内容
function fetchUrl(url, callback) {
superagent.get(url)
.end(function (err, res) {
var page = res.text;
// 页面分析,返回需要的数据
var postTime = analyze(page);
// postTime 加入到了 result 数组中
callback(null, postTime);
});
}
// start
var app = express();
app.get('/', function (req, res, next) {
var urls = getUrls();
// 并发量控制为 5
// 对每个元素执行第三个回调
// 全部执行完后执行第四个回调
async.mapLimit(urls, 5, function(url, callback) {
fetchUrl(url, callback);
}, function (err, result) {
res.send(result);
});
});
// listen
app.listen(3000, function () {
console.log('app is listening at port 3000');
});