-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathcrawler-jianshu.js
119 lines (94 loc) · 3.42 KB
/
crawler-jianshu.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
/*******
Promise
https://www.promisejs.org/
It is a super set of ES6 Promises designed to have readable,
performant code and to provide just the extensions that are
absolutely necessary for using promises today.
********/
const http = require("http");
const Promise = require("promise"); //ES6 buit-in object
const cheerio = require("cheerio");
const baseUrl = "http://www.jianshu.com/p/";
const articleIds = ['d05e902af678','d05e902af678','89f1d4245b20','f2f5aca71fec','5b4c2f4c7a52','23454b4c899d','2f3bc2598dc5','3d4e8e2592a8','6958f99db769','a7d6077187d9','8e28be0e7ab1','95901615f322','3aa7de527e33','d36fb31f9cff']
const articlePromiseArray = [];
const nodemailer = require('nodemailer');
articleIds.forEach(function(item) {
articlePromiseArray.push(getPageAsync(baseUrl + item));
});
function getPageAsync (url) {
return new Promise(function(resolve, reject){
http.get(url, function(res) {
var html = "";
res.on("data", function(data) {
html += data;
});
res.on("end", function() {
resolve(html);
});
}).on("error", function(e) {
reject(e);
console.log("获取信息出错!");
});
});
};
Promise.all(articlePromiseArray).then(function onFulfilled (pages) {
let mailContent = '';
pages.forEach(function(html) {
let info = filterArticles(html);
printInfo(info);
});
var transporter = nodemailer.createTransport({
host : '',
secureConnection: true, // 使用SSL方式(安全方式,防止被窃取信息)
auth : {
user : '',
pass :
},
});
// mailContent需要由读者自行配制,这里对mailContent的赋值已经删去。
var mailOptions = {
from: '', // sender address
to: '', // list of receivers
subject: 'Crawler-jianshu ✔', // Subject line
text: mailContent, // plaintext body
html: '<b>'+mailContent+'</b>' // html body
};
transporter.sendMail(mailOptions, function(error, info){
if(error){
console.log(error);
}else{
console.log('Message sent: ' + info.response);
}
});
}, function onRejected (e) {
console.log(e);
});
function filterArticles (html) {
let $ = cheerio.load(html);
let title = $(".article .title").text();
let publishTime = $('.publish-time').text();
let textNum = $('.wordage').text().split(' ')[1];
// let views = $('.views-count').text().split('阅读')[1];
// let commentsNum = $('.comments-count').text();
// let likeNum = $('.likes-count').text();
let articleData = {
title: title,
publishTime: publishTime,
textNum: textNum
// views: views,
// commentsNum: commentsNum,
// likeNum: likeNum
};
return articleData;
};
function printInfo (info) {
console.log("=========printInfo BEGIN=========" + "\n");
let title = info.title;
let publishTime = info.publishTime;
let textNum = info.textNum;
console.log("-- 【文章题目】" + title.replace(/\s+/g,"") + "\n");
console.log(" 【"+ title.replace(/\s+/g,"") +"】 发布时间:" + publishTime + "\n");
console.log(" 【"+ title.replace(/\s+/g,"") +"】 字数总计:" + textNum.replace(/\s+/g,"") + "\n");
console.log("=========printInfo DONE=========");
console.log("\n");
}