-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathindex.js
167 lines (118 loc) · 4.02 KB
/
index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
const fs = require('fs'),
async = require('async'),
request = require('request'),
cheerio = require('cheerio'),
Client = require('ssh2').Client,
config = require('./json/config'),
login = require('./json/ftp')
var upload = process.argv.length === 3
console.log('\n* Start Process *\n')
var nthPage = 0
function requestJson (nextPage) {
nthPage ++
var urlJson = 'https://www.liberation.fr/mapi/sections/' + config.dossier + '/contents/?format=json'
if (nextPage) urlJson = 'https://www.liberation.fr/mapi/sections/' + config.dossier + '/contents/?page=' + nthPage + '&format=json';
request.get({
url: urlJson,
json: true
}, (err, res, json) => {
if (err) throw err
var titles = json.results.map(v => v.title),
nth = titles.length
console.log('=> Get json from folder ' + config.dossier + ' via API page ' + nthPage +'\n')
console.log('=> Result from ' + nth + ' articles :\n')
console.log(' * ' + titles.join('\n * '))
if (nth >= 20) parseJson(json.results, true)
else parseJson(json.results, false)
})
}
requestJson(false)
var datas = []
function parseJson (results, isNext) {
console.log('\n=> Parse json\n')
async.eachSeries(results, (v, callback) => {
if (v.type == 'ARTICLE') {
var id = v.id,
publication_date = v.publication_date,
photo = v.call_photo ? v.call_photo.url.split('?')[0] + '?width=975&ratio_y=2&ratio_x=3' : false,
legende = v.call_photo ? v.call_photo.caption : false,
credit = v.call_photo ? v.call_photo.credits : false,
titre = v.title,
chapo = v.subtitle,
auteur,
date,
texte
request(v.url, (err, res, html) => {
if (err) throw err
console.log(' * Scrap data from article ' + titre)
var $ = cheerio.load(html)
auteur = $('.author').find('a').html()
date = $('.date').html()
$('.essential').remove()
$('.others').remove()
$('.note').remove()
$('.authors').remove()
texte = $('.article-body').html()
var articleData = {id, publication_date, photo, legende, credit, titre, chapo, auteur, date, texte}
if (photo) {
datas.push(articleData)
console.log(' * Ok Push data to json')
} else console.log(' * Nop This article don\'t have photo')
callback()
})
} else callback()
}, err => {
if (err) throw err
console.log('\n=> Datas of page ' + nthPage + ' have been scraped successfuly\n')
if (isNext && (datas.length < 40)) requestJson(true)
else writeJson()
})
}
function writeJson () {
console.log('\n=> All ' + datas.length + ' datas have been scraped successfuly\n')
var json = {
slug: config.slug,
titre: config.titre,
chapo: config.chapo,
auteur: config.auteur,
max_articles: config.max_articles,
order: config.order,
articles: datas
}
fs.writeFile('./json/datas.json', JSON.stringify(json), err => {
if (err) throw err
fs.createReadStream('./json/datas.json').pipe(fs.createWriteStream('./dist/assets/datas.json'))
console.log('=> Write file in ./json/datas.json')
console.log('=> Copy file in ./dist/assets/datas.json\n')
console.log('* End Process *\n')
if (upload) uploadJson()
})
}
function uploadJson () {
console.log('=> Upload json to server\n')
var conn = new Client()
conn.on('ready', () => {
console.log(' * Start Client')
conn.sftp((err, sftp) => {
if (err) {
throw err
sftp.end()
conn.end()
}
var readStreamAll = fs.createReadStream('./json/datas.json')
var writeStreamAll = sftp.createWriteStream('./' + config.date + '/' + config.slug + '/assets/datas.json')
writeStreamAll.on(
'close',
() => {
console.log(' * File ' + config.slug + '.json transfered successfully!')
sftp.end()
conn.end()
console.log(' * Stop Client')
console.log('\n* End Process *\n')
process.exit( 0 )
}
)
readStreamAll.pipe( writeStreamAll )
})
}).connect(login)
}