-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathgetGenosetCriteria.js
35 lines (27 loc) · 971 Bytes
/
getGenosetCriteria.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
const request = require('request')
const cheerio = require('cheerio')
const config = require('./config')
module.exports = (partialUrl) => {
const url = `${config.baseUrl}${partialUrl}/criteria`
const genosetId = partialUrl.split('/')[2]
return new Promise((resolve, reject) => {
request(url, function (error, response, html) {
if (error) {
reject(`${genosetId} - ${url}:: ${error}`)
return
}
var $ = cheerio.load(html)
var json = {genoset: genosetId, criteria: ''}
json.criteria = $('#mw-content-text').text()
// remove all comments
json.criteria = json.criteria.split('\n').map((line) => line.split('#')[0]).join('')
// remove all whitespace
json.criteria = json.criteria.replace(/\s+/g, '')
// reject any page that has no text
if (json.criteria.includes('Thereiscurrentlynotextinthispage')) {
reject('No text on page')
}
resolve(json)
})
})
}