This repository has been archived by the owner on Mar 12, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 119
钢材价格爬虫 #48
Comments
添加定时任务const cheerio = require('cheerio')
const tableParser = require('cheerio-tableparser')
const rp = require('request-promise')
const schedule = require('node-schedule')
let scheduleWork = null
const complete = new Set()
scheduleWork = schedule.scheduleJob({hour: 12, minute: 30}, async () => {
const today = `${new Date().getMonth() + 1}-${zeroPadding(new Date().getDate())}`
console.log(complete, complete.has(today))
if (!complete.has(today)) {
const link = await checkNew()
if (link) {
const res = await getPrice(link)
complete.add(today)
console.log(res)
}
} else {
console.log(`today(${date}) is complete`)
}
})
/**
* 判断是否有当天数据
*/
async function checkNew () {
const listUrl = 'http://hq.zgw.com/hefei/jiancai.html'
const body = await rp({ uri: listUrl })
const $ = cheerio.load(body)
let list = $('body > div.wrap > div.cslm_tit > div.hq_con > div.fl.lm_left > div.lm_list > ul:nth-child(2)').children('li:first-child').text().trim()
if (list.match(/(\d+-\d+)/)[0] === `${new Date().getMonth() + 1}-${zeroPadding(new Date().getDate())}`) {
const link = $('body > div.wrap > div.cslm_tit > div.hq_con > div.fl.lm_left > div.lm_list > ul:nth-child(2) li:first-child a').attr('href')
return `http://hq.zgw.com${link}`
} else {
return false
}
}
/**
* 获取价格
* @param {string} url 价格页链接
*/
async function getPrice (url) {
const tableSelector = 'body > div.wrap > div.cslm_tit > div.hq_con > div.fl.lm_left > div > div.lm_m > div.lm_mt > div.article > table'
const body = await rp({ uri: url })
const $ = cheerio.load(body)
const priceCol = spiderTable(tableSelector, $, false)[4]
priceCol.shift()
const pricesFormat = priceCol.map(v => $(v).data().type)
const otherData = spiderTable(tableSelector, $, true)
const nameCol = otherData[0]
const sizeCol = otherData[1]
nameCol.shift()
sizeCol.shift()
const ironArr = []
for (let i = 0, len = nameCol.length; i < len; i++) {
ironArr.push({
name: nameCol[i],
size: sizeCol[i],
price: pricesFormat[i]
})
}
return ironArr
}
/**
* 数字补零
* @param {number} num 数字
*/
function zeroPadding (num) {
let n = num.toString()
if (n.length === 1) {
return `0${n}`
} else {
return n
}
}
/**
* 解析表格数据
* @param {string} tableSelector 表格选择器
* @param {function} $ cheerio
* @param {boolean} noHtml 是否需要清除html标签
*/
function spiderTable (tableSelector, $, noHtml) {
tableParser($)
return $(tableSelector).parsetable(false, false, noHtml)
} |
Sign up for free
to subscribe to this conversation on GitHub.
Already have an account?
Sign in.
后期整合进定时服务
The text was updated successfully, but these errors were encountered: