Skip to content

Commit

Permalink
refactor code
Browse files Browse the repository at this point in the history
  • Loading branch information
maryan-mandzyuk committed Jul 30, 2019
1 parent bd7a3cb commit 7be5fd1
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 37 deletions.
7 changes: 3 additions & 4 deletions app.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
const express = require('express');
const bodyParser = require('body-parser');
const scraping = require('./src/scraping');
const scraping = require('./src/scraper');
const dbConnection = require('./src/db/connection');
const api = require('./src/api/index');
require('dotenv').config();
Expand All @@ -18,6 +18,5 @@ app.listen(process.env.PORT, () => console.log(`Server started on port ${proces
api.start(app);

setInterval(async () => {
const sites = await scraping.getSites();
scraping.scrapingPage(sites);
}, 1800000); // 1800000 ms = 30 min || 300000 ms = 5 min 10000
scraping.scrapingPage();
}, 10000); // 1800000 ms = 30 min || 300000 ms = 5 min 10000
71 changes: 38 additions & 33 deletions src/scraping.js → src/scraper.js
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
/* eslint-disable no-restricted-syntax */
/* eslint-disable func-names */
const axios = require('axios');
const cheerio = require('cheerio');
const Article = require('./classes/Article');
const articelModel = require('../src/db/models/article.model');
const lastSavedArticleModel = require('../src/db/models/lastSavedArticle.model');
const siteModel = require('../src/db/models/site.model');
const articelModel = require('./db/models/article.model');
const lastSavedArticleModel = require('./db/models/lastSavedArticle.model');
const siteModel = require('./db/models/site.model');

const getBody = async (url) => {
let output;
Expand Down Expand Up @@ -97,38 +98,42 @@ const getArticles = async (url, selector, siteName) => {
return articles;
};

module.exports = {
scrapingPage: async (sitesToScrap) => {
sitesToScrap.forEach(async (site) => {
const lastSavedArticle = await lastSavedArticleModel.readLastSavedArticleBySite(site.name);
let articels;
if (site.url) {
articels = await getArticles(site.url, site.selector, site.name);
}
let i = 0;
// eslint-disable-next-line no-restricted-syntax
for (const article of articels) {
if (article.url === lastSavedArticle.article.url) {
break;
}
i += 1;
}
articels = articels.slice(0, i);
const newArticles = async (site, lastSavedArticle) => {
let articels = await getArticles(site.url, site.selector, site.name);
let i = 0;
for (const article of articels) {
if (article.url === lastSavedArticle.article.url) {
break;
}
i += 1;
}
articels = articels.slice(0, i);
return articels;
};

const saveArticles = (articels, site) => {
if (articels && articels.length) {
articelModel.createAllArticle(articels).then(
console.log(`Saved ${articels.length} new articles | Site: ${site.name} | ${new Date()}`)
);
lastSavedArticleModel.updateLastSavedArticle(site.name, articels[0]);
} else {
console.log(`Do not Save new articles | Site: ${site.name} | ${new Date()}`);
}
};

const getSites = async () => {
const sitesToScrap = await siteModel.readSites();
return sitesToScrap;
};

if (articels && articels.length) {
articelModel.createAllArticle(articels).then(
console.log(`Saved ${articels.length} new articles | Site: ${site.name} | ${new Date()}`)
);
lastSavedArticleModel.updateLastSavedArticle(site.name, articels[0]);
} else {
console.log(`Do not Save new articles | Site: ${site.name} | ${new Date()}`);
}
module.exports = {
scrapingPage: async () => {
const sites = await getSites();
sites.forEach(async (site) => {
const lastSavedArticle = await lastSavedArticleModel.readLastSavedArticleBySite(site.name);
const articels = await newArticles(site, lastSavedArticle);
saveArticles(articels, site);
});
},

getSites: async () => {
const sitesToScrap = await siteModel.readSites();
return sitesToScrap;
}
};

0 comments on commit 7be5fd1

Please sign in to comment.