-
Notifications
You must be signed in to change notification settings - Fork 15
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Setup and scrape tasks which define a set of scrapers and relevant scraping options #32
Changes from 9 commits
2c42762
4f54991
f3120ea
68c70d5
1012c3d
5d1c385
36bf0a0
ba79c91
c76f0da
d86ff79
9fc2c4b
9416efa
ab2bebf
90f9be4
757531e
f7a56fa
94e7fc2
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,6 +3,8 @@ | |
"arrow-body-style": 0, | ||
"no-shadow": 0, | ||
"no-console": 0, | ||
"no-underscore-dangle": ["error", { "allow": ["_private"] }], | ||
"class-methods-use-this": 0, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @esakal yeah, I agree about this one, I think it's irrelevant. |
||
"no-await-in-loop": 0 | ||
}, | ||
"globals": { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -62,3 +62,6 @@ lib/ | |
|
||
# OS related | ||
.DS_Store | ||
|
||
# IDEs | ||
.idea |
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,3 @@ | ||
const PASSWORD_FIELD = 'password'; | ||
export default PASSWORD_FIELD; | ||
const DATE_AND_TIME_MOMENT_FORMAT = 'DD-MM-YYYY_HH-mm-ss'; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @esakal mind changing this to There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||
export { PASSWORD_FIELD, DATE_AND_TIME_MOMENT_FORMAT }; |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,19 +6,42 @@ import jsonfile from 'jsonfile'; | |
const writeFileAsync = util.promisify(fs.writeFile); | ||
const existsAsync = util.promisify(fs.exists); | ||
const makeDirAsync = util.promisify(fs.mkdir); | ||
const readdirAsync = util.promisify(fs.readdir); | ||
const deleteFileAsync = util.promisify(fs.unlink); | ||
const readJsonFileAsync = util.promisify(jsonfile.readFile); | ||
const writeJsonFileAsync = util.promisify(jsonfile.writeFile); | ||
|
||
async function verifyFolder(filePath) { | ||
const folder = path.dirname(filePath); | ||
const exists = await existsAsync(folder); | ||
if (!exists) { | ||
await makeDirAsync(folder); | ||
async function verifyFolder(folderPath) { | ||
const pathTokens = folderPath.split(path.sep); | ||
let currentPath = ''; | ||
for (let i = 0; i < pathTokens.length; i += 1) { | ||
const folder = pathTokens[i]; | ||
currentPath += folder + path.sep; | ||
if (!await existsAsync(currentPath)) { | ||
await makeDirAsync(currentPath); | ||
} | ||
} | ||
} | ||
|
||
export async function getFolderFiles(folderPath, suffix) { | ||
await verifyFolder(folderPath); | ||
const files = await readdirAsync(folderPath); | ||
if (suffix) { | ||
return files.filter(filePath => (path.extname(filePath) || '').toLowerCase() === suffix.toLowerCase()); | ||
} | ||
return files; | ||
} | ||
|
||
export async function deleteFile(filePath) { | ||
if (await existsAsync(filePath)) { | ||
return deleteFileAsync(filePath); | ||
} | ||
return null; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @esakal this function should probably throw an exception if the file doesn't exist There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It is a good question, whether not having a file should throw an exception even if the purpose of this function is to remove that file. I think that a developer wants to delete a file, it doesn't matter for him if the file was not there to begin with as long as this file is not there once this method complete its execution. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @esakal I disagree. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. OK, i removed the btw, I don't have a machine with microsoft OS to check this out, I assume node handles both OS when dealing with the file system. |
||
} | ||
|
||
export async function writeFile(filePath, data, options) { | ||
await verifyFolder(filePath); | ||
const folderPath = path.dirname(filePath); | ||
await verifyFolder(folderPath); | ||
return writeFileAsync(filePath, data, options); | ||
} | ||
|
||
|
@@ -31,6 +54,7 @@ export async function readJsonFile(filePath, options) { | |
} | ||
|
||
export async function writeJsonFile(filePath, obj, options) { | ||
await verifyFolder(filePath); | ||
const folderPath = path.dirname(filePath); | ||
await verifyFolder(folderPath); | ||
await writeJsonFileAsync(filePath, obj, options); | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,81 @@ | ||
import path from 'path'; | ||
import colors from 'colors/safe'; | ||
import moment from 'moment'; | ||
import { writeJsonFile, readJsonFile, getFolderFiles, deleteFile } from '../helpers/files'; | ||
import { TASKS_FOLDER } from '../definitions'; | ||
import { SCRAPERS } from '../helpers/scrapers'; | ||
|
||
function writeSummaryLine(key, value) { | ||
console.log(`- ${colors.bold(key)}: ${value}`); | ||
} | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @esakal redundant line break There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. fixed |
||
class TaskManager { | ||
async getTasksList() { | ||
const files = await getFolderFiles(TASKS_FOLDER, '.json'); | ||
const result = files.map(file => path.basename(file, '.json')); | ||
return result; | ||
} | ||
|
||
async hasTask(taskName) { | ||
const tasksList = await this.getTasksList(); | ||
return taskName && !!tasksList.find(taskListName => | ||
taskListName.toLowerCase() === taskName.toLowerCase()); | ||
} | ||
|
||
isValidTaskName(taskName) { | ||
return taskName && taskName.match(/^[a-zA-Z0-9_-]+$/); | ||
} | ||
|
||
async loadTask(taskName) { | ||
if (taskName && this.hasTask(taskName)) { | ||
return readJsonFile(`${TASKS_FOLDER}/${taskName}.json`); | ||
} | ||
|
||
throw new Error(`failed to find a task named ${taskName}`); | ||
} | ||
|
||
async saveTask(taskName, taskData) { | ||
if (taskName && this.isValidTaskName(taskName)) { | ||
return writeJsonFile(`${TASKS_FOLDER}/${taskName}.json`, taskData); | ||
} | ||
|
||
throw new Error(`invalid task name provided ${taskName}`); | ||
} | ||
|
||
async deleteTask(taskName) { | ||
if (taskName && this.hasTask(taskName)) { | ||
await deleteFile(`${TASKS_FOLDER}/${taskName}.json`); | ||
} else { | ||
throw new Error(`invalid task name provided ${taskName}`); | ||
} | ||
} | ||
|
||
async printTaskSummary(taskName) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @esakal I would imagine seeing this function somewhere else, as it shouldn't be this class's responsibility. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. because there might be a feature request for printing a summary of all tasks or specific task as part of the setup process. At the moment if someone wants to see a task scrapers or options he need to edit them, it is not natural action and also can scare people. Actually I think we should consider adding it as one of the options to do on a task in the setup process, what do you think? If we still want to move it to another place, we should consider having a task class that will hold this function as well as its properties There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
@esakal sounds like a good idea, but let's do that on a separate PR 😄
I would still put it on a different location - either a helper procedural function. Not even sure it belongs to a |
||
const taskData = await this.loadTask(taskName); | ||
|
||
if (taskData) { | ||
const scrapers = taskData.scrapers || []; | ||
const { | ||
dateDiffByMonth, | ||
combineInstallments, | ||
} = taskData.options; | ||
const { | ||
combineReport, | ||
saveLocation, | ||
} = taskData.output; | ||
const substractValue = dateDiffByMonth - 1; | ||
const startMoment = moment().subtract(substractValue, 'month').startOf('month'); | ||
|
||
console.log(colors.underline.bold(`Task ${taskName} Summary`)); | ||
writeSummaryLine('Scrapers', scrapers.map(scraper => SCRAPERS[scraper.id].name).join(', ')); | ||
writeSummaryLine('Start scraping from', startMoment.format('ll')); | ||
writeSummaryLine('Combine installments', combineInstallments ? 'yes' : 'no'); | ||
writeSummaryLine('Save to location', saveLocation); | ||
writeSummaryLine('Create single report', combineReport ? 'yes' : 'no'); | ||
} | ||
} | ||
} | ||
|
||
const taskManager = new TaskManager(); | ||
|
||
export default taskManager; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @esakal we should export the class, not the instance There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not sure, holding this class as singleton and using the same instance can help us in the future if we will need to share things among classes. since we don't have a proper DI we can either do what I did to mimic singleton or to export the class which means that every consumer will hold its own instance. BTW I would go with ` export { taskManager, TaskManager } which will allow the user to choose. I don't mind changing that if you still want me to, let me know There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @esakal I usually try to keep stuff simple and to avoid unnecessary "mizgun" of code. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not sure what 'mizgun' means but sure, I will change it as requested There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @esakal 😄 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @eshaham It's funny I use this term a lot so I should have figure it out, I usually say הכנה למזגן so it is very close. :) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
import json2csv from 'json2csv'; | ||
import colors from 'colors/safe'; | ||
import moment from 'moment'; | ||
import { DATE_AND_TIME_MOMENT_FORMAT } from '../constants'; | ||
import { writeFile } from '../helpers/files'; | ||
|
||
async function exportAccountData(account, saveLocation) { | ||
const fields = ['Date', 'Payee', 'Inflow', 'Installment', 'Total']; | ||
const csv = json2csv({ data: account.txns, fields, withBOM: true }); | ||
await writeFile(`${saveLocation}/${account.scraperName} (${account.accountNumber}).csv`, csv); | ||
} | ||
|
||
export async function generateSeparatedReports(scrapedAccounts, saveLocation) { | ||
let numFiles = 0; | ||
for (let i = 0; i < scrapedAccounts.length; i += 1) { | ||
const account = scrapedAccounts[i]; | ||
if (account.txns.length) { | ||
console.log(colors.notify(`exporting ${account.txns.length} transactions for account # ${account.accountNumber}`)); | ||
await exportAccountData(account, saveLocation); | ||
numFiles += 1; | ||
} else { | ||
console.log(`no transactions for account # ${account.accountNumber}`); | ||
} | ||
} | ||
|
||
console.log(colors.notify(`${numFiles} csv files saved under ${saveLocation}`)); | ||
} | ||
|
||
export async function generateSingleReport(scrapedAccounts, saveLocation) { | ||
const fileTransactions = scrapedAccounts.reduce((acc, account) => { | ||
acc.push(...account.txns); | ||
return acc; | ||
}, []); | ||
const filePath = `${saveLocation}/${moment().format(DATE_AND_TIME_MOMENT_FORMAT)}.csv`; | ||
const fileFields = ['Institute', 'Account', 'Date', 'Payee', 'Inflow', 'Installment', 'Total']; | ||
const fileContent = json2csv({ data: fileTransactions, fileFields, withBOM: true }); | ||
await writeFile(filePath, fileContent); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @esakal would be nicer to re-use the same code in There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I tried to do minimal changes in the file generation part to reduce a bit the changes in this PR, this is why I copied the functions you were using for the separated reports. If you want I can optimize the code in this class as part of the PR or in future PR, let me know There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @esakal I meant something extracting code to a different function in this class. But now I see it may be more complicated and the benefit is too small, so let's drop it. |
||
console.log(colors.notify(`created file ${filePath}`)); | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
import moment from 'moment'; | ||
import { SCRAPERS, createScraper } from '../helpers/scrapers'; | ||
|
||
async function prepareResults(scraperId, scraperName, scraperResult, combineInstallments) { | ||
return scraperResult.accounts.map((account) => { | ||
console.log(`${scraperName}: scraped ${account.txns.length} transactions from account ${account.accountNumber}`); | ||
|
||
const txns = account.txns.map((txn) => { | ||
return { | ||
Company: scraperName, | ||
AccountNumber: account.accountNumber, | ||
Date: moment(txn.date).format('DD/MM/YYYY'), | ||
Payee: txn.description, | ||
Inflow: txn.type !== 'installments' || !combineInstallments ? txn.chargedAmount : txn.originalAmount, | ||
Installment: txn.installments ? txn.installments.number : null, | ||
Total: txn.installments ? txn.installments.total : null, | ||
}; | ||
}); | ||
|
||
return { | ||
scraperId, | ||
scraperName, | ||
accountNumber: account.accountNumber, | ||
txns, | ||
}; | ||
}); | ||
} | ||
|
||
export default async function (scraperId, credentials, options) { | ||
const { | ||
combineInstallments, | ||
startDate, | ||
showBrowser, | ||
} = options; | ||
|
||
const scraperOptions = { | ||
companyId: scraperId, | ||
startDate, | ||
combineInstallments, | ||
showBrowser, | ||
verbose: false, | ||
}; | ||
const scraperName = SCRAPERS[scraperId] ? | ||
SCRAPERS[scraperId].name : null; | ||
|
||
if (!scraperName) { | ||
throw new Error(`unknown scraper with id ${scraperId}`); | ||
} | ||
console.log(`scraping ${scraperName}`); | ||
|
||
let scraperResult; | ||
try { | ||
const scraper = createScraper(scraperOptions); | ||
scraper.onProgress((companyId, payload) => { | ||
console.log(`${scraperName}: ${payload.type}`); | ||
}); | ||
scraperResult = await scraper.scrape(credentials); | ||
} catch (e) { | ||
console.error(e.message); | ||
throw e; | ||
} | ||
|
||
console.log(`success: ${scraperResult.success}`); | ||
if (!scraperResult.success) { | ||
console.log(`error type: ${scraperResult.errorType}`); | ||
console.log('error:', scraperResult.errorMessage); | ||
throw new Error(scraperResult.errorMessage); | ||
} | ||
|
||
return prepareResults(scraperId, scraperName, scraperResult, combineInstallments); | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ok, I found a compromise imo.
@private
flag of jsdoc_private
that as you can see is private by design.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@esakal ok, better I guess.
I still need to digest this idea, but I guess it's the least bad option
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
sure, let me know :)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@esakal my way of saying sababa