Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Setup and scrape tasks which define a set of scrapers and relevant scraping options #32

Merged
merged 17 commits into from
Apr 15, 2018
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .eslintrc
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
"arrow-body-style": 0,
"no-shadow": 0,
"no-console": 0,
"no-underscore-dangle": ["error", { "allow": ["_private"] }],
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, I found a compromise imo.

  • for private methods - I moved them outside of the class if they didn't use the state of the instance , for others I removed the prefix and set @private flag of jsdoc
  • for private properties - I used the following strategy to provide true support for private properties. I remember that you prefer not disabling lint by row so I added the exception of _private that as you can see is private by design.

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@esakal ok, better I guess.
I still need to digest this idea, but I guess it's the least bad option

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sure, let me know :)

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@esakal my way of saying sababa

"class-methods-use-this": 0,
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@esakal yeah, I agree about this one, I think it's irrelevant.

"no-await-in-loop": 0
},
"globals": {
Expand Down
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -62,3 +62,6 @@ lib/

# OS related
.DS_Store

# IDEs
.idea
13 changes: 10 additions & 3 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
},
"dependencies": {
"babel-polyfill": "^6.26.0",
"colors": "^1.2.1",
"inquirer": "^4.0.1",
"israeli-bank-scrapers": "^0.5.5",
"json2csv": "^3.11.5",
Expand Down
3 changes: 2 additions & 1 deletion src/constants.js
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
const PASSWORD_FIELD = 'password';
export default PASSWORD_FIELD;
const DATE_AND_TIME_MOMENT_FORMAT = 'DD-MM-YYYY_HH-mm-ss';
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@esakal mind changing this to DATE_TIME_FORMAT?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

export { PASSWORD_FIELD, DATE_AND_TIME_MOMENT_FORMAT };
1 change: 1 addition & 0 deletions src/definitions.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,5 @@ import os from 'os';

export const CONFIG_FOLDER = `${os.homedir()}/.ynab-updater`;
export const SETTINGS_FILE = `${CONFIG_FOLDER}/settings.json`;
export const TASKS_FOLDER = `${CONFIG_FOLDER}/tasks`;
export const DOWNLOAD_FOLDER = `${os.homedir()}/Downloads/Transactions`;
38 changes: 31 additions & 7 deletions src/helpers/files.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,19 +6,42 @@ import jsonfile from 'jsonfile';
const writeFileAsync = util.promisify(fs.writeFile);
const existsAsync = util.promisify(fs.exists);
const makeDirAsync = util.promisify(fs.mkdir);
const readdirAsync = util.promisify(fs.readdir);
const deleteFileAsync = util.promisify(fs.unlink);
const readJsonFileAsync = util.promisify(jsonfile.readFile);
const writeJsonFileAsync = util.promisify(jsonfile.writeFile);

async function verifyFolder(filePath) {
const folder = path.dirname(filePath);
const exists = await existsAsync(folder);
if (!exists) {
await makeDirAsync(folder);
async function verifyFolder(folderPath) {
const pathTokens = folderPath.split(path.sep);
let currentPath = '';
for (let i = 0; i < pathTokens.length; i += 1) {
const folder = pathTokens[i];
currentPath += folder + path.sep;
if (!await existsAsync(currentPath)) {
await makeDirAsync(currentPath);
}
}
}

export async function getFolderFiles(folderPath, suffix) {
await verifyFolder(folderPath);
const files = await readdirAsync(folderPath);
if (suffix) {
return files.filter(filePath => (path.extname(filePath) || '').toLowerCase() === suffix.toLowerCase());
}
return files;
}

export async function deleteFile(filePath) {
if (await existsAsync(filePath)) {
return deleteFileAsync(filePath);
}
return null;
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@esakal this function should probably throw an exception if the file doesn't exist

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is a good question, whether not having a file should throw an exception even if the purpose of this function is to remove that file. I think that a developer wants to delete a file, it doesn't matter for him if the file was not there to begin with as long as this file is not there once this method complete its execution.

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@esakal I disagree.
Most file APIs I know, as well as OS commands, will return an error if the file you're trying to delete doesn't exist.

Copy link
Contributor Author

@esakal esakal Apr 10, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK, i removed the existsAsync check condition and following your comment I assume that node fs.unlink function will result with error if the file is missing.

btw, I don't have a machine with microsoft OS to check this out, I assume node handles both OS when dealing with the file system.

}

export async function writeFile(filePath, data, options) {
await verifyFolder(filePath);
const folderPath = path.dirname(filePath);
await verifyFolder(folderPath);
return writeFileAsync(filePath, data, options);
}

Expand All @@ -31,6 +54,7 @@ export async function readJsonFile(filePath, options) {
}

export async function writeJsonFile(filePath, obj, options) {
await verifyFolder(filePath);
const folderPath = path.dirname(filePath);
await verifyFolder(folderPath);
await writeJsonFileAsync(filePath, obj, options);
}
81 changes: 81 additions & 0 deletions src/helpers/tasks-manager.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
import path from 'path';
import colors from 'colors/safe';
import moment from 'moment';
import { writeJsonFile, readJsonFile, getFolderFiles, deleteFile } from '../helpers/files';
import { TASKS_FOLDER } from '../definitions';
import { SCRAPERS } from '../helpers/scrapers';

function writeSummaryLine(key, value) {
console.log(`- ${colors.bold(key)}: ${value}`);
}

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@esakal redundant line break

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fixed

class TaskManager {
async getTasksList() {
const files = await getFolderFiles(TASKS_FOLDER, '.json');
const result = files.map(file => path.basename(file, '.json'));
return result;
}

async hasTask(taskName) {
const tasksList = await this.getTasksList();
return taskName && !!tasksList.find(taskListName =>
taskListName.toLowerCase() === taskName.toLowerCase());
}

isValidTaskName(taskName) {
return taskName && taskName.match(/^[a-zA-Z0-9_-]+$/);
}

async loadTask(taskName) {
if (taskName && this.hasTask(taskName)) {
return readJsonFile(`${TASKS_FOLDER}/${taskName}.json`);
}

throw new Error(`failed to find a task named ${taskName}`);
}

async saveTask(taskName, taskData) {
if (taskName && this.isValidTaskName(taskName)) {
return writeJsonFile(`${TASKS_FOLDER}/${taskName}.json`, taskData);
}

throw new Error(`invalid task name provided ${taskName}`);
}

async deleteTask(taskName) {
if (taskName && this.hasTask(taskName)) {
await deleteFile(`${TASKS_FOLDER}/${taskName}.json`);
} else {
throw new Error(`invalid task name provided ${taskName}`);
}
}

async printTaskSummary(taskName) {
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@esakal I would imagine seeing this function somewhere else, as it shouldn't be this class's responsibility.
why not just put it as a function in scrape-task.js?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

because there might be a feature request for printing a summary of all tasks or specific task as part of the setup process. At the moment if someone wants to see a task scrapers or options he need to edit them, it is not natural action and also can scare people. Actually I think we should consider adding it as one of the options to do on a task in the setup process, what do you think?

If we still want to move it to another place, we should consider having a task class that will hold this function as well as its properties

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually I think we should consider adding it as one of the options to do on a task in the setup process, what do you think?

@esakal sounds like a good idea, but let's do that on a separate PR 😄

because there might be a feature request for printing a summary of all tasks or specific task as part of the setup process

I would still put it on a different location - either a helper procedural function. Not even sure it belongs to a Task class, as it it purely presentational.

const taskData = await this.loadTask(taskName);

if (taskData) {
const scrapers = taskData.scrapers || [];
const {
dateDiffByMonth,
combineInstallments,
} = taskData.options;
const {
combineReport,
saveLocation,
} = taskData.output;
const substractValue = dateDiffByMonth - 1;
const startMoment = moment().subtract(substractValue, 'month').startOf('month');

console.log(colors.underline.bold(`Task ${taskName} Summary`));
writeSummaryLine('Scrapers', scrapers.map(scraper => SCRAPERS[scraper.id].name).join(', '));
writeSummaryLine('Start scraping from', startMoment.format('ll'));
writeSummaryLine('Combine installments', combineInstallments ? 'yes' : 'no');
writeSummaryLine('Save to location', saveLocation);
writeSummaryLine('Create single report', combineReport ? 'yes' : 'no');
}
}
}

const taskManager = new TaskManager();

export default taskManager;
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@esakal we should export the class, not the instance

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure, holding this class as singleton and using the same instance can help us in the future if we will need to share things among classes. since we don't have a proper DI we can either do what I did to mimic singleton or to export the class which means that every consumer will hold its own instance.

BTW I would go with ` export { taskManager, TaskManager } which will allow the user to choose.

I don't mind changing that if you still want me to, let me know

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@esakal I usually try to keep stuff simple and to avoid unnecessary "mizgun" of code.
I avoid Singletons if not needed. I wouldn't hesitate using them if I really need them, but that's not the case here.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure what 'mizgun' means but sure, I will change it as requested

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@esakal 😄
"mizgun" comes out of the Hebrew saying, "Preparing for Air condition".
The meaning for it here is, setting up stuff in preparation for a future addition, which may or may not ever come.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@eshaham It's funny I use this term a lot so I should have figure it out, I usually say הכנה למזגן so it is very close. :)

11 changes: 9 additions & 2 deletions src/index.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,13 @@
import yargs from 'yargs';
import scrape from './scrape/scrape-individual';
import colors from 'colors/safe';
import setupMainMenu from './setup/setup-main-menu';
import scrapingMainMenu from './scrape/scraping-main-menu';

// set theme
colors.setTheme({
title: 'bgCyan',
notify: 'magenta',
});

const args = yargs.options({
mode: {
Expand All @@ -16,7 +23,7 @@ const args = yargs.options({
}).help().argv;

if (!args.mode || args.mode === 'scrape') {
scrape(args.show);
scrapingMainMenu(args.show);
} else if (args.mode === 'setup') {
setupMainMenu();
}
39 changes: 39 additions & 0 deletions src/scrape/generate-reports.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
import json2csv from 'json2csv';
import colors from 'colors/safe';
import moment from 'moment';
import { DATE_AND_TIME_MOMENT_FORMAT } from '../constants';
import { writeFile } from '../helpers/files';

async function exportAccountData(account, saveLocation) {
const fields = ['Date', 'Payee', 'Inflow', 'Installment', 'Total'];
const csv = json2csv({ data: account.txns, fields, withBOM: true });
await writeFile(`${saveLocation}/${account.scraperName} (${account.accountNumber}).csv`, csv);
}

export async function generateSeparatedReports(scrapedAccounts, saveLocation) {
let numFiles = 0;
for (let i = 0; i < scrapedAccounts.length; i += 1) {
const account = scrapedAccounts[i];
if (account.txns.length) {
console.log(colors.notify(`exporting ${account.txns.length} transactions for account # ${account.accountNumber}`));
await exportAccountData(account, saveLocation);
numFiles += 1;
} else {
console.log(`no transactions for account # ${account.accountNumber}`);
}
}

console.log(colors.notify(`${numFiles} csv files saved under ${saveLocation}`));
}

export async function generateSingleReport(scrapedAccounts, saveLocation) {
const fileTransactions = scrapedAccounts.reduce((acc, account) => {
acc.push(...account.txns);
return acc;
}, []);
const filePath = `${saveLocation}/${moment().format(DATE_AND_TIME_MOMENT_FORMAT)}.csv`;
const fileFields = ['Institute', 'Account', 'Date', 'Payee', 'Inflow', 'Installment', 'Total'];
const fileContent = json2csv({ data: fileTransactions, fileFields, withBOM: true });
await writeFile(filePath, fileContent);
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@esakal would be nicer to re-use the same code in exportAccountData() function. Perhaps the list of fields could be sent as a parameter.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I tried to do minimal changes in the file generation part to reduce a bit the changes in this PR, this is why I copied the functions you were using for the separated reports.

If you want I can optimize the code in this class as part of the PR or in future PR, let me know

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@esakal I meant something extracting code to a different function in this class. But now I see it may be more complicated and the benefit is too small, so let's drop it.

console.log(colors.notify(`created file ${filePath}`));
}
71 changes: 71 additions & 0 deletions src/scrape/scrape-base.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
import moment from 'moment';
import { SCRAPERS, createScraper } from '../helpers/scrapers';

async function prepareResults(scraperId, scraperName, scraperResult, combineInstallments) {
return scraperResult.accounts.map((account) => {
console.log(`${scraperName}: scraped ${account.txns.length} transactions from account ${account.accountNumber}`);

const txns = account.txns.map((txn) => {
return {
Company: scraperName,
AccountNumber: account.accountNumber,
Date: moment(txn.date).format('DD/MM/YYYY'),
Payee: txn.description,
Inflow: txn.type !== 'installments' || !combineInstallments ? txn.chargedAmount : txn.originalAmount,
Installment: txn.installments ? txn.installments.number : null,
Total: txn.installments ? txn.installments.total : null,
};
});

return {
scraperId,
scraperName,
accountNumber: account.accountNumber,
txns,
};
});
}

export default async function (scraperId, credentials, options) {
const {
combineInstallments,
startDate,
showBrowser,
} = options;

const scraperOptions = {
companyId: scraperId,
startDate,
combineInstallments,
showBrowser,
verbose: false,
};
const scraperName = SCRAPERS[scraperId] ?
SCRAPERS[scraperId].name : null;

if (!scraperName) {
throw new Error(`unknown scraper with id ${scraperId}`);
}
console.log(`scraping ${scraperName}`);

let scraperResult;
try {
const scraper = createScraper(scraperOptions);
scraper.onProgress((companyId, payload) => {
console.log(`${scraperName}: ${payload.type}`);
});
scraperResult = await scraper.scrape(credentials);
} catch (e) {
console.error(e.message);
throw e;
}

console.log(`success: ${scraperResult.success}`);
if (!scraperResult.success) {
console.log(`error type: ${scraperResult.errorType}`);
console.log('error:', scraperResult.errorMessage);
throw new Error(scraperResult.errorMessage);
}

return prepareResults(scraperId, scraperName, scraperResult, combineInstallments);
}
Loading