From dd5299d44f2538cb1c00d11aa5fa7046bc150446 Mon Sep 17 00:00:00 2001 From: Siddharth VP Date: Sat, 21 Aug 2021 00:40:17 +0530 Subject: [PATCH] improve/expand documentation --- README.md | 4 +- package-lock.json | 32 ++++----- src/bot.ts | 6 +- src/wikitext.ts | 31 ++++++--- website/docs/10-bulk-processing.md | 66 +++++++++++++++++++ .../docs/11-integration-with-other-apis.md | 53 +++++++++++++++ website/docs/12-logging.md | 14 ++++ website/docs/13-dates.md | 45 +++++++++++++ website/docs/6-emergency-shutoff.md | 24 ++++--- website/docs/9-working-with-wikitext.md | 40 +++++++++-- website/src/css/custom.css | 8 +++ website/src/pages/index.js | 2 +- 12 files changed, 285 insertions(+), 40 deletions(-) create mode 100644 website/docs/10-bulk-processing.md create mode 100644 website/docs/11-integration-with-other-apis.md create mode 100644 website/docs/12-logging.md create mode 100644 website/docs/13-dates.md diff --git a/README.md b/README.md index b375158..7c46dd6 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ [![Coverage Status](https://coveralls.io/repos/github/siddharthvp/mwn/badge.svg?branch=master)](https://coveralls.io/github/siddharthvp/mwn?branch=master) [![PRs Welcome](https://img.shields.io/badge/PRs-welcome-brightgreen.svg?style=flat-square)](http://makeapullrequest.com) -**Quick links: [Getting Started](#user-content-getting-started) — [Docs](https://mwn.toolforge.org/) — [GitHub](https://github.com/siddharthvp/mwn) — [NPM](https://www.npmjs.com/package/mwn) — [API Documentation](https://mwn.toolforge.org/docs/api/classes/mwn.html)** +**Quick links: [Getting Started](https://mwn.toolforge.org/docs/setting-up-mwn) — [GitHub](https://github.com/siddharthvp/mwn) — [NPM](https://www.npmjs.com/package/mwn) — [User Documentation](https://mwn.toolforge.org/) — [API Documentation](https://mwn.toolforge.org/docs/api/classes/mwn.html)** **Mwn** is a modern and comprehensive MediaWiki bot framework for Node.js, originally adapted from [mwbot](https://github.com/Fannon/mwbot). @@ -155,6 +155,8 @@ Exclusion compliance is _not_ enabled by default. ### Getting started +**Note: More complete [documentation is on Toolforge](https://mwn.toolforge.org/).** + Importing mwn: In JavaScript: diff --git a/package-lock.json b/package-lock.json index 7818608..18c27df 100644 --- a/package-lock.json +++ b/package-lock.json @@ -4925,8 +4925,10 @@ "integrity": "sha512-5dgndWOriYSm5cnYaJNhalLNDKOqFwyDB/rr1E9ZsGciGvKPs8R2xYGCacuf3z6K1YKDz182fd+fY3cn3pMqXQ==", "dev": true, "dependencies": { - "graceful-fs": "^4.1.6", "universalify": "^2.0.0" + }, + "optionalDependencies": { + "graceful-fs": "^4.1.6" } }, "node_modules/jsonfile/node_modules/universalify": { @@ -5534,9 +5536,9 @@ } }, "node_modules/marked": { - "version": "2.0.6", - "resolved": "https://registry.npmjs.org/marked/-/marked-2.0.6.tgz", - "integrity": "sha512-S2mYj0FzTQa0dLddssqwRVW4EOJOVJ355Xm2Vcbm+LU7GQRGWvwbO5K87OaPSOux2AwTSgtPPaXmc8sDPrhn2A==", + "version": "2.0.7", + "resolved": "https://registry.npmjs.org/marked/-/marked-2.0.7.tgz", + "integrity": "sha512-BJXxkuIfJchcXOJWTT2DOL+yFWifFv2yGYOUzvXg8Qz610QKw+sHCvTMYwA+qWGhlA2uivBezChZ/pBy1tWdkQ==", "dev": true, "bin": { "marked": "bin/marked" @@ -8690,9 +8692,9 @@ } }, "node_modules/typedoc": { - "version": "0.20.36", - "resolved": "https://registry.npmjs.org/typedoc/-/typedoc-0.20.36.tgz", - "integrity": "sha512-qFU+DWMV/hifQ9ZAlTjdFO9wbUIHuUBpNXzv68ZyURAP9pInjZiO4+jCPeAzHVcaBCHER9WL/+YzzTt6ZlN/Nw==", + "version": "0.20.37", + "resolved": "https://registry.npmjs.org/typedoc/-/typedoc-0.20.37.tgz", + "integrity": "sha512-9+qDhdc4X00qTNOtii6QX2z7ndAeWVOso7w3MPSoSJdXlVhpwPfm1yEp4ooKuWA9fiQILR8FKkyjmeqa13hBbw==", "dev": true, "dependencies": { "colors": "^1.4.0", @@ -8700,7 +8702,7 @@ "handlebars": "^4.7.7", "lodash": "^4.17.21", "lunr": "^2.3.9", - "marked": "^2.0.3", + "marked": "~2.0.3", "minimatch": "^3.0.0", "progress": "^2.0.3", "shelljs": "^0.8.4", @@ -13567,9 +13569,9 @@ } }, "marked": { - "version": "2.0.6", - "resolved": "https://registry.npmjs.org/marked/-/marked-2.0.6.tgz", - "integrity": "sha512-S2mYj0FzTQa0dLddssqwRVW4EOJOVJ355Xm2Vcbm+LU7GQRGWvwbO5K87OaPSOux2AwTSgtPPaXmc8sDPrhn2A==", + "version": "2.0.7", + "resolved": "https://registry.npmjs.org/marked/-/marked-2.0.7.tgz", + "integrity": "sha512-BJXxkuIfJchcXOJWTT2DOL+yFWifFv2yGYOUzvXg8Qz610QKw+sHCvTMYwA+qWGhlA2uivBezChZ/pBy1tWdkQ==", "dev": true }, "merge-stream": { @@ -16042,9 +16044,9 @@ } }, "typedoc": { - "version": "0.20.36", - "resolved": "https://registry.npmjs.org/typedoc/-/typedoc-0.20.36.tgz", - "integrity": "sha512-qFU+DWMV/hifQ9ZAlTjdFO9wbUIHuUBpNXzv68ZyURAP9pInjZiO4+jCPeAzHVcaBCHER9WL/+YzzTt6ZlN/Nw==", + "version": "0.20.37", + "resolved": "https://registry.npmjs.org/typedoc/-/typedoc-0.20.37.tgz", + "integrity": "sha512-9+qDhdc4X00qTNOtii6QX2z7ndAeWVOso7w3MPSoSJdXlVhpwPfm1yEp4ooKuWA9fiQILR8FKkyjmeqa13hBbw==", "dev": true, "requires": { "colors": "^1.4.0", @@ -16052,7 +16054,7 @@ "handlebars": "^4.7.7", "lodash": "^4.17.21", "lunr": "^2.3.9", - "marked": "^2.0.3", + "marked": "~2.0.3", "minimatch": "^3.0.0", "progress": "^2.0.3", "shelljs": "^0.8.4", diff --git a/src/bot.ts b/src/bot.ts index 7d3f3de..f442643 100644 --- a/src/bot.ts +++ b/src/bot.ts @@ -30,7 +30,7 @@ * */ -// Node internal module +// Node internal modules import * as fs from 'fs'; import * as path from 'path'; import * as crypto from 'crypto'; @@ -87,6 +87,10 @@ import { } from './api_response_types'; export { MwnDate, MwnTitle, MwnPage, MwnFile, MwnCategory, MwnWikitext, MwnUser, MwnStream, ApiPage, ApiRevision }; +// Export, if only for the sake of getting generated documentation +export * from './api_response_types'; +export type { PageViewData, PageViewOptions, AuthorshipData } from './page'; +export type { TemplateConfig, Template, MwnWikitextStatic } from './wikitext'; export interface MwnOptions { silent?: boolean; diff --git a/src/wikitext.ts b/src/wikitext.ts index cfadc34..a927205 100644 --- a/src/wikitext.ts +++ b/src/wikitext.ts @@ -74,19 +74,20 @@ export interface TemplateConfig { // by Evad37 (cc-by-sa-3.0/GFDL) // TODO: expand from evad37/xfdcloser /** - * Represents the wikitext of template transclusion. Used by #parseTemplates. - * @prop {string} name Name of the template - * @prop {string} wikitext Full wikitext of the transclusion - * @prop {Object[]} parameters Parameters used in the translcusion, in order, of form: - { - name: {string|number} parameter name, or position for unnamed parameters, - value: {string} Wikitext passed to the parameter (whitespace trimmed), - wikitext: {string} Full wikitext (including leading pipe, parameter name/equals sign (if applicable), value, and any whitespace) - } + * Represents the wikitext of template transclusion. Used by {@link parseTemplates}. */ export class Template { + /** + * Full wikitext of the transclusion + */ wikitext: string; + /** + * Parameters used in the transclusion + */ parameters: Array; + /** + * Name of the template + */ name: string | number; /** @@ -115,9 +116,21 @@ export class Template { } } +/** + * Represents a template parameter + */ export class Parameter { + /** + * parameter name, or position for unnamed parameters + */ name: string | number; + /** + * Wikitext passed to the parameter (whitespace trimmed) + */ value: string; + /** + * Full wikitext (including leading pipe, parameter name/equals sign (if applicable), value, and any whitespace) + */ wikitext: string; constructor(name: string | number, val: string, wikitext: string) { diff --git a/website/docs/10-bulk-processing.md b/website/docs/10-bulk-processing.md new file mode 100644 index 0000000..d03c134 --- /dev/null +++ b/website/docs/10-bulk-processing.md @@ -0,0 +1,66 @@ +# Bulk processing + +### continuedQuery / continuedQueryGen +See [Handling query continuation](/docs/handling-query-continuation) for more details. + +continuedQuery returns a promised resolved with the array of all individual API response. + +Use of `continuedQueryGen` is recommended since continuedQuery will fetch the results of all the API calls before it begins to do anything with the results. `continuedQueryGen` gets the result of each API call and processes them one at a time. + +### massQuery / massQueryGen +MediaWiki sets a limit of 500 (50 for non-bots) on the number of pages that can be queried in a single API call. To query more than that, `massQuery` or `massQueryGen` can be used. This splits the page list into batches of 500 and sends individual queries and returns a promise resolved with the array of all individual API call responses. + +Example: get the protection status of a large number of pages: + +```js +bot.massQuery({ + action: 'query', + format: 'json', + prop: 'info', + titles: ['Page1', 'Page2', 'Page1300'], // array of page names + inprop: 'protection' +}) // 2nd parameter is taken as 'titles' by default + .then((jsons) => { + // jsons is the array of individual JSON responses. + }); +``` + +Any errors in the individual API calls will not cause the entire massQuery to fail, but the data at the array index corresponding to that API call will be error object. + +massQueryGen is the generator equivalent that yields each API response as when they're received. + +### Batch operations + +Perform asynchronous tasks (involving API usage) over a number of pages (or other arbitrary items). `batchOperation` uses a default concurrency of 5. Customise this according to how expensive the API operation is. Higher concurrency limits could lead to more frequent API errors. + +- `batchOperation(pageList, workerFunction, concurrency, maxRetries)`: The `workerFunction` must return a promise. + +```js +bot.batchOperation( + pageList, + (page, idx) => { + // do something with each page + // the index of the page in pageList is available as the 2nd argument + // return a promise in the end + }, + /* concurrency */ 5, + /* retries */ 2 +); +``` + +- `bot.seriesBatchOperation(pageList, workerFunction, sleepDuration, retries)` can be used for serial operations, with a sleep duration between each task (default 5 seconds). + +```js +bot.seriesBatchOperation( + pageList, + (page, idx) => { + // do something with each page + // the index of the page in pageList is available as the 2nd argument + // return a promise in the end + }, + 5000, + 2 +); // set the sleep duration in milliseconds as the third parameter, max number of retries for each action is set as the 4th parameter +``` + +Note that `seriesBatchOperation` with delay=0 is same as `batchOperation` with concurrency=1. diff --git a/website/docs/11-integration-with-other-apis.md b/website/docs/11-integration-with-other-apis.md new file mode 100644 index 0000000..763daf2 --- /dev/null +++ b/website/docs/11-integration-with-other-apis.md @@ -0,0 +1,53 @@ +# Integration with other APIs + +Apart from the [MediaWiki API](https://www.mediawiki.org/wiki/API:Main_page), Mwn integrates with a few other APIs: + +### ORES +See https://ores.wikimedia.org/ for details. +Get ORES scores for revisions: +```js +await bot.oresQueryRevisions( + 'https://ores.wikimedia.org/', // ORES endpoint URL + ['articlequality', 'drafttopic'], // ORES modes + ['76923582', '2387429'] // Revision IDs +); +``` + + +### EventStreams +See https://wikitech.wikimedia.org/wiki/Event_Platform/EventStreams +```js +const stream = bot.stream(['recentchange']); +stream.addListener( + // Consider event only if this function returns true + function eventFilter(data) { + return data.wiki === 'enwiki'; + }, + + // Run this function for every filtered event + async function worker(data) { + // do something with data + } +); +``` + +### PageViews +See https://wikitech.wikimedia.org/wiki/Analytics/AQS/Pageviews +```js +const page = new bot.page('Deaths in 2020'); +const pageViewData = await page.pageViews({ + // See https://mwn.toolforge.org/docs/api/interfaces/pageviewoptions.html for available options +}); +``` +The [PageViewOptions](https://mwn.toolforge.org/docs/api/interfaces/pageviewoptions.html) argument is optional. Return type is Promise<PageViewData[]>. + +### WikiWho +See https://wikiwho.wmflabs.org/ + +Fetch the list of top contributors to an article. Available for limited number of Wikipedias. + +```js +const page = new bot.page('Lorem ipsum'); +const contributorData = await page.queryAuthors(); +``` +Return type is Promise<AuthorshipData>. diff --git a/website/docs/12-logging.md b/website/docs/12-logging.md new file mode 100644 index 0000000..2c64784 --- /dev/null +++ b/website/docs/12-logging.md @@ -0,0 +1,14 @@ +# Logging + +Mwn provides a convenient coloured logging utility, based on [semlog](https://npmjs.com/package/semlog). + +```js +const log = mwn.log; + +log('[I] Informational message'); +log('[S] Success message'); +log('[W] Warning message'); +log('[E] Error message'); +``` + +Based on the character within `[]`, colouration happens automatically. diff --git a/website/docs/13-dates.md b/website/docs/13-dates.md new file mode 100644 index 0000000..25ba699 --- /dev/null +++ b/website/docs/13-dates.md @@ -0,0 +1,45 @@ +# Dates + +Mwn provides a rich wrapper around the native [Date](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Date) interface. + +The constructor support the common MediaWiki datetime formats: +```js +const date1 = new bot.date('13:45, 2 April 2020 (UTC)'); // This won't parse with JS native Date! +const date2 = new bot.date('20210304134567'); // MW database timestamp format. +``` + +in addition to everything that native JS Date supports: +```js +const date1 = new bot.date(); +const date2 = new bot.date('3 December 2020'); +``` + +Note that it inherits the weirdities of JS Date - even "NY 12345" gets parsed as valid date, so per [MDN recommendation](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Date/Date#timestamp_string) you should not parse dates as strings of unknown format. + +All methods on native date are inherited, for instance: +```js +date.getUTCDate(); +date.toISOString(); +``` + + +But in addition, you can get names of the months and days of the week (in English): +```js +date.getMonthName(); +date.getUTCMonthName(); +date.getDayName(); +date.getUTCDayName(); +``` + +Add and subtract dates. This mutates the original date as well as returns the mutated object to allow chaining. The supported units are seconds, minutes, hours, days, weeks, months and years. +```js +date.add(1, 'hour'); +date.add(4, 'hours'); +date.subtract(5, 'hours').subtract(30, 'minutes'); +``` + +Check if a date is before or after another date (which can be either an Mwn date or a normal Date object). +```js +date.isBefore(new Date()); // boolean +date.isAfter(new bot.date()); +``` diff --git a/website/docs/6-emergency-shutoff.md b/website/docs/6-emergency-shutoff.md index fd00f72..205687f 100644 --- a/website/docs/6-emergency-shutoff.md +++ b/website/docs/6-emergency-shutoff.md @@ -4,19 +4,27 @@ Mwn exploits Node's asynchronous event loop to efficiently implement emergency s ```js bot.enableEmergencyShutoff({ - page: 'User:ExampleBot/shutoff', // The name of the page to check - intervalDuration: 5000, // check shutoff page every 5 seconds + // The name of the page to check + page: 'User:ExampleBot/shutoff', + + // check shutoff page every 5 seconds + intervalDuration: 5000, + + // function to determine whether the bot should continue to run or not condition: function (pagetext) { - // function to determine whether the bot should continue to run or not + // Example implementation: if some one changes the text to something + // other than "running", let's decide to stop! if (pagetext !== 'running') { - // Example implementation: if some one changes the text to something - return false; // other than "running", let's decide to stop! + return false; } else return true; }, + + // function to trigger when shutoff is activated onShutoff: function (pagetext) { - // function to trigger when shutoff is activated - process.exit(); // let's just exit, though we could also terminate - } // any open connections, close files, etc. + // let's just exit, though we could also terminate + // any open connections, close files, etc. + process.exit(); + } }); ``` diff --git a/website/docs/9-working-with-wikitext.md b/website/docs/9-working-with-wikitext.md index bf6a3b0..66eeb6b 100644 --- a/website/docs/9-working-with-wikitext.md +++ b/website/docs/9-working-with-wikitext.md @@ -1,14 +1,44 @@ # Working with wikitext -Mwn can be used for parsing wikitext: +Mwn can be used for common wikitext parsing needs, though there is no AST-based parsing. +Create object for further operations: ```js let wkt = new bot.wikitext('This is some wikitext with [[links]] and {{templates|with=params}}.'); - -wkt.parseTemplates(); // -> [Template {wikitext: '{{templates|with=params}}', parameters: [ Parameter {name: 'with', value: 'params', wikitext: '|with=params'}] ], name: 'Templates' }] - +``` +Parse links: +```js // This requires the bot object to have the namespace data of the wiki available. // Either the bot should be logged in, or run bot.getSiteInfo() wkt.parseLinks(); // populates wkt.links, wkt.files, wkt.categories -wkt.links; // -> [{ wikitext: '[[links]]', target: Title { namespace: 0, title: 'links', fragment: null }, displaytext: 'links'}] + +wkt.links // -> [{ wikitext: '[[links]]', target: Title { namespace: 0, title: 'links', fragment: null }, displaytext: 'links'}] +wkt.categores // -> [] +wkt.files // => [] +``` + +Parse templates: +```js +wkt.parseTemplates() // -> [Template {wikitext: '{{templates|with=params}}', parameters: [ Parameter {name: 'with', value: 'params', wikitext: '|with=params'}] ], name: 'Templates' }] +``` +`parseTemplates` can optionally take a [TemplateConfig](https://mwn.toolforge.org/docs/api/interfaces/templateconfig.html) object as argument. + +It can also be used without constructing a bot.wikitext object, as: +```js +bot.wikitext.parseTemplates() +``` + +Parse simple tables: +```js +const parsedTable = bot.wikitext.parseTable(` +{| class="wikitable sortable" +|- +! Header1 !! Header2 !! Header3 +|- +| A || B || C +| - +| D || E || F +|} +`); ``` +The result is an array of plain JS objects, each having the table headers as keys. diff --git a/website/src/css/custom.css b/website/src/css/custom.css index 74ba0f2..082ecd7 100644 --- a/website/src/css/custom.css +++ b/website/src/css/custom.css @@ -23,3 +23,11 @@ margin: 0 calc(-1 * var(--ifm-pre-padding)); padding: 0 var(--ifm-pre-padding); } + +code { + white-space: pre-wrap !important; +} + +code .token.comment { + color: rgb(173 174 183) !important; +} diff --git a/website/src/pages/index.js b/website/src/pages/index.js index b687030..41caebf 100644 --- a/website/src/pages/index.js +++ b/website/src/pages/index.js @@ -29,7 +29,7 @@ export default function Home() { const {siteConfig} = useDocusaurusContext(); return (