diff --git a/index.js b/index.js index 902c1b8..be7dc35 100644 --- a/index.js +++ b/index.js @@ -1,82 +1,172 @@ +/** + * @typedef {import('mdast').Nodes} Nodes + * @typedef {import('mdast').Resource} Resource + * @typedef {import('mdast').Root} Root + * @typedef {import('vfile').VFile} VFile + */ + +/** + * @typedef Options + * Configuration. + * @property {string | null | undefined} [from] + * Check relative values relative to this URL (optiona, example: + * `'https://example.com/from'`). + * @property {boolean | null | undefined} [skipLocalhost=false] + * Whether to ignore `localhost` links such as `http://localhost/*`, + * `http://127.0.0.1/*` (default: `false`). + * @property {boolean | null | undefined} [skipOffline=false] + * Whether to let offline runs pass quietly (default: `false`). + * @property {Array | null | undefined} [skipUrlPatterns] + * List of patterns for URLs that should be skipped (optional); + * each URL will be tested against each pattern and will be ignored if + * `new RegExp(pattern).test(url) === true`. + * + * @typedef {Extract} Resources + * Resource nodes. + */ + +import {ok as assert} from 'devlop' +import {deadOrAlive} from 'dead-or-alive' +import isOnline from 'is-online' import {lintRule} from 'unified-lint-rule' import {visit} from 'unist-util-visit' -import checkLinks from 'check-links' -import isOnline from 'is-online' + +const remarkLintNoDeadUrls = lintRule('remark-lint:no-dead-urls', rule) + +const defaultSkipUrlPatterns = [/^(?!https?)/i] + +export default remarkLintNoDeadUrls /** - * @typedef {import('mdast').Root} Root - * @typedef {import('mdast').Link} Link - * @typedef {import('mdast').Image} Image - * @typedef {import('mdast').Definition} Definition + * Check URLs. * - * @typedef {Object} Options - * @property {import('got').OptionsOfTextResponseBody} [gotOptions] - * @property {boolean} [skipLocalhost] - * @property {boolean} [skipOffline] - * @property {Array} [skipUrlPatterns] + * @param {Root} tree + * Tree. + * @param {VFile} file + * File. + * @param {Readonly | null | undefined} [options] + * Configuration (optional). + * @returns {Promise} + * Nothing. */ +async function rule(tree, file, options) { + /** @type {Map>} */ + const nodesByUrl = new Map() + const online = await isOnline() + const settings = options || {} + const skipUrlPatterns = settings.skipUrlPatterns + ? settings.skipUrlPatterns.map((d) => + typeof d === 'string' ? new RegExp(d) : d + ) + : [...defaultSkipUrlPatterns] -/** @type {import('unified-lint-rule').Rule} */ -function noDeadUrls(ast, file, options) { - /** @type {{[url: string]: Array}} */ - const urlToNodes = {} - - visit(ast, ['link', 'image', 'definition'], (node) => { - const url = /** @type {Link | Image | Definition} */ (node).url - if ( - options.skipLocalhost && - /^(https?:\/\/)(localhost|127\.0\.0\.1)(:\d+)?/.test(url) - ) { - return - } + if (settings.skipLocalhost) { + defaultSkipUrlPatterns.push(/^(https?:\/\/)(localhost|127\.0\.0\.1)(:\d+)?/) + return + } - if ( - options.skipUrlPatterns && - options.skipUrlPatterns.some((skipPattern) => - new RegExp(skipPattern).test(url) - ) - ) { - return + /* c8 ignore next 8 -- difficult to test */ + if (!online) { + if (!settings.skipOffline) { + // To do: clean message. + file.message('You are not online and have not set skipOffline: true.') } - if (!urlToNodes[url]) { - urlToNodes[url] = [] - } + return + } - urlToNodes[url].push(/** @type {Link | Image | Definition} */ (node)) - }) + const meta = /** @type {Record | undefined} */ ( + file.data.meta + ) - return checkLinks(Object.keys(urlToNodes), options.gotOptions).then( - (results) => { - for (const url of Object.keys(results)) { - const result = results[url] - if (result.status !== 'dead') continue + const from = + settings.from || + (meta && + typeof meta.origin === 'string' && + typeof meta.pathname === 'string' + ? new URL(meta.pathname, meta.origin).href + : undefined) - const nodes = urlToNodes[url] + visit(tree, function (node) { + if ('url' in node && typeof node.url === 'string') { + const value = node.url + const colon = value.indexOf(':') + const questionMark = value.indexOf('?') + const numberSign = value.indexOf('#') + const slash = value.indexOf('/') + let relativeToSomething = false - for (const node of nodes) { - file.message(`Link to ${url} is dead`, node) - } + if ( + // If there is no protocol, it’s relative. + colon < 0 || + // If the first colon is after a `?`, `#`, or `/`, it’s not a protocol. + (slash > -1 && colon > slash) || + (questionMark > -1 && colon > questionMark) || + (numberSign > -1 && colon > numberSign) + ) { + relativeToSomething = true } - } - ) -} -/** @type {import('unified-lint-rule').Rule} */ -function wrapper(ast, file, options = {}) { - return isOnline().then((online) => { - if (!online) { - if (!options.skipOffline) { - file.message('You are not online and have not set skipOffline: true.') + // We can only check URLs relative to something if `from` is passed. + if (relativeToSomething && !from) { + return } - return - } + const url = new URL(value, from).href + + if (skipUrlPatterns.some((skipPattern) => skipPattern.test(url))) { + return + } + + let list = nodesByUrl.get(url) + + if (!list) { + list = [] + nodesByUrl.set(url, list) + } - return noDeadUrls(ast, file, options) + list.push(node) + } }) -} -const remarkLintNoDeadLinks = lintRule('remark-lint:no-dead-urls', wrapper) + const urls = [...nodesByUrl.keys()] + + await Promise.all( + urls.map(async function (url) { + const nodes = nodesByUrl.get(url) + assert(nodes) + const result = await deadOrAlive(url, { + findUrls: false + // To do: + // * `anchorAllowlist` + // * `checkAnchor` + // * `followMetaHttpEquiv` + // * `maxRedirects` + // * `maxRetries` + // * `resolveClobberPrefix` + // * `sleep` + // * `timeout` + // * `userAgent` + }) + + for (const node of nodes) { + for (const message of result.messages) { + // To do: enclose url in backticks. + const copy = file.message('Link to ' + url + ' is dead', { + cause: message, + place: node.position + }) + + copy.fatal = message.fatal + } -export default remarkLintNoDeadLinks + if (result.status === 'alive' && new URL(url).href !== result.url) { + // To do: clean message. + file.message('Link to ' + url + ' redirects to ' + result.url, { + place: node.position + }) + } + } + }) + ) +} diff --git a/package.json b/package.json index 2c16030..55111cc 100644 --- a/package.json +++ b/package.json @@ -31,22 +31,25 @@ ], "dependencies": { "@types/mdast": "^4.0.0", - "check-links": "^2.0.0", + "dead-or-alive": "^1.0.0", + "devlop": "^1.0.0", "is-online": "^10.0.0", - "unified-lint-rule": "^2.0.0", - "unist-util-visit": "^5.0.0" + "unified-lint-rule": "^3.0.0", + "unist-util-visit": "^5.0.0", + "vfile": "^6.0.0", + "vfile-message": "^4.0.0" }, "devDependencies": { "@types/node": "^20.0.0", "c8": "^8.0.0", - "esmock": "^2.0.0", "prettier": "^3.0.0", "remark": "^15.0.0", "remark-cli": "^12.0.0", - "remark-gfm": "^4.0.0", - "remark-preset-wooorm": "^9.0.0", + "remark-preset-wooorm": "^10.0.0", "type-coverage": "^2.0.0", "typescript": "^5.0.0", + "undici": "^6.0.0", + "vfile-sort": "^4.0.0", "xo": "^0.56.0" }, "scripts": { @@ -54,7 +57,7 @@ "format": "remark . --frail --output --quiet && prettier . --log-level warn --write && xo --fix", "prepack": "npm run build && npm run format", "test": "npm run build && npm run format && npm run test-coverage", - "test-api": "node --conditions development --loader=esmock test.js", + "test-api": "node --conditions development test.js", "test-coverage": "c8 --100 --reporter lcov npm run test-api" }, "prettier": { diff --git a/test.js b/test.js index ee3b83c..a04339f 100644 --- a/test.js +++ b/test.js @@ -1,196 +1,328 @@ -import test from 'node:test' import assert from 'node:assert/strict' +import test from 'node:test' import {remark} from 'remark' -import esmock from 'esmock' - -/** - * Wrapper for calling remark with the linter plugin - * @param {string} markdown - * @param {import("esmock").MockMap} [globalMockDefinitions] - * @param {import("./index.js").Options} [linterOptions] - * @returns {Promise} - */ -async function processMarkdown(markdown, globalMockDefinitions, linterOptions) { - /** @type {import('unified').Plugin<[import('./index.js').Options?], import('mdast').Root, import('mdast').Root>} */ - const remarkLintNoDeadLinks = await esmock( - './index.js', - {}, - globalMockDefinitions +import {MockAgent, getGlobalDispatcher, setGlobalDispatcher} from 'undici' +import {compareMessage} from 'vfile-sort' +import remarkLintNoDeadUrls from './index.js' + +test('works', async () => { + const globalDispatcher = getGlobalDispatcher() + const mockAgent = new MockAgent() + mockAgent.enableNetConnect(/(?=a)b/) + setGlobalDispatcher(mockAgent) + const a = mockAgent.get('https://exists.com') + a.intercept({path: '/'}).reply(200, 'ok') + a.intercept({path: '/does/not/'}).reply(404, 'nok') + + mockAgent + .get('https://does-not-exists.com') + .intercept({path: '/'}) + .reply(404, 'nok') + + const file = await remark().use(remarkLintNoDeadUrls).process(` +# Title + +Here is a [good link](https://exists.com). + +Here is a [bad link](https://exists.com/does/not/). + +Here is another [bad link](https://does-not-exists.com). + `) + + await mockAgent.close() + await setGlobalDispatcher(globalDispatcher) + + file.messages.sort(compareMessage) + + assert.deepEqual( + file.messages.map((d) => d.reason), + [ + 'Link to https://exists.com/does/not/ is dead', + 'Link to https://does-not-exists.com/ is dead' + ] ) - // @ts-expect-error: to do: fix types. - return remark().use(remarkLintNoDeadLinks, linterOptions).process(markdown) -} +}) -test('works with no URLs', async () => { - const vfile = await processMarkdown(` +test('works w/o URLs', async () => { + const file = await remark().use(remarkLintNoDeadUrls).process(` # Title No URLs in here. `) - assert.equal(vfile.messages.length, 0) + + assert.equal(file.messages.length, 0) }) -test('works with mix of valid and invalid links', async () => { - const vfile = await processMarkdown( - ` -# Title +test('ignores URLs relative to the current URL normally', async () => { + const file = await remark().use(remarkLintNoDeadUrls).process(` +[](a.md) +[](/b.md) +[](./c.md) +[](../d.md) +[](#e) +[](?f) +[](//g.com) +[](/h:i) +[](?j:k) +[](#l:m) +`) -Here is a [good link](https://www.github.com). - -Here is a [bad link](https://github.com/unified/oops). - -Here is a [local link](http://localhost:3000). - `, - { - 'check-links': () => - Promise.resolve({ - 'https://www.github.com': {status: 'alive', statusCode: 200}, - 'https://github.com/unified/oops': { - status: 'dead', - statusCode: 404 - }, - 'http://localhost:3000': {status: 'dead', statusCode: 404} - }), - 'is-online': () => Promise.resolve(true) - } - ) + assert.equal(file.messages.length, 0) +}) - assert.equal(vfile.messages.length, 2) - assert.equal( - vfile.messages[0].reason, - 'Link to https://github.com/unified/oops is dead' - ) - assert.equal( - vfile.messages[1].reason, - 'Link to http://localhost:3000 is dead' +test('checks full URLs normally', async () => { + const globalDispatcher = getGlobalDispatcher() + const mockAgent = new MockAgent() + mockAgent.enableNetConnect(/(?=a)b/) + setGlobalDispatcher(mockAgent) + + const file = await remark().use(remarkLintNoDeadUrls, { + // Note: `[]` to overwrite the default only-http check in `skipUrlPatterns`. + skipUrlPatterns: [] + }).process(` +[](http://a.com) +[](https://b.com) +[](C:\\Documents\\c.md) +[](file:///Users/tilde/d.js) +`) + + await mockAgent.close() + await setGlobalDispatcher(globalDispatcher) + + file.messages.sort(compareMessage) + + assert.deepEqual( + file.messages.map((d) => d.reason), + [ + 'Link to http://a.com/ is dead', + 'Link to https://b.com/ is dead', + 'Link to c:\\Documents\\c.md is dead', + 'Link to file:///Users/tilde/d.js is dead' + ] ) }) -test('works with definitions and images', async () => { - const vfile = await processMarkdown( - ` -# Title +test('checks relative URLs w/ `from`', async () => { + const globalDispatcher = getGlobalDispatcher() + const mockAgent = new MockAgent() + mockAgent.enableNetConnect(/(?=a)b/) + setGlobalDispatcher(mockAgent) -Here is a good pig: ![picture of pig](/pig-photos/384). + const file = await remark().use(remarkLintNoDeadUrls, { + from: 'https://example.com/from/folder' + }).process(` +[](a.md) +[](/b.md) +[](./c.md) +[](../d.md) +[](#e) +[](?f) +[](//g.com) +`) -Download the pig picture [here](/pig-photos/384). + await mockAgent.close() + await setGlobalDispatcher(globalDispatcher) -Here is a [bad link]. Here is that [bad link] again. + file.messages.sort(compareMessage) -[bad link]: /oops/broken - `, - { - 'check-links': () => - Promise.resolve({ - '/pig-photos/384': {status: 'alive', statusCode: 200}, - '/oops/broken': {status: 'dead', statusCode: 404} - }), - 'is-online': () => Promise.resolve(true) - } + assert.deepEqual( + file.messages.map((d) => d.reason), + [ + 'Link to https://example.com/from/a.md is dead', + 'Link to https://example.com/b.md is dead', + 'Link to https://example.com/from/c.md is dead', + 'Link to https://example.com/d.md is dead', + 'Link to https://example.com/from/folder#e is dead', + 'Link to https://example.com/from/folder?f is dead', + 'Link to https://g.com/ is dead' + ] ) +}) - assert.equal(vfile.messages.length, 1) - assert.equal(vfile.messages[0].reason, 'Link to /oops/broken is dead') +test('checks relative URLs w/ `meta.origin`, `meta.pathname`', async () => { + const globalDispatcher = getGlobalDispatcher() + const mockAgent = new MockAgent() + mockAgent.enableNetConnect(/(?=a)b/) + setGlobalDispatcher(mockAgent) + + const file = await remark() + .use(remarkLintNoDeadUrls) + .process({ + data: {meta: {origin: 'https://example.com', pathname: '/from/folder'}}, + value: '[](a.md)' + }) + + await mockAgent.close() + await setGlobalDispatcher(globalDispatcher) + + file.messages.sort(compareMessage) + + assert.deepEqual( + file.messages.map((d) => d.reason), + ['Link to https://example.com/from/a.md is dead'] + ) }) -test('skips URLs with unsupported protocols', async () => { - const vfile = await processMarkdown(` -[Send me an email.](mailto:me@me.com) -[Look at this file.](ftp://path/to/file.txt) -[Special schema.](flopper://a/b/c) +test('works with definitions and images', async () => { + const globalDispatcher = getGlobalDispatcher() + const mockAgent = new MockAgent() + mockAgent.enableNetConnect(/(?=a)b/) + setGlobalDispatcher(mockAgent) + + const file = await remark().use(remarkLintNoDeadUrls).process(` +![image](https://example.com/a) + +[link](https://example.com/b) + +[definition]: https://example.com/c `) - assert.equal(vfile.messages.length, 0) -}) + await mockAgent.close() + await setGlobalDispatcher(globalDispatcher) -test('warns if you are not online', async () => { - const vfile = await processMarkdown( - ` -Here is a [bad link](https://github.com/davidtheclark/oops). - `, - { - 'is-online': () => Promise.resolve(false) - } - ) + file.messages.sort(compareMessage) - assert.equal(vfile.messages.length, 1) - assert.equal( - vfile.messages[0].reason, - 'You are not online and have not set skipOffline: true.' + assert.deepEqual( + file.messages.map((d) => d.reason), + [ + 'Link to https://example.com/a is dead', + 'Link to https://example.com/b is dead', + 'Link to https://example.com/c is dead' + ] ) }) -test('works offline with skipOffline enabled', async () => { - const vfile = await processMarkdown( - ` -Here is a [bad link](https://github.com/davidtheclark/oops). - `, - { - 'is-online': () => Promise.resolve(false) - }, - { - skipOffline: true - } - ) +test('skips URLs with unsupported protocols', async () => { + const globalDispatcher = getGlobalDispatcher() + const mockAgent = new MockAgent() + mockAgent.enableNetConnect(/(?=a)b/) + setGlobalDispatcher(mockAgent) + + const file = await remark().use(remarkLintNoDeadUrls).process(` +[a](mailto:me@me.com) + +[b](ftp://path/to/file.txt) + +[c](flopper://a/b/c) +`) + + await mockAgent.close() + await setGlobalDispatcher(globalDispatcher) - assert.equal(vfile.messages.length, 0) + file.messages.sort(compareMessage) + + assert.deepEqual( + file.messages.map((d) => d.reason), + [] + ) }) test('ignores localhost when skipLocalhost enabled', async () => { - const vfile = await processMarkdown( - ` -- [http://localhost](http://localhost) -- [http://localhost/alex/test](http://localhost/alex/test) -- [http://localhost:3000](http://localhost:3000) -- [http://localhost:3000/alex/test](http://localhost:3000/alex/test) -- [https://localhost](http://localhost) -- [https://localhost/alex/test](http://localhost/alex/test) -- [https://localhost:3000](http://localhost:3000) -- [https://localhost:3000/alex/test](http://localhost:3000/alex/test) - `, - {}, - { - skipLocalhost: true - } - ) + const globalDispatcher = getGlobalDispatcher() + const mockAgent = new MockAgent() + mockAgent.enableNetConnect(/(?=a)b/) + setGlobalDispatcher(mockAgent) - assert.equal(vfile.messages.length, 0) -}) + const file = await remark().use(remarkLintNoDeadUrls, {skipLocalhost: true}) + .process(` +* [a](http://localhost) +* [b](http://localhost/alex/test) +* [c](http://localhost:3000) +* [d](http://localhost:3000/alex/test) +* [e](http://127.0.0.1) +* [f](http://127.0.0.1:3000) +`) -test('ignore loop back IP (127.0.0.1) when skipLocalhost is enabled', async () => { - const vfile = await processMarkdown( - ` -- [http://127.0.0.1](http://127.0.0.1) -- [http://127.0.0.1:3000](http://127.0.0.1:3000) -- [http://127.0.0.1/alex/test](http://127.0.0.1) -- [http://127.0.0.1:3000/alex/test](http://127.0.0.1:3000) -- [https://127.0.0.1](http://127.0.0.1) -- [https://127.0.0.1:3000](http://127.0.0.1:3000) -- [https://127.0.0.1/alex/test](http://127.0.0.1) -- [https://127.0.0.1:3000/alex/test](http://127.0.0.1:3000) - `, - {}, - { - skipLocalhost: true - } + await mockAgent.close() + await setGlobalDispatcher(globalDispatcher) + + file.messages.sort(compareMessage) + + assert.deepEqual( + file.messages.map((d) => d.reason), + [] ) +}) + +test('skipUrlPatterns for content', async () => { + const globalDispatcher = getGlobalDispatcher() + const mockAgent = new MockAgent() + mockAgent.enableNetConnect(/(?=a)b/) + setGlobalDispatcher(mockAgent) - assert.equal(vfile.messages.length, 0) + const file = await remark().use(remarkLintNoDeadUrls, { + skipUrlPatterns: [/^http:\/\/aaa\.com/, '^http://bbb\\.com'] + }).process(` +[a](http://aaa.com) +[b](http://aaa.com/somePath) +[c](http://aaa.com/somePath?withQuery=wow) +[d](http://bbb.com/somePath/maybe) +`) + + await mockAgent.close() + await setGlobalDispatcher(globalDispatcher) + + file.messages.sort(compareMessage) + + assert.deepEqual( + file.messages.map((d) => d.reason), + [] + ) }) -test('skipUrlPatterns for content:', async () => { - const vfile = await processMarkdown( - ` -[Ignore this](http://www.url-to-ignore.com) -[Ignore this](http://www.url-to-ignore.com/somePath) -[Ignore this](http://www.url-to-ignore.com/somePath?withQuery=wow) -[its complicated](http://url-to-ignore.com/somePath/maybe) - `, - {}, - { - skipUrlPatterns: [/^http:\/\/(.*)url-to-ignore\.com/] - } +test('should support anchors', async () => { + const globalDispatcher = getGlobalDispatcher() + const mockAgent = new MockAgent() + mockAgent.enableNetConnect(/(?=a)b/) + setGlobalDispatcher(mockAgent) + const site = mockAgent.get('https://example.com') + + site.intercept({path: '/'}).reply(200, '

hi

', { + headers: {'Content-Type': 'text/html'} + }) + + const file = await remark().use(remarkLintNoDeadUrls).process(` +[a](https://example.com#exists) +[b](https://example.com#does-not-exist) + `) + + await mockAgent.close() + await setGlobalDispatcher(globalDispatcher) + + file.messages.sort(compareMessage) + + assert.deepEqual( + file.messages.map((d) => d.reason), + ['Link to https://example.com/#does-not-exist is dead'] ) +}) + +test('should support redirects', async () => { + const globalDispatcher = getGlobalDispatcher() + const mockAgent = new MockAgent() + mockAgent.enableNetConnect(/(?=a)b/) + setGlobalDispatcher(mockAgent) + const site = mockAgent.get('https://example.com') + + site.intercept({path: '/from'}).reply(301, '', { + headers: {Location: '/to'} + }) + + site.intercept({path: '/to'}).reply(200, 'ok', { + headers: {'Content-Type': 'text/html'} + }) - assert.equal(vfile.messages.length, 0) + const file = await remark().use(remarkLintNoDeadUrls).process(` +[a](https://example.com/from) + `) + + await mockAgent.close() + await setGlobalDispatcher(globalDispatcher) + + file.messages.sort(compareMessage) + + assert.deepEqual( + file.messages.map((d) => d.reason), + ['Link to https://example.com/from redirects to https://example.com/to'] + ) })