From 34a545a50b05dcdd76f34693f174f0573ed8c985 Mon Sep 17 00:00:00 2001 From: Simon Schrottner Date: Wed, 11 Jan 2023 12:29:23 +0100 Subject: [PATCH] Reintroduce anchor detection as configurable step Anchor detection got removed during refactoring, as a link will still point to a sufficient url with a broken anchor, it still generates wrong/broken links. Especially when markdown is used to generate pages which are checked with tools like HTMLtest, which do verify proper anchors etc. Therefore i reintroduce this change with this pr, but as a configurable option. This should still allow the proper outcome of the refactoring, but ensures backwards compatibility for other users who rely on proper anchor handling. Relates: #24 Signed-off-by: Simon Schrottner --- README.md | 25 ++++++++++++++++--- index.js | 24 +++++++++++++++++-- test/markdown-link-extractor.test.js | 36 +++++++++++++--------------- 3 files changed, 61 insertions(+), 24 deletions(-) diff --git a/README.md b/README.md index a5af4db..c853fa3 100644 --- a/README.md +++ b/README.md @@ -8,15 +8,18 @@ $ npm install --save markdown-link-extractor ``` ## API -### markdownLinkExtractor(markdown) +### markdownLinkExtractor(markdown, checkAnchors = false) Parameters: * `markdown` text in markdown format. +* `anchors` if anchors should also be extracted. Returns: -* an array containing the URLs from the links found. +* an object with the following properties: + * `.anchors`: an array of anchor tag strings (e.g. `[ "#foo", "#bar" ]`) - only filled if `checkAnchors` set `true`. + * `.links`: an array containing the URLs from the links found. ## Examples @@ -26,10 +29,26 @@ const markdownLinkExtractor = require('markdown-link-extractor'); const markdown = readFileSync('README.md', {encoding: 'utf8'}); -const links = markdownLinkExtractor(markdown); +const { links } = markdownLinkExtractor(markdown); links.forEach(link => console.log(link)); ``` +## Upgrading to v5.0.0 + +- anchor link extraction reintroduced - be careful if you upgrade from version <`3.x` as the `extended` parameter got removed but now there is the `checkAnchors` parameter in place. + +Code that looked like this: + +``` +const links = markdownLinkExtractor(str); +``` + +Should change to this: + +``` +const { links } = markdownLinkExtractor(str); +``` + ## Upgrading to v4.0.0 - anchor link extraction no longer supported diff --git a/index.js b/index.js index 604975a..c68e501 100644 --- a/index.js +++ b/index.js @@ -3,13 +3,33 @@ const { marked } = require('marked'); const htmlLinkExtractor = require('html-link-extractor'); -module.exports = function markdownLinkExtractor(markdown, extended = false) { +module.exports = function markdownLinkExtractor(markdown, checkAnchors = false) { + const anchors = []; + if(checkAnchors) { + const renderer = { + heading(text, level, raw, slugger) { + if (this.options.headerIds) { + var id = this.options.headerPrefix + slugger.slug(raw); + + anchors.push(`#${id}`); + + return "" + text + "\n"; + } // ignore IDs + + + return "" + text + "\n"; + } + }; + + marked.use({ renderer }); + } marked.setOptions({ mangle: false, // don't escape autolinked email address with HTML character references. }); + const html = marked(markdown); const links = htmlLinkExtractor(html); - return links; + return { links, anchors }; }; diff --git a/test/markdown-link-extractor.test.js b/test/markdown-link-extractor.test.js index 3b6b4e3..4bf2f71 100644 --- a/test/markdown-link-extractor.test.js +++ b/test/markdown-link-extractor.test.js @@ -6,69 +6,62 @@ var markdownLinkExtractor = require('../'); describe('markdown-link-extractor', function () { it('should return an empty array when no links are present', function () { - var links = markdownLinkExtractor('No links here'); + var { links } = markdownLinkExtractor('No links here'); expect(links).to.be.an('array'); expect(links).to.have.length(0); }); it('should extract links with emojis', function () { - var links = markdownLinkExtractor('**[📣 Foo!](https://www.example.com)**'); + var { links } = markdownLinkExtractor('**[📣 Foo!](https://www.example.com)**'); expect(links).to.be.an('array'); expect(links).to.have.length(1); expect(links[0]).to.be('https://www.example.com'); }); it('should extract a link in a [tag](http://example.com)', function () { - var links = markdownLinkExtractor('[example](http://www.example.com)'); + var { links } = markdownLinkExtractor('[example](http://www.example.com)'); expect(links).to.be.an('array'); expect(links).to.have.length(1); expect(links[0]).to.be('http://www.example.com'); }); - it('should extract a hash link in [foobar](#foobar)', function () { - var links = markdownLinkExtractor('[foobar](#foobar)'); - expect(links).to.be.an('array'); - expect(links).to.have.length(1); - expect(links[0]).to.be('#foobar'); - }); - it('should extract a link from inline html foo', function () { - var links = markdownLinkExtractor('foo'); + var { links } = markdownLinkExtractor('foo'); expect(links).to.be.an('array'); expect(links).to.have.length(1); expect(links[0]).to.be('http://foo.bar.test'); }); it('should extract mailto: link from ', function () { - var links = markdownLinkExtractor(')'); + var { links } = markdownLinkExtractor(')'); expect(links).to.be.an('array'); expect(links).to.have.length(1); expect(links[0]).to.be('mailto:test@example.com'); }); it('should extract a link in a with escaped braces [tag](http://example.com\(1\))', function () { - var links = markdownLinkExtractor('[XMLHttpRequest](http://msdn.microsoft.com/library/ie/ms535874\\(v=vs.85\\).aspx)'); + var { links } = markdownLinkExtractor('[XMLHttpRequest](http://msdn.microsoft.com/library/ie/ms535874\\(v=vs.85\\).aspx)'); expect(links).to.be.an('array'); expect(links).to.have.length(1); expect(links[0]).to.be('http://msdn.microsoft.com/library/ie/ms535874(v=vs.85).aspx'); }); it('should extract an image link in a ![tag](http://example.com/image.jpg)', function () { - var links = markdownLinkExtractor('![example](http://www.example.com/image.jpg)'); + var { links } = markdownLinkExtractor('![example](http://www.example.com/image.jpg)'); expect(links).to.be.an('array'); expect(links).to.have.length(1); expect(links[0]).to.be('http://www.example.com/image.jpg'); }); it('should extract an image link in a ![tag](foo/image.jpg)', function () { - var links = markdownLinkExtractor('![example](foo/image.jpg)'); + var { links } = markdownLinkExtractor('![example](foo/image.jpg)'); expect(links).to.be.an('array'); expect(links).to.have.length(1); expect(links[0]).to.be('foo/image.jpg'); }); it('should extract two image links', function () { - var links = markdownLinkExtractor('![img](http://www.example.test/hello.jpg) ![img](hello.jpg)'); + var { links } = markdownLinkExtractor('![img](http://www.example.test/hello.jpg) ![img](hello.jpg)'); expect(links).to.be.an('array'); expect(links).to.have.length(2); expect(links[0]).to.be('http://www.example.test/hello.jpg'); @@ -76,18 +69,23 @@ describe('markdown-link-extractor', function () { }); it('should extract a bare link http://example.com', function () { - var links = markdownLinkExtractor('This is a link: http://www.example.com'); + var { links } = markdownLinkExtractor('This is a link: http://www.example.com'); expect(links).to.be.an('array'); expect(links).to.have.length(1); expect(links[0]).to.be('http://www.example.com'); }); it('should extract multiple links', function () { - var links = markdownLinkExtractor('This is an [example](http://www.example.com). Hope it [works](http://www.example.com/works)'); + var { links } = markdownLinkExtractor('This is an [example](http://www.example.com). Hope it [works](http://www.example.com/works)'); expect(links).to.be.an('array'); expect(links).to.have.length(2); expect(links[0]).to.be('http://www.example.com'); expect(links[1]).to.be('http://www.example.com/works'); }); -}); + it('should collect anchor tags', function () { + var { anchors } = markdownLinkExtractor('# foo\n# foo', true); + expect(anchors).to.eql(['#foo','#foo-1']); + }); + +}); \ No newline at end of file