From 34a545a50b05dcdd76f34693f174f0573ed8c985 Mon Sep 17 00:00:00 2001
From: Simon Schrottner <simon.schrottner@dynatrace.com>
Date: Wed, 11 Jan 2023 12:29:23 +0100
Subject: [PATCH] Reintroduce anchor detection as configurable step

Anchor detection got removed during refactoring, as a link will
still point to a sufficient url with a broken anchor, it still
generates wrong/broken links. Especially when markdown is used
to generate pages which are checked with tools like HTMLtest,
which do verify proper anchors etc.

Therefore i reintroduce this change with this pr, but as a
configurable option. This should still allow the proper
outcome of the refactoring, but ensures backwards compatibility
for other users who rely on proper anchor handling.

Relates: #24

Signed-off-by: Simon Schrottner <simon.schrottner@dynatrace.com>
---
 README.md                            | 25 ++++++++++++++++---
 index.js                             | 24 +++++++++++++++++--
 test/markdown-link-extractor.test.js | 36 +++++++++++++---------------
 3 files changed, 61 insertions(+), 24 deletions(-)

diff --git a/README.md b/README.md
index a5af4db..c853fa3 100644
--- a/README.md
+++ b/README.md
@@ -8,15 +8,18 @@ $ npm install --save markdown-link-extractor
 ```
 ## API
 
-### markdownLinkExtractor(markdown)
+### markdownLinkExtractor(markdown, checkAnchors = false)
 
 Parameters:
 
 * `markdown` text in markdown format.
+* `anchors` if anchors should also be extracted.
 
 Returns:
 
-* an array containing the URLs from the links found.
+* an object with the following properties:
+  * `.anchors`: an array of anchor tag strings (e.g. `[ "#foo", "#bar" ]`) - only filled if `checkAnchors` set `true`.
+  * `.links`: an array containing the URLs from the links found.
 
 ## Examples
 
@@ -26,10 +29,26 @@ const markdownLinkExtractor = require('markdown-link-extractor');
 
 const markdown = readFileSync('README.md', {encoding: 'utf8'});
 
-const links = markdownLinkExtractor(markdown);
+const { links } = markdownLinkExtractor(markdown);
 links.forEach(link => console.log(link));
 ```
 
+## Upgrading to v5.0.0
+
+- anchor link extraction reintroduced - be careful if you upgrade from version <`3.x` as the `extended` parameter got removed but now there is the `checkAnchors` parameter in place.
+
+Code that looked like this:
+
+```
+const links = markdownLinkExtractor(str);
+```
+
+Should change to this:
+
+```
+const { links } = markdownLinkExtractor(str);
+```
+
 ## Upgrading to v4.0.0
 
 - anchor link extraction no longer supported
diff --git a/index.js b/index.js
index 604975a..c68e501 100644
--- a/index.js
+++ b/index.js
@@ -3,13 +3,33 @@
 const { marked } = require('marked');
 const htmlLinkExtractor = require('html-link-extractor');
 
-module.exports = function markdownLinkExtractor(markdown, extended = false) {
+module.exports = function markdownLinkExtractor(markdown, checkAnchors = false) {
+    const anchors = [];
+    if(checkAnchors) {
+        const renderer = {
+            heading(text, level, raw, slugger) {
+                if (this.options.headerIds) {
+                    var id = this.options.headerPrefix + slugger.slug(raw);
+                    
+                        anchors.push(`#${id}`);
+                    
+                    return "<h" + level + " id=\"" + id + "\">" + text + "</h" + level + ">\n";
+                } // ignore IDs
+
+
+                return "<h" + level + ">" + text + "</h" + level + ">\n";
+            }
+        };
+
+        marked.use({ renderer });
+    }
 
     marked.setOptions({
         mangle: false, // don't escape autolinked email address with HTML character references.
     });
 
+
     const html = marked(markdown);
     const links = htmlLinkExtractor(html);
-    return links;
+    return { links, anchors };
 };
diff --git a/test/markdown-link-extractor.test.js b/test/markdown-link-extractor.test.js
index 3b6b4e3..4bf2f71 100644
--- a/test/markdown-link-extractor.test.js
+++ b/test/markdown-link-extractor.test.js
@@ -6,69 +6,62 @@ var markdownLinkExtractor = require('../');
 describe('markdown-link-extractor', function () {
 
     it('should return an empty array when no links are present', function () {
-        var links = markdownLinkExtractor('No links here');
+        var { links } = markdownLinkExtractor('No links here');
         expect(links).to.be.an('array');
         expect(links).to.have.length(0);
     });
 
     it('should extract links with emojis', function () {
-        var links = markdownLinkExtractor('**[📣 Foo!](https://www.example.com)**');
+        var { links } = markdownLinkExtractor('**[📣 Foo!](https://www.example.com)**');
         expect(links).to.be.an('array');
         expect(links).to.have.length(1);
         expect(links[0]).to.be('https://www.example.com');
     });
 
     it('should extract a link in a [tag](http://example.com)', function () {
-        var links = markdownLinkExtractor('[example](http://www.example.com)');
+        var { links } = markdownLinkExtractor('[example](http://www.example.com)');
         expect(links).to.be.an('array');
         expect(links).to.have.length(1);
         expect(links[0]).to.be('http://www.example.com');
     });
 
-    it('should extract a hash link in [foobar](#foobar)', function () {
-        var links = markdownLinkExtractor('[foobar](#foobar)');
-        expect(links).to.be.an('array');
-        expect(links).to.have.length(1);
-        expect(links[0]).to.be('#foobar');
-    });
-
     it('should extract a link from inline html <a href="http://foo.bar.test">foo</a>', function () {
-        var links = markdownLinkExtractor('<a href="http://foo.bar.test">foo</a>');
+        var { links } = markdownLinkExtractor('<a href="http://foo.bar.test">foo</a>');
         expect(links).to.be.an('array');
         expect(links).to.have.length(1);
         expect(links[0]).to.be('http://foo.bar.test');
     });
 
     it('should extract mailto: link from <test@example.com>', function () {
-        var links = markdownLinkExtractor('<test@example.com>)');
+        var { links } = markdownLinkExtractor('<test@example.com>)');
         expect(links).to.be.an('array');
         expect(links).to.have.length(1);
         expect(links[0]).to.be('mailto:test@example.com');
     });
 
     it('should extract a link in a with escaped braces [tag](http://example.com\(1\))', function () {
-        var links = markdownLinkExtractor('[XMLHttpRequest](http://msdn.microsoft.com/library/ie/ms535874\\(v=vs.85\\).aspx)');
+        var { links } = markdownLinkExtractor('[XMLHttpRequest](http://msdn.microsoft.com/library/ie/ms535874\\(v=vs.85\\).aspx)');
         expect(links).to.be.an('array');
         expect(links).to.have.length(1);
         expect(links[0]).to.be('http://msdn.microsoft.com/library/ie/ms535874(v=vs.85).aspx');
     });
 
     it('should extract an image link in a ![tag](http://example.com/image.jpg)', function () {
-        var links = markdownLinkExtractor('![example](http://www.example.com/image.jpg)');
+        var { links } = markdownLinkExtractor('![example](http://www.example.com/image.jpg)');
         expect(links).to.be.an('array');
         expect(links).to.have.length(1);
         expect(links[0]).to.be('http://www.example.com/image.jpg');
     });
 
     it('should extract an image link in a ![tag](foo/image.jpg)', function () {
-        var links = markdownLinkExtractor('![example](foo/image.jpg)');
+        var { links } = markdownLinkExtractor('![example](foo/image.jpg)');
         expect(links).to.be.an('array');
         expect(links).to.have.length(1);
         expect(links[0]).to.be('foo/image.jpg');
     });
 
     it('should extract two image links', function () {
-        var links = markdownLinkExtractor('![img](http://www.example.test/hello.jpg) ![img](hello.jpg)');
+        var { links } = markdownLinkExtractor('![img](http://www.example.test/hello.jpg) ![img](hello.jpg)');
         expect(links).to.be.an('array');
         expect(links).to.have.length(2);
         expect(links[0]).to.be('http://www.example.test/hello.jpg');
@@ -76,18 +69,23 @@ describe('markdown-link-extractor', function () {
     });
 
     it('should extract a bare link http://example.com', function () {
-        var links = markdownLinkExtractor('This is a link: http://www.example.com');
+        var { links } = markdownLinkExtractor('This is a link: http://www.example.com');
         expect(links).to.be.an('array');
         expect(links).to.have.length(1);
         expect(links[0]).to.be('http://www.example.com');
     });
 
     it('should extract multiple links', function () {
-        var links = markdownLinkExtractor('This is an [example](http://www.example.com). Hope it [works](http://www.example.com/works)');
+        var { links } = markdownLinkExtractor('This is an [example](http://www.example.com). Hope it [works](http://www.example.com/works)');
         expect(links).to.be.an('array');
         expect(links).to.have.length(2);
         expect(links[0]).to.be('http://www.example.com');
         expect(links[1]).to.be('http://www.example.com/works');
     });
 
-});
+    it('should collect anchor tags', function () {
+        var { anchors } = markdownLinkExtractor('# foo\n# foo', true);
+        expect(anchors).to.eql(['#foo','#foo-1']);
+    });
+
+});
\ No newline at end of file