From 3605fbf14c5c6e00627a2e57003b4d0b01498be5 Mon Sep 17 00:00:00 2001 From: Tom Christensen Date: Tue, 6 Sep 2022 21:35:07 -0600 Subject: [PATCH 1/7] Adding a space in link text and any nwo#number text string --- .vscode/launch.json | 3 ++- .../link_and_mention_sanitizer.rb | 13 ++++++++++++- .../link_and_mention_sanitizer_spec.rb | 17 ++++++++++++++--- 3 files changed, 28 insertions(+), 5 deletions(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index 2167eca61a..46518252be 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -20,7 +20,7 @@ "name": "Debug Tests", "type": "Ruby", "request": "launch", - "program": "${workspaceRoot}/${input:ecosystem}/.bundle/bin/rspec", + "program": "${workspaceRoot}/omnibus/.bundle/bin/rspec", "cwd": "${workspaceRoot}/${input:ecosystem}", "useBundler": true, "args": ["${input:test_path}"], @@ -106,6 +106,7 @@ "options": [ "bundler", "cargo", + "common", "composer", "docker", "elm", diff --git a/common/lib/dependabot/pull_request_creator/message_builder/link_and_mention_sanitizer.rb b/common/lib/dependabot/pull_request_creator/message_builder/link_and_mention_sanitizer.rb index 76803e47ad..47df647f34 100644 --- a/common/lib/dependabot/pull_request_creator/message_builder/link_and_mention_sanitizer.rb +++ b/common/lib/dependabot/pull_request_creator/message_builder/link_and_mention_sanitizer.rb @@ -14,6 +14,7 @@ class LinkAndMentionSanitizer github\.com/(?#{GITHUB_USERNAME}/[^/\s]+)/ (?:issue|pull)s?/(?\d+) }x.freeze + GITHUB_NWO_REGEX = %r{(?#{GITHUB_USERNAME}/[^/\s#]+)#(?\d+)}.freeze MENTION_REGEX = %r{(?#{GITHUB_USERNAME})/(?#{GITHUB_USERNAME})/?}.freeze @@ -98,13 +99,19 @@ def sanitize_links(doc) last_match = subnode.string_content.match(GITHUB_REF_REGEX) number = last_match.named_captures.fetch("number") repo = last_match.named_captures.fetch("repo") - subnode.string_content = "#{repo}##{number}" + subnode.string_content = insert_space_in_link_text("#{repo}##{number}") end node.url = replace_github_host(node.url) elsif node.type == :text && node.string_content.match?(GITHUB_REF_REGEX) node.string_content = replace_github_host(node.string_content) + elsif node.type == :text && + node.string_content.match?(GITHUB_NWO_REGEX) + match = node.string_content.match(GITHUB_NWO_REGEX) + repo = match.named_captures.fetch("repo") + number = match.named_captures.fetch("number") + node.string_content = insert_space_in_link_text("#{repo}##{number}") end end end @@ -188,6 +195,10 @@ def insert_zero_width_space_in_mention(mention) mention.sub("@", "@\u200B").encode("utf-8") end + def insert_space_in_link_text(text) + text.sub("#", " #").encode("utf-8") + end + def parent_node_link?(node) node.type == :link || (node.parent && parent_node_link?(node.parent)) end diff --git a/common/spec/dependabot/pull_request_creator/message_builder/link_and_mention_sanitizer_spec.rb b/common/spec/dependabot/pull_request_creator/message_builder/link_and_mention_sanitizer_spec.rb index 8f7624e169..3b5c254c30 100644 --- a/common/spec/dependabot/pull_request_creator/message_builder/link_and_mention_sanitizer_spec.rb +++ b/common/spec/dependabot/pull_request_creator/message_builder/link_and_mention_sanitizer_spec.rb @@ -252,7 +252,7 @@ it do is_expected.to eq( "

Check out my/repo#5

\n" + "issues/5\">my/repo #5

\n" ) end end @@ -263,7 +263,7 @@ it do is_expected.to eq( "

Check out my/repo#5

\n" + "issues/5\">my/repo #5

\n" ) end end @@ -276,7 +276,18 @@ it do is_expected.to eq( "

rust-num/num-traits#144

\n" + "pull/144\">rust-num/num-traits #144

\n" + ) + end + end + + context "with a GitHub NWO and PR number" do + let(:text) do + "dsp-testing/dependabot-ts-definitely-typed#25" + end + it do + is_expected.to eq( + "

dsp-testing/dependabot-ts-definitely-typed #25

\n" ) end end From b07430e8c89c4e639b84dbb22cbba2523532db19 Mon Sep 17 00:00:00 2001 From: Tom Christensen Date: Wed, 7 Sep 2022 06:52:06 -0600 Subject: [PATCH 2/7] Use `` tags instead of spaces --- .../link_and_mention_sanitizer.rb | 16 ++++++++++------ .../link_and_mention_sanitizer_spec.rb | 8 ++++---- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/common/lib/dependabot/pull_request_creator/message_builder/link_and_mention_sanitizer.rb b/common/lib/dependabot/pull_request_creator/message_builder/link_and_mention_sanitizer.rb index 47df647f34..e448cac3f6 100644 --- a/common/lib/dependabot/pull_request_creator/message_builder/link_and_mention_sanitizer.rb +++ b/common/lib/dependabot/pull_request_creator/message_builder/link_and_mention_sanitizer.rb @@ -99,7 +99,7 @@ def sanitize_links(doc) last_match = subnode.string_content.match(GITHUB_REF_REGEX) number = last_match.named_captures.fetch("number") repo = last_match.named_captures.fetch("repo") - subnode.string_content = insert_space_in_link_text("#{repo}##{number}") + subnode.string_content = "#{repo}##{number}" end node.url = replace_github_host(node.url) @@ -111,7 +111,9 @@ def sanitize_links(doc) match = node.string_content.match(GITHUB_NWO_REGEX) repo = match.named_captures.fetch("repo") number = match.named_captures.fetch("number") - node.string_content = insert_space_in_link_text("#{repo}##{number}") + new_node = build_nwo_text_node("#{repo}##{number}") + node.insert_before(new_node) + node.delete end end end @@ -177,6 +179,12 @@ def build_mention_link_text_nodes(text) code_node.string_content = insert_zero_width_space_in_mention(text) [code_node] end + + def build_nwo_text_node(text) + code_node = CommonMarker::Node.new(:code) + code_node.string_content = text + code_node + end def create_link_node(url, text) link_node = CommonMarker::Node.new(:link) @@ -195,10 +203,6 @@ def insert_zero_width_space_in_mention(mention) mention.sub("@", "@\u200B").encode("utf-8") end - def insert_space_in_link_text(text) - text.sub("#", " #").encode("utf-8") - end - def parent_node_link?(node) node.type == :link || (node.parent && parent_node_link?(node.parent)) end diff --git a/common/spec/dependabot/pull_request_creator/message_builder/link_and_mention_sanitizer_spec.rb b/common/spec/dependabot/pull_request_creator/message_builder/link_and_mention_sanitizer_spec.rb index 3b5c254c30..c5a63c4050 100644 --- a/common/spec/dependabot/pull_request_creator/message_builder/link_and_mention_sanitizer_spec.rb +++ b/common/spec/dependabot/pull_request_creator/message_builder/link_and_mention_sanitizer_spec.rb @@ -252,7 +252,7 @@ it do is_expected.to eq( "

Check out my/repo #5

\n" + "issues/5\">my/repo#5

\n" ) end end @@ -263,7 +263,7 @@ it do is_expected.to eq( "

Check out my/repo #5

\n" + "issues/5\">my/repo#5

\n" ) end end @@ -276,7 +276,7 @@ it do is_expected.to eq( "

rust-num/num-traits #144

\n" + "pull/144\">rust-num/num-traits#144

\n" ) end end @@ -287,7 +287,7 @@ end it do is_expected.to eq( - "

dsp-testing/dependabot-ts-definitely-typed #25

\n" + "

dsp-testing/dependabot-ts-definitely-typed#25

\n" ) end end From 0f6b6c777ab40b15bfc27ffebcf7e6be267f15a0 Mon Sep 17 00:00:00 2001 From: Tom Christensen Date: Wed, 7 Sep 2022 08:28:11 -0600 Subject: [PATCH 3/7] check if the parent is a link before wrapping in code block --- .../message_builder/link_and_mention_sanitizer.rb | 3 ++- .../message_builder/link_and_mention_sanitizer_spec.rb | 6 +++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/common/lib/dependabot/pull_request_creator/message_builder/link_and_mention_sanitizer.rb b/common/lib/dependabot/pull_request_creator/message_builder/link_and_mention_sanitizer.rb index e448cac3f6..83e5f85a0e 100644 --- a/common/lib/dependabot/pull_request_creator/message_builder/link_and_mention_sanitizer.rb +++ b/common/lib/dependabot/pull_request_creator/message_builder/link_and_mention_sanitizer.rb @@ -107,7 +107,8 @@ def sanitize_links(doc) node.string_content.match?(GITHUB_REF_REGEX) node.string_content = replace_github_host(node.string_content) elsif node.type == :text && - node.string_content.match?(GITHUB_NWO_REGEX) + node.string_content.match?(GITHUB_NWO_REGEX) && + !parent_node_link?(node) match = node.string_content.match(GITHUB_NWO_REGEX) repo = match.named_captures.fetch("repo") number = match.named_captures.fetch("number") diff --git a/common/spec/dependabot/pull_request_creator/message_builder/link_and_mention_sanitizer_spec.rb b/common/spec/dependabot/pull_request_creator/message_builder/link_and_mention_sanitizer_spec.rb index c5a63c4050..6108d824e1 100644 --- a/common/spec/dependabot/pull_request_creator/message_builder/link_and_mention_sanitizer_spec.rb +++ b/common/spec/dependabot/pull_request_creator/message_builder/link_and_mention_sanitizer_spec.rb @@ -252,7 +252,7 @@ it do is_expected.to eq( "

Check out my/repo#5

\n" + "issues/5\">my/repo#5

\n" ) end end @@ -263,7 +263,7 @@ it do is_expected.to eq( "

Check out my/repo#5

\n" + "issues/5\">my/repo#5

\n" ) end end @@ -276,7 +276,7 @@ it do is_expected.to eq( "

rust-num/num-traits#144

\n" + "pull/144\">rust-num/num-traits#144

\n" ) end end From 7c99a8a08a34ef3dc894c165284f8226c38b0bdd Mon Sep 17 00:00:00 2001 From: Tom Christensen Date: Wed, 7 Sep 2022 09:41:03 -0600 Subject: [PATCH 4/7] Cleanup the complexity a bit. --- .../link_and_mention_sanitizer.rb | 31 +++++++++++++------ 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/common/lib/dependabot/pull_request_creator/message_builder/link_and_mention_sanitizer.rb b/common/lib/dependabot/pull_request_creator/message_builder/link_and_mention_sanitizer.rb index 83e5f85a0e..5ba0cf4931 100644 --- a/common/lib/dependabot/pull_request_creator/message_builder/link_and_mention_sanitizer.rb +++ b/common/lib/dependabot/pull_request_creator/message_builder/link_and_mention_sanitizer.rb @@ -41,7 +41,8 @@ def sanitize_links_and_mentions(text:, unsafe: false) sanitize_team_mentions(doc) sanitize_mentions(doc) sanitize_links(doc) - + sanitize_nwo_text(doc) + mode = unsafe ? :UNSAFE : :DEFAULT doc.to_html(([mode] + COMMONMARKER_OPTIONS), COMMONMARKER_EXTENSIONS) end @@ -106,19 +107,29 @@ def sanitize_links(doc) elsif node.type == :text && node.string_content.match?(GITHUB_REF_REGEX) node.string_content = replace_github_host(node.string_content) - elsif node.type == :text && - node.string_content.match?(GITHUB_NWO_REGEX) && - !parent_node_link?(node) - match = node.string_content.match(GITHUB_NWO_REGEX) - repo = match.named_captures.fetch("repo") - number = match.named_captures.fetch("number") - new_node = build_nwo_text_node("#{repo}##{number}") - node.insert_before(new_node) - node.delete end end end + def sanitize_nwo_text(doc) + doc.walk do |node| + if node.type == :text && + node.string_content.match?(GITHUB_NWO_REGEX) && + !parent_node_link?(node) + replace_nwo_node(node) + end + end + end + + def replace_nwo_node(node) + match = node.string_content.match(GITHUB_NWO_REGEX) + repo = match.named_captures.fetch("repo") + number = match.named_captures.fetch("number") + new_node = build_nwo_text_node("#{repo}##{number}") + node.insert_before(new_node) + node.delete + end + def replace_github_host(text) text.gsub( /(www\.)?github.com/, github_redirection_service || "github.com" From 3f8fe0c001b53e2ef88eb974a6b3741638ca2956 Mon Sep 17 00:00:00 2001 From: Tom Christensen Date: Wed, 7 Sep 2022 09:41:40 -0600 Subject: [PATCH 5/7] lint cleanup --- .../message_builder/link_and_mention_sanitizer.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/lib/dependabot/pull_request_creator/message_builder/link_and_mention_sanitizer.rb b/common/lib/dependabot/pull_request_creator/message_builder/link_and_mention_sanitizer.rb index 5ba0cf4931..ccad536aa1 100644 --- a/common/lib/dependabot/pull_request_creator/message_builder/link_and_mention_sanitizer.rb +++ b/common/lib/dependabot/pull_request_creator/message_builder/link_and_mention_sanitizer.rb @@ -42,7 +42,7 @@ def sanitize_links_and_mentions(text:, unsafe: false) sanitize_mentions(doc) sanitize_links(doc) sanitize_nwo_text(doc) - + mode = unsafe ? :UNSAFE : :DEFAULT doc.to_html(([mode] + COMMONMARKER_OPTIONS), COMMONMARKER_EXTENSIONS) end From 730c4c1cd35330f8c2dad1fdff9da6abebba3583 Mon Sep 17 00:00:00 2001 From: Ankit Honey Date: Wed, 7 Sep 2022 08:52:38 -0700 Subject: [PATCH 6/7] Removing trailing whitespaces --- .../message_builder/link_and_mention_sanitizer.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/lib/dependabot/pull_request_creator/message_builder/link_and_mention_sanitizer.rb b/common/lib/dependabot/pull_request_creator/message_builder/link_and_mention_sanitizer.rb index ccad536aa1..6edb3358db 100644 --- a/common/lib/dependabot/pull_request_creator/message_builder/link_and_mention_sanitizer.rb +++ b/common/lib/dependabot/pull_request_creator/message_builder/link_and_mention_sanitizer.rb @@ -191,7 +191,7 @@ def build_mention_link_text_nodes(text) code_node.string_content = insert_zero_width_space_in_mention(text) [code_node] end - + def build_nwo_text_node(text) code_node = CommonMarker::Node.new(:code) code_node.string_content = text From 640186eb094a53b78366c3168b96353b94db2fd5 Mon Sep 17 00:00:00 2001 From: Tom Christensen Date: Wed, 7 Sep 2022 11:56:04 -0600 Subject: [PATCH 7/7] adding a comment about the regex --- .../message_builder/link_and_mention_sanitizer.rb | 1 + 1 file changed, 1 insertion(+) diff --git a/common/lib/dependabot/pull_request_creator/message_builder/link_and_mention_sanitizer.rb b/common/lib/dependabot/pull_request_creator/message_builder/link_and_mention_sanitizer.rb index 6edb3358db..8035b25756 100644 --- a/common/lib/dependabot/pull_request_creator/message_builder/link_and_mention_sanitizer.rb +++ b/common/lib/dependabot/pull_request_creator/message_builder/link_and_mention_sanitizer.rb @@ -14,6 +14,7 @@ class LinkAndMentionSanitizer github\.com/(?#{GITHUB_USERNAME}/[^/\s]+)/ (?:issue|pull)s?/(?\d+) }x.freeze + # [^/\s#]+ means one or more characters not matching (^) the class /, whitespace (\s), or # GITHUB_NWO_REGEX = %r{(?#{GITHUB_USERNAME}/[^/\s#]+)#(?\d+)}.freeze MENTION_REGEX = %r{(?