Skip to content

Commit

Permalink
Merge pull request #34 from alphagov/link-token-mapper-inline-links
Browse files Browse the repository at this point in the history
Extend LinkTokenMapper to handle anchor tags
  • Loading branch information
jackbot authored Feb 5, 2025
2 parents 9e13772 + cd0a5d7 commit 7ad9311
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 19 deletions.
20 changes: 10 additions & 10 deletions lib/answer_composition/link_token_mapper.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,15 @@ def initialize
@mapping = {}
end

def map_links_to_tokens(html_content)
def map_links_to_tokens(html_content, exact_path)
doc = Nokogiri::HTML::DocumentFragment.parse(html_content)

doc.css("a").each do |link|
href = link["href"]
href = begin
URI.join(ensure_absolute_govuk_url(exact_path), link["href"]).to_s
rescue URI::InvalidURIError
link["href"]
end

if mapping.key?(href)
link["href"] = mapping[href]
Expand Down Expand Up @@ -61,9 +65,7 @@ def rewrite_link(link_element)

if (url = link_for_token(token))
link_element.tap do |el|
# We frequently host GOV.UK chat in environments off www.gov.uk
# and need links not to be relative so that they will work.
el.attr["href"] = ensure_absolute_govuk_url(url)
el.attr["href"] = url

# If we have a link where the text is e.g. "link_1" then we should replace
# it with "source". Showing "link_1" to the user makes it seem like something
Expand All @@ -82,11 +84,9 @@ def rewrite_link(link_element)
end

def ensure_absolute_govuk_url(url)
begin
relative_uri = URI(url)
rescue URI::InvalidURIError
return url
end
# We frequently host GOV.UK chat in environments off www.gov.uk
# and need links not to be relative so that they will work.
relative_uri = URI(url)

return url if relative_uri.absolute?

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ def system_prompt_context
page_title: result.title,
page_description: result.description,
context_headings: result.heading_hierarchy,
context_content: link_token_mapper.map_links_to_tokens(result.html_content),
context_content: link_token_mapper.map_links_to_tokens(result.html_content, result.exact_path),
}
end
end
Expand Down
25 changes: 17 additions & 8 deletions spec/lib/answer_composition/link_token_mapper_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -12,17 +12,18 @@
<li>prove you're self-employed, for example to claim Tax-Free Childcare</li>
<li>make voluntary <a id="foo" href="https://www.gov.uk/national-insurance/what-national-insurance-is">National Insurance</a> payments</li>
<li>fill in a <a href="/tax-returns">Tax return</a> each tax year.</li>
<li>this is <a href="#some-heading">an anchor tag</a>.</li>
</ul>
HTML
end

describe "#map_links_to_tokens" do
it "replaces href attributes with tokens" do
amended_html = described_class.new.map_links_to_tokens(html)
amended_html = described_class.new.map_links_to_tokens(html, "/exact-path")
parsed_html = Nokogiri::HTML::DocumentFragment.parse(amended_html)
links = parsed_html.css("a")

expect(links.length).to eq(4)
expect(links.length).to eq(5)

expect(links[0]["href"]).to eq("link_1")
expect(links[0].text).to eq("Tax return")
Expand All @@ -33,9 +34,12 @@
expect(links[2]["href"]).to eq("link_3")
expect(links[2].text).to eq("National Insurance")

#  Duplicate link, so gets the same token as the first link
# Duplicate link, so gets the same token as the first link
expect(links[3]["href"]).to eq("link_1")
expect(links[3].text).to eq("Tax return")

expect(links[4]["href"]).to eq("link_4")
expect(links[4].text).to eq("an anchor tag")
end
end

Expand All @@ -60,7 +64,7 @@
describe "#replace_tokens_with_links" do
it "replaces token-based links with stored links that are absolute URIs" do
mapper = described_class.new
mapper.map_links_to_tokens(html)
mapper.map_links_to_tokens(html, "/exact-path")

source = <<~MARKDOWN
# Tax
Expand All @@ -73,6 +77,7 @@
* prove you're self-employed, for example to claim Tax-Free Childcare
* make voluntary [National Insurance](link_3) payments
* do something with an [anchor tag](link_4)
[1]: link_2
MARKDOWN
Expand All @@ -93,11 +98,15 @@
expect(output)
.to include("[National Insurance][3]")
.and include("[3]: https://www.gov.uk/national-insurance/what-national-insurance-is")

expect(output)
.to include("[anchor tag][4]")
.and include("[4]: https://www.test.gov.uk/exact-path#some-heading")
end

it "replaces link text that has not been substituted" do
mapper = described_class.new
mapper.map_links_to_tokens(html)
mapper.map_links_to_tokens(html, "/exact-path")

markdown = <<~MARKDOWN
Send a tax return ([link_1][1])
Expand All @@ -112,7 +121,7 @@
it "handles invalid URIs" do
html = '<p>Send a tax return to <a href="mailto:<[email protected]>">us</a></p>'
mapper = described_class.new
mapper.map_links_to_tokens(html)
mapper.map_links_to_tokens(html, "/exact-path")

markdown = <<~MARKDOWN
You should send a tax return to [us](link_1)
Expand Down Expand Up @@ -140,9 +149,9 @@
describe "#link_for_token" do
it "returns the link for a given token" do
mapper = described_class.new
mapper.map_links_to_tokens(html)
mapper.map_links_to_tokens(html, "/exact-path")

expect(mapper.link_for_token("link_1")).to eq("/tax-returns")
expect(mapper.link_for_token("link_1")).to eq("https://www.test.gov.uk/tax-returns")
end

it "returns nil if the token is not in the mapping" do
Expand Down

0 comments on commit 7ad9311

Please sign in to comment.