Skip to content

Commit

Permalink
add HTML.unescape [Closes #3107]
Browse files Browse the repository at this point in the history
  • Loading branch information
Duke committed Oct 3, 2016
1 parent bdffb8b commit 3877c73
Show file tree
Hide file tree
Showing 2 changed files with 70 additions and 0 deletions.
38 changes: 38 additions & 0 deletions spec/std/html_spec.cr
Original file line number Diff line number Diff line change
Expand Up @@ -27,4 +27,42 @@ describe "HTML" do
str.should eq("nbsp space ")
end
end

describe ".unescape" do
it "does not change a safe string" do
str = HTML.unescape("safe_string")

str.should eq("safe_string")
end

it "unescapes dangerous characters from a string" do
str = HTML.unescape("< & >")

str.should eq("< & >")
end

it "unescapes javascript example from a string" do
str = HTML.unescape("&lt;script&gt;alert&#40;&#39;You are being hacked&#39;&#41;&lt;/script&gt;")

str.should eq("<script>alert('You are being hacked')</script>")
end

it "unescapes decimal encoded chars" do
str = HTML.unescape("&lt;&#104;&#101;llo world&gt;")

str.should eq("<hello world>")
end

it "unescapes with invalid entities" do
str = HTML.unescape("&&lt;&amp&gt;&quot&abcdefghijklmn")

str.should eq("&<&amp>&quot&abcdefghijklmn")
end

it "unescapes hex encoded chars" do
str = HTML.unescape("3 &#x0002B; 2 &#x0003D; 5")

str.should eq("3 + 2 = 5")
end
end
end
32 changes: 32 additions & 0 deletions src/html.cr
Original file line number Diff line number Diff line change
Expand Up @@ -30,4 +30,36 @@ module HTML
io << SUBSTITUTIONS.fetch(char, char)
end
end

def self.unescape(string : String)
return string unless string.includes? '&'
charlimit = 0x10ffff

string.gsub(/&(apos|amp|quot|gt|lt|\#[0-9]+|\#[xX][0-9A-Fa-f]+);/) do |string, _match|
match = _match[1].dup
case match
when "apos" then "'"
when "amp" then "&"
when "quot" then "\""
when "gt" then ">"
when "lt" then "<"
when /\A#0*(\d+)\z/
n = $1.to_i
if n < charlimit
n.unsafe_chr
else
"&##{$1};"
end
when /\A#x([0-9a-f]+)\z/i
n = $1.to_i(16)
if n < charlimit
n.unsafe_chr
else
"&#x#{$1};"
end
else
"&#{match};"
end
end
end
end

0 comments on commit 3877c73

Please sign in to comment.