Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Drop XSS escaping because it should be aside HTML escaping. #4555

Closed
wants to merge 9 commits into from
38 changes: 29 additions & 9 deletions spec/std/html_spec.cr
Original file line number Diff line number Diff line change
Expand Up @@ -9,22 +9,42 @@ describe "HTML" do
str.should eq("safe_string")
end

it "escapes dangerous characters from a string" do
str = HTML.escape("< & >")
it "escapes special characters from an HTML string" do
str = HTML.escape("< & > \"")

str.should eq("&lt; &amp; &gt;")
str.should eq("&lt; &amp; &gt; &quot;")
end

it "escapes javascript example from a string" do
str = HTML.escape("<script>alert('You are being hacked')</script>")
it "escapes as documented in default mode" do
str = HTML.escape("Crystal & You")

str.should eq("&lt;script&gt;alert&#40;&#39;You are being hacked&#39;&#41;&lt;/script&gt;")
str.should eq("Crystal &amp; You")
end

it "escapes nonbreakable space but not normal space" do
str = HTML.escape("nbsp space ")
it "escapes characters according no escape_quotes mode" do
str = HTML.escape("< & ' \" \\", escape_quotes: false)

str.should eq("nbsp&nbsp;space ")
str.should eq("&lt; &amp; ' \" \\")
end
end

describe ".escape_javascript" do
it "does not change a safe string" do
str = HTML.escape_javascript("safe_string")

str.should eq("safe_string")
end

it "escapes special characters from a JavaScript string" do
str = HTML.escape_javascript("</tag> \r\n \r \n \u2028 \u2029")

str.should eq("<\\/tag> \\n \\n \\n &#x2028; &#x2029;")
end

it "escapes special characters from a JavaScript IO" do
io = IO::Memory.new
HTML.escape_javascript("</tag> \r\n \r \n \u2028 \u2029", io).should be_nil
io.to_s.should eq("<\\/tag> \\n \\n \\n &#x2028; &#x2029;")
end
end

Expand Down
93 changes: 69 additions & 24 deletions src/html.cr
Original file line number Diff line number Diff line change
@@ -1,25 +1,34 @@
# Handles encoding and decoding of HTML entities.
module HTML
SUBSTITUTIONS = {
'!' => "&#33;",
'"' => "&quot;",
'$' => "&#36;",
'%' => "&#37;",
'&' => "&amp;",
'\'' => "&#39;",
'(' => "&#40;",
')' => "&#41;",
'=' => "&#61;",
'>' => "&gt;",
'<' => "&lt;",
'+' => "&#43;",
'@' => "&#64;",
'[' => "&#91;",
']' => "&#93;",
'`' => "&#96;",
'{' => "&#123;",
'}' => "&#125;",
'\u{a0}' => "&nbsp;",
# `HTML.escape` escaping mode.
ESCAPE_SUBST = {
# Escapes '&', '<' and '>' chars.
#
# Like PHP htmlspecialchars (with ENT_NOQUOTES), Python cgi.escape, W3C recommendation.
false => {
'&' => "&amp;",
'<' => "&lt;",
'>' => "&gt;",
},
# Escapes '&', '<' and '>', '"' and '\'' chars.
#
# Like Ruby CGI.escape, PHP htmlspecialchars (with ENT_QUOTES), Rack::Utils.escape_html.
true => {
'&' => "&amp;",
'"' => "&quot;",
'<' => "&lt;",
'>' => "&gt;",
'\'' => "&#27;",
},
}
ESCAPE_JAVASCRIPT_SUBST = {
'\'' => "\\'",
'"' => "\\\"",
'\\' => "\\\\",
'\u2028' => "&#x2028;",
'\u2029' => "&#x2029;",
'\n' => "\\n",
'\r' => "\\n",
}

# Encodes a string with HTML entity substitutions.
Expand All @@ -29,8 +38,8 @@ module HTML
#
# HTML.escape("Crystal & You") # => "Crystal &amp; You"
# ```
Copy link
Member

@straight-shoota straight-shoota Sep 6, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be nice to list all character substitutions explicitly, so you don't need to look at the source code to figure out what exactly gets escaped and why.

Suggestion:

# Escapes `&`, `<`,  `>`, `"` and `'` chars as `&amp;`, `&lt;`, `&gt;`, `&quot;` and `&#27` according to 
# [OWASP Rule #1](https://www.owasp.org/index.php/XSS_(Cross_Site_Scripting)_Prevention_Cheat_Sheet#RULE_.231_-_HTML_Escape_Before_Inserting_Untrusted_Data_into_HTML_Element_Content).
# If *escape_quotes* is `false`, `"` and `'` will not be escaped ([W3C recommendation](https://www.w3.org/International/questions/qa-escapes#use)).

The other methods should have similar explanations.

def self.escape(string : String) : String
string.gsub(SUBSTITUTIONS)
def self.escape(string : String, escape_quotes : Bool = true) : String
string.gsub(ESCAPE_SUBST[escape_quotes])
end

# Encodes a string to HTML, but writes to the `IO` instance provided.
Expand All @@ -40,9 +49,45 @@ module HTML
# HTML.escape("Crystal & You", io) # => nil
# io.to_s # => "Crystal &amp; You"
# ```
def self.escape(string : String, io : IO)
def self.escape(string : String, io : IO, escape_quotes : Bool = true) : Nil
subst = ESCAPE_SUBST[escape_quotes]
string.each_char do |char|
io << subst.fetch(char, char)
end
end

# Encodes a string with JavaScript escaping substitutions.
#
# ```
# require "html"
#
# HTML.escape_javascript("</crystal> \u2028") # => "<\\/crystal> &#x2028;"
# ```
def self.escape_javascript(string : String) : String
string.gsub("\r\n", "\n").gsub(ESCAPE_JAVASCRIPT_SUBST).gsub("</", "<\\/")
end

# Encodes a string with JavaScript escaping, but writes to the `IO` instance provided.
#
# ```
# io = IO::Memory.new
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It'd be better to use a string builder as example.

# HTML.escape_javascript("</crystal> \u2028", io) # => nil
# io.to_s # => "<\\/crystal> &#x2028;"
# ```
def self.escape_javascript(string : String, io : IO) : Nil
previous_char = '\0'
string.each_char do |char|
io << SUBSTITUTIONS.fetch(char, char)
if previous_char == '\r' && char == '\n'
Copy link
Member

@straight-shoota straight-shoota Sep 6, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe this would be better as a case statement to express flow more clearly and be shorter overall.

case
when previous_char == '\r' && char == '\n'
when previous_char == '<' && char == '/'
  io << '\\' << '/'
else
  io << ESCAPE_JAVASCRIPT_SUBST.fetch(char, char)
end
previous_char = char

previous_char = '\n'
next
end
if previous_char == '<' && char == '/'
previous_char = '/'
io << '\\' << '/'
next
end
io << ESCAPE_JAVASCRIPT_SUBST.fetch(char, char)
previous_char = char
end
end

Expand Down