diff --git a/spec/std/html_spec.cr b/spec/std/html_spec.cr
index 374cd454af72..44810466804a 100644
--- a/spec/std/html_spec.cr
+++ b/spec/std/html_spec.cr
@@ -9,22 +9,42 @@ describe "HTML" do
str.should eq("safe_string")
end
- it "escapes dangerous characters from a string" do
- str = HTML.escape("< & >")
+ it "escapes special characters from an HTML string" do
+ str = HTML.escape("< & > \"")
- str.should eq("< & >")
+ str.should eq("< & > "")
end
- it "escapes javascript example from a string" do
- str = HTML.escape("")
+ it "escapes as documented in default mode" do
+ str = HTML.escape("Crystal & You")
- str.should eq("<script>alert('You are being hacked')</script>")
+ str.should eq("Crystal & You")
end
- it "escapes nonbreakable space but not normal space" do
- str = HTML.escape("nbspĀ space ")
+ it "escapes characters according no escape_quotes mode" do
+ str = HTML.escape("< & ' \" \\", escape_quotes: false)
- str.should eq("nbsp space ")
+ str.should eq("< & ' \" \\")
+ end
+ end
+
+ describe ".escape_javascript" do
+ it "does not change a safe string" do
+ str = HTML.escape_javascript("safe_string")
+
+ str.should eq("safe_string")
+ end
+
+ it "escapes special characters from a JavaScript string" do
+ str = HTML.escape_javascript(" \r\n \r \n \u2028 \u2029")
+
+ str.should eq("<\\/tag> \\n \\n \\n
")
+ end
+
+ it "escapes special characters from a JavaScript IO" do
+ io = IO::Memory.new
+ HTML.escape_javascript(" \r\n \r \n \u2028 \u2029", io).should be_nil
+ io.to_s.should eq("<\\/tag> \\n \\n \\n
")
end
end
diff --git a/src/html.cr b/src/html.cr
index 708e40b1684c..517bb6716dd5 100644
--- a/src/html.cr
+++ b/src/html.cr
@@ -1,25 +1,34 @@
# Handles encoding and decoding of HTML entities.
module HTML
- SUBSTITUTIONS = {
- '!' => "!",
- '"' => """,
- '$' => "$",
- '%' => "%",
- '&' => "&",
- '\'' => "'",
- '(' => "(",
- ')' => ")",
- '=' => "=",
- '>' => ">",
- '<' => "<",
- '+' => "+",
- '@' => "@",
- '[' => "[",
- ']' => "]",
- '`' => "`",
- '{' => "{",
- '}' => "}",
- '\u{a0}' => " ",
+ # `HTML.escape` escaping mode.
+ ESCAPE_SUBST = {
+ # Escapes '&', '<' and '>' chars.
+ #
+ # Like PHP htmlspecialchars (with ENT_NOQUOTES), Python cgi.escape, W3C recommendation.
+ false => {
+ '&' => "&",
+ '<' => "<",
+ '>' => ">",
+ },
+ # Escapes '&', '<' and '>', '"' and '\'' chars.
+ #
+ # Like Ruby CGI.escape, PHP htmlspecialchars (with ENT_QUOTES), Rack::Utils.escape_html.
+ true => {
+ '&' => "&",
+ '"' => """,
+ '<' => "<",
+ '>' => ">",
+ '\'' => "",
+ },
+ }
+ ESCAPE_JAVASCRIPT_SUBST = {
+ '\'' => "\\'",
+ '"' => "\\\"",
+ '\\' => "\\\\",
+ '\u2028' => "
",
+ '\u2029' => "
",
+ '\n' => "\\n",
+ '\r' => "\\n",
}
# Encodes a string with HTML entity substitutions.
@@ -29,8 +38,8 @@ module HTML
#
# HTML.escape("Crystal & You") # => "Crystal & You"
# ```
- def self.escape(string : String) : String
- string.gsub(SUBSTITUTIONS)
+ def self.escape(string : String, escape_quotes : Bool = true) : String
+ string.gsub(ESCAPE_SUBST[escape_quotes])
end
# Encodes a string to HTML, but writes to the `IO` instance provided.
@@ -40,9 +49,45 @@ module HTML
# HTML.escape("Crystal & You", io) # => nil
# io.to_s # => "Crystal & You"
# ```
- def self.escape(string : String, io : IO)
+ def self.escape(string : String, io : IO, escape_quotes : Bool = true) : Nil
+ subst = ESCAPE_SUBST[escape_quotes]
+ string.each_char do |char|
+ io << subst.fetch(char, char)
+ end
+ end
+
+ # Encodes a string with JavaScript escaping substitutions.
+ #
+ # ```
+ # require "html"
+ #
+ # HTML.escape_javascript(" \u2028") # => "<\\/crystal>
"
+ # ```
+ def self.escape_javascript(string : String) : String
+ string.gsub("\r\n", "\n").gsub(ESCAPE_JAVASCRIPT_SUBST).gsub("", "<\\/")
+ end
+
+ # Encodes a string with JavaScript escaping, but writes to the `IO` instance provided.
+ #
+ # ```
+ # io = IO::Memory.new
+ # HTML.escape_javascript(" \u2028", io) # => nil
+ # io.to_s # => "<\\/crystal>
"
+ # ```
+ def self.escape_javascript(string : String, io : IO) : Nil
+ previous_char = '\0'
string.each_char do |char|
- io << SUBSTITUTIONS.fetch(char, char)
+ if previous_char == '\r' && char == '\n'
+ previous_char = '\n'
+ next
+ end
+ if previous_char == '<' && char == '/'
+ previous_char = '/'
+ io << '\\' << '/'
+ next
+ end
+ io << ESCAPE_JAVASCRIPT_SUBST.fetch(char, char)
+ previous_char = char
end
end