diff --git a/spec/std/html_spec.cr b/spec/std/html_spec.cr index 374cd454af72..44810466804a 100644 --- a/spec/std/html_spec.cr +++ b/spec/std/html_spec.cr @@ -9,22 +9,42 @@ describe "HTML" do str.should eq("safe_string") end - it "escapes dangerous characters from a string" do - str = HTML.escape("< & >") + it "escapes special characters from an HTML string" do + str = HTML.escape("< & > \"") - str.should eq("< & >") + str.should eq("< & > "") end - it "escapes javascript example from a string" do - str = HTML.escape("") + it "escapes as documented in default mode" do + str = HTML.escape("Crystal & You") - str.should eq("<script>alert('You are being hacked')</script>") + str.should eq("Crystal & You") end - it "escapes nonbreakable space but not normal space" do - str = HTML.escape("nbspĀ space ") + it "escapes characters according no escape_quotes mode" do + str = HTML.escape("< & ' \" \\", escape_quotes: false) - str.should eq("nbsp space ") + str.should eq("< & ' \" \\") + end + end + + describe ".escape_javascript" do + it "does not change a safe string" do + str = HTML.escape_javascript("safe_string") + + str.should eq("safe_string") + end + + it "escapes special characters from a JavaScript string" do + str = HTML.escape_javascript(" \r\n \r \n \u2028 \u2029") + + str.should eq("<\\/tag> \\n \\n \\n 
 
") + end + + it "escapes special characters from a JavaScript IO" do + io = IO::Memory.new + HTML.escape_javascript(" \r\n \r \n \u2028 \u2029", io).should be_nil + io.to_s.should eq("<\\/tag> \\n \\n \\n 
 
") end end diff --git a/src/html.cr b/src/html.cr index 708e40b1684c..517bb6716dd5 100644 --- a/src/html.cr +++ b/src/html.cr @@ -1,25 +1,34 @@ # Handles encoding and decoding of HTML entities. module HTML - SUBSTITUTIONS = { - '!' => "!", - '"' => """, - '$' => "$", - '%' => "%", - '&' => "&", - '\'' => "'", - '(' => "(", - ')' => ")", - '=' => "=", - '>' => ">", - '<' => "<", - '+' => "+", - '@' => "@", - '[' => "[", - ']' => "]", - '`' => "`", - '{' => "{", - '}' => "}", - '\u{a0}' => " ", + # `HTML.escape` escaping mode. + ESCAPE_SUBST = { + # Escapes '&', '<' and '>' chars. + # + # Like PHP htmlspecialchars (with ENT_NOQUOTES), Python cgi.escape, W3C recommendation. + false => { + '&' => "&", + '<' => "<", + '>' => ">", + }, + # Escapes '&', '<' and '>', '"' and '\'' chars. + # + # Like Ruby CGI.escape, PHP htmlspecialchars (with ENT_QUOTES), Rack::Utils.escape_html. + true => { + '&' => "&", + '"' => """, + '<' => "<", + '>' => ">", + '\'' => "", + }, + } + ESCAPE_JAVASCRIPT_SUBST = { + '\'' => "\\'", + '"' => "\\\"", + '\\' => "\\\\", + '\u2028' => "
", + '\u2029' => "
", + '\n' => "\\n", + '\r' => "\\n", } # Encodes a string with HTML entity substitutions. @@ -29,8 +38,8 @@ module HTML # # HTML.escape("Crystal & You") # => "Crystal & You" # ``` - def self.escape(string : String) : String - string.gsub(SUBSTITUTIONS) + def self.escape(string : String, escape_quotes : Bool = true) : String + string.gsub(ESCAPE_SUBST[escape_quotes]) end # Encodes a string to HTML, but writes to the `IO` instance provided. @@ -40,9 +49,45 @@ module HTML # HTML.escape("Crystal & You", io) # => nil # io.to_s # => "Crystal & You" # ``` - def self.escape(string : String, io : IO) + def self.escape(string : String, io : IO, escape_quotes : Bool = true) : Nil + subst = ESCAPE_SUBST[escape_quotes] + string.each_char do |char| + io << subst.fetch(char, char) + end + end + + # Encodes a string with JavaScript escaping substitutions. + # + # ``` + # require "html" + # + # HTML.escape_javascript(" \u2028") # => "<\\/crystal> 
" + # ``` + def self.escape_javascript(string : String) : String + string.gsub("\r\n", "\n").gsub(ESCAPE_JAVASCRIPT_SUBST).gsub(" \u2028", io) # => nil + # io.to_s # => "<\\/crystal> 
" + # ``` + def self.escape_javascript(string : String, io : IO) : Nil + previous_char = '\0' string.each_char do |char| - io << SUBSTITUTIONS.fetch(char, char) + if previous_char == '\r' && char == '\n' + previous_char = '\n' + next + end + if previous_char == '<' && char == '/' + previous_char = '/' + io << '\\' << '/' + next + end + io << ESCAPE_JAVASCRIPT_SUBST.fetch(char, char) + previous_char = char end end