Skip to content

Commit

Permalink
add support to many html entities
Browse files Browse the repository at this point in the history
  • Loading branch information
Duke committed Oct 11, 2016
1 parent 9eab0f3 commit 00b8c93
Show file tree
Hide file tree
Showing 2 changed files with 513 additions and 8 deletions.
262 changes: 261 additions & 1 deletion spec/std/html_spec.cr
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ describe "HTML" do
it "unescapes  " do
str = HTML.unescape("nbsp space ")

str.should eq("nbsp space ")
str.should eq("nbsp\u{A0}space ")
end

it "unescapes Char::MAX_CODEPOINT" do
Expand All @@ -78,5 +78,265 @@ describe "HTML" do
str = HTML.unescape("limit ")
str.should eq("limit 􏿿")
end

entities = [
{name: " ", char: '\u{A0}'},
{name: "¡", char: '\u{A1}'},
{name: "¢", char: '\u{A2}'},
{name: "£", char: '\u{A3}'},
{name: "¤", char: '\u{A4}'},
{name: "¥", char: '\u{A5}'},
{name: "¦", char: '\u{A6}'},
{name: "§", char: '\u{A7}'},
{name: "¨", char: '\u{A8}'},
{name: "©", char: '\u{A9}'},
{name: "ª", char: '\u{AA}'},
{name: "«", char: '\u{AB}'},
{name: "¬", char: '\u{AC}'},
{name: "­", char: '\u{AD}'},
{name: "®", char: '\u{AE}'},
{name: "¯", char: '\u{AF}'},
{name: "°", char: '\u{B0}'},
{name: "±", char: '\u{B1}'},
{name: "²", char: '\u{B3}'},
{name: "´", char: '\u{B4}'},
{name: "µ", char: '\u{B5}'},
{name: "¶", char: '\u{B6}'},
{name: "·", char: '\u{B7}'},
{name: "¸", char: '\u{B8}'},
{name: "¹", char: '\u{B9}'},
{name: "º", char: '\u{BA}'},
{name: "»", char: '\u{BB}'},
{name: "¼", char: '\u{BC}'},
{name: "½", char: '\u{BE}'},
{name: "¿", char: '\u{BF}'},
{name: "À", char: '\u{C0}'},
{name: "Á", char: '\u{C1}'},
{name: "Â", char: '\u{C2}'},
{name: "Ã", char: '\u{C3}'},
{name: "Ä", char: '\u{C4}'},
{name: "Å", char: '\u{C5}'},
{name: "Æ", char: '\u{C6}'},
{name: "Ç", char: '\u{C7}'},
{name: "È", char: '\u{C8}'},
{name: "É", char: '\u{C9}'},
{name: "Ê", char: '\u{CA}'},
{name: "Ë", char: '\u{CB}'},
{name: "Ì", char: '\u{CC}'},
{name: "Í", char: '\u{CD}'},
{name: "Î", char: '\u{CE}'},
{name: "Ï", char: '\u{CF}'},
{name: "Ð", char: '\u{D0}'},
{name: "Ñ", char: '\u{D1}'},
{name: "Ò", char: '\u{D2}'},
{name: "Ó", char: '\u{D3}'},
{name: "Ô", char: '\u{D4}'},
{name: "Õ", char: '\u{D5}'},
{name: "Ö", char: '\u{D6}'},
{name: "×", char: '\u{D7}'},
{name: "Ø", char: '\u{D8}'},
{name: "Ù", char: '\u{D9}'},
{name: "Ú", char: '\u{DA}'},
{name: "Û", char: '\u{DB}'},
{name: "Ü", char: '\u{DC}'},
{name: "Ý", char: '\u{DD}'},
{name: "Þ", char: '\u{DE}'},
{name: "ß", char: '\u{DF}'},
{name: "à", char: '\u{E0}'},
{name: "á", char: '\u{E1}'},
{name: "â", char: '\u{E2}'},
{name: "ã", char: '\u{E3}'},
{name: "ä", char: '\u{E4}'},
{name: "å", char: '\u{E5}'},
{name: "æ", char: '\u{E6}'},
{name: "ç", char: '\u{E7}'},
{name: "è", char: '\u{E8}'},
{name: "é", char: '\u{E9}'},
{name: "ê", char: '\u{EA}'},
{name: "ë", char: '\u{EB}'},
{name: "ì", char: '\u{EC}'},
{name: "í", char: '\u{ED}'},
{name: "î", char: '\u{EE}'},
{name: "ï", char: '\u{EF}'},
{name: "ð", char: '\u{F0}'},
{name: "ñ", char: '\u{F1}'},
{name: "ò", char: '\u{F2}'},
{name: "ó", char: '\u{F3}'},
{name: "ô", char: '\u{F4}'},
{name: "õ", char: '\u{F5}'},
{name: "ö", char: '\u{F6}'},
{name: "÷", char: '\u{F7}'},
{name: "ø", char: '\u{F8}'},
{name: "ù", char: '\u{F9}'},
{name: "ú", char: '\u{FA}'},
{name: "û", char: '\u{FB}'},
{name: "ü", char: '\u{FC}'},
{name: "ý", char: '\u{FD}'},
{name: "þ", char: '\u{FE}'},
{name: "ÿ", char: '\u{FF}'},

{name: "ƒ", char: '\u{0192}'},
{name: "Α", char: '\u{0391}'},
{name: "Β", char: '\u{0392}'},
{name: "Γ", char: '\u{0393}'},
{name: "Δ", char: '\u{0394}'},
{name: "Ε", char: '\u{0395}'},
{name: "Ζ", char: '\u{0396}'},
{name: "Η", char: '\u{0397}'},
{name: "Θ", char: '\u{0398}'},
{name: "Ι", char: '\u{0399}'},
{name: "Κ", char: '\u{039A}'},
{name: "Λ", char: '\u{039B}'},
{name: "Μ", char: '\u{039C}'},
{name: "Ν", char: '\u{039D}'},
{name: "Ξ", char: '\u{039E}'},
{name: "Ο", char: '\u{039F}'},
{name: "Π", char: '\u{03A0}'},
{name: "Ρ", char: '\u{03A1}'},
{name: "Σ", char: '\u{03A3}'},
{name: "Τ", char: '\u{03A4}'},
{name: "Υ", char: '\u{03A5}'},
{name: "Φ", char: '\u{03A6}'},
{name: "Χ", char: '\u{03A7}'},
{name: "Ψ", char: '\u{03A8}'},
{name: "Ω", char: '\u{03A9}'},
{name: "α", char: '\u{03B1}'},
{name: "β", char: '\u{03B2}'},
{name: "γ", char: '\u{03B3}'},
{name: "δ", char: '\u{03B4}'},
{name: "ε", char: '\u{03B5}'},
{name: "ζ", char: '\u{03B6}'},
{name: "η", char: '\u{03B7}'},
{name: "θ", char: '\u{03B8}'},
{name: "ι", char: '\u{03B9}'},
{name: "κ", char: '\u{03BA}'},
{name: "λ", char: '\u{03BB}'},
{name: "μ", char: '\u{03BC}'},
{name: "ν", char: '\u{03BD}'},
{name: "ξ", char: '\u{03BE}'},
{name: "ο", char: '\u{03BF}'},
{name: "π", char: '\u{03C0}'},
{name: "ρ", char: '\u{03C1}'},
{name: "ς", char: '\u{03C2}'},
{name: "σ", char: '\u{03C3}'},
{name: "τ", char: '\u{03C4}'},
{name: "υ", char: '\u{03C5}'},
{name: "φ", char: '\u{03C6}'},
{name: "χ", char: '\u{03C7}'},
{name: "ψ", char: '\u{03C8}'},
{name: "ω", char: '\u{03C9}'},
{name: "ϑ", char: '\u{03D1}'},
{name: "ϒ", char: '\u{03D2}'},
{name: "ϖ", char: '\u{03D6}'},
{name: "•", char: '\u{2022}'},
{name: "…", char: '\u{2026}'},
{name: "′", char: '\u{2032}'},
{name: "″", char: '\u{2033}'},
{name: "‾", char: '\u{203E}'},
{name: "⁄", char: '\u{2044}'},
{name: "℘", char: '\u{2118}'},
{name: "ℑ", char: '\u{2111}'},
{name: "ℜ", char: '\u{211C}'},
{name: "™", char: '\u{2122}'},
{name: "ℵ", char: '\u{2135}'},
{name: "←", char: '\u{2190}'},
{name: "↑", char: '\u{2191}'},
{name: "→", char: '\u{2192}'},
{name: "↓", char: '\u{2193}'},
{name: "↔", char: '\u{2194}'},
{name: "↵", char: '\u{21B5}'},
{name: "⇐", char: '\u{21D0}'},
{name: "⇑", char: '\u{21D1}'},
{name: "⇒", char: '\u{21D2}'},
{name: "⇓", char: '\u{21D3}'},
{name: "⇔", char: '\u{21D4}'},
{name: "∀", char: '\u{2200}'},
{name: "∂", char: '\u{2202}'},
{name: "∃", char: '\u{2203}'},
{name: "∅", char: '\u{2205}'},
{name: "∇", char: '\u{2207}'},
{name: "∈", char: '\u{2208}'},
{name: "∉", char: '\u{2209}'},
{name: "∋", char: '\u{220B}'},
{name: "∏", char: '\u{220F}'},
{name: "∑", char: '\u{2211}'},
{name: "−", char: '\u{2212}'},
{name: "∗", char: '\u{2217}'},
{name: "√", char: '\u{221A}'},
{name: "∝", char: '\u{221D}'},
{name: "∞", char: '\u{221E}'},
{name: "∠", char: '\u{2220}'},
{name: "∧", char: '\u{2227}'},
{name: "∨", char: '\u{2228}'},
{name: "∩", char: '\u{2229}'},
{name: "∪", char: '\u{222A}'},
{name: "∫", char: '\u{222B}'},
{name: "∴", char: '\u{2234}'},
{name: "∼", char: '\u{223C}'},
{name: "≅", char: '\u{2245}'},
{name: "≈", char: '\u{2248}'},
{name: "≠", char: '\u{2260}'},
{name: "≡", char: '\u{2261}'},
{name: "≤", char: '\u{2264}'},
{name: "≥", char: '\u{2265}'},
{name: "⊂", char: '\u{2282}'},
{name: "⊃", char: '\u{2283}'},
{name: "⊄", char: '\u{2284}'},
{name: "⊆", char: '\u{2286}'},
{name: "⊇", char: '\u{2287}'},
{name: "⊕", char: '\u{2295}'},
{name: "⊗", char: '\u{2297}'},
{name: "⊥", char: '\u{22A5}'},
{name: "⋅", char: '\u{22C5}'},
{name: "⌈", char: '\u{2308}'},
{name: "⌉", char: '\u{2309}'},
{name: "⌊", char: '\u{230A}'},
{name: "⌋", char: '\u{230B}'},
{name: "⟨", char: '\u{2329}'},
{name: "⟩", char: '\u{232A}'},
{name: "◊", char: '\u{25CA}'},
{name: "♠", char: '\u{2660}'},
{name: "♣", char: '\u{2663}'},
{name: "♥", char: '\u{2665}'},
{name: "♦", char: '\u{2666}'},
{name: """, char: '\u{0022}'},
{name: "&", char: '\u{0026}'},
{name: "<", char: '\u{003C}'},
{name: ">", char: '\u{003E}'},
{name: "Œ", char: '\u{0152}'},
{name: "œ", char: '\u{0153}'},
{name: "Š", char: '\u{0160}'},
{name: "š", char: '\u{0161}'},
{name: "Ÿ", char: '\u{0178}'},
{name: "ˆ", char: '\u{02C6}'},
{name: "˜", char: '\u{02DC}'},
{name: " ", char: '\u{2002}'},
{name: " ", char: '\u{2003}'},
{name: " ", char: '\u{2009}'},
{name: "‌", char: '\u{200C}'},
{name: "‍", char: '\u{200D}'},
{name: "‎", char: '\u{200E}'},
{name: "‏", char: '\u{200F}'},
{name: "–", char: '\u{2013}'},
{name: "—", char: '\u{2014}'},
{name: "‘", char: '\u{2018}'},
{name: "’", char: '\u{2019}'},
{name: "‚", char: '\u{201A}'},
{name: "“", char: '\u{201C}'},
{name: "”", char: '\u{201D}'},
{name: "„", char: '\u{201E}'},
{name: "†", char: '\u{2020}'},
{name: "‡", char: '\u{2021}'},
{name: "‰", char: '\u{2030}'},
{name: "‹", char: '\u{2039}'},
{name: "›", char: '\u{203A}'},
{name: "€", char: '\u{20AC}'},
]
entities.each do |entity|
it "unescapes #{entity[:name]} to #{entity[:char]}" do
str = HTML.unescape(entity[:name])
str.should eq("#{entity[:char]}")
end
end
end
end
Loading

0 comments on commit 00b8c93

Please sign in to comment.