From 7eb960d6e995aae010662710633ab7b80c04a0be Mon Sep 17 00:00:00 2001 From: Boshen <1430279+Boshen@users.noreply.github.com> Date: Sat, 13 Jul 2024 02:38:06 +0000 Subject: [PATCH] feat(transformer): decode xml character entity `&#xhhhh` and `&#nnnn;` (#4235) --- crates/oxc_transformer/src/react/jsx.rs | 39 ++++++++++++++++++------- 1 file changed, 28 insertions(+), 11 deletions(-) diff --git a/crates/oxc_transformer/src/react/jsx.rs b/crates/oxc_transformer/src/react/jsx.rs index 561240ac4c9b9..850d901db2faf 100644 --- a/crates/oxc_transformer/src/react/jsx.rs +++ b/crates/oxc_transformer/src/react/jsx.rs @@ -947,7 +947,7 @@ impl<'a> ReactJsx<'a> { /// * See /// Code adapted from fn decode_entities(s: &str) -> String { - let mut buffer = vec![]; + let mut buffer = String::new(); let mut chars = s.char_indices(); let mut prev = 0; while let Some((i, c)) = chars.next() { @@ -961,21 +961,38 @@ impl<'a> ReactJsx<'a> { } } if let Some(end) = end { - let word = &s[start + 1..end]; - buffer.extend_from_slice(s[prev..start].as_bytes()); + buffer.push_str(&s[prev..start]); prev = end + 1; - if let Some(c) = XML_ENTITIES.get(word) { - buffer.extend_from_slice(c.to_string().as_bytes()); + let word = &s[start + 1..end]; + if let Some(decimal) = word.strip_prefix('#') { + if let Some(hex) = decimal.strip_prefix('x') { + if let Some(c) = + u32::from_str_radix(hex, 16).ok().and_then(char::from_u32) + { + // &x0123; + buffer.push(c); + continue; + } + } else if let Some(c) = decimal.parse::().ok().and_then(char::from_u32) + { + // { + buffer.push(c); + continue; + } + } else if let Some(c) = XML_ENTITIES.get(word) { + // "e; + buffer.push(*c); + continue; } + // fallback + buffer.push('&'); + buffer.push_str(word); + buffer.push(';'); } } } - buffer.extend_from_slice(s[prev..].as_bytes()); - #[allow(unsafe_code)] - // SAFETY: The buffer is constructed from valid utf chars. - unsafe { - String::from_utf8_unchecked(buffer) - } + buffer.push_str(&s[prev..]); + buffer } }