From 1190237f74c254d0e9850c412a3f0ac3f8b784ff Mon Sep 17 00:00:00 2001 From: LongYinan Date: Fri, 26 Jul 2024 23:42:39 +0800 Subject: [PATCH] perf(sourcemap): avoid allocate in encode if possible --- Cargo.lock | 1 + crates/oxc_sourcemap/Cargo.toml | 1 + crates/oxc_sourcemap/src/encode.rs | 123 +++++++++++++++++++++++------ 3 files changed, 99 insertions(+), 26 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 73cfa926597934..a72e71d82021c0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1741,6 +1741,7 @@ version = "0.22.0" dependencies = [ "base64-simd", "cfg-if", + "memchr", "rayon", "rustc-hash", "serde", diff --git a/crates/oxc_sourcemap/Cargo.toml b/crates/oxc_sourcemap/Cargo.toml index 229873235270a1..6f8ce8f4c1c60d 100644 --- a/crates/oxc_sourcemap/Cargo.toml +++ b/crates/oxc_sourcemap/Cargo.toml @@ -23,6 +23,7 @@ rustc-hash = { workspace = true } serde = { workspace = true, features = ["derive"] } serde_json = { workspace = true } base64-simd = { workspace = true } +memchr = { workspace = true } cfg-if = { workspace = true } rayon = { workspace = true, optional = true } diff --git a/crates/oxc_sourcemap/src/encode.rs b/crates/oxc_sourcemap/src/encode.rs index 29316a0435b518..c714be6fbf699f 100644 --- a/crates/oxc_sourcemap/src/encode.rs +++ b/crates/oxc_sourcemap/src/encode.rs @@ -1,9 +1,12 @@ use std::borrow::Cow; +#[cfg(feature = "concurrent")] +use std::sync::atomic::{AtomicUsize, Ordering}; +use memchr::memchr_iter; #[cfg(feature = "concurrent")] use rayon::prelude::*; -use crate::error::{Error, Result}; +use crate::error::Result; use crate::JSONSourceMap; /// Port from https://github.com/getsentry/rust-sourcemap/blob/master/src/encoder.rs /// It is a helper for encode `SourceMap` to vlq sourcemap string, but here some different. @@ -29,9 +32,10 @@ pub fn encode(sourcemap: &SourceMap) -> JSONSourceMap { // It will escape the string to avoid invalid JSON string. pub fn encode_to_string(sourcemap: &SourceMap) -> Result { let mut contents = PreAllocatedString::new( - 10 + sourcemap.names.len() * 2 + 9 + sourcemap.names.len() * 2 + sourcemap.sources.len() * 2 - + if let Some(x) = &sourcemap.x_google_ignore_list { x.len() * 2 } else { 0 }, + + if let Some(x) = &sourcemap.x_google_ignore_list { x.len() * 2 } else { 0 } + + if let Some(s) = &sourcemap.source_contents { s.len() * 3 } else { 0 }, ); contents.push("{\"version\":3,".into()); if let Some(file) = sourcemap.get_file() { @@ -45,43 +49,77 @@ pub fn encode_to_string(sourcemap: &SourceMap) -> Result { contents.push("\",".into()); } contents.push("\"names\":[".into()); - for n in &sourcemap.names { - contents.push(serde_json::to_string(n.as_ref())?.into()); - contents.push(",".into()); - } if !sourcemap.names.is_empty() { + for n in &sourcemap.names { + if needs_escaping(n) { + contents.push(serde_json::to_string(n.as_ref())?.into()); + } else { + contents.push("\"".into()); + contents.push((&**n).into()); + contents.push("\"".into()); + } + contents.push(",".into()); + } // Remove the last `,`. contents.pop(); } contents.push(Cow::Borrowed("],\"sources\":[")); - for s in &sourcemap.sources { - contents.push(serde_json::to_string(s.as_ref())?.into()); - contents.push(",".into()); - } if !sourcemap.sources.is_empty() { + for s in &sourcemap.sources { + if needs_escaping(s) { + contents.push(serde_escape_content(s)?.into()); + } else { + contents.push("\"".into()); + contents.push((&**s).into()); + contents.push("\"".into()); + } + contents.push(",".into()); + } // Remove the last `,`. contents.pop(); } // Quote `source_content` at parallel. if let Some(source_contents) = &sourcemap.source_contents { contents.push("],\"sourcesContent\":[".into()); - cfg_if::cfg_if! { - if #[cfg(feature = "concurrent")] { - let quote_source_contents = source_contents + if !source_contents.is_empty() { + #[cfg(feature = "concurrent")] + { + let content_len = AtomicUsize::new(0); + let mut quote_source_contents = source_contents .par_iter() - .map(|x| serde_json::to_string(x.as_ref())) - .collect::, serde_json::Error>>() - .map_err(Error::from)?; - } else { - let quote_source_contents = source_contents - .iter() - .map(|x| serde_json::to_string(x.as_ref())) - .collect::, serde_json::Error>>() - .map_err(Error::from)?; + .filter_map(|s| { + if needs_escaping(s) { + let escaped = serde_escape_content(s).ok()?; + content_len.fetch_add(escaped.len(), Ordering::Relaxed); + Some(["".into(), escaped.into(), "".into(), ",".into()]) + } else { + content_len.fetch_add(s.len() + 3, Ordering::Relaxed); + Some(["\"".into(), Cow::Borrowed(&**s), "\"".into(), ",".into()]) + } + }) + .flatten() + .collect::>(); + // Remove the last `,`. + quote_source_contents.pop(); + contents.extend(quote_source_contents, content_len.load(Ordering::Relaxed)); } - }; - - contents.push(quote_source_contents.join(",").into()); + #[cfg(not(feature = "concurrent"))] + { + for s in source_contents { + if needs_escaping(&**s) { + let escaped = serde_escape_content(s)?; + contents.push(escaped.into()); + } else { + contents.push("\"".into()); + contents.push((&**s).into()); + contents.push("\"".into()); + } + contents.push(",".into()); + } + // Remove the last `,`. + contents.pop(); + } + } } if let Some(x_google_ignore_list) = &sourcemap.x_google_ignore_list { contents.push("],\"x_google_ignoreList\":[".into()); @@ -230,6 +268,16 @@ impl<'a> PreAllocatedString<'a> { } } + #[cfg(feature = "concurrent")] + #[inline] + fn extend(&mut self, iter: I, extended: usize) + where + I: IntoIterator>, + { + self.buf.extend(iter); + self.len += extended; + } + #[inline] fn consume(self) -> String { let mut buf = String::with_capacity(self.len); @@ -238,6 +286,21 @@ impl<'a> PreAllocatedString<'a> { } } +#[inline] +fn needs_escaping(s: &str) -> bool { + let bytes = s.as_bytes(); + + // Check for control characters (0x00-0x1F) + if memchr_iter(b'\x00', bytes).next().is_some() { + return true; + } + + // Check for ", \, and / + memchr_iter(b'"', bytes).next().is_some() + || memchr_iter(b'\\', bytes).next().is_some() + || memchr_iter(b'/', bytes).next().is_some() +} + #[test] fn test_encode() { let input = r#"{ @@ -255,6 +318,14 @@ fn test_encode() { } } +#[inline] +fn serde_escape_content>(s: S) -> Result { + let mut escaped_buf = Vec::with_capacity(s.as_ref().len() + 2); + serde::Serialize::serialize(s.as_ref(), &mut serde_json::Serializer::new(&mut escaped_buf))?; + // Safety: `escaped_buf` is valid utf8. + Ok(unsafe { String::from_utf8_unchecked(escaped_buf) }) +} + #[test] fn test_encode_escape_string() { // '\0' should be escaped.