Skip to content

Commit

Permalink
Auto merge of rust-lang#137285 - yotamofek:pr/rustdoc/pulldown-escapi…
Browse files Browse the repository at this point in the history
…ng, r=GuillaumeGomez

librustdoc: Use `pulldown-cmark-escape` for HTML escaping

Implementation of `@notriddle` 's [suggestion](rust-lang#137274 (comment)).
Somewhat related to rust-lang#137274 , but the two PRs should be complementary.

Local perf results look like a nice improvement! (so would love a perf run on the CI)
  • Loading branch information
bors committed Feb 24, 2025
2 parents e0be1a0 + 489f5c1 commit f43e549
Show file tree
Hide file tree
Showing 3 changed files with 5 additions and 48 deletions.
1 change: 1 addition & 0 deletions Cargo.lock
Original file line number Diff line number Diff line change
Expand Up @@ -4571,6 +4571,7 @@ dependencies = [
"itertools",
"minifier",
"pulldown-cmark 0.9.6",
"pulldown-cmark-escape",
"regex",
"rinja",
"rustdoc-json-types",
Expand Down
1 change: 1 addition & 0 deletions src/librustdoc/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ itertools = "0.12"
indexmap = "2"
minifier = { version = "0.3.5", default-features = false }
pulldown-cmark-old = { version = "0.9.6", package = "pulldown-cmark", default-features = false }
pulldown-cmark-escape = { version = "0.11.0", features = ["simd"] }
regex = "1"
rustdoc-json-types = { path = "../rustdoc-json-types" }
serde_json = "1.0"
Expand Down
51 changes: 3 additions & 48 deletions src/librustdoc/html/escape.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
use std::fmt;

use pulldown_cmark_escape::FmtWriter;
use unicode_segmentation::UnicodeSegmentation;

/// Wrapper struct which will emit the HTML-escaped version of the contained
Expand All @@ -13,31 +14,7 @@ pub(crate) struct Escape<'a>(pub &'a str);

impl fmt::Display for Escape<'_> {
fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
// Because the internet is always right, turns out there's not that many
// characters to escape: http://stackoverflow.com/questions/7381974
let Escape(s) = *self;
let pile_o_bits = s;
let mut last = 0;
for (i, ch) in s.char_indices() {
let s = match ch {
'>' => "&gt;",
'<' => "&lt;",
'&' => "&amp;",
'\'' => "&#39;",
'"' => "&quot;",
_ => continue,
};
fmt.write_str(&pile_o_bits[last..i])?;
fmt.write_str(s)?;
// NOTE: we only expect single byte characters here - which is fine as long as we
// only match single byte characters
last = i + 1;
}

if last < s.len() {
fmt.write_str(&pile_o_bits[last..])?;
}
Ok(())
pulldown_cmark_escape::escape_html(FmtWriter(fmt), self.0)
}
}

Expand All @@ -51,29 +28,7 @@ pub(crate) struct EscapeBodyText<'a>(pub &'a str);

impl fmt::Display for EscapeBodyText<'_> {
fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
// Because the internet is always right, turns out there's not that many
// characters to escape: http://stackoverflow.com/questions/7381974
let EscapeBodyText(s) = *self;
let pile_o_bits = s;
let mut last = 0;
for (i, ch) in s.char_indices() {
let s = match ch {
'>' => "&gt;",
'<' => "&lt;",
'&' => "&amp;",
_ => continue,
};
fmt.write_str(&pile_o_bits[last..i])?;
fmt.write_str(s)?;
// NOTE: we only expect single byte characters here - which is fine as long as we
// only match single byte characters
last = i + 1;
}

if last < s.len() {
fmt.write_str(&pile_o_bits[last..])?;
}
Ok(())
pulldown_cmark_escape::escape_html_body_text(FmtWriter(fmt), self.0)
}
}

Expand Down

0 comments on commit f43e549

Please sign in to comment.