From 1baf2efce9b58397f8f15a1e893a8ec52c0cd025 Mon Sep 17 00:00:00 2001 From: Hollow Man Date: Wed, 2 Feb 2022 21:19:40 +0800 Subject: [PATCH] Make print page (print.html) links link to anchors on the print page Let all the anchors id on the print page to have a path id prefix to help locate. e.g. bar/foo.md#abc -> #bar-foo-abc Also append a dummy div to the start of the original page to make sure that original page links without an anchor can also be located. Signed-off-by: Hollow Man --- src/renderer/html_handlebars/hbs_renderer.rs | 58 +++++++++++--- src/utils/mod.rs | 81 ++++++++++++++++---- 2 files changed, 114 insertions(+), 25 deletions(-) diff --git a/src/renderer/html_handlebars/hbs_renderer.rs b/src/renderer/html_handlebars/hbs_renderer.rs index 69dc31248c..9678e26716 100644 --- a/src/renderer/html_handlebars/hbs_renderer.rs +++ b/src/renderer/html_handlebars/hbs_renderer.rs @@ -63,7 +63,20 @@ impl HtmlHandlebars { print_content .push_str(r#"
"#); } - print_content.push_str(&fixed_content); + let path_id = { + let mut base = path.display().to_string(); + if base.ends_with(".md") { + base.replace_range(base.len() - 3.., ""); + } + &base.replace("/", "-").replace("\\", "-") + }; + + // We have to build header links in advance so that we can know the ranges + // for the headers in one page. + // Insert a dummy div to make sure that we can locate the specific page. + print_content + .push_str(&(format!(r#"
"#, &path_id))); + print_content.push_str(&build_header_links(&fixed_content, Some(path_id))); // Update the context with data for this file let ctx_path = path @@ -181,19 +194,31 @@ impl HtmlHandlebars { } #[cfg_attr(feature = "cargo-clippy", allow(clippy::let_and_return))] - fn post_process( + fn post_process_print( &self, rendered: String, playground_config: &Playground, edition: Option, ) -> String { - let rendered = build_header_links(&rendered); let rendered = fix_code_blocks(&rendered); let rendered = add_playground_pre(&rendered, playground_config, edition); rendered } + #[cfg_attr(feature = "cargo-clippy", allow(clippy::let_and_return))] + fn post_process( + &self, + rendered: String, + playground_config: &Playground, + edition: Option, + ) -> String { + let rendered = build_header_links(&rendered, None); + let rendered = self.post_process_print(rendered, &playground_config, edition); + + rendered + } + fn copy_static_files( &self, destination: &Path, @@ -547,7 +572,7 @@ impl Renderer for HtmlHandlebars { let rendered = handlebars.render("index", &data)?; let rendered = - self.post_process(rendered, &html_config.playground, ctx.config.rust.edition); + self.post_process_print(rendered, &html_config.playground, ctx.config.rust.edition); utils::fs::write_file(destination, "print.html", rendered.as_bytes())?; debug!("Creating print.html ✓"); @@ -746,7 +771,7 @@ fn make_data( /// Goes through the rendered HTML, making sure all header tags have /// an anchor respectively so people can link to sections directly. -fn build_header_links(html: &str) -> String { +fn build_header_links(html: &str, path_id: Option<&str>) -> String { let regex = Regex::new(r"(.*?)").unwrap(); let mut id_counter = HashMap::new(); @@ -756,25 +781,40 @@ fn build_header_links(html: &str) -> String { .parse() .expect("Regex should ensure we only ever get numbers here"); - insert_link_into_header(level, &caps[2], &mut id_counter) + insert_link_into_header(level, &caps[2], &mut id_counter, path_id) }) .into_owned() } /// Insert a sinle link into a header, making sure each link gets its own /// unique ID by appending an auto-incremented number (if necessary). +/// +/// For `print.html`, we will add a path id prefix. fn insert_link_into_header( level: usize, content: &str, id_counter: &mut HashMap, + path_id: Option<&str>, ) -> String { let raw_id = utils::id_from_content(content); let id_count = id_counter.entry(raw_id.clone()).or_insert(0); let id = match *id_count { - 0 => raw_id, - other => format!("{}-{}", raw_id, other), + 0 => { + if let Some(path_id) = path_id { + format!("{}-{}", path_id, raw_id) + } else { + raw_id + } + }, + other => { + if let Some(path_id) = path_id { + format!("{}-{}-{}", path_id, raw_id, other) + } else { + format!("{}-{}", raw_id, other) + } + }, }; *id_count += 1; @@ -980,7 +1020,7 @@ mod tests { ]; for (src, should_be) in inputs { - let got = build_header_links(src); + let got = build_header_links(src, None); assert_eq!(got, should_be); } } diff --git a/src/utils/mod.rs b/src/utils/mod.rs index 44494a8b7b..731469088a 100644 --- a/src/utils/mod.rs +++ b/src/utils/mod.rs @@ -10,7 +10,7 @@ use pulldown_cmark::{html, CodeBlockKind, CowStr, Event, Options, Parser, Tag}; use std::borrow::Cow; use std::fmt::Write; -use std::path::Path; +use std::path::{Path, PathBuf, Component}; pub use self::string::{ take_anchored_lines, take_lines, take_rustdoc_include_anchored_lines, @@ -63,19 +63,52 @@ pub fn id_from_content(content: &str) -> String { normalize_id(trimmed) } +/// https://stackoverflow.com/a/68233480 +/// Improve the path to try remove and solve .. token. Return the path id +/// by replacing the directory separator with a hyphen. +/// +/// This assumes that `a/b/../c` is `a/c` which might be different from +/// what the OS would have chosen when b is a link. This is OK +/// for broot verb arguments but can't be generally used elsewhere +/// +/// This function ensures a given path ending with '/' will +/// end with '-' after normalization. +pub fn normalize_path_id>(path: P) -> String { + let ends_with_slash = path.as_ref() + .to_str() + .map_or(false, |s| s.ends_with('/')); + let mut normalized = PathBuf::new(); + for component in path.as_ref().components() { + match &component { + Component::ParentDir => { + if !normalized.pop() { + normalized.push(component); + } + } + _ => { + normalized.push(component); + } + } + } + if ends_with_slash { + normalized.push(""); + } + normalized.to_str().unwrap().replace("\\", "-").replace("/", "-") +} + /// Fix links to the correct location. /// /// This adjusts links, such as turning `.md` extensions to `.html`. /// /// `path` is the path to the page being rendered relative to the root of the /// book. This is used for the `print.html` page so that links on the print -/// page go to the original location. Normal page rendering sets `path` to -/// None. Ideally, print page links would link to anchors on the print page, -/// but that is very difficult. +/// page go to the anchors that has a path id prefix. Normal page rendering +/// sets `path` to None. fn adjust_links<'a>(event: Event<'a>, path: Option<&Path>) -> Event<'a> { lazy_static! { static ref SCHEME_LINK: Regex = Regex::new(r"^[a-z][a-z0-9+.-]*:").unwrap(); static ref MD_LINK: Regex = Regex::new(r"(?P.*)\.md(?P#.*)?").unwrap(); + static ref HTML_MD_LINK: Regex = Regex::new(r"(?P.*)\.(html|md)(?P#.*)?").unwrap(); } fn fix<'a>(dest: CowStr<'a>, path: Option<&Path>) -> CowStr<'a> { @@ -84,9 +117,9 @@ fn adjust_links<'a>(event: Event<'a>, path: Option<&Path>) -> Event<'a> { if let Some(path) = path { let mut base = path.display().to_string(); if base.ends_with(".md") { - base.replace_range(base.len() - 3.., ".html"); + base.replace_range(base.len() - 3.., ""); } - return format!("{}{}", base, dest).into(); + return format!("#{}{}", normalize_path_id(base), dest.replace("#", "-")).into(); } else { return dest; } @@ -104,18 +137,34 @@ fn adjust_links<'a>(event: Event<'a>, path: Option<&Path>) -> Event<'a> { if !base.is_empty() { write!(fixed_link, "{}/", base).unwrap(); } - } - if let Some(caps) = MD_LINK.captures(&dest) { - fixed_link.push_str(&caps["link"]); - fixed_link.push_str(".html"); - if let Some(anchor) = caps.name("anchor") { - fixed_link.push_str(anchor.as_str()); - } + // In `print.html`, print page links would all link to anchors on the print page. + if let Some(caps) = HTML_MD_LINK.captures(&dest) { + fixed_link.push_str(&caps["link"]); + if let Some(anchor) = caps.name("anchor") { + fixed_link.push_str(anchor.as_str()); + } + } else { + fixed_link.push_str(&dest); + }; + + let mut fixed_anchor_for_print = String::new(); + fixed_anchor_for_print.push_str("#"); + fixed_anchor_for_print.push_str(&normalize_path_id(&fixed_link).replace("#", "-")); + return CowStr::from(fixed_anchor_for_print); } else { - fixed_link.push_str(&dest); - }; - return CowStr::from(fixed_link); + // In normal page rendering, links to anchors on another page. + if let Some(caps) = MD_LINK.captures(&dest) { + fixed_link.push_str(&caps["link"]); + fixed_link.push_str(".html"); + if let Some(anchor) = caps.name("anchor") { + fixed_link.push_str(anchor.as_str()); + } + } else { + fixed_link.push_str(&dest); + }; + return CowStr::from(fixed_link); + } } dest }