Skip to content

Commit

Permalink
Auto merge of rust-lang#119912 - notriddle:notriddle/reexport-dedup, …
Browse files Browse the repository at this point in the history
…r=GuillaumeGomez

rustdoc-search: single result for items with multiple paths

Part of rust-lang#15723

Preview: https://notriddle.com/rustdoc-html-demo-9/reexport-dup/std/index.html?search=hashmap

This change uses the same "exact" paths as trait implementors and type alias inlining to track items with multiple reachable paths. This way, if you search for `vec`, you get only the `std` exports of it, and not the one from `alloc`.

It still includes all the items in the search index so that you can search for them by all available paths. For example, try `core::option` and `std::option`, and notice that the results page doesn't show duplicates, but still shows all the items in their respective crates.
  • Loading branch information
bors committed Apr 18, 2024
2 parents 3412f01 + 226c77d commit d5745df
Show file tree
Hide file tree
Showing 14 changed files with 330 additions and 31 deletions.
12 changes: 12 additions & 0 deletions src/librustdoc/formats/cache.rs
Original file line number Diff line number Diff line change
Expand Up @@ -346,16 +346,28 @@ impl<'a, 'tcx> DocFolder for CacheBuilder<'a, 'tcx> {
{
let desc =
short_markdown_summary(&item.doc_value(), &item.link_names(self.cache));
// For searching purposes, a re-export is a duplicate if:
//
// - It's either an inline, or a true re-export
// - It's got the same name
// - Both of them have the same exact path
let defid = (match &*item.kind {
&clean::ItemKind::ImportItem(ref import) => import.source.did,
_ => None,
})
.or_else(|| item.item_id.as_def_id());
// In case this is a field from a tuple struct, we don't add it into
// the search index because its name is something like "0", which is
// not useful for rustdoc search.
self.cache.search_index.push(IndexItem {
ty,
defid,
name: s,
path: join_with_double_colon(path),
desc,
parent,
parent_idx: None,
exact_path: None,
impl_id: if let Some(ParentStackItem::Impl { item_id, .. }) =
self.cache.parent_stack.last()
{
Expand Down
2 changes: 2 additions & 0 deletions src/librustdoc/html/render/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -111,11 +111,13 @@ pub(crate) enum RenderMode {
#[derive(Debug)]
pub(crate) struct IndexItem {
pub(crate) ty: ItemType,
pub(crate) defid: Option<DefId>,
pub(crate) name: Symbol,
pub(crate) path: String,
pub(crate) desc: String,
pub(crate) parent: Option<DefId>,
pub(crate) parent_idx: Option<isize>,
pub(crate) exact_path: Option<String>,
pub(crate) impl_id: Option<DefId>,
pub(crate) search_type: Option<IndexItemFunctionType>,
pub(crate) aliases: Box<[Symbol]>,
Expand Down
154 changes: 138 additions & 16 deletions src/librustdoc/html/render/search_index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,10 +59,13 @@ pub(crate) fn build_index<'tcx>(
cache: &mut Cache,
tcx: TyCtxt<'tcx>,
) -> SerializedSearchIndex {
// Maps from ID to position in the `crate_paths` array.
let mut itemid_to_pathid = FxHashMap::default();
let mut primitives = FxHashMap::default();
let mut associated_types = FxHashMap::default();
let mut crate_paths = vec![];

// item type, display path, re-exported internal path
let mut crate_paths: Vec<(ItemType, Vec<Symbol>, Option<Vec<Symbol>>)> = vec![];

// Attach all orphan items to the type's definition if the type
// has since been learned.
Expand All @@ -72,11 +75,13 @@ pub(crate) fn build_index<'tcx>(
let desc = short_markdown_summary(&item.doc_value(), &item.link_names(cache));
cache.search_index.push(IndexItem {
ty: item.type_(),
defid: item.item_id.as_def_id(),
name: item.name.unwrap(),
path: join_with_double_colon(&fqp[..fqp.len() - 1]),
desc,
parent: Some(parent),
parent_idx: None,
exact_path: None,
impl_id,
search_type: get_function_type_for_search(
item,
Expand Down Expand Up @@ -126,17 +131,22 @@ pub(crate) fn build_index<'tcx>(
map: &mut FxHashMap<F, isize>,
itemid: F,
lastpathid: &mut isize,
crate_paths: &mut Vec<(ItemType, Vec<Symbol>)>,
crate_paths: &mut Vec<(ItemType, Vec<Symbol>, Option<Vec<Symbol>>)>,
item_type: ItemType,
path: &[Symbol],
exact_path: Option<&[Symbol]>,
) -> RenderTypeId {
match map.entry(itemid) {
Entry::Occupied(entry) => RenderTypeId::Index(*entry.get()),
Entry::Vacant(entry) => {
let pathid = *lastpathid;
entry.insert(pathid);
*lastpathid += 1;
crate_paths.push((item_type, path.to_vec()));
crate_paths.push((
item_type,
path.to_vec(),
exact_path.map(|path| path.to_vec()),
));
RenderTypeId::Index(pathid)
}
}
Expand All @@ -149,21 +159,32 @@ pub(crate) fn build_index<'tcx>(
primitives: &mut FxHashMap<Symbol, isize>,
associated_types: &mut FxHashMap<Symbol, isize>,
lastpathid: &mut isize,
crate_paths: &mut Vec<(ItemType, Vec<Symbol>)>,
crate_paths: &mut Vec<(ItemType, Vec<Symbol>, Option<Vec<Symbol>>)>,
) -> Option<RenderTypeId> {
let Cache { ref paths, ref external_paths, .. } = *cache;
let Cache { ref paths, ref external_paths, ref exact_paths, .. } = *cache;
match id {
RenderTypeId::DefId(defid) => {
if let Some(&(ref fqp, item_type)) =
paths.get(&defid).or_else(|| external_paths.get(&defid))
{
let exact_fqp = exact_paths
.get(&defid)
.or_else(|| external_paths.get(&defid).map(|&(ref fqp, _)| fqp))
// Re-exports only count if the name is exactly the same.
// This is a size optimization, since it means we only need
// to store the name once (and the path is re-used for everything
// exported from this same module). It's also likely to Do
// What I Mean, since if a re-export changes the name, it might
// also be a change in semantic meaning.
.filter(|fqp| fqp.last() == fqp.last());
Some(insert_into_map(
itemid_to_pathid,
ItemId::DefId(defid),
lastpathid,
crate_paths,
item_type,
fqp,
exact_fqp.map(|x| &x[..]).filter(|exact_fqp| exact_fqp != fqp),
))
} else {
None
Expand All @@ -178,6 +199,7 @@ pub(crate) fn build_index<'tcx>(
crate_paths,
ItemType::Primitive,
&[sym],
None,
))
}
RenderTypeId::Index(_) => Some(id),
Expand All @@ -188,6 +210,7 @@ pub(crate) fn build_index<'tcx>(
crate_paths,
ItemType::AssocType,
&[sym],
None,
)),
}
}
Expand All @@ -199,7 +222,7 @@ pub(crate) fn build_index<'tcx>(
primitives: &mut FxHashMap<Symbol, isize>,
associated_types: &mut FxHashMap<Symbol, isize>,
lastpathid: &mut isize,
crate_paths: &mut Vec<(ItemType, Vec<Symbol>)>,
crate_paths: &mut Vec<(ItemType, Vec<Symbol>, Option<Vec<Symbol>>)>,
) {
if let Some(generics) = &mut ty.generics {
for item in generics {
Expand Down Expand Up @@ -296,7 +319,7 @@ pub(crate) fn build_index<'tcx>(
}
}

let Cache { ref paths, .. } = *cache;
let Cache { ref paths, ref exact_paths, ref external_paths, .. } = *cache;

// Then, on parent modules
let crate_items: Vec<&IndexItem> = search_index
Expand All @@ -311,14 +334,56 @@ pub(crate) fn build_index<'tcx>(
lastpathid += 1;

if let Some(&(ref fqp, short)) = paths.get(&defid) {
crate_paths.push((short, fqp.clone()));
let exact_fqp = exact_paths
.get(&defid)
.or_else(|| external_paths.get(&defid).map(|&(ref fqp, _)| fqp))
.filter(|exact_fqp| {
exact_fqp.last() == Some(&item.name) && *exact_fqp != fqp
});
crate_paths.push((short, fqp.clone(), exact_fqp.cloned()));
Some(pathid)
} else {
None
}
}
});

if let Some(defid) = item.defid
&& item.parent_idx.is_none()
{
// If this is a re-export, retain the original path.
// Associated items don't use this.
// Their parent carries the exact fqp instead.
let exact_fqp = exact_paths
.get(&defid)
.or_else(|| external_paths.get(&defid).map(|&(ref fqp, _)| fqp));
item.exact_path = exact_fqp.and_then(|fqp| {
// Re-exports only count if the name is exactly the same.
// This is a size optimization, since it means we only need
// to store the name once (and the path is re-used for everything
// exported from this same module). It's also likely to Do
// What I Mean, since if a re-export changes the name, it might
// also be a change in semantic meaning.
if fqp.last() != Some(&item.name) {
return None;
}
let path =
if item.ty == ItemType::Macro && tcx.has_attr(defid, sym::macro_export) {
// `#[macro_export]` always exports to the crate root.
tcx.crate_name(defid.krate).to_string()
} else {
if fqp.len() < 2 {
return None;
}
join_with_double_colon(&fqp[..fqp.len() - 1])
};
if path == item.path {
return None;
}
Some(path)
});
}

// Omit the parent path if it is same to that of the prior item.
if lastpath == &item.path {
item.path.clear();
Expand Down Expand Up @@ -356,7 +421,7 @@ pub(crate) fn build_index<'tcx>(

struct CrateData<'a> {
items: Vec<&'a IndexItem>,
paths: Vec<(ItemType, Vec<Symbol>)>,
paths: Vec<(ItemType, Vec<Symbol>, Option<Vec<Symbol>>)>,
// The String is alias name and the vec is the list of the elements with this alias.
//
// To be noted: the `usize` elements are indexes to `items`.
Expand All @@ -374,6 +439,7 @@ pub(crate) fn build_index<'tcx>(
ty: ItemType,
name: Symbol,
path: Option<usize>,
exact_path: Option<usize>,
}

impl Serialize for Paths {
Expand All @@ -387,6 +453,10 @@ pub(crate) fn build_index<'tcx>(
if let Some(ref path) = self.path {
seq.serialize_element(path)?;
}
if let Some(ref path) = self.exact_path {
assert!(self.path.is_some());
seq.serialize_element(path)?;
}
seq.end()
}
}
Expand All @@ -409,43 +479,94 @@ pub(crate) fn build_index<'tcx>(
mod_paths.insert(&item.path, index);
}
let mut paths = Vec::with_capacity(self.paths.len());
for (ty, path) in &self.paths {
for (ty, path, exact) in &self.paths {
if path.len() < 2 {
paths.push(Paths { ty: *ty, name: path[0], path: None });
paths.push(Paths { ty: *ty, name: path[0], path: None, exact_path: None });
continue;
}
let full_path = join_with_double_colon(&path[..path.len() - 1]);
let full_exact_path = exact
.as_ref()
.filter(|exact| exact.last() == path.last() && exact.len() >= 2)
.map(|exact| join_with_double_colon(&exact[..exact.len() - 1]));
let exact_path = extra_paths.len() + self.items.len();
let exact_path = full_exact_path.as_ref().map(|full_exact_path| match extra_paths
.entry(full_exact_path.clone())
{
Entry::Occupied(entry) => *entry.get(),
Entry::Vacant(entry) => {
if let Some(index) = mod_paths.get(&full_exact_path) {
return *index;
}
entry.insert(exact_path);
if !revert_extra_paths.contains_key(&exact_path) {
revert_extra_paths.insert(exact_path, full_exact_path.clone());
}
exact_path
}
});
if let Some(index) = mod_paths.get(&full_path) {
paths.push(Paths { ty: *ty, name: *path.last().unwrap(), path: Some(*index) });
paths.push(Paths {
ty: *ty,
name: *path.last().unwrap(),
path: Some(*index),
exact_path,
});
continue;
}
// It means it comes from an external crate so the item and its path will be
// stored into another array.
//
// `index` is put after the last `mod_paths`
let index = extra_paths.len() + self.items.len();
if !revert_extra_paths.contains_key(&index) {
revert_extra_paths.insert(index, full_path.clone());
}
match extra_paths.entry(full_path) {
match extra_paths.entry(full_path.clone()) {
Entry::Occupied(entry) => {
paths.push(Paths {
ty: *ty,
name: *path.last().unwrap(),
path: Some(*entry.get()),
exact_path,
});
}
Entry::Vacant(entry) => {
entry.insert(index);
if !revert_extra_paths.contains_key(&index) {
revert_extra_paths.insert(index, full_path);
}
paths.push(Paths {
ty: *ty,
name: *path.last().unwrap(),
path: Some(index),
exact_path,
});
}
}
}

// Direct exports use adjacent arrays for the current crate's items,
// but re-exported exact paths don't.
let mut re_exports = Vec::new();
for (item_index, item) in self.items.iter().enumerate() {
if let Some(exact_path) = item.exact_path.as_ref() {
if let Some(path_index) = mod_paths.get(&exact_path) {
re_exports.push((item_index, *path_index));
} else {
let path_index = extra_paths.len() + self.items.len();
let path_index = match extra_paths.entry(exact_path.clone()) {
Entry::Occupied(entry) => *entry.get(),
Entry::Vacant(entry) => {
entry.insert(path_index);
if !revert_extra_paths.contains_key(&path_index) {
revert_extra_paths.insert(path_index, exact_path.clone());
}
path_index
}
};
re_exports.push((item_index, path_index));
}
}
}

let mut names = Vec::with_capacity(self.items.len());
let mut types = String::with_capacity(self.items.len());
let mut full_paths = Vec::with_capacity(self.items.len());
Expand Down Expand Up @@ -501,6 +622,7 @@ pub(crate) fn build_index<'tcx>(
crate_data.serialize_field("f", &functions)?;
crate_data.serialize_field("D", &self.desc_index)?;
crate_data.serialize_field("p", &paths)?;
crate_data.serialize_field("r", &re_exports)?;
crate_data.serialize_field("b", &self.associated_item_disambiguators)?;
crate_data.serialize_field("c", &bitmap_to_string(&deprecated))?;
crate_data.serialize_field("e", &bitmap_to_string(&self.empty_desc))?;
Expand Down
19 changes: 13 additions & 6 deletions src/librustdoc/html/static/js/externs.js
Original file line number Diff line number Diff line change
Expand Up @@ -239,20 +239,27 @@ let FunctionType;
* `doc` contains the description of the crate.
*
* `p` is a list of path/type pairs. It is used for parents and function parameters.
* The first item is the type, the second is the name, the third is the visible path (if any) and
* the fourth is the canonical path used for deduplication (if any).
*
* `r` is the canonical path used for deduplication of re-exported items.
* It is not used for associated items like methods (that's the fourth element
* of `p`) but is used for modules items like free functions.
*
* `c` is an array of item indices that are deprecated.
* @typedef {{
* doc: string,
* a: Object,
* n: Array<string>,
* t: String,
* t: string,
* d: Array<string>,
* q: Array<[Number, string]>,
* i: Array<Number>,
* q: Array<[number, string]>,
* i: Array<number>,
* f: string,
* p: Array<Object>,
* b: Array<[Number, String]>,
* c: Array<Number>
* p: Array<[number, string] | [number, string, number] | [number, string, number, number]>,
* b: Array<[number, String]>,
* c: Array<number>,
* r: Array<[number, number]>,
* }}
*/
let RawSearchIndexCrate;
Loading

0 comments on commit d5745df

Please sign in to comment.