Skip to content

Commit

Permalink
rustdoc-search: single result for items with multiple paths
Browse files Browse the repository at this point in the history
This change uses the same "exact" paths as trait implementors
and type alias inlining to track items with multiple
reachable paths. This way, if you search for `vec`, you get
only the `std` exports of it, and not the one from `alloc`.

It still includes all the items in the search index so that
you can search for them by all available paths. For example,
try `core::option` and `std::option`, and notice that the
results page doesn't show duplicates, but still shows all
the items in their respective crates.
  • Loading branch information
notriddle committed Jan 13, 2024
1 parent 174e73a commit f6f69e8
Show file tree
Hide file tree
Showing 13 changed files with 315 additions and 26 deletions.
12 changes: 12 additions & 0 deletions src/librustdoc/formats/cache.rs
Original file line number Diff line number Diff line change
Expand Up @@ -348,16 +348,28 @@ impl<'a, 'tcx> DocFolder for CacheBuilder<'a, 'tcx> {
{
let desc =
short_markdown_summary(&item.doc_value(), &item.link_names(self.cache));
// For searching purposes, a re-export is a duplicate if:
//
// - It's either an inline, or a true re-export
// - It's got the same name
// - Both of them have the same exact path
let defid = (match &*item.kind {
&clean::ItemKind::ImportItem(ref import) => import.source.did,
_ => None,
})
.or_else(|| item.item_id.as_def_id());
// In case this is a field from a tuple struct, we don't add it into
// the search index because its name is something like "0", which is
// not useful for rustdoc search.
self.cache.search_index.push(IndexItem {
ty,
defid,
name: s,
path: join_with_double_colon(path),
desc,
parent,
parent_idx: None,
exact_path: None,
impl_id: if let Some(ParentStackItem::Impl { item_id, .. }) =
self.cache.parent_stack.last()
{
Expand Down
2 changes: 2 additions & 0 deletions src/librustdoc/html/render/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -111,11 +111,13 @@ pub(crate) enum RenderMode {
#[derive(Debug)]
pub(crate) struct IndexItem {
pub(crate) ty: ItemType,
pub(crate) defid: Option<DefId>,
pub(crate) name: Symbol,
pub(crate) path: String,
pub(crate) desc: String,
pub(crate) parent: Option<DefId>,
pub(crate) parent_idx: Option<isize>,
pub(crate) exact_path: Option<String>,
pub(crate) impl_id: Option<DefId>,
pub(crate) search_type: Option<IndexItemFunctionType>,
pub(crate) aliases: Box<[Symbol]>,
Expand Down
151 changes: 135 additions & 16 deletions src/librustdoc/html/render/search_index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ use std::collections::{BTreeMap, VecDeque};
use rustc_data_structures::fx::{FxHashMap, FxIndexMap};
use rustc_middle::ty::TyCtxt;
use rustc_span::def_id::DefId;
use rustc_span::sym;
use rustc_span::symbol::Symbol;
use serde::ser::{Serialize, SerializeSeq, SerializeStruct, Serializer};
use thin_vec::ThinVec;
Expand All @@ -22,10 +23,13 @@ pub(crate) fn build_index<'tcx>(
cache: &mut Cache,
tcx: TyCtxt<'tcx>,
) -> String {
// Maps from ID to position in the `crate_paths` array.
let mut itemid_to_pathid = FxHashMap::default();
let mut primitives = FxHashMap::default();
let mut associated_types = FxHashMap::default();
let mut crate_paths = vec![];

// item type, display path, re-exported internal path
let mut crate_paths: Vec<(ItemType, Vec<Symbol>, Option<Vec<Symbol>>)> = vec![];

// Attach all orphan items to the type's definition if the type
// has since been learned.
Expand All @@ -35,11 +39,13 @@ pub(crate) fn build_index<'tcx>(
let desc = short_markdown_summary(&item.doc_value(), &item.link_names(cache));
cache.search_index.push(IndexItem {
ty: item.type_(),
defid: item.item_id.as_def_id(),
name: item.name.unwrap(),
path: join_with_double_colon(&fqp[..fqp.len() - 1]),
desc,
parent: Some(parent),
parent_idx: None,
exact_path: None,
impl_id,
search_type: get_function_type_for_search(
item,
Expand Down Expand Up @@ -88,17 +94,22 @@ pub(crate) fn build_index<'tcx>(
map: &mut FxHashMap<F, isize>,
itemid: F,
lastpathid: &mut isize,
crate_paths: &mut Vec<(ItemType, Vec<Symbol>)>,
crate_paths: &mut Vec<(ItemType, Vec<Symbol>, Option<Vec<Symbol>>)>,
item_type: ItemType,
path: &[Symbol],
exact_path: Option<&[Symbol]>,
) -> RenderTypeId {
match map.entry(itemid) {
Entry::Occupied(entry) => RenderTypeId::Index(*entry.get()),
Entry::Vacant(entry) => {
let pathid = *lastpathid;
entry.insert(pathid);
*lastpathid += 1;
crate_paths.push((item_type, path.to_vec()));
crate_paths.push((
item_type,
path.to_vec(),
exact_path.map(|path| path.to_vec()),
));
RenderTypeId::Index(pathid)
}
}
Expand All @@ -111,21 +122,30 @@ pub(crate) fn build_index<'tcx>(
primitives: &mut FxHashMap<Symbol, isize>,
associated_types: &mut FxHashMap<Symbol, isize>,
lastpathid: &mut isize,
crate_paths: &mut Vec<(ItemType, Vec<Symbol>)>,
crate_paths: &mut Vec<(ItemType, Vec<Symbol>, Option<Vec<Symbol>>)>,
) -> Option<RenderTypeId> {
let Cache { ref paths, ref external_paths, .. } = *cache;
let Cache { ref paths, ref external_paths, ref exact_paths, .. } = *cache;
match id {
RenderTypeId::DefId(defid) => {
if let Some(&(ref fqp, item_type)) =
paths.get(&defid).or_else(|| external_paths.get(&defid))
{
let exact_fqp = exact_paths
.get(&defid)
.or_else(|| external_paths.get(&defid).map(|&(ref fqp, _)| fqp))
// re-exports only count if the name is exactly the same
// this is a size optimization, as well as a DWIM attempt
// since if the names are not the same, the intent probably
// isn't, either
.filter(|fqp| fqp.last() == fqp.last());
Some(insert_into_map(
itemid_to_pathid,
ItemId::DefId(defid),
lastpathid,
crate_paths,
item_type,
fqp,
exact_fqp.map(|x| &x[..]).filter(|exact_fqp| exact_fqp != fqp),
))
} else {
None
Expand All @@ -140,6 +160,7 @@ pub(crate) fn build_index<'tcx>(
crate_paths,
ItemType::Primitive,
&[sym],
None,
))
}
RenderTypeId::Index(_) => Some(id),
Expand All @@ -150,6 +171,7 @@ pub(crate) fn build_index<'tcx>(
crate_paths,
ItemType::AssocType,
&[sym],
None,
)),
}
}
Expand All @@ -161,7 +183,7 @@ pub(crate) fn build_index<'tcx>(
primitives: &mut FxHashMap<Symbol, isize>,
associated_types: &mut FxHashMap<Symbol, isize>,
lastpathid: &mut isize,
crate_paths: &mut Vec<(ItemType, Vec<Symbol>)>,
crate_paths: &mut Vec<(ItemType, Vec<Symbol>, Option<Vec<Symbol>>)>,
) {
if let Some(generics) = &mut ty.generics {
for item in generics {
Expand Down Expand Up @@ -258,7 +280,7 @@ pub(crate) fn build_index<'tcx>(
}
}

let Cache { ref paths, .. } = *cache;
let Cache { ref paths, ref exact_paths, ref external_paths, .. } = *cache;

// Then, on parent modules
let crate_items: Vec<&IndexItem> = search_index
Expand All @@ -273,14 +295,54 @@ pub(crate) fn build_index<'tcx>(
lastpathid += 1;

if let Some(&(ref fqp, short)) = paths.get(&defid) {
crate_paths.push((short, fqp.clone()));
let exact_fqp = exact_paths
.get(&defid)
.or_else(|| external_paths.get(&defid).map(|&(ref fqp, _)| fqp))
.filter(|exact_fqp| {
exact_fqp.last() == Some(&item.name) && *exact_fqp != fqp
});
crate_paths.push((short, fqp.clone(), exact_fqp.cloned()));
Some(pathid)
} else {
None
}
}
});

if let Some(defid) = item.defid
&& item.parent_idx.is_none()
{
// If this is a re-export, retain the original path.
// Associated items don't use this.
// Their parent carries the exact fqp instead.
let exact_fqp = exact_paths
.get(&defid)
.or_else(|| external_paths.get(&defid).map(|&(ref fqp, _)| fqp));
item.exact_path = exact_fqp.and_then(|fqp| {
// re-exports only count if the name is exactly the same
// this is a size optimization, as well as a DWIM attempt
// since if the names are not the same, the intent probably
// isn't, either
if fqp.last() != Some(&item.name) {
return None;
}
let path =
if item.ty == ItemType::Macro && tcx.has_attr(defid, sym::macro_export) {
// `#[macro_export]` always exports to the crate root.
tcx.crate_name(defid.krate).to_string()
} else {
if fqp.len() < 2 {
return None;
}
join_with_double_colon(&fqp[..fqp.len() - 1])
};
if path == item.path {
return None;
}
Some(path)
});
}

// Omit the parent path if it is same to that of the prior item.
if lastpath == &item.path {
item.path.clear();
Expand Down Expand Up @@ -319,7 +381,7 @@ pub(crate) fn build_index<'tcx>(
struct CrateData<'a> {
doc: String,
items: Vec<&'a IndexItem>,
paths: Vec<(ItemType, Vec<Symbol>)>,
paths: Vec<(ItemType, Vec<Symbol>, Option<Vec<Symbol>>)>,
// The String is alias name and the vec is the list of the elements with this alias.
//
// To be noted: the `usize` elements are indexes to `items`.
Expand All @@ -332,6 +394,7 @@ pub(crate) fn build_index<'tcx>(
ty: ItemType,
name: Symbol,
path: Option<usize>,
exact_path: Option<usize>,
}

impl Serialize for Paths {
Expand All @@ -345,6 +408,10 @@ pub(crate) fn build_index<'tcx>(
if let Some(ref path) = self.path {
seq.serialize_element(path)?;
}
if let Some(ref path) = self.exact_path {
assert!(self.path.is_some());
seq.serialize_element(path)?;
}
seq.end()
}
}
Expand All @@ -367,43 +434,94 @@ pub(crate) fn build_index<'tcx>(
mod_paths.insert(&item.path, index);
}
let mut paths = Vec::with_capacity(self.paths.len());
for (ty, path) in &self.paths {
for (ty, path, exact) in &self.paths {
if path.len() < 2 {
paths.push(Paths { ty: *ty, name: path[0], path: None });
paths.push(Paths { ty: *ty, name: path[0], path: None, exact_path: None });
continue;
}
let full_path = join_with_double_colon(&path[..path.len() - 1]);
let full_exact_path = exact
.as_ref()
.filter(|exact| exact.last() == path.last() && exact.len() >= 2)
.map(|exact| join_with_double_colon(&exact[..exact.len() - 1]));
let exact_path = extra_paths.len() + self.items.len();
let exact_path = full_exact_path.as_ref().map(|full_exact_path| match extra_paths
.entry(full_exact_path.clone())
{
Entry::Occupied(entry) => *entry.get(),
Entry::Vacant(entry) => {
if let Some(index) = mod_paths.get(&full_exact_path) {
return *index;
}
entry.insert(exact_path);
if !revert_extra_paths.contains_key(&exact_path) {
revert_extra_paths.insert(exact_path, full_exact_path.clone());
}
exact_path
}
});
if let Some(index) = mod_paths.get(&full_path) {
paths.push(Paths { ty: *ty, name: *path.last().unwrap(), path: Some(*index) });
paths.push(Paths {
ty: *ty,
name: *path.last().unwrap(),
path: Some(*index),
exact_path,
});
continue;
}
// It means it comes from an external crate so the item and its path will be
// stored into another array.
//
// `index` is put after the last `mod_paths`
let index = extra_paths.len() + self.items.len();
if !revert_extra_paths.contains_key(&index) {
revert_extra_paths.insert(index, full_path.clone());
}
match extra_paths.entry(full_path) {
match extra_paths.entry(full_path.clone()) {
Entry::Occupied(entry) => {
paths.push(Paths {
ty: *ty,
name: *path.last().unwrap(),
path: Some(*entry.get()),
exact_path,
});
}
Entry::Vacant(entry) => {
entry.insert(index);
if !revert_extra_paths.contains_key(&index) {
revert_extra_paths.insert(index, full_path);
}
paths.push(Paths {
ty: *ty,
name: *path.last().unwrap(),
path: Some(index),
exact_path,
});
}
}
}

// Direct exports use adjacent arrays for the current crate's items,
// but re-exported exact paths don't.
let mut re_exports = Vec::new();
for (item_index, item) in self.items.iter().enumerate() {
if let Some(exact_path) = item.exact_path.as_ref() {
if let Some(path_index) = mod_paths.get(&exact_path) {
re_exports.push((item_index, *path_index));
} else {
let path_index = extra_paths.len() + self.items.len();
let path_index = match extra_paths.entry(exact_path.clone()) {
Entry::Occupied(entry) => *entry.get(),
Entry::Vacant(entry) => {
entry.insert(path_index);
if !revert_extra_paths.contains_key(&path_index) {
revert_extra_paths.insert(path_index, exact_path.clone());
}
path_index
}
};
re_exports.push((item_index, path_index));
}
}
}

let mut names = Vec::with_capacity(self.items.len());
let mut types = String::with_capacity(self.items.len());
let mut full_paths = Vec::with_capacity(self.items.len());
Expand Down Expand Up @@ -463,6 +581,7 @@ pub(crate) fn build_index<'tcx>(
crate_data.serialize_field("f", &functions)?;
crate_data.serialize_field("c", &deprecated)?;
crate_data.serialize_field("p", &paths)?;
crate_data.serialize_field("r", &re_exports)?;
crate_data.serialize_field("b", &self.associated_item_disambiguators)?;
if has_aliases {
crate_data.serialize_field("a", &self.aliases)?;
Expand Down
Loading

0 comments on commit f6f69e8

Please sign in to comment.