Skip to content

Commit

Permalink
Auto merge of #123372 - GuillaumeGomez:rollup-nwxdzev, r=GuillaumeGomez
Browse files Browse the repository at this point in the history
Rollup of 4 pull requests

Successful merges:

 - #122614 (rustdoc-search: shard the search result descriptions)
 - #123338 (Update to new browser-ui-test version)
 - #123366 (Minor by_move_body impl cleanups)
 - #123371 (Remove dangling `.mir.stderr` and `.thir.stderr` test files)

r? `@ghost`
`@rustbot` modify labels: rollup
  • Loading branch information
bors committed Apr 2, 2024
2 parents 36b6f9b + 4468068 commit 029cb1b
Show file tree
Hide file tree
Showing 80 changed files with 1,104 additions and 808 deletions.
2 changes: 2 additions & 0 deletions Cargo.lock
Original file line number Diff line number Diff line change
Expand Up @@ -4783,6 +4783,8 @@ version = "0.0.0"
dependencies = [
"arrayvec",
"askama",
"base64",
"byteorder",
"expect-test",
"indexmap",
"itertools 0.12.1",
Expand Down
12 changes: 6 additions & 6 deletions compiler/rustc_mir_transform/src/coroutine/by_move_body.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
//! be a coroutine body that takes all of its upvars by-move, and which we stash
//! into the `CoroutineInfo` for all coroutines returned by coroutine-closures.

use rustc_data_structures::fx::FxIndexSet;
use rustc_data_structures::unord::UnordSet;
use rustc_hir as hir;
use rustc_middle::mir::visit::MutVisitor;
use rustc_middle::mir::{self, dump_mir, MirPass};
Expand Down Expand Up @@ -33,7 +33,7 @@ impl<'tcx> MirPass<'tcx> for ByMoveBody {
return;
}

let mut by_ref_fields = FxIndexSet::default();
let mut by_ref_fields = UnordSet::default();
let by_move_upvars = Ty::new_tup_from_iter(
tcx,
tcx.closure_captures(coroutine_def_id).iter().enumerate().map(|(idx, capture)| {
Expand Down Expand Up @@ -73,7 +73,7 @@ impl<'tcx> MirPass<'tcx> for ByMoveBody {

struct MakeByMoveBody<'tcx> {
tcx: TyCtxt<'tcx>,
by_ref_fields: FxIndexSet<FieldIdx>,
by_ref_fields: UnordSet<FieldIdx>,
by_move_coroutine_ty: Ty<'tcx>,
}

Expand All @@ -89,11 +89,11 @@ impl<'tcx> MutVisitor<'tcx> for MakeByMoveBody<'tcx> {
location: mir::Location,
) {
if place.local == ty::CAPTURE_STRUCT_LOCAL
&& !place.projection.is_empty()
&& let mir::ProjectionElem::Field(idx, ty) = place.projection[0]
&& let Some((&mir::ProjectionElem::Field(idx, ty), projection)) =
place.projection.split_first()
&& self.by_ref_fields.contains(&idx)
{
let (begin, end) = place.projection[1..].split_first().unwrap();
let (begin, end) = projection.split_first().unwrap();
// FIXME(async_closures): I'm actually a bit surprised to see that we always
// initially deref the by-ref upvars. If this is not actually true, then we
// will at least get an ICE that explains why this isn't true :^)
Expand Down
2 changes: 1 addition & 1 deletion src/ci/docker/host-x86_64/mingw-check/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ ENV SCRIPT python3 ../x.py --stage 2 test src/tools/expand-yaml-anchors && \
/scripts/validate-error-codes.sh && \
reuse --include-submodules lint && \
# Runs checks to ensure that there are no ES5 issues in our JS code.
es-check es6 ../src/librustdoc/html/static/js/*.js && \
es-check es8 ../src/librustdoc/html/static/js/*.js && \
eslint -c ../src/librustdoc/html/static/.eslintrc.js ../src/librustdoc/html/static/js/*.js && \
eslint -c ../src/tools/rustdoc-js/.eslintrc.js ../src/tools/rustdoc-js/tester.js && \
eslint -c ../src/tools/rustdoc-gui/.eslintrc.js ../src/tools/rustdoc-gui/tester.js
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.16.11
0.17.0
2 changes: 2 additions & 0 deletions src/librustdoc/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ path = "lib.rs"
[dependencies]
arrayvec = { version = "0.7", default-features = false }
askama = { version = "0.12", default-features = false, features = ["config"] }
base64 = "0.21.7"
byteorder = "1.5"
itertools = "0.12"
indexmap = "2"
minifier = "0.3.0"
Expand Down
33 changes: 4 additions & 29 deletions src/librustdoc/html/render/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -184,40 +184,15 @@ pub(crate) enum RenderTypeId {

impl RenderTypeId {
pub fn write_to_string(&self, string: &mut String) {
// (sign, value)
let (sign, id): (bool, u32) = match &self {
let id: i32 = match &self {
// 0 is a sentinel, everything else is one-indexed
// concrete type
RenderTypeId::Index(idx) if *idx >= 0 => (false, (idx + 1isize).try_into().unwrap()),
RenderTypeId::Index(idx) if *idx >= 0 => (idx + 1isize).try_into().unwrap(),
// generic type parameter
RenderTypeId::Index(idx) => (true, (-*idx).try_into().unwrap()),
RenderTypeId::Index(idx) => (*idx).try_into().unwrap(),
_ => panic!("must convert render types to indexes before serializing"),
};
// zig-zag encoding
let value: u32 = (id << 1) | (if sign { 1 } else { 0 });
// Self-terminating hex use capital letters for everything but the
// least significant digit, which is lowercase. For example, decimal 17
// would be `` Aa `` if zig-zag encoding weren't used.
//
// Zig-zag encoding, however, stores the sign bit as the last bit.
// This means, in the last hexit, 1 is actually `c`, -1 is `b`
// (`a` is the imaginary -0), and, because all the bits are shifted
// by one, `` A` `` is actually 8 and `` Aa `` is -8.
//
// https://rust-lang.github.io/rustc-dev-guide/rustdoc-internals/search.html
// describes the encoding in more detail.
let mut shift: u32 = 28;
let mut mask: u32 = 0xF0_00_00_00;
while shift < 32 {
let hexit = (value & mask) >> shift;
if hexit != 0 || shift == 0 {
let hex =
char::try_from(if shift == 0 { '`' } else { '@' } as u32 + hexit).unwrap();
string.push(hex);
}
shift = shift.wrapping_sub(4);
mask = mask >> 4;
}
search_index::encode::write_vlqhex_to_string(id, string);
}
}

Expand Down
107 changes: 94 additions & 13 deletions src/librustdoc/html/render/search_index.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
pub(crate) mod encode;

use std::collections::hash_map::Entry;
use std::collections::{BTreeMap, VecDeque};

Expand All @@ -17,12 +19,46 @@ use crate::html::format::join_with_double_colon;
use crate::html::markdown::short_markdown_summary;
use crate::html::render::{self, IndexItem, IndexItemFunctionType, RenderType, RenderTypeId};

use encode::{bitmap_to_string, write_vlqhex_to_string};

/// The serialized search description sharded version
///
/// The `index` is a JSON-encoded list of names and other information.
///
/// The desc has newlined descriptions, split up by size into 128KiB shards.
/// For example, `(4, "foo\nbar\nbaz\nquux")`.
///
/// There is no single, optimal size for these shards, because it depends on
/// configuration values that we can't predict or control, such as the version
/// of HTTP used (HTTP/1.1 would work better with larger files, while HTTP/2
/// and 3 are more agnostic), transport compression (gzip, zstd, etc), whether
/// the search query is going to produce a large number of results or a small
/// number, the bandwidth delay product of the network...
///
/// Gzipping some standard library descriptions to guess what transport
/// compression will do, the compressed file sizes can be as small as 4.9KiB
/// or as large as 18KiB (ignoring the final 1.9KiB shard of leftovers).
/// A "reasonable" range for files is for them to be bigger than 1KiB,
/// since that's about the amount of data that can be transferred in a
/// single TCP packet, and 64KiB, the maximum amount of data that
/// TCP can transfer in a single round trip without extensions.
///
/// [1]: https://en.wikipedia.org/wiki/Maximum_transmission_unit#MTUs_for_common_media
/// [2]: https://en.wikipedia.org/wiki/Sliding_window_protocol#Basic_concept
/// [3]: https://learn.microsoft.com/en-us/troubleshoot/windows-server/networking/description-tcp-features
pub(crate) struct SerializedSearchIndex {
pub(crate) index: String,
pub(crate) desc: Vec<(usize, String)>,
}

const DESC_INDEX_SHARD_LEN: usize = 128 * 1024;

/// Builds the search index from the collected metadata
pub(crate) fn build_index<'tcx>(
krate: &clean::Crate,
cache: &mut Cache,
tcx: TyCtxt<'tcx>,
) -> String {
) -> SerializedSearchIndex {
let mut itemid_to_pathid = FxHashMap::default();
let mut primitives = FxHashMap::default();
let mut associated_types = FxHashMap::default();
Expand Down Expand Up @@ -319,7 +355,6 @@ pub(crate) fn build_index<'tcx>(
.collect::<Vec<_>>();

struct CrateData<'a> {
doc: String,
items: Vec<&'a IndexItem>,
paths: Vec<(ItemType, Vec<Symbol>)>,
// The String is alias name and the vec is the list of the elements with this alias.
Expand All @@ -328,6 +363,11 @@ pub(crate) fn build_index<'tcx>(
aliases: &'a BTreeMap<String, Vec<usize>>,
// Used when a type has more than one impl with an associated item with the same name.
associated_item_disambiguators: &'a Vec<(usize, String)>,
// A list of shard lengths encoded as vlqhex. See the comment in write_vlqhex_to_string
// for information on the format.
desc_index: String,
// A list of items with no description. This is eventually turned into a bitmap.
empty_desc: Vec<u32>,
}

struct Paths {
Expand Down Expand Up @@ -409,7 +449,6 @@ pub(crate) fn build_index<'tcx>(
let mut names = Vec::with_capacity(self.items.len());
let mut types = String::with_capacity(self.items.len());
let mut full_paths = Vec::with_capacity(self.items.len());
let mut descriptions = Vec::with_capacity(self.items.len());
let mut parents = Vec::with_capacity(self.items.len());
let mut functions = String::with_capacity(self.items.len());
let mut deprecated = Vec::with_capacity(self.items.len());
Expand All @@ -432,7 +471,6 @@ pub(crate) fn build_index<'tcx>(
parents.push(item.parent_idx.map(|x| x + 1).unwrap_or(0));

names.push(item.name.as_str());
descriptions.push(&item.desc);

if !item.path.is_empty() {
full_paths.push((index, &item.path));
Expand All @@ -444,7 +482,8 @@ pub(crate) fn build_index<'tcx>(
}

if item.deprecation.is_some() {
deprecated.push(index);
// bitmasks always use 1-indexing for items, with 0 as the crate itself
deprecated.push(u32::try_from(index + 1).unwrap());
}
}

Expand All @@ -455,42 +494,84 @@ pub(crate) fn build_index<'tcx>(
let has_aliases = !self.aliases.is_empty();
let mut crate_data =
serializer.serialize_struct("CrateData", if has_aliases { 9 } else { 8 })?;
crate_data.serialize_field("doc", &self.doc)?;
crate_data.serialize_field("t", &types)?;
crate_data.serialize_field("n", &names)?;
// Serialize as an array of item indices and full paths
crate_data.serialize_field("q", &full_paths)?;
crate_data.serialize_field("d", &descriptions)?;
crate_data.serialize_field("i", &parents)?;
crate_data.serialize_field("f", &functions)?;
crate_data.serialize_field("c", &deprecated)?;
crate_data.serialize_field("D", &self.desc_index)?;
crate_data.serialize_field("p", &paths)?;
crate_data.serialize_field("b", &self.associated_item_disambiguators)?;
crate_data.serialize_field("c", &bitmap_to_string(&deprecated))?;
crate_data.serialize_field("e", &bitmap_to_string(&self.empty_desc))?;
if has_aliases {
crate_data.serialize_field("a", &self.aliases)?;
}
crate_data.end()
}
}

// Collect the index into a string
format!(
let (empty_desc, desc) = {
let mut empty_desc = Vec::new();
let mut result = Vec::new();
let mut set = String::new();
let mut len: usize = 0;
let mut item_index: u32 = 0;
for desc in std::iter::once(&crate_doc).chain(crate_items.iter().map(|item| &item.desc)) {
if desc == "" {
empty_desc.push(item_index);
item_index += 1;
continue;
}
if set.len() >= DESC_INDEX_SHARD_LEN {
result.push((len, std::mem::replace(&mut set, String::new())));
len = 0;
} else if len != 0 {
set.push('\n');
}
set.push_str(&desc);
len += 1;
item_index += 1;
}
result.push((len, std::mem::replace(&mut set, String::new())));
(empty_desc, result)
};

let desc_index = {
let mut desc_index = String::with_capacity(desc.len() * 4);
for &(len, _) in desc.iter() {
write_vlqhex_to_string(len.try_into().unwrap(), &mut desc_index);
}
desc_index
};

assert_eq!(
crate_items.len() + 1,
desc.iter().map(|(len, _)| *len).sum::<usize>() + empty_desc.len()
);

// The index, which is actually used to search, is JSON
// It uses `JSON.parse(..)` to actually load, since JSON
// parses faster than the full JavaScript syntax.
let index = format!(
r#"["{}",{}]"#,
krate.name(tcx),
serde_json::to_string(&CrateData {
doc: crate_doc,
items: crate_items,
paths: crate_paths,
aliases: &aliases,
associated_item_disambiguators: &associated_item_disambiguators,
desc_index,
empty_desc,
})
.expect("failed serde conversion")
// All these `replace` calls are because we have to go through JS string for JSON content.
.replace('\\', r"\\")
.replace('\'', r"\'")
// We need to escape double quotes for the JSON.
.replace("\\\"", "\\\\\"")
)
);
SerializedSearchIndex { index, desc }
}

pub(crate) fn get_function_type_for_search<'tcx>(
Expand Down
Loading

0 comments on commit 029cb1b

Please sign in to comment.