Skip to content

Commit

Permalink
Auto merge of rust-lang#93839 - Mark-Simulacrum:delete-json-rust-dese…
Browse files Browse the repository at this point in the history
…rialization, r=nnethercote

Simplify rustc_serialize by dropping support for decoding into JSON

This PR currently bundles two (somewhat separate) tasks.

First, it removes the JSON Decoder trait impl, which permitted going from JSON to Rust structs. For now, we keep supporting JSON deserialization, but only to `Json` (an equivalent of serde_json::Value). The primary hard to remove user there is for custom targets -- which need some form of JSON deserialization -- but they already have a custom ad-hoc pass for moving from Json to a Rust struct.

A [comment](https://github.com/rust-lang/rust/blob/e7aca895980f25f6d2d3c48e10fd04656764d1e4/compiler/rustc_target/src/spec/mod.rs#L1653) there suggests that it would be impractical to move them to a Decodable-based impl, at least without backwards compatibility concerns. I suspect that if we were widely breaking compat there, it would make sense to use serde_json at this point which would produce better error messages; the types in rustc_target are relatively isolated so we would not particularly suffer from using serde_derive.

The second part of the PR (all but the first commit) is to simplify the Decoder API by removing the non-primitive `read_*` functions. These primarily add indirection (through a closure), which doesn't directly cause a performance issue (the unique closure types essentially guarantee monomorphization), but does increase the amount of work rustc and LLVM need to do. This could be split out to a separate PR, but is included here in part to help motivate the first part.

Future work might consist of:

* Specializing enum discriminant encoding to avoid leb128 for small enums (since we know the variant count, we can directly use read/write u8 in almost all cases)
* Adding new methods to support faster deserialization (e.g., access to the underlying byte stream)
   * Currently these are somewhat ad-hoc supported by specializations for e.g. `Vec<u8>`, but other types which could benefit don't today.
* Removing the Decoder trait entirely in favor of a concrete type -- today, we only really have one impl of it modulo wrappers used for specialization-based dispatch.

Highly recommend review with whitespace changes off, as the removal of closures frequently causes things to be de-indented.
  • Loading branch information
bors committed Feb 22, 2022
2 parents b8967b0 + c6ad61a commit 58a721a
Show file tree
Hide file tree
Showing 15 changed files with 208 additions and 1,059 deletions.
27 changes: 15 additions & 12 deletions compiler/rustc_errors/src/json/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,12 @@ use rustc_span::source_map::{FilePathMapping, SourceMap};

use crate::emitter::{ColorConfig, HumanReadableErrorType};
use crate::Handler;
use rustc_serialize::json::decode;
use rustc_serialize::json;
use rustc_span::{BytePos, Span};

use std::str;

#[derive(Decodable, Debug, PartialEq, Eq)]
struct TestData {
spans: Vec<SpanTestData>,
}

#[derive(Decodable, Debug, PartialEq, Eq)]
#[derive(Debug, PartialEq, Eq)]
struct SpanTestData {
pub byte_start: u32,
pub byte_end: u32,
Expand All @@ -41,8 +36,6 @@ impl<T: Write> Write for Shared<T> {

/// Test the span yields correct positions in JSON.
fn test_positions(code: &str, span: (u32, u32), expected_output: SpanTestData) {
let expected_output = TestData { spans: vec![expected_output] };

rustc_span::create_default_session_globals_then(|| {
let sm = Lrc::new(SourceMap::new(FilePathMapping::empty()));
sm.new_source_file(Path::new("test.rs").to_owned().into(), code.to_owned());
Expand All @@ -64,9 +57,19 @@ fn test_positions(code: &str, span: (u32, u32), expected_output: SpanTestData) {

let bytes = output.lock().unwrap();
let actual_output = str::from_utf8(&bytes).unwrap();
let actual_output: TestData = decode(actual_output);

assert_eq!(expected_output, actual_output)
let actual_output = json::from_str(&actual_output).unwrap();
let spans = actual_output["spans"].as_array().unwrap();
assert_eq!(spans.len(), 1);
let obj = &spans[0];
let actual_output = SpanTestData {
byte_start: obj["byte_start"].as_u64().unwrap() as u32,
byte_end: obj["byte_end"].as_u64().unwrap() as u32,
line_start: obj["line_start"].as_u64().unwrap() as u32,
line_end: obj["line_end"].as_u64().unwrap() as u32,
column_start: obj["column_start"].as_u64().unwrap() as u32,
column_end: obj["column_end"].as_u64().unwrap() as u32,
};
assert_eq!(expected_output, actual_output);
})
}

Expand Down
53 changes: 8 additions & 45 deletions compiler/rustc_macros/src/serialize.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,51 +42,26 @@ fn decodable_body(
}
let ty_name = s.ast().ident.to_string();
let decode_body = match s.variants() {
[vi] => {
let construct = vi.construct(|field, index| decode_field(field, index, true));
quote! {
::rustc_serialize::Decoder::read_struct(
__decoder,
|__decoder| { #construct },
)
}
}
[vi] => vi.construct(|field, _index| decode_field(field)),
variants => {
let match_inner: TokenStream = variants
.iter()
.enumerate()
.map(|(idx, vi)| {
let construct = vi.construct(|field, index| decode_field(field, index, false));
let construct = vi.construct(|field, _index| decode_field(field));
quote! { #idx => { #construct } }
})
.collect();
let names: TokenStream = variants
.iter()
.map(|vi| {
let variant_name = vi.ast().ident.to_string();
quote!(#variant_name,)
})
.collect();
let message = format!(
"invalid enum variant tag while decoding `{}`, expected 0..{}",
ty_name,
variants.len()
);
quote! {
::rustc_serialize::Decoder::read_enum(
__decoder,
|__decoder| {
::rustc_serialize::Decoder::read_enum_variant(
__decoder,
&[#names],
|__decoder, __variant_idx| {
match __variant_idx {
#match_inner
_ => panic!(#message),
}
})
}
)
match ::rustc_serialize::Decoder::read_usize(__decoder) {
#match_inner
_ => panic!(#message),
}
}
}
};
Expand All @@ -101,30 +76,18 @@ fn decodable_body(
)
}

fn decode_field(field: &syn::Field, index: usize, is_struct: bool) -> proc_macro2::TokenStream {
fn decode_field(field: &syn::Field) -> proc_macro2::TokenStream {
let field_span = field.ident.as_ref().map_or(field.ty.span(), |ident| ident.span());

let decode_inner_method = if let syn::Type::Reference(_) = field.ty {
quote! { ::rustc_middle::ty::codec::RefDecodable::decode }
} else {
quote! { ::rustc_serialize::Decodable::decode }
};
let (decode_method, opt_field_name) = if is_struct {
let field_name = field.ident.as_ref().map_or_else(|| index.to_string(), |i| i.to_string());
(proc_macro2::Ident::new("read_struct_field", field_span), quote! { #field_name, })
} else {
(proc_macro2::Ident::new("read_enum_variant_arg", field_span), quote! {})
};

let __decoder = quote! { __decoder };
// Use the span of the field for the method call, so
// that backtraces will point to the field.
let decode_call = quote_spanned! {field_span=>
::rustc_serialize::Decoder::#decode_method(
#__decoder, #opt_field_name #decode_inner_method)
};

quote! { #decode_call }
quote_spanned! {field_span=> #decode_inner_method(#__decoder) }
}

pub fn type_encodable_derive(mut s: synstructure::Structure<'_>) -> proc_macro2::TokenStream {
Expand Down
41 changes: 18 additions & 23 deletions compiler/rustc_query_system/src/dep_graph/serialized.rs
Original file line number Diff line number Diff line change
Expand Up @@ -122,29 +122,24 @@ impl<'a, K: DepKind + Decodable<opaque::Decoder<'a>>> Decodable<opaque::Decoder<
let mut edge_list_data = Vec::with_capacity(edge_count);

for _index in 0..node_count {
d.read_struct(|d| {
let dep_node: DepNode<K> = d.read_struct_field("node", Decodable::decode);
let _i: SerializedDepNodeIndex = nodes.push(dep_node);
debug_assert_eq!(_i.index(), _index);

let fingerprint: Fingerprint =
d.read_struct_field("fingerprint", Decodable::decode);
let _i: SerializedDepNodeIndex = fingerprints.push(fingerprint);
debug_assert_eq!(_i.index(), _index);

d.read_struct_field("edges", |d| {
d.read_seq(|d, len| {
let start = edge_list_data.len().try_into().unwrap();
for _ in 0..len {
let edge = d.read_seq_elt(Decodable::decode);
edge_list_data.push(edge);
}
let end = edge_list_data.len().try_into().unwrap();
let _i: SerializedDepNodeIndex = edge_list_indices.push((start, end));
debug_assert_eq!(_i.index(), _index);
})
})
});
let dep_node: DepNode<K> = Decodable::decode(d);
let _i: SerializedDepNodeIndex = nodes.push(dep_node);
debug_assert_eq!(_i.index(), _index);

let fingerprint: Fingerprint = Decodable::decode(d);
let _i: SerializedDepNodeIndex = fingerprints.push(fingerprint);
debug_assert_eq!(_i.index(), _index);

// Deserialize edges -- sequence of DepNodeIndex
let len = d.read_usize();
let start = edge_list_data.len().try_into().unwrap();
for _ in 0..len {
let edge = Decodable::decode(d);
edge_list_data.push(edge);
}
let end = edge_list_data.len().try_into().unwrap();
let _i: SerializedDepNodeIndex = edge_list_indices.push((start, end));
debug_assert_eq!(_i.index(), _index);
}

let index: FxHashMap<_, _> =
Expand Down
107 changes: 52 additions & 55 deletions compiler/rustc_serialize/src/collection_impls.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@ impl<S: Encoder, A: Array<Item: Encodable<S>>> Encodable<S> for SmallVec<A> {

impl<D: Decoder, A: Array<Item: Decodable<D>>> Decodable<D> for SmallVec<A> {
fn decode(d: &mut D) -> SmallVec<A> {
d.read_seq(|d, len| (0..len).map(|_| d.read_seq_elt(|d| Decodable::decode(d))).collect())
let len = d.read_usize();
(0..len).map(|_| Decodable::decode(d)).collect()
}
}

Expand All @@ -35,7 +36,8 @@ impl<S: Encoder, T: Encodable<S>> Encodable<S> for LinkedList<T> {

impl<D: Decoder, T: Decodable<D>> Decodable<D> for LinkedList<T> {
fn decode(d: &mut D) -> LinkedList<T> {
d.read_seq(|d, len| (0..len).map(|_| d.read_seq_elt(|d| Decodable::decode(d))).collect())
let len = d.read_usize();
(0..len).map(|_| Decodable::decode(d)).collect()
}
}

Expand All @@ -52,7 +54,8 @@ impl<S: Encoder, T: Encodable<S>> Encodable<S> for VecDeque<T> {

impl<D: Decoder, T: Decodable<D>> Decodable<D> for VecDeque<T> {
fn decode(d: &mut D) -> VecDeque<T> {
d.read_seq(|d, len| (0..len).map(|_| d.read_seq_elt(|d| Decodable::decode(d))).collect())
let len = d.read_usize();
(0..len).map(|_| Decodable::decode(d)).collect()
}
}

Expand All @@ -78,15 +81,14 @@ where
V: Decodable<D>,
{
fn decode(d: &mut D) -> BTreeMap<K, V> {
d.read_map(|d, len| {
let mut map = BTreeMap::new();
for _ in 0..len {
let key = d.read_map_elt_key(|d| Decodable::decode(d));
let val = d.read_map_elt_val(|d| Decodable::decode(d));
map.insert(key, val);
}
map
})
let len = d.read_usize();
let mut map = BTreeMap::new();
for _ in 0..len {
let key = Decodable::decode(d);
let val = Decodable::decode(d);
map.insert(key, val);
}
map
}
}

Expand All @@ -109,13 +111,12 @@ where
T: Decodable<D> + PartialEq + Ord,
{
fn decode(d: &mut D) -> BTreeSet<T> {
d.read_seq(|d, len| {
let mut set = BTreeSet::new();
for _ in 0..len {
set.insert(d.read_seq_elt(|d| Decodable::decode(d)));
}
set
})
let len = d.read_usize();
let mut set = BTreeSet::new();
for _ in 0..len {
set.insert(Decodable::decode(d));
}
set
}
}

Expand Down Expand Up @@ -143,16 +144,15 @@ where
S: BuildHasher + Default,
{
fn decode(d: &mut D) -> HashMap<K, V, S> {
d.read_map(|d, len| {
let state = Default::default();
let mut map = HashMap::with_capacity_and_hasher(len, state);
for _ in 0..len {
let key = d.read_map_elt_key(|d| Decodable::decode(d));
let val = d.read_map_elt_val(|d| Decodable::decode(d));
map.insert(key, val);
}
map
})
let len = d.read_usize();
let state = Default::default();
let mut map = HashMap::with_capacity_and_hasher(len, state);
for _ in 0..len {
let key = Decodable::decode(d);
let val = Decodable::decode(d);
map.insert(key, val);
}
map
}
}

Expand Down Expand Up @@ -187,14 +187,13 @@ where
S: BuildHasher + Default,
{
fn decode(d: &mut D) -> HashSet<T, S> {
d.read_seq(|d, len| {
let state = Default::default();
let mut set = HashSet::with_capacity_and_hasher(len, state);
for _ in 0..len {
set.insert(d.read_seq_elt(|d| Decodable::decode(d)));
}
set
})
let len = d.read_usize();
let state = Default::default();
let mut set = HashSet::with_capacity_and_hasher(len, state);
for _ in 0..len {
set.insert(Decodable::decode(d));
}
set
}
}

Expand Down Expand Up @@ -222,16 +221,15 @@ where
S: BuildHasher + Default,
{
fn decode(d: &mut D) -> indexmap::IndexMap<K, V, S> {
d.read_map(|d, len| {
let state = Default::default();
let mut map = indexmap::IndexMap::with_capacity_and_hasher(len, state);
for _ in 0..len {
let key = d.read_map_elt_key(|d| Decodable::decode(d));
let val = d.read_map_elt_val(|d| Decodable::decode(d));
map.insert(key, val);
}
map
})
let len = d.read_usize();
let state = Default::default();
let mut map = indexmap::IndexMap::with_capacity_and_hasher(len, state);
for _ in 0..len {
let key = Decodable::decode(d);
let val = Decodable::decode(d);
map.insert(key, val);
}
map
}
}

Expand All @@ -256,14 +254,13 @@ where
S: BuildHasher + Default,
{
fn decode(d: &mut D) -> indexmap::IndexSet<T, S> {
d.read_seq(|d, len| {
let state = Default::default();
let mut set = indexmap::IndexSet::with_capacity_and_hasher(len, state);
for _ in 0..len {
set.insert(d.read_seq_elt(|d| Decodable::decode(d)));
}
set
})
let len = d.read_usize();
let state = Default::default();
let mut set = indexmap::IndexSet::with_capacity_and_hasher(len, state);
for _ in 0..len {
set.insert(Decodable::decode(d));
}
set
}
}

Expand Down
Loading

0 comments on commit 58a721a

Please sign in to comment.