Skip to content

Commit

Permalink
Auto merge of rust-lang#118344 - saethlin:rmeta-header-pos, r=WaffleL…
Browse files Browse the repository at this point in the history
…apkin

 Use a u64 for the rmeta root position

Waffle noticed this in rust-lang#117301 (comment)

We've upgraded the other file offsets to u64, and this one only costs 4 bytes per file. Also the way the truncation was being done before was extremely easy to miss, I sure missed it! It's not clear to me if not having this change effectively made the other upgrades from u32 to u64 ineffective, but we can have it now.

r? `@WaffleLapkin`
  • Loading branch information
bors committed Dec 11, 2023
2 parents 8a37655 + 79bdd24 commit 5701093
Show file tree
Hide file tree
Showing 6 changed files with 46 additions and 44 deletions.
18 changes: 9 additions & 9 deletions compiler/rustc_codegen_ssa/src/back/metadata.rs
Original file line number Diff line number Diff line change
Expand Up @@ -158,12 +158,12 @@ pub(super) fn get_metadata_xcoff<'a>(path: &Path, data: &'a [u8]) -> Result<&'a
file.symbols().find(|sym| sym.name() == Ok(AIX_METADATA_SYMBOL_NAME))
{
let offset = metadata_symbol.address() as usize;
if offset < 4 {
if offset < 8 {
return Err(format!("Invalid metadata symbol offset: {offset}"));
}
// The offset specifies the location of rustc metadata in the comment section.
// The metadata is preceded by a 4-byte length field.
let len = u32::from_be_bytes(info_data[(offset - 4)..offset].try_into().unwrap()) as usize;
// The metadata is preceded by a 8-byte length field.
let len = u64::from_le_bytes(info_data[(offset - 8)..offset].try_into().unwrap()) as usize;
if offset + len > (info_data.len() as usize) {
return Err(format!(
"Metadata at offset {offset} with size {len} is beyond .info section"
Expand Down Expand Up @@ -479,8 +479,8 @@ pub fn create_wrapper_file(
file.section_mut(section).flags =
SectionFlags::Xcoff { s_flags: xcoff::STYP_INFO as u32 };

let len = data.len() as u32;
let offset = file.append_section_data(section, &len.to_be_bytes(), 1);
let len = data.len() as u64;
let offset = file.append_section_data(section, &len.to_le_bytes(), 1);
// Add a symbol referring to the data in .info section.
file.add_symbol(Symbol {
name: AIX_METADATA_SYMBOL_NAME.into(),
Expand Down Expand Up @@ -524,7 +524,7 @@ pub fn create_compressed_metadata_file(
symbol_name: &str,
) -> Vec<u8> {
let mut packed_metadata = rustc_metadata::METADATA_HEADER.to_vec();
packed_metadata.write_all(&(metadata.raw_data().len() as u32).to_be_bytes()).unwrap();
packed_metadata.write_all(&(metadata.raw_data().len() as u64).to_le_bytes()).unwrap();
packed_metadata.extend(metadata.raw_data());

let Some(mut file) = create_object_file(sess) else {
Expand Down Expand Up @@ -599,12 +599,12 @@ pub fn create_compressed_metadata_file_for_xcoff(
section: SymbolSection::Section(data_section),
flags: SymbolFlags::None,
});
let len = data.len() as u32;
let offset = file.append_section_data(section, &len.to_be_bytes(), 1);
let len = data.len() as u64;
let offset = file.append_section_data(section, &len.to_le_bytes(), 1);
// Add a symbol referring to the rustc metadata.
file.add_symbol(Symbol {
name: AIX_METADATA_SYMBOL_NAME.into(),
value: offset + 4, // The metadata is preceded by a 4-byte length field.
value: offset + 8, // The metadata is preceded by a 8-byte length field.
size: 0,
kind: SymbolKind::Unknown,
scope: SymbolScope::Dynamic,
Expand Down
8 changes: 4 additions & 4 deletions compiler/rustc_metadata/src/locator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -783,8 +783,8 @@ fn get_metadata_section<'p>(
loader.get_dylib_metadata(target, filename).map_err(MetadataError::LoadFailure)?;
// The header is uncompressed
let header_len = METADATA_HEADER.len();
// header + u32 length of data
let data_start = header_len + 4;
// header + u64 length of data
let data_start = header_len + 8;

debug!("checking {} bytes of metadata-version stamp", header_len);
let header = &buf[..cmp::min(header_len, buf.len())];
Expand All @@ -797,13 +797,13 @@ fn get_metadata_section<'p>(

// Length of the compressed stream - this allows linkers to pad the section if they want
let Ok(len_bytes) =
<[u8; 4]>::try_from(&buf[header_len..cmp::min(data_start, buf.len())])
<[u8; 8]>::try_from(&buf[header_len..cmp::min(data_start, buf.len())])
else {
return Err(MetadataError::LoadFailure(
"invalid metadata length found".to_string(),
));
};
let compressed_len = u32::from_be_bytes(len_bytes) as usize;
let compressed_len = u64::from_le_bytes(len_bytes) as usize;

// Header is okay -> inflate the actual metadata
let compressed_bytes = buf.slice(|buf| &buf[data_start..(data_start + compressed_len)]);
Expand Down
25 changes: 11 additions & 14 deletions compiler/rustc_metadata/src/rmeta/decoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -690,28 +690,25 @@ impl MetadataBlob {
}

pub(crate) fn get_rustc_version(&self) -> String {
LazyValue::<String>::from_position(NonZeroUsize::new(METADATA_HEADER.len() + 4).unwrap())
LazyValue::<String>::from_position(NonZeroUsize::new(METADATA_HEADER.len() + 8).unwrap())
.decode(self)
}

pub(crate) fn get_header(&self) -> CrateHeader {
let slice = &self.blob()[..];
fn root_pos(&self) -> NonZeroUsize {
let offset = METADATA_HEADER.len();
let pos_bytes = self.blob()[offset..][..8].try_into().unwrap();
let pos = u64::from_le_bytes(pos_bytes);
NonZeroUsize::new(pos as usize).unwrap()
}

let pos_bytes = slice[offset..][..4].try_into().unwrap();
let pos = u32::from_be_bytes(pos_bytes) as usize;

LazyValue::<CrateHeader>::from_position(NonZeroUsize::new(pos).unwrap()).decode(self)
pub(crate) fn get_header(&self) -> CrateHeader {
let pos = self.root_pos();
LazyValue::<CrateHeader>::from_position(pos).decode(self)
}

pub(crate) fn get_root(&self) -> CrateRoot {
let slice = &self.blob()[..];
let offset = METADATA_HEADER.len();

let pos_bytes = slice[offset..][..4].try_into().unwrap();
let pos = u32::from_be_bytes(pos_bytes) as usize;

LazyValue::<CrateRoot>::from_position(NonZeroUsize::new(pos).unwrap()).decode(self)
let pos = self.root_pos();
LazyValue::<CrateRoot>::from_position(pos).decode(self)
}

pub(crate) fn list_crate_metadata(
Expand Down
4 changes: 2 additions & 2 deletions compiler/rustc_metadata/src/rmeta/encoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2190,7 +2190,7 @@ pub fn encode_metadata(tcx: TyCtxt<'_>, path: &Path) {
encoder.emit_raw_bytes(METADATA_HEADER);

// Will be filled with the root position after encoding everything.
encoder.emit_raw_bytes(&[0, 0, 0, 0]);
encoder.emit_raw_bytes(&0u64.to_le_bytes());

let source_map_files = tcx.sess.source_map().files();
let source_file_cache = (source_map_files[0].clone(), 0);
Expand Down Expand Up @@ -2246,7 +2246,7 @@ fn encode_root_position(mut file: &File, pos: usize) -> Result<(), std::io::Erro
// Encode the root position.
let header = METADATA_HEADER.len();
file.seek(std::io::SeekFrom::Start(header as u64))?;
file.write_all(&[(pos >> 24) as u8, (pos >> 16) as u8, (pos >> 8) as u8, (pos >> 0) as u8])?;
file.write_all(&pos.to_le_bytes())?;

// Return to the position where we are before writing the root position.
file.seek(std::io::SeekFrom::Start(pos_before_seek))?;
Expand Down
4 changes: 2 additions & 2 deletions compiler/rustc_metadata/src/rmeta/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -57,12 +57,12 @@ pub(crate) fn rustc_version(cfg_version: &'static str) -> String {
/// Metadata encoding version.
/// N.B., increment this if you change the format of metadata such that
/// the rustc version can't be found to compare with `rustc_version()`.
const METADATA_VERSION: u8 = 8;
const METADATA_VERSION: u8 = 9;

/// Metadata header which includes `METADATA_VERSION`.
///
/// This header is followed by the length of the compressed data, then
/// the position of the `CrateRoot`, which is encoded as a 32-bit big-endian
/// the position of the `CrateRoot`, which is encoded as a 64-bit little-endian
/// unsigned integer, and further followed by the rustc version string.
pub const METADATA_HEADER: &[u8] = &[b'r', b'u', b's', b't', 0, 0, 0, METADATA_VERSION];

Expand Down
31 changes: 18 additions & 13 deletions src/tools/rust-analyzer/crates/proc-macro-api/src/version.rs
Original file line number Diff line number Diff line change
Expand Up @@ -85,8 +85,8 @@ fn read_section<'a>(dylib_binary: &'a [u8], section_name: &str) -> io::Result<&'
}

/// Check the version of rustc that was used to compile a proc macro crate's
///
/// binary file.
///
/// A proc macro crate binary's ".rustc" section has following byte layout:
/// * [b'r',b'u',b's',b't',0,0,0,5] is the first 8 bytes
/// * ff060000 734e6150 is followed, it's the snappy format magic bytes,
Expand All @@ -96,8 +96,8 @@ fn read_section<'a>(dylib_binary: &'a [u8], section_name: &str) -> io::Result<&'
/// The bytes you get after decompressing the snappy format portion has
/// following layout:
/// * [b'r',b'u',b's',b't',0,0,0,5] is the first 8 bytes(again)
/// * [crate root bytes] next 4 bytes is to store crate root position,
/// according to rustc's source code comment
/// * [crate root bytes] next 8 bytes (4 in old versions) is to store
/// crate root position, according to rustc's source code comment
/// * [length byte] next 1 byte tells us how many bytes we should read next
/// for the version string's utf8 bytes
/// * [version string bytes encoded in utf8] <- GET THIS BOI
Expand All @@ -119,14 +119,19 @@ pub fn read_version(dylib_path: &AbsPath) -> io::Result<String> {
}
let version = u32::from_be_bytes([dot_rustc[4], dot_rustc[5], dot_rustc[6], dot_rustc[7]]);
// Last supported version is:
// https://github.com/rust-lang/rust/commit/0696e79f2740ad89309269b460579e548a5cd632
let snappy_portion = match version {
5 | 6 => &dot_rustc[8..],
// https://github.com/rust-lang/rust/commit/b94cfefc860715fb2adf72a6955423d384c69318
let (snappy_portion, bytes_before_version) = match version {
5 | 6 => (&dot_rustc[8..], 13),
7 | 8 => {
let len_bytes = &dot_rustc[8..12];
let data_len = u32::from_be_bytes(len_bytes.try_into().unwrap()) as usize;
&dot_rustc[12..data_len + 12]
(&dot_rustc[12..data_len + 12], 13)
}
9 => {
let len_bytes = &dot_rustc[8..16];
let data_len = u64::from_le_bytes(len_bytes.try_into().unwrap()) as usize;
(&dot_rustc[16..data_len + 12], 17)
}
_ => {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
Expand All @@ -142,15 +147,15 @@ pub fn read_version(dylib_path: &AbsPath) -> io::Result<String> {
Box::new(SnapDecoder::new(snappy_portion))
};

// the bytes before version string bytes, so this basically is:
// We're going to skip over the bytes before the version string, so basically:
// 8 bytes for [b'r',b'u',b's',b't',0,0,0,5]
// 4 bytes for [crate root bytes]
// 4 or 8 bytes for [crate root bytes]
// 1 byte for length of version string
// so 13 bytes in total, and we should check the 13th byte
// so 13 or 17 bytes in total, and we should check the last of those bytes
// to know the length
let mut bytes_before_version = [0u8; 13];
uncompressed.read_exact(&mut bytes_before_version)?;
let length = bytes_before_version[12];
let mut bytes = [0u8; 17];
uncompressed.read_exact(&mut bytes[..bytes_before_version])?;
let length = bytes[bytes_before_version - 1];

let mut version_string_utf8 = vec![0u8; length as usize];
uncompressed.read_exact(&mut version_string_utf8)?;
Expand Down

0 comments on commit 5701093

Please sign in to comment.