From b655ec62ab1ded0f264abf3076b5593caeb84ae1 Mon Sep 17 00:00:00 2001 From: Cody P Schafer Date: Mon, 8 Nov 2021 10:18:41 -0500 Subject: [PATCH] no_std and start adding lookup override features --- Cargo.lock | 27 ++------------- Cargo.toml | 14 ++++---- src/elf.rs | 81 +++++++++++++++++-------------------------- src/lib.rs | 80 ++++++++++++++++++++++++++++++++++++------ src/section-inject.rs | 34 ++++++++++++++++++ src/windows.rs | 55 +++++++++++++---------------- 6 files changed, 170 insertions(+), 121 deletions(-) create mode 100644 src/section-inject.rs diff --git a/Cargo.lock b/Cargo.lock index d73f1dd..c4d8ee4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -27,9 +27,8 @@ name = "buildid" version = "0.1.0" dependencies = [ "env_logger", - "lazy_static", "libc", - "thiserror", + "log", "tracing", "winapi", ] @@ -76,9 +75,9 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" [[package]] name = "libc" -version = "0.2.106" +version = "0.2.107" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a60553f9a9e039a333b4e9b20573b9e9b9c0bb3a11e201ccc48ef4283456d673" +checksum = "fbe5e23404da5b4f555ef85ebed98fb4083e55a00c317800bc2a50ede9f3d219" [[package]] name = "log" @@ -156,26 +155,6 @@ dependencies = [ "winapi-util", ] -[[package]] -name = "thiserror" -version = "1.0.30" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "854babe52e4df1653706b98fcfc05843010039b406875930a70e4d9644e5c417" -dependencies = [ - "thiserror-impl", -] - -[[package]] -name = "thiserror-impl" -version = "1.0.30" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aa32fd3f627f367fe16f893e2597ae3c05020f8bba2666a4e6ea73d377e5714b" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - [[package]] name = "tracing" version = "0.1.29" diff --git a/Cargo.toml b/Cargo.toml index f7e5836..935dabb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,18 +6,20 @@ keywords = ["no_std"] license = "GPL-2.0-or-later OR CDDL-1.0" description = "Examine the currently running executable to get its build-id or LC_UUID" +[features] +build-id-symbol-start-end = [] +build-id-section-inject = [] +build-id-custom-inject = [] + [dependencies] -thiserror = "1" -tracing = "0.1" +log = { version = "0.4", no-default-features = true } [target.'cfg(all(unix, not(target_vendor = "apple")))'.dependencies] -# extra_traits: give me Debug or give me death -libc = { version = "0.2", features = ["extra_traits"] } +libc = { version = "0.2" } [target.'cfg(windows)'.dependencies] winapi = { version = "0.3", features = ["libloaderapi", "winnt", "dbghelp"] } -lazy_static = "1" [dev-dependencies] env_logger = "*" -tracing = { version = "*", features = ["log"] } +tracing = { version = "0.1", features = ["log"] } diff --git a/src/elf.rs b/src/elf.rs index d045d4b..b7c0d06 100644 --- a/src/elf.rs +++ b/src/elf.rs @@ -1,39 +1,8 @@ use crate::align::align_up; -use std::convert::TryInto; -use std::mem; -use std::mem::MaybeUninit; -use thiserror::Error; -use tracing::{event, Level}; - -// NOTE: unix doesn't necessarily promise we'll have this section. We can use some functions to -// dynamically look it up instead if we have issues. -// -// NOTE: this build id does not include any dynamically linked libraries. We can get those -// build ids seperately by performing some dynamic lookups. -// -// NOTE: this works by adding a zero sized symbol to the end of the build-id section, but it's -// not entirely clear why we're always at the end of the build-id section (instead of at the -// beginning). We don't have any way to measure the size of the section, so we just have to -// assume the hash size based on what is currently in common use (which is a 20 byte/160 bit -// hash as I'm writing this). -// -// NOTE: current gcc enables build-id by default, but current clang does not. To use clang, -// ensure one does `RUSTFLAGS='-C linker=clang -Clink-arg=-Wl,--build-id'` or similar. -// -// NOTE: If using a toolchain without build-id enabled, junk is returned (likely the content of -// other note sections). We could do a small bit of validation by checking the note header. -/* -// This method only works if a build-id of exactly the right size is linked in. Otherwise, the -// link fails or invalid data is accessed -#[link_section = ".note.gnu.build-id"] -static NOTE_GNU_BUILD_ID_END: [u8; 0] = []; - -const BUILD_ID_LEN: usize = 20; - -pub fn build_id() -> Option<&'static [u8]> { - Some(unsafe { core::slice::from_raw_parts(NOTE_GNU_BUILD_ID_END.as_ptr().sub(BUILD_ID_LEN), BUILD_ID_LEN) }) -} -*/ +use core::mem; +use core::mem::MaybeUninit; +use core::{convert::TryInto, fmt}; +use log::error; // FIXME: dl_phdr_info references are actually unsafe here because of how glibc defines // dl_phdr_info (to have some fields present depending on the size provided (() * 3; -#[derive(Debug, Error)] +#[derive(Debug)] enum NoteError { - #[error("have {size} bytes, but need at least {MIN_NOTE_SIZE}")] MissingHeader { size: usize }, - - #[error("have {have} bytes, but need at least {need}")] Truncated { have: usize, need: usize }, } +impl fmt::Display for NoteError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::MissingHeader { size } => write!( + f, + "have {} bytes, but need at least {}", + size, MIN_NOTE_SIZE + ), + Self::Truncated { have, need } => { + write!(f, "have {} bytes, but need at least {}", have, need) + } + } + } +} + impl Note { // NOTE: the _standards_ say to use 8 byte alignment in 64-bit land. But llvm and others note // that everyone actually uses 4 byte alignment. Perfect. Hopefully this always works. @@ -83,7 +64,7 @@ impl Note { } fn from_bytes(data: &[u8]) -> Result<(&Self, &[u8]), NoteError> { - let u = std::mem::size_of::(); + let u = core::mem::size_of::(); if data.len() < u * 3 { return Err(NoteError::MissingHeader { size: data.len() }); } @@ -92,33 +73,33 @@ impl Note { } fn name_len(&self) -> usize { - u32::from_ne_bytes(self.data[..std::mem::size_of::()].try_into().unwrap()) as usize + u32::from_ne_bytes(self.data[..core::mem::size_of::()].try_into().unwrap()) as usize } fn desc_len(&self) -> usize { - let u = std::mem::size_of::(); + let u = core::mem::size_of::(); u32::from_ne_bytes(self.data[u..(u + u)].try_into().unwrap()) as usize } fn type_(&self) -> u32 { - let u = std::mem::size_of::(); + let u = core::mem::size_of::(); u32::from_ne_bytes(self.data[(u + u)..(u + u + u)].try_into().unwrap()) } fn name(&self) -> &[u8] { - let u = std::mem::size_of::(); + let u = core::mem::size_of::(); let b = u * 3; &self.data[b..(b + self.name_len())] } fn desc(&self) -> &[u8] { - let u = std::mem::size_of::(); + let u = core::mem::size_of::(); let b = u * 3 + align_up(self.name_len(), Self::ALIGN); &self.data[b..(b + self.desc_len())] } fn split_trailing(&self) -> Result<(&Self, &[u8]), NoteError> { - let u = std::mem::size_of::(); + let u = core::mem::size_of::(); let end = u * 3 + align_up(self.name_len(), Self::ALIGN) + align_up(self.desc_len(), Self::ALIGN); if end > self.data.len() { @@ -139,8 +120,8 @@ unsafe extern "C" fn phdr_cb( size: libc::size_t, data: *mut libc::c_void, ) -> libc::c_int { - let closure: &mut &mut dyn FnMut(&libc::dl_phdr_info, usize) -> libc::c_int = - &mut *(data as *mut &mut dyn for<'r> std::ops::FnMut(&'r libc::dl_phdr_info, usize) -> i32); + let closure: &mut &mut dyn FnMut(&libc::dl_phdr_info, usize) -> libc::c_int = &mut *(data + as *mut &mut dyn for<'r> core::ops::FnMut(&'r libc::dl_phdr_info, usize) -> i32); let info = &*info; closure(info, size) @@ -200,7 +181,7 @@ impl<'a> NoteIter<'a> { let segment = unsafe { // FIXME: consider p_memsz vs p_filesz question here. // llvm appears to use filesz - std::slice::from_raw_parts(segment_base, phdr.p_filesz as usize) + core::slice::from_raw_parts(segment_base, phdr.p_filesz as usize) }; Some(NoteIter { segment }) } @@ -272,7 +253,7 @@ pub fn build_id() -> Option<&'static [u8]> { for note in ni { let note = match note { Err(e) => { - event!(Level::ERROR, "note program segment had invalid note {}", e); + error!("note program segment had invalid note {}", e); continue 'phdr; } Ok(v) => v, diff --git a/src/lib.rs b/src/lib.rs index b66cae2..b6d01e8 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -15,11 +15,38 @@ //! build-id is also used by `mesa` as a key component in their caching of shaders (changes in the //! build-id cause the cache to be discarded). //! +//! By default, the `buildid` crate will pick the best build-id lookup function it can for your +//! platform. If one is not avaliable, it may fail to compile. If you have a custom build-id lookup +//! mechanism you want to tell `buildid` about, enabling one of the features may help. +//! +//! # Features +//! +//! For all of the build-id lookup customization features, we recommend only setting them in +//! top-level crates than have a complete understanding of the final link step for the executable. +//! +//! - `build-id-section-inject`: if enabled, inject our own symbol into the section where build id +//! is expected to be located, and use the build-time environment variable `BUILD_ID_SIZE` to +//! determine how many bytes to read. This method will only function on some platforms +//! (basically: GNU ones). Note that `BUILD_ID_SIZE` must be set correctly, and differs for GNU +//! ld (bfd) and LLVM lld. This method takes presdence over the default lookup methods if +//! enabled. +//! - `build-id-symbol-start-end`: if enabled, assume the presense of 2 symbols named "__build_id_start" and +//! "__build_id_end", and use these to find the build-id. Normally, one can provide the symbols +//! by using a custom ldscript (linker script). This method takes precedence over automatically +//! enable build-id lookup methods, and over `build-id-section-inject`. +//! - `build-id-custom-inject`: if enabled, assume that a function `int build_id__get(unsigned +//! char **build_id, size_t *len)` is provided (with C linkage) that can locate and return the +//! build-id. The `build_id__get` must return `1` if a build-id is located (and modify the +//! `build_id` and `len` arguments to point to the memory containing the build-id and to contain +//! the number of bytes in the build-id respectively), return `0` if no build-id exists, and +//! return a negative error code if an unexpected error occured. This method takes precedence +//! over all other build-id lookup methods (if enabled). +//! //! # Platform Details //! -//! - On unix variants other than those with apple as the vendor, the `.note.gnu.build-id` is used -//! and returned as a slice. Note that GNU LD and LLD generate different sized build-ids using -//! different hash functions. +//! - On unix variants other than those with apple as the vendor, the `.note.gnu.build-id` is +//! used. Note that GNU LD and LLD generate different sized build-ids using different hash +//! functions. //! - On Apple unix variants (MacOS), the `LC_UUID` (loader command uuid) is returned directly as //! a slice. //! - On windows, the module is parsed for a CodeView descriptor containing a GUID (which is @@ -33,23 +60,57 @@ //! RUSTFLAGS="-Clink-args=-Wl,--build-id" in the environment before running cargo. The same //! argument works for any system using GNU LD or compatible. //! +//! - On most linux platforms, build-id is enabled by default by gcc. Sometimes clang on the same +//! platform does not have build-id enabled though. Set `RUSTFLAGS="-Clink-args=-Wl,--build-id"` +//! to ensure build id is enabled for clang or gcc +//! +//! - MacOS appears to enable build-id (LC_UUID) by default, with no change needed. +//! - Windows MSVC appears to enable build-id (CodeView GUID) by default, with no change needed. +#![no_std] + +#[cfg(feature = "build-id-section-inject")] +#[path = "section-inject.rs"] +mod target; -#[cfg(all(target_family = "unix", not(target_vendor = "apple")))] +#[cfg(all( + target_family = "unix", + not(target_vendor = "apple"), + not(feature = "build-id-section-inject"), + not(feature = "build-id-symbol-start-end") +))] #[path = "elf.rs"] mod target; -#[cfg(all(target_family = "unix", not(target_vendor = "apple")))] +#[cfg(all( + target_family = "unix", + not(target_vendor = "apple"), + not(feature = "build-id-section-inject"), + not(feature = "build-id-symbol-start-end") +))] mod align; -#[cfg(all(target_family = "unix", target_vendor = "apple"))] +#[cfg(all( + target_family = "unix", + target_vendor = "apple", + not(feature = "build-id-section-inject"), + not(feature = "build-id-symbol-start-end") +))] #[path = "mach.rs"] mod target; -#[cfg(target_family = "windows")] +#[cfg(all( + target_family = "windows", + not(feature = "build-id-section-inject"), + not(feature = "build-id-symbol-start-end") +))] #[path = "windows.rs"] mod target; -#[cfg(target_family = "wasm")] +#[cfg(all( + target_family = "wasm", + not(feature = "build-id-section-inject"), + not(feature = "build-id-symbol-start-end") +))] mod target { pub fn build_id() -> Option<&'static [u8]> { // not sure how to implement this right now. need to introspect the wasm object in some way @@ -57,9 +118,6 @@ mod target { } } -// TODO: provide a feature that allows using known symbols -// TODO: provide a feature that allows injecting a symbol into a particular section - /// If present, return the build-id or platform equivalent pub fn build_id() -> Option<&'static [u8]> { target::build_id() diff --git a/src/section-inject.rs b/src/section-inject.rs new file mode 100644 index 0000000..329d904 --- /dev/null +++ b/src/section-inject.rs @@ -0,0 +1,34 @@ +// NOTE: unix doesn't necessarily promise we'll have this section. We can use some functions to +// dynamically look it up instead if we have issues. +// +// NOTE: this build id does not include any dynamically linked libraries. We can get those +// build ids seperately by performing some dynamic lookups. +// +// NOTE: this works by adding a zero sized symbol to the end of the build-id section, but it's +// not entirely clear why we're always at the end of the build-id section (instead of at the +// beginning). We don't have any way to measure the size of the section, so we just have to +// assume the hash size based on what is currently in common use (which is a 20 byte/160 bit +// hash as I'm writing this). +// +// NOTE: current gcc enables build-id by default, but current clang does not. To use clang, +// ensure one does `RUSTFLAGS='-C linker=clang -Clink-arg=-Wl,--build-id'` or similar. +// +// NOTE: If using a toolchain without build-id enabled, junk is returned (likely the content of +// other note sections). We could do a small bit of validation by checking the note header. +// +// This method only works if a build-id of exactly the right size is linked in. Otherwise, the +// link fails or invalid data is accessed +#[link_section = ".note.gnu.build-id"] +static NOTE_GNU_BUILD_ID_END: [u8; 0] = []; + +// 20 for GNU +const BUILD_ID_LEN: usize = env!("BUILD_ID_LEN").parse().unwrap(); + +pub fn build_id() -> Option<&'static [u8]> { + Some(unsafe { + core::slice::from_raw_parts( + NOTE_GNU_BUILD_ID_END.as_ptr().sub(BUILD_ID_LEN), + BUILD_ID_LEN, + ) + }) +} diff --git a/src/windows.rs b/src/windows.rs index c188501..c5fd23d 100644 --- a/src/windows.rs +++ b/src/windows.rs @@ -1,5 +1,4 @@ -use lazy_static::lazy_static; -use tracing::{event, Level}; +use log::error; use winapi::um::dbghelp::ImageNtHeader; use winapi::um::libloaderapi::GetModuleHandleA; use winapi::um::winnt::IMAGE_DEBUG_DIRECTORY; @@ -14,38 +13,34 @@ struct CV_INFO_PDB70 { // followed by pdb name } -lazy_static! { - // This primarily exists as a hack to allow us to return a `&'static [u8]` - static ref BUILD_ID_CACHE: Option<&'static [u8]> = { - let module = unsafe { GetModuleHandleA(core::ptr::null_mut()) }; - event!(Level::TRACE, "module {:#x}", module as usize); +pub fn build_id() -> Option<&'static [u8]> { + let module = unsafe { GetModuleHandleA(core::ptr::null_mut()) }; - let image_nt_header = unsafe { &*ImageNtHeader(module as _) }; + let image_nt_header = unsafe { &*ImageNtHeader(module as _) }; - let opt_header = &image_nt_header.OptionalHeader; - let dir = &opt_header.DataDirectory[IMAGE_DIRECTORY_ENTRY_DEBUG as usize]; - if dir.Size == 0 { - event!(Level::ERROR, "IMAGE_DIRECTORY_ENTRY_DEBUG is empty"); - return None; - } + let opt_header = &image_nt_header.OptionalHeader; + let dir = &opt_header.DataDirectory[IMAGE_DIRECTORY_ENTRY_DEBUG as usize]; + if dir.Size == 0 { + error!("IMAGE_DIRECTORY_ENTRY_DEBUG is empty"); + return None; + } - let dbg_dir = unsafe { &*((module as usize + dir.VirtualAddress as usize) as *const IMAGE_DEBUG_DIRECTORY) }; + let dbg_dir = unsafe { + &*((module as usize + dir.VirtualAddress as usize) as *const IMAGE_DEBUG_DIRECTORY) + }; - if dbg_dir.Type == IMAGE_DEBUG_TYPE_CODEVIEW { - let pdb_info = unsafe { &*((module as usize + dbg_dir.AddressOfRawData as usize) as *const CV_INFO_PDB70) }; - if pdb_info.cv_signature != 0x53445352 { - event!(Level::ERROR, "mismatch sig: {:#x}", pdb_info.cv_signature); - None - } else { - Some(&pdb_info.signature[..]) - } - } else { - event!(Level::ERROR, "wrong image type {:#x}", dbg_dir.Type); + if dbg_dir.Type == IMAGE_DEBUG_TYPE_CODEVIEW { + let pdb_info = unsafe { + &*((module as usize + dbg_dir.AddressOfRawData as usize) as *const CV_INFO_PDB70) + }; + if pdb_info.cv_signature != 0x53445352 { + error!("mismatch sig: {:#x}", pdb_info.cv_signature); None + } else { + Some(&pdb_info.signature[..]) } - }; -} - -pub fn build_id() -> Option<&'static [u8]> { - *BUILD_ID_CACHE + } else { + error!("wrong image type {:#x}", dbg_dir.Type); + None + } }