Skip to content

Commit

Permalink
no_std and start adding lookup override features
Browse files Browse the repository at this point in the history
  • Loading branch information
codyps committed Nov 8, 2021
1 parent 874044c commit b655ec6
Show file tree
Hide file tree
Showing 6 changed files with 170 additions and 121 deletions.
27 changes: 3 additions & 24 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

14 changes: 8 additions & 6 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,18 +6,20 @@ keywords = ["no_std"]
license = "GPL-2.0-or-later OR CDDL-1.0"
description = "Examine the currently running executable to get its build-id or LC_UUID"

[features]
build-id-symbol-start-end = []
build-id-section-inject = []
build-id-custom-inject = []

[dependencies]
thiserror = "1"
tracing = "0.1"
log = { version = "0.4", no-default-features = true }

[target.'cfg(all(unix, not(target_vendor = "apple")))'.dependencies]
# extra_traits: give me Debug or give me death
libc = { version = "0.2", features = ["extra_traits"] }
libc = { version = "0.2" }

[target.'cfg(windows)'.dependencies]
winapi = { version = "0.3", features = ["libloaderapi", "winnt", "dbghelp"] }
lazy_static = "1"

[dev-dependencies]
env_logger = "*"
tracing = { version = "*", features = ["log"] }
tracing = { version = "0.1", features = ["log"] }
81 changes: 31 additions & 50 deletions src/elf.rs
Original file line number Diff line number Diff line change
@@ -1,39 +1,8 @@
use crate::align::align_up;
use std::convert::TryInto;
use std::mem;
use std::mem::MaybeUninit;
use thiserror::Error;
use tracing::{event, Level};

// NOTE: unix doesn't necessarily promise we'll have this section. We can use some functions to
// dynamically look it up instead if we have issues.
//
// NOTE: this build id does not include any dynamically linked libraries. We can get those
// build ids seperately by performing some dynamic lookups.
//
// NOTE: this works by adding a zero sized symbol to the end of the build-id section, but it's
// not entirely clear why we're always at the end of the build-id section (instead of at the
// beginning). We don't have any way to measure the size of the section, so we just have to
// assume the hash size based on what is currently in common use (which is a 20 byte/160 bit
// hash as I'm writing this).
//
// NOTE: current gcc enables build-id by default, but current clang does not. To use clang,
// ensure one does `RUSTFLAGS='-C linker=clang -Clink-arg=-Wl,--build-id'` or similar.
//
// NOTE: If using a toolchain without build-id enabled, junk is returned (likely the content of
// other note sections). We could do a small bit of validation by checking the note header.
/*
// This method only works if a build-id of exactly the right size is linked in. Otherwise, the
// link fails or invalid data is accessed
#[link_section = ".note.gnu.build-id"]
static NOTE_GNU_BUILD_ID_END: [u8; 0] = [];
const BUILD_ID_LEN: usize = 20;
pub fn build_id() -> Option<&'static [u8]> {
Some(unsafe { core::slice::from_raw_parts(NOTE_GNU_BUILD_ID_END.as_ptr().sub(BUILD_ID_LEN), BUILD_ID_LEN) })
}
*/
use core::mem;
use core::mem::MaybeUninit;
use core::{convert::TryInto, fmt};
use log::error;

// FIXME: dl_phdr_info references are actually unsafe here because of how glibc defines
// dl_phdr_info (to have some fields present depending on the size provided (<glibc-2.4 omits
Expand Down Expand Up @@ -63,15 +32,27 @@ struct Note {

const MIN_NOTE_SIZE: usize = mem::size_of::<usize>() * 3;

#[derive(Debug, Error)]
#[derive(Debug)]
enum NoteError {
#[error("have {size} bytes, but need at least {MIN_NOTE_SIZE}")]
MissingHeader { size: usize },

#[error("have {have} bytes, but need at least {need}")]
Truncated { have: usize, need: usize },
}

impl fmt::Display for NoteError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::MissingHeader { size } => write!(
f,
"have {} bytes, but need at least {}",
size, MIN_NOTE_SIZE
),
Self::Truncated { have, need } => {
write!(f, "have {} bytes, but need at least {}", have, need)
}
}
}
}

impl Note {
// NOTE: the _standards_ say to use 8 byte alignment in 64-bit land. But llvm and others note
// that everyone actually uses 4 byte alignment. Perfect. Hopefully this always works.
Expand All @@ -83,7 +64,7 @@ impl Note {
}

fn from_bytes(data: &[u8]) -> Result<(&Self, &[u8]), NoteError> {
let u = std::mem::size_of::<u32>();
let u = core::mem::size_of::<u32>();
if data.len() < u * 3 {
return Err(NoteError::MissingHeader { size: data.len() });
}
Expand All @@ -92,33 +73,33 @@ impl Note {
}

fn name_len(&self) -> usize {
u32::from_ne_bytes(self.data[..std::mem::size_of::<u32>()].try_into().unwrap()) as usize
u32::from_ne_bytes(self.data[..core::mem::size_of::<u32>()].try_into().unwrap()) as usize
}

fn desc_len(&self) -> usize {
let u = std::mem::size_of::<u32>();
let u = core::mem::size_of::<u32>();
u32::from_ne_bytes(self.data[u..(u + u)].try_into().unwrap()) as usize
}

fn type_(&self) -> u32 {
let u = std::mem::size_of::<u32>();
let u = core::mem::size_of::<u32>();
u32::from_ne_bytes(self.data[(u + u)..(u + u + u)].try_into().unwrap())
}

fn name(&self) -> &[u8] {
let u = std::mem::size_of::<u32>();
let u = core::mem::size_of::<u32>();
let b = u * 3;
&self.data[b..(b + self.name_len())]
}

fn desc(&self) -> &[u8] {
let u = std::mem::size_of::<u32>();
let u = core::mem::size_of::<u32>();
let b = u * 3 + align_up(self.name_len(), Self::ALIGN);
&self.data[b..(b + self.desc_len())]
}

fn split_trailing(&self) -> Result<(&Self, &[u8]), NoteError> {
let u = std::mem::size_of::<u32>();
let u = core::mem::size_of::<u32>();
let end =
u * 3 + align_up(self.name_len(), Self::ALIGN) + align_up(self.desc_len(), Self::ALIGN);
if end > self.data.len() {
Expand All @@ -139,8 +120,8 @@ unsafe extern "C" fn phdr_cb(
size: libc::size_t,
data: *mut libc::c_void,
) -> libc::c_int {
let closure: &mut &mut dyn FnMut(&libc::dl_phdr_info, usize) -> libc::c_int =
&mut *(data as *mut &mut dyn for<'r> std::ops::FnMut(&'r libc::dl_phdr_info, usize) -> i32);
let closure: &mut &mut dyn FnMut(&libc::dl_phdr_info, usize) -> libc::c_int = &mut *(data
as *mut &mut dyn for<'r> core::ops::FnMut(&'r libc::dl_phdr_info, usize) -> i32);
let info = &*info;

closure(info, size)
Expand Down Expand Up @@ -200,7 +181,7 @@ impl<'a> NoteIter<'a> {
let segment = unsafe {
// FIXME: consider p_memsz vs p_filesz question here.
// llvm appears to use filesz
std::slice::from_raw_parts(segment_base, phdr.p_filesz as usize)
core::slice::from_raw_parts(segment_base, phdr.p_filesz as usize)
};
Some(NoteIter { segment })
}
Expand Down Expand Up @@ -272,7 +253,7 @@ pub fn build_id() -> Option<&'static [u8]> {
for note in ni {
let note = match note {
Err(e) => {
event!(Level::ERROR, "note program segment had invalid note {}", e);
error!("note program segment had invalid note {}", e);
continue 'phdr;
}
Ok(v) => v,
Expand Down
80 changes: 69 additions & 11 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,38 @@
//! build-id is also used by `mesa` as a key component in their caching of shaders (changes in the
//! build-id cause the cache to be discarded).
//!
//! By default, the `buildid` crate will pick the best build-id lookup function it can for your
//! platform. If one is not avaliable, it may fail to compile. If you have a custom build-id lookup
//! mechanism you want to tell `buildid` about, enabling one of the features may help.
//!
//! # Features
//!
//! For all of the build-id lookup customization features, we recommend only setting them in
//! top-level crates than have a complete understanding of the final link step for the executable.
//!
//! - `build-id-section-inject`: if enabled, inject our own symbol into the section where build id
//! is expected to be located, and use the build-time environment variable `BUILD_ID_SIZE` to
//! determine how many bytes to read. This method will only function on some platforms
//! (basically: GNU ones). Note that `BUILD_ID_SIZE` must be set correctly, and differs for GNU
//! ld (bfd) and LLVM lld. This method takes presdence over the default lookup methods if
//! enabled.
//! - `build-id-symbol-start-end`: if enabled, assume the presense of 2 symbols named "__build_id_start" and
//! "__build_id_end", and use these to find the build-id. Normally, one can provide the symbols
//! by using a custom ldscript (linker script). This method takes precedence over automatically
//! enable build-id lookup methods, and over `build-id-section-inject`.
//! - `build-id-custom-inject`: if enabled, assume that a function `int build_id__get(unsigned
//! char **build_id, size_t *len)` is provided (with C linkage) that can locate and return the
//! build-id. The `build_id__get` must return `1` if a build-id is located (and modify the
//! `build_id` and `len` arguments to point to the memory containing the build-id and to contain
//! the number of bytes in the build-id respectively), return `0` if no build-id exists, and
//! return a negative error code if an unexpected error occured. This method takes precedence
//! over all other build-id lookup methods (if enabled).
//!
//! # Platform Details
//!
//! - On unix variants other than those with apple as the vendor, the `.note.gnu.build-id` is used
//! and returned as a slice. Note that GNU LD and LLD generate different sized build-ids using
//! different hash functions.
//! - On unix variants other than those with apple as the vendor, the `.note.gnu.build-id` is
//! used. Note that GNU LD and LLD generate different sized build-ids using different hash
//! functions.
//! - On Apple unix variants (MacOS), the `LC_UUID` (loader command uuid) is returned directly as
//! a slice.
//! - On windows, the module is parsed for a CodeView descriptor containing a GUID (which is
Expand All @@ -33,33 +60,64 @@
//! RUSTFLAGS="-Clink-args=-Wl,--build-id" in the environment before running cargo. The same
//! argument works for any system using GNU LD or compatible.
//!
//! - On most linux platforms, build-id is enabled by default by gcc. Sometimes clang on the same
//! platform does not have build-id enabled though. Set `RUSTFLAGS="-Clink-args=-Wl,--build-id"`
//! to ensure build id is enabled for clang or gcc
//!
//! - MacOS appears to enable build-id (LC_UUID) by default, with no change needed.
//! - Windows MSVC appears to enable build-id (CodeView GUID) by default, with no change needed.
#![no_std]

#[cfg(feature = "build-id-section-inject")]
#[path = "section-inject.rs"]
mod target;

#[cfg(all(target_family = "unix", not(target_vendor = "apple")))]
#[cfg(all(
target_family = "unix",
not(target_vendor = "apple"),
not(feature = "build-id-section-inject"),
not(feature = "build-id-symbol-start-end")
))]
#[path = "elf.rs"]
mod target;

#[cfg(all(target_family = "unix", not(target_vendor = "apple")))]
#[cfg(all(
target_family = "unix",
not(target_vendor = "apple"),
not(feature = "build-id-section-inject"),
not(feature = "build-id-symbol-start-end")
))]
mod align;

#[cfg(all(target_family = "unix", target_vendor = "apple"))]
#[cfg(all(
target_family = "unix",
target_vendor = "apple",
not(feature = "build-id-section-inject"),
not(feature = "build-id-symbol-start-end")
))]
#[path = "mach.rs"]
mod target;

#[cfg(target_family = "windows")]
#[cfg(all(
target_family = "windows",
not(feature = "build-id-section-inject"),
not(feature = "build-id-symbol-start-end")
))]
#[path = "windows.rs"]
mod target;

#[cfg(target_family = "wasm")]
#[cfg(all(
target_family = "wasm",
not(feature = "build-id-section-inject"),
not(feature = "build-id-symbol-start-end")
))]
mod target {
pub fn build_id() -> Option<&'static [u8]> {
// not sure how to implement this right now. need to introspect the wasm object in some way
None
}
}

// TODO: provide a feature that allows using known symbols
// TODO: provide a feature that allows injecting a symbol into a particular section

/// If present, return the build-id or platform equivalent
pub fn build_id() -> Option<&'static [u8]> {
target::build_id()
Expand Down
34 changes: 34 additions & 0 deletions src/section-inject.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
// NOTE: unix doesn't necessarily promise we'll have this section. We can use some functions to
// dynamically look it up instead if we have issues.
//
// NOTE: this build id does not include any dynamically linked libraries. We can get those
// build ids seperately by performing some dynamic lookups.
//
// NOTE: this works by adding a zero sized symbol to the end of the build-id section, but it's
// not entirely clear why we're always at the end of the build-id section (instead of at the
// beginning). We don't have any way to measure the size of the section, so we just have to
// assume the hash size based on what is currently in common use (which is a 20 byte/160 bit
// hash as I'm writing this).
//
// NOTE: current gcc enables build-id by default, but current clang does not. To use clang,
// ensure one does `RUSTFLAGS='-C linker=clang -Clink-arg=-Wl,--build-id'` or similar.
//
// NOTE: If using a toolchain without build-id enabled, junk is returned (likely the content of
// other note sections). We could do a small bit of validation by checking the note header.
//
// This method only works if a build-id of exactly the right size is linked in. Otherwise, the
// link fails or invalid data is accessed
#[link_section = ".note.gnu.build-id"]
static NOTE_GNU_BUILD_ID_END: [u8; 0] = [];

// 20 for GNU
const BUILD_ID_LEN: usize = env!("BUILD_ID_LEN").parse().unwrap();

pub fn build_id() -> Option<&'static [u8]> {
Some(unsafe {
core::slice::from_raw_parts(
NOTE_GNU_BUILD_ID_END.as_ptr().sub(BUILD_ID_LEN),
BUILD_ID_LEN,
)
})
}
Loading

0 comments on commit b655ec6

Please sign in to comment.