From 44b09db02c896d12812ee0df104ee7985e84d095 Mon Sep 17 00:00:00 2001 From: Markus Stange Date: Mon, 17 May 2021 23:34:11 -0400 Subject: [PATCH 1/2] Add MachOFile::parse_at_offset. This allows parsing Mach-O images inside dyld shared cache files (#268): The dyld shared cache contains multiple images at different offsets; all these images share the same address space for absolute offsets such as symoff. Due to these absolute offsets, one cannot just parse the images by subsetting the input slice and parsing at header offset zero. This patch is a breaking change because it adds a header_offset argument to the MachHeader methods load_commands and uuid, and MachHeader is part of the public API. --- examples/readobj.rs | 2 +- src/read/macho/file.rs | 46 ++++++++++++++++++++++++++++++--------- tests/round_trip/macho.rs | 2 +- 3 files changed, 38 insertions(+), 12 deletions(-) diff --git a/examples/readobj.rs b/examples/readobj.rs index 6de868d0..8a790756 100644 --- a/examples/readobj.rs +++ b/examples/readobj.rs @@ -3248,7 +3248,7 @@ mod macho { if let Ok(endian) = header.endian() { let mut state = MachState::default(); print_mach_header(p, endian, header); - if let Ok(mut commands) = header.load_commands(endian, data) { + if let Ok(mut commands) = header.load_commands(endian, data, 0) { while let Ok(Some(command)) = commands.next() { print_load_command(p, endian, data, header, command, &mut state); } diff --git a/src/read/macho/file.rs b/src/read/macho/file.rs index 655c58fd..a5c9b84d 100644 --- a/src/read/macho/file.rs +++ b/src/read/macho/file.rs @@ -33,6 +33,7 @@ where { pub(super) endian: Mach::Endian, pub(super) data: R, + pub(super) header_offset: u64, pub(super) header: &'data Mach, pub(super) sections: Vec>, pub(super) symbols: SymbolTable<'data, Mach>, @@ -45,13 +46,21 @@ where { /// Parse the raw Mach-O file data. pub fn parse(data: R) -> Result { - let header = Mach::parse(data)?; + Self::parse_at_offset(data, 0) + } + + /// Parse the raw Mach-O file data at an arbitrary offset inside the input data. + /// This can be used for parsing Mach-O images inside the dyld shared cache, + /// where multiple images, located at different offsets, share the same address + /// space. + pub fn parse_at_offset(data: R, header_offset: u64) -> Result { + let header = Mach::parse_at_offset(data, header_offset)?; let endian = header.endian()?; let mut symbols = SymbolTable::default(); // Build a list of sections to make some operations more efficient. let mut sections = Vec::new(); - if let Ok(mut commands) = header.load_commands(endian, data) { + if let Ok(mut commands) = header.load_commands(endian, data, header_offset) { while let Ok(Some(command)) = commands.next() { if let Some((segment, section_data)) = Mach::Segment::from_command(command)? { for section in segment.sections(endian, section_data)? { @@ -67,6 +76,7 @@ where Ok(MachOFile { endian, header, + header_offset, sections, symbols, data, @@ -137,7 +147,7 @@ where file: self, commands: self .header - .load_commands(self.endian, self.data) + .load_commands(self.endian, self.data, self.header_offset) .ok() .unwrap_or_else(Default::default), } @@ -240,7 +250,9 @@ where if twolevel { libraries.push(&[][..]); } - let mut commands = self.header.load_commands(self.endian, self.data)?; + let mut commands = self + .header + .load_commands(self.endian, self.data, self.header_offset)?; while let Some(command) = commands.next()? { if let Some(command) = command.dysymtab()? { dysymtab = Some(command); @@ -278,7 +290,9 @@ where fn exports(&self) -> Result>> { let mut dysymtab = None; - let mut commands = self.header.load_commands(self.endian, self.data)?; + let mut commands = self + .header + .load_commands(self.endian, self.data, self.header_offset)?; while let Some(command) = commands.next()? { if let Some(command) = command.dysymtab()? { dysymtab = Some(command); @@ -313,11 +327,14 @@ where } fn mach_uuid(&self) -> Result> { - self.header.uuid(self.endian, self.data) + self.header.uuid(self.endian, self.data, self.header_offset) } fn entry(&self) -> u64 { - if let Ok(mut commands) = self.header.load_commands(self.endian, self.data) { + if let Ok(mut commands) = + self.header + .load_commands(self.endian, self.data, self.header_offset) + { while let Ok(Some(command)) = commands.next() { if let Ok(Some(command)) = command.entry_point() { return command.entryoff.get(self.endian); @@ -481,8 +498,15 @@ pub trait MachHeader: Debug + Pod { /// /// Also checks that the magic field in the file header is a supported format. fn parse<'data, R: ReadRef<'data>>(data: R) -> read::Result<&'data Self> { + Self::parse_at_offset(data, 0) + } + + fn parse_at_offset<'data, R: ReadRef<'data>>( + data: R, + offset: u64, + ) -> read::Result<&'data Self> { let header = data - .read_at::(0) + .read_at::(offset) .read_error("Invalid Mach-O header size or alignment")?; if !header.is_supported() { return Err(Error("Unsupported Mach-O header")); @@ -502,10 +526,11 @@ pub trait MachHeader: Debug + Pod { &self, endian: Self::Endian, data: R, + header_offset: u64, ) -> Result> { let data = data .read_bytes_at( - mem::size_of::() as u64, + header_offset + mem::size_of::() as u64, self.sizeofcmds(endian).into(), ) .read_error("Invalid Mach-O load command table size")?; @@ -517,8 +542,9 @@ pub trait MachHeader: Debug + Pod { &self, endian: Self::Endian, data: R, + header_offset: u64, ) -> Result> { - let mut commands = self.load_commands(endian, data)?; + let mut commands = self.load_commands(endian, data, header_offset)?; while let Some(command) = commands.next()? { if let Ok(Some(uuid)) = command.uuid() { return Ok(Some(uuid.uuid)); diff --git a/tests/round_trip/macho.rs b/tests/round_trip/macho.rs index cc1ac8e5..df1f60e4 100644 --- a/tests/round_trip/macho.rs +++ b/tests/round_trip/macho.rs @@ -16,7 +16,7 @@ fn issue_286_segment_file_size() { let bytes = &*object.write().unwrap(); let header = macho::MachHeader64::parse(bytes).unwrap(); let endian: Endianness = header.endian().unwrap(); - let mut commands = header.load_commands(endian, bytes).unwrap(); + let mut commands = header.load_commands(endian, bytes, 0).unwrap(); let command = commands.next().unwrap().unwrap(); let (segment, _) = command.segment_64().unwrap().unwrap(); assert_eq!(segment.vmsize.get(endian), 30); From 7e12d28e7885130746d1ca5330d13e50045754fc Mon Sep 17 00:00:00 2001 From: Markus Stange Date: Sun, 16 May 2021 22:03:27 -0400 Subject: [PATCH 2/2] Add DyldCache. This implements just enough to get the path and header offset of each contained image. It also adds a function to get an "any" File object for the image, so that the caller doesn't need to write code twice for 32 and 64 bit images and can instead benefit from the enum-based dynamic dispatch. This commit also adds two "examples", for printing the list of images in the cache and for dumping an object from inside the cache. --- examples/dyldcachedump.rs | 51 +++++++++ examples/dyldcacheobjdump.rs | 162 ++++++++++++++++++++++++++ src/macho.rs | 59 ++++++++++ src/read/any.rs | 16 +++ src/read/macho/dyld_cache.rs | 214 +++++++++++++++++++++++++++++++++++ src/read/macho/mod.rs | 3 + 6 files changed, 505 insertions(+) create mode 100644 examples/dyldcachedump.rs create mode 100644 examples/dyldcacheobjdump.rs create mode 100644 src/read/macho/dyld_cache.rs diff --git a/examples/dyldcachedump.rs b/examples/dyldcachedump.rs new file mode 100644 index 00000000..a26026bf --- /dev/null +++ b/examples/dyldcachedump.rs @@ -0,0 +1,51 @@ +use object::read::macho::DyldCache; +use object::Endianness; +use std::{env, fs, process}; + +fn main() { + let arg_len = env::args().len(); + if arg_len <= 1 { + eprintln!("Usage: {} ...", env::args().next().unwrap()); + process::exit(1); + } + + for file_path in env::args().skip(1) { + if arg_len > 2 { + println!(); + println!("{}:", file_path); + } + + let file = match fs::File::open(&file_path) { + Ok(file) => file, + Err(err) => { + println!("Failed to open file '{}': {}", file_path, err,); + continue; + } + }; + let file = match unsafe { memmap2::Mmap::map(&file) } { + Ok(mmap) => mmap, + Err(err) => { + println!("Failed to map file '{}': {}", file_path, err,); + continue; + } + }; + let cache = match DyldCache::::parse(&*file) { + Ok(cache) => cache, + Err(err) => { + println!( + "Failed to parse Dyld shared cache file '{}': {}", + file_path, err, + ); + continue; + } + }; + + // Print the list of image paths in this file. + let mut images = cache.iter_images(); + while let Ok(Some(image)) = images.next() { + if let Ok(path) = image.path() { + println!("{}", path); + } + } + } +} diff --git a/examples/dyldcacheobjdump.rs b/examples/dyldcacheobjdump.rs new file mode 100644 index 00000000..41c11e48 --- /dev/null +++ b/examples/dyldcacheobjdump.rs @@ -0,0 +1,162 @@ +use object::read::macho::{DyldCache, DyldCacheImage}; +use object::{Endianness, File, Object, ObjectComdat, ObjectSection, ObjectSymbol, ReadRef}; +use std::{env, fs, process}; + +fn main() { + let arg_len = env::args().len(); + if arg_len < 3 { + // E.g. dyldcacheobjdump /System/Library/dyld/dyld_shared_cache_x86_64 /System/Library/Frameworks/AppKit.framework/Versions/C/AppKit + eprintln!( + "Usage: {} ...", + env::args().next().unwrap() + ); + process::exit(1); + } + + let mut path_iter = env::args().skip(1); + let cache_path = path_iter.next().unwrap(); + + let file = match fs::File::open(&cache_path) { + Ok(file) => file, + Err(err) => { + println!("Failed to open file '{}': {}", cache_path, err,); + process::exit(1); + } + }; + let file = match unsafe { memmap2::Mmap::map(&file) } { + Ok(mmap) => mmap, + Err(err) => { + println!("Failed to map file '{}': {}", cache_path, err,); + process::exit(1); + } + }; + let cache = match DyldCache::::parse(&*file) { + Ok(cache) => cache, + Err(err) => { + println!( + "Failed to parse Dyld shared cache file '{}': {}", + cache_path, err, + ); + process::exit(1); + } + }; + + for dylib_path in path_iter { + if arg_len > 3 { + println!(); + println!("{}:", dylib_path); + } + + let image = match find_image(&cache, &dylib_path) { + Some(image) => image, + None => { + println!( + "Could not find dylib path in shared cache file '{}': {}", + cache_path, dylib_path, + ); + continue; + } + }; + + let file = match image.parse_object() { + Ok(file) => file, + Err(err) => { + println!("Failed to parse Mach-O image '{}': {}", dylib_path, err); + process::exit(1); + } + }; + dump_object(&file); + } +} + +fn find_image<'data>(cache: &DyldCache<'data>, path: &str) -> Option> { + let mut images = cache.iter_images(); + while let Ok(Some(image)) = images.next() { + if image.path() == Ok(path) { + return Some(image); + } + } + None +} + +fn dump_object<'data, R>(file: &File<'data, R>) +where + R: ReadRef<'data>, +{ + println!( + "Format: Mach-O {:?}-endian {}-bit", + file.endianness(), + if file.is_64() { "64" } else { "32" } + ); + println!("Architecture: {:?}", file.architecture()); + println!("Flags: {:x?}", file.flags()); + + match file.mach_uuid() { + Ok(Some(uuid)) => println!("Mach UUID: {:x?}", uuid), + Ok(None) => {} + Err(e) => println!("Failed to parse Mach UUID: {}", e), + } + for segment in file.segments() { + println!("{:x?}", segment); + } + + for section in file.sections() { + println!("{}: {:x?}", section.index().0, section); + } + + for comdat in file.comdats() { + print!("{:?} Sections:", comdat); + for section in comdat.sections() { + print!(" {}", section.0); + } + println!(); + } + + println!(); + println!("Symbols"); + for symbol in file.symbols() { + println!("{}: {:x?}", symbol.index().0, symbol); + } + + for section in file.sections() { + if section.relocations().next().is_some() { + println!( + "\n{} relocations", + section.name().unwrap_or("") + ); + for relocation in section.relocations() { + println!("{:x?}", relocation); + } + } + } + + println!(); + println!("Dynamic symbols"); + for symbol in file.dynamic_symbols() { + println!("{}: {:x?}", symbol.index().0, symbol); + } + + if let Some(relocations) = file.dynamic_relocations() { + println!(); + println!("Dynamic relocations"); + for relocation in relocations { + println!("{:x?}", relocation); + } + } + + let imports = file.imports().unwrap(); + if !imports.is_empty() { + println!(); + for import in imports { + println!("{:?}", import); + } + } + + let exports = file.exports().unwrap(); + if !exports.is_empty() { + println!(); + for export in exports { + println!("{:x?}", export); + } + } +} diff --git a/src/macho.rs b/src/macho.rs index 4139688f..9d81f7d6 100644 --- a/src/macho.rs +++ b/src/macho.rs @@ -282,6 +282,62 @@ pub const VM_PROT_WRITE: u32 = 0x02; /// execute permission pub const VM_PROT_EXECUTE: u32 = 0x04; +// Definitions from https://opensource.apple.com/source/dyld/dyld-210.2.3/launch-cache/dyld_cache_format.h.auto.html + +/// The dyld cache header, containing only the fields which are present +/// in all versions of dyld caches (dyld-95.3 and up). +/// Many more fields exist in later dyld versions, but we currently do +/// not need to parse those. +/// Corresponds to struct dyld_cache_header from dyld_cache_format.h. +#[derive(Debug, Clone, Copy)] +#[repr(C)] +pub struct DyldCacheHeader { + /// e.g. "dyld_v0 i386" + pub magic: [u8; 16], + /// file offset to first dyld_cache_mapping_info + pub mapping_offset: U32, + /// number of dyld_cache_mapping_info entries + pub mapping_count: U32, + /// file offset to first dyld_cache_image_info + pub images_offset: U32, + /// number of dyld_cache_image_info entries + pub images_count: U32, + /// base address of dyld when cache was built + pub dyld_base_address: U64, +} + +/// Corresponds to struct dyld_cache_mapping_info from dyld_cache_format.h. +#[derive(Debug, Clone, Copy)] +#[repr(C)] +pub struct DyldCacheMappingInfo { + /// + pub address: U64, + /// + pub size: U64, + /// + pub file_offset: U64, + /// + pub max_prot: U32, + /// + pub init_prot: U32, +} + +/// Corresponds to struct dyld_cache_image_info from dyld_cache_format.h. +#[derive(Debug, Clone, Copy)] +#[repr(C)] +pub struct DyldCacheImageInfo { + /// + pub address: U64, + /// + pub mod_time: U64, + /// + pub inode: U64, + /// + pub path_file_offset: U32, + /// + pub pad: U32, +} + // Definitions from "/usr/include/mach-o/loader.h". /* @@ -3140,6 +3196,9 @@ pub const X86_64_RELOC_TLV: u8 = 9; unsafe_impl_pod!(FatHeader, FatArch32, FatArch64,); unsafe_impl_endian_pod!( + DyldCacheHeader, + DyldCacheMappingInfo, + DyldCacheImageInfo, MachHeader32, MachHeader64, LoadCommand, diff --git a/src/read/any.rs b/src/read/any.rs index d19649b8..b42ca236 100644 --- a/src/read/any.rs +++ b/src/read/any.rs @@ -235,6 +235,22 @@ impl<'data, R: ReadRef<'data>> File<'data, R> { FileInternal::Wasm(_) => BinaryFormat::Wasm, } } + + /// Initialize with an existing Mach-O 32 file. + #[cfg(feature = "macho")] + pub(crate) fn from_macho_32(inner: macho::MachOFile32<'data, Endianness, R>) -> Self { + File { + inner: FileInternal::MachO32(inner), + } + } + + /// Initialize with an existing Mach-O 64 file. + #[cfg(feature = "macho")] + pub(crate) fn from_macho_64(inner: macho::MachOFile64<'data, Endianness, R>) -> Self { + File { + inner: FileInternal::MachO64(inner), + } + } } impl<'data, R: ReadRef<'data>> read::private::Sealed for File<'data, R> {} diff --git a/src/read/macho/dyld_cache.rs b/src/read/macho/dyld_cache.rs new file mode 100644 index 00000000..5fd33441 --- /dev/null +++ b/src/read/macho/dyld_cache.rs @@ -0,0 +1,214 @@ +use crate::read::{Error, ReadError, ReadRef, Result}; +use crate::{macho, Architecture, Bytes, Endian, Endianness}; + +use super::{MachOFile32, MachOFile64}; + +/// A parsed representation of the dyld shared cache. +#[derive(Debug)] +pub struct DyldCache<'data, E = Endianness, R = &'data [u8]> +where + E: Endian, + R: ReadRef<'data>, +{ + endian: E, + data: R, + first_mapping_address: u64, + header: &'data macho::DyldCacheHeader, + arch: Architecture, + is_64: bool, +} + +impl<'data, E, R> DyldCache<'data, E, R> +where + E: Endian, + R: ReadRef<'data>, +{ + /// Parse the raw Mach-O file data. + pub fn parse(data: R) -> Result { + let mut offset = 0; + let header = data + .read::>(&mut offset) + .read_error("Invalid dyld cache header size or alignment")?; + + let (arch, is_64, endianness) = match Self::parse_magic(&header.magic) { + Some(props) => props, + None => return Err(Error("Unrecognized magic value")), + }; + + let is_big_endian = endianness.is_big_endian(); + let endian = E::from_big_endian(is_big_endian).read_error("Unsupported Mach-O endian")?; + let mapping_count = header.mapping_count.get(endian); + if mapping_count == 0 { + return Err(Error("No mappings in dyld cache")); + } + + let mapping_offset = header.mapping_offset.get(endian) as u64; + let first_mapping = data + .read_at::>(mapping_offset) + .read_error("Couldn't read macho::DyldCacheMappingInfo")?; + if first_mapping.file_offset.get(endian) != 0 { + // dsc_extractor.cpp bails out in this case, in forEachDylibInCache + return Err(Error( + "Unexpected non-zero first mapping file offset in dyld cache", + )); + } + + let first_mapping_address = first_mapping.address.get(endian); + + Ok(DyldCache { + endian, + header, + first_mapping_address, + data, + arch, + is_64, + }) + } + + /// Returns (arch, is_64, endianness) based on the magic string. + fn parse_magic(magic: &[u8; 16]) -> Option<(Architecture, bool, Endianness)> { + Some(match magic { + b"dyld_v1 i386\0" => (Architecture::I386, false, Endianness::Little), + b"dyld_v1 x86_64\0" => (Architecture::X86_64, true, Endianness::Little), + b"dyld_v1 x86_64h\0" => (Architecture::X86_64, true, Endianness::Little), + b"dyld_v1 ppc\0" => (Architecture::Unknown, false, Endianness::Big), + b"dyld_v1 armv6\0" => (Architecture::Arm, false, Endianness::Little), + b"dyld_v1 armv7\0" => (Architecture::Arm, false, Endianness::Little), + b"dyld_v1 armv7f\0" => (Architecture::Arm, false, Endianness::Little), + b"dyld_v1 armv7s\0" => (Architecture::Arm, false, Endianness::Little), + b"dyld_v1 armv7k\0" => (Architecture::Arm, false, Endianness::Little), + b"dyld_v1 arm64\0" => (Architecture::Aarch64, true, Endianness::Little), + b"dyld_v1 arm64e\0" => (Architecture::Aarch64, true, Endianness::Little), + _ => return None, + }) + } + + /// Get the architecture type of the file. + pub fn architecture(&self) -> Architecture { + self.arch + } + + /// Get the endianness of the file. + #[inline] + pub fn endianness(&self) -> Endianness { + if self.is_little_endian() { + Endianness::Little + } else { + Endianness::Big + } + } + + /// Return true if the file is little endian, false if it is big endian. + pub fn is_little_endian(&self) -> bool { + self.endian.is_little_endian() + } + + /// Return true if the file can contain 64-bit addresses. + pub fn is_64(&self) -> bool { + self.is_64 + } + + /// Iterate over the images in this cache. + pub fn iter_images<'cache>(&'cache self) -> DyldCacheImageIterator<'data, 'cache, E, R> { + let images_offset = self.header.images_offset.get(self.endian) as u64; + let images_count = self.header.images_count.get(self.endian); + DyldCacheImageIterator { + cache: self, + images_count, + next_image_index: 0, + next_image_offset: images_offset, + } + } +} + +/// An iterator over all the images (dylibs) in the dyld shared cache. +#[derive(Debug)] +pub struct DyldCacheImageIterator<'data, 'cache, E = Endianness, R = &'data [u8]> +where + E: Endian, + R: ReadRef<'data>, +{ + cache: &'cache DyldCache<'data, E, R>, + images_count: u32, + next_image_index: u32, + next_image_offset: u64, +} + +impl<'data, 'cache, E, R> DyldCacheImageIterator<'data, 'cache, E, R> +where + E: Endian, + R: ReadRef<'data>, +{ + /// Advance the iterator and return the current image. + pub fn next(&mut self) -> Result>> { + if self.next_image_index >= self.images_count { + return Ok(None); + } + self.next_image_index += 1; + let data = self.cache.data; + let image_info = data + .read::>(&mut self.next_image_offset) + .read_error("Couldn't read macho::DyldCacheImageInfo")?; + Ok(Some(DyldCacheImage { + endian: self.cache.endian, + is_64: self.cache.is_64, + data, + first_mapping_address: self.cache.first_mapping_address, + image_info, + })) + } +} + +/// One image (dylib) from inside the dyld shared cache. +#[derive(Debug)] +pub struct DyldCacheImage<'data, E = Endianness, R = &'data [u8]> +where + E: Endian, + R: ReadRef<'data>, +{ + endian: E, + is_64: bool, + data: R, + first_mapping_address: u64, + image_info: &'data macho::DyldCacheImageInfo, +} + +impl<'data, E, R> DyldCacheImage<'data, E, R> +where + E: Endian, + R: ReadRef<'data>, +{ + /// The file system path of this image. + pub fn path(&self) -> Result<&'data str> { + // The longest path I've seen is 164 bytes long. In theory paths could be longer than 256. + const MAX_PATH_LEN: u64 = 256; + + let path_offset = self.image_info.path_file_offset.get(self.endian) as u64; + let slice_containing_path = self + .data + .read_bytes_at(path_offset, MAX_PATH_LEN) + .read_error("Couldn't read path")?; + let path = Bytes(slice_containing_path).read_string().read_error( + "Couldn't read path string (didn't find nul byte within first 256 bytes)", + )?; + // The path should always be ascii, so from_utf8 should alway succeed. + let path = core::str::from_utf8(path).map_err(|_| Error("Path string not valid utf-8"))?; + Ok(path) + } + + /// The offset in the dyld cache file where this image starts. + pub fn offset(&self) -> u64 { + self.image_info.address.get(self.endian) - self.first_mapping_address + } + + /// Parse this image into an Object. + pub fn parse_object(&self) -> Result> { + if !self.is_64 { + let file = MachOFile32::::parse_at_offset(self.data, self.offset())?; + Ok(crate::File::from_macho_32(file)) + } else { + let file = MachOFile64::::parse_at_offset(self.data, self.offset())?; + Ok(crate::File::from_macho_64(file)) + } + } +} diff --git a/src/read/macho/mod.rs b/src/read/macho/mod.rs index aeca8da1..f07ed581 100644 --- a/src/read/macho/mod.rs +++ b/src/read/macho/mod.rs @@ -5,6 +5,9 @@ //! //! Also provides `MachOFile` and related types which implement the `Object` trait. +mod dyld_cache; +pub use dyld_cache::*; + mod fat; pub use fat::*;