Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add dyld shared cache parsing #308

Merged
merged 8 commits into from
May 27, 2021
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 18 additions & 16 deletions src/read/any.rs
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,24 @@ impl<'data, R: ReadRef<'data>> File<'data, R> {
Ok(File { inner })
}

/// Parse the raw file data at an arbitrary offset inside the input data.
///
/// Currently, this is only supported for Mach-O images.
/// This can be used for parsing Mach-O images inside the dyld shared cache,
/// where multiple images, located at different offsets, share the same address
/// space.
pub fn parse_at(data: R, offset: u64) -> Result<Self> {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Excellent!

let inner = match FileKind::parse_at(data, offset)? {
#[cfg(feature = "macho")]
FileKind::MachO32 => FileInternal::MachO32(macho::MachOFile32::parse_at(data, offset)?),
#[cfg(feature = "macho")]
FileKind::MachO64 => FileInternal::MachO64(macho::MachOFile64::parse_at(data, offset)?),
#[allow(unreachable_patterns)]
_ => return Err(Error("Unsupported file format")),
};
Ok(File { inner })
}

/// Return the file format.
pub fn format(&self) -> BinaryFormat {
match self.inner {
Expand All @@ -235,22 +253,6 @@ impl<'data, R: ReadRef<'data>> File<'data, R> {
FileInternal::Wasm(_) => BinaryFormat::Wasm,
}
}

/// Initialize with an existing Mach-O 32 file.
#[cfg(feature = "macho")]
pub(crate) fn from_macho_32(inner: macho::MachOFile32<'data, Endianness, R>) -> Self {
File {
inner: FileInternal::MachO32(inner),
}
}

/// Initialize with an existing Mach-O 64 file.
#[cfg(feature = "macho")]
pub(crate) fn from_macho_64(inner: macho::MachOFile64<'data, Endianness, R>) -> Self {
File {
inner: FileInternal::MachO64(inner),
}
}
}

impl<'data, R: ReadRef<'data>> read::private::Sealed for File<'data, R> {}
Expand Down
51 changes: 17 additions & 34 deletions src/read/macho/dyld_cache.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
use crate::read::{Error, ReadError, ReadRef, Result};
use crate::read::{Error, File, ReadError, ReadRef, Result};
use crate::{macho, Architecture, Bytes, Endian, Endianness};

use super::{MachOFile32, MachOFile64};

/// A parsed representation of the dyld shared cache.
#[derive(Debug)]
pub struct DyldCache<'data, E = Endianness, R = &'data [u8]>
Expand All @@ -15,7 +13,6 @@ where
first_mapping_address: u64,
header: &'data macho::DyldCacheHeader<E>,
arch: Architecture,
is_64: bool,
}

impl<'data, E, R> DyldCache<'data, E, R>
Expand All @@ -30,7 +27,7 @@ where
.read::<macho::DyldCacheHeader<E>>(&mut offset)
.read_error("Invalid dyld cache header size or alignment")?;

let (arch, is_64, endianness) = match Self::parse_magic(&header.magic) {
let (arch, endianness) = match Self::parse_magic(&header.magic) {
Some(props) => props,
None => return Err(Error("Unrecognized magic value")),
};
Expand Down Expand Up @@ -61,24 +58,23 @@ where
first_mapping_address,
data,
arch,
is_64,
})
}

/// Returns (arch, is_64, endianness) based on the magic string.
fn parse_magic(magic: &[u8; 16]) -> Option<(Architecture, bool, Endianness)> {
/// Returns (arch, endianness) based on the magic string.
fn parse_magic(magic: &[u8; 16]) -> Option<(Architecture, Endianness)> {
Some(match magic {
b"dyld_v1 i386\0" => (Architecture::I386, false, Endianness::Little),
b"dyld_v1 x86_64\0" => (Architecture::X86_64, true, Endianness::Little),
b"dyld_v1 x86_64h\0" => (Architecture::X86_64, true, Endianness::Little),
b"dyld_v1 ppc\0" => (Architecture::PowerPc, false, Endianness::Big),
b"dyld_v1 armv6\0" => (Architecture::Arm, false, Endianness::Little),
b"dyld_v1 armv7\0" => (Architecture::Arm, false, Endianness::Little),
b"dyld_v1 armv7f\0" => (Architecture::Arm, false, Endianness::Little),
b"dyld_v1 armv7s\0" => (Architecture::Arm, false, Endianness::Little),
b"dyld_v1 armv7k\0" => (Architecture::Arm, false, Endianness::Little),
b"dyld_v1 arm64\0" => (Architecture::Aarch64, true, Endianness::Little),
b"dyld_v1 arm64e\0" => (Architecture::Aarch64, true, Endianness::Little),
b"dyld_v1 i386\0" => (Architecture::I386, Endianness::Little),
b"dyld_v1 x86_64\0" => (Architecture::X86_64, Endianness::Little),
b"dyld_v1 x86_64h\0" => (Architecture::X86_64, Endianness::Little),
b"dyld_v1 ppc\0" => (Architecture::PowerPc, Endianness::Big),
b"dyld_v1 armv6\0" => (Architecture::Arm, Endianness::Little),
b"dyld_v1 armv7\0" => (Architecture::Arm, Endianness::Little),
b"dyld_v1 armv7f\0" => (Architecture::Arm, Endianness::Little),
b"dyld_v1 armv7s\0" => (Architecture::Arm, Endianness::Little),
b"dyld_v1 armv7k\0" => (Architecture::Arm, Endianness::Little),
b"dyld_v1 arm64\0" => (Architecture::Aarch64, Endianness::Little),
b"dyld_v1 arm64e\0" => (Architecture::Aarch64, Endianness::Little),
_ => return None,
})
}
Expand All @@ -103,11 +99,6 @@ where
self.endian.is_little_endian()
}

/// Return true if the file can contain 64-bit addresses.
pub fn is_64(&self) -> bool {
self.is_64
}

/// Iterate over the images in this cache.
pub fn iter_images<'cache>(&'cache self) -> DyldCacheImageIterator<'data, 'cache, E, R> {
let images_offset = self.header.images_offset.get(self.endian) as u64;
Expand Down Expand Up @@ -151,7 +142,6 @@ where
.read_error("Couldn't read macho::DyldCacheImageInfo")?;
Ok(Some(DyldCacheImage {
endian: self.cache.endian,
is_64: self.cache.is_64,
data,
first_mapping_address: self.cache.first_mapping_address,
image_info,
Expand All @@ -167,7 +157,6 @@ where
R: ReadRef<'data>,
{
endian: E,
is_64: bool,
data: R,
first_mapping_address: u64,
image_info: &'data macho::DyldCacheImageInfo<E>,
Expand Down Expand Up @@ -202,13 +191,7 @@ where
}

/// Parse this image into an Object.
pub fn parse_object(&self) -> Result<crate::File<'data, R>> {
if !self.is_64 {
let file = MachOFile32::<Endianness, R>::parse_at_offset(self.data, self.offset())?;
Ok(crate::File::from_macho_32(file))
} else {
let file = MachOFile64::<Endianness, R>::parse_at_offset(self.data, self.offset())?;
Ok(crate::File::from_macho_64(file))
}
pub fn parse_object(&self) -> Result<File<'data, R>> {
File::parse_at(self.data, self.offset())
}
}
4 changes: 2 additions & 2 deletions src/read/macho/file.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,14 +46,14 @@ where
{
/// Parse the raw Mach-O file data.
pub fn parse(data: R) -> Result<Self> {
Self::parse_at_offset(data, 0)
Self::parse_at(data, 0)
}

/// Parse the raw Mach-O file data at an arbitrary offset inside the input data.
/// This can be used for parsing Mach-O images inside the dyld shared cache,
/// where multiple images, located at different offsets, share the same address
/// space.
pub fn parse_at_offset(data: R, header_offset: u64) -> Result<Self> {
pub fn parse_at(data: R, header_offset: u64) -> Result<Self> {
let header = Mach::parse(data, header_offset)?;
let endian = header.endian()?;

Expand Down
7 changes: 6 additions & 1 deletion src/read/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -159,8 +159,13 @@ pub enum FileKind {
impl FileKind {
/// Determine a file kind by parsing the start of the file.
pub fn parse<'data, R: ReadRef<'data>>(data: R) -> Result<FileKind> {
Self::parse_at(data, 0)
}

/// Determine a file kind by parsing at the given offset.
pub fn parse_at<'data, R: ReadRef<'data>>(data: R, offset: u64) -> Result<FileKind> {
let magic = data
.read_bytes_at(0, 16)
.read_bytes_at(offset, 16)
.read_error("Could not read file magic")?;
if magic.len() < 16 {
return Err(Error("File too short"));
Expand Down