Skip to content

Commit

Permalink
mach: support archive entries in fat binaries (fixes #320)
Browse files Browse the repository at this point in the history
Multi-arch containers can be made up of archives or Mach-O
binaries. This adds support for archives. It is a breaking change
because previously the `MachO` struct was returned and now we're
returning a new enum: `SingleArch`.

This required some refactoring of the `lib.rs` file to share the
required functions and data structures for parsing the hint at the top
of files. `take_hint_bytes`, `Hint` and `HintData` dont't require any
special features but I think were inside the `if_everything!` because
that's the only case they were used in. I did expand the API by making
`take_byte_hints` public, this was mainly because I thought it was
better solution than trying to maintain the various combinations of
features required to stop the compiler warning about the function
being unused. It's also a function that may be useful for goblin users.

Also add tests for parsing fat binaries.

This tests parsing fat binaries made up of Mach-O binaries and
archives. I've checked in the binaries to make testing easier as
they're quite small (both are built from the hello_world.c file in the
same directory). Above the tests themselves are instructions for how
to compile the binaries they use.
  • Loading branch information
Nick Spain authored Sep 12, 2022
1 parent a20ce47 commit e2b5207
Show file tree
Hide file tree
Showing 7 changed files with 180 additions and 64 deletions.
4 changes: 2 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,8 @@ endian_fd = ["alloc"]
elf32 = []
elf64 = []
# for now we will require mach and pe to be alloc + endian_fd
mach32 = ["alloc", "endian_fd"]
mach64 = ["alloc", "endian_fd"]
mach32 = ["alloc", "endian_fd", "archive"]
mach64 = ["alloc", "endian_fd", "archive"]
pe32 = ["alloc", "endian_fd"]
pe64 = ["alloc", "endian_fd"]
archive = ["alloc"]
Expand Down
7 changes: 7 additions & 0 deletions assets/hello_world.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
// This is a file use to compile some of the binaries in this
// directory for testing purposes.
#include <stdio.h>

extern void say(char *name) {
printf("Hello, %s!", name);
}
Binary file added assets/hello_world_fat_archives
Binary file not shown.
Binary file added assets/hello_world_fat_binaries
Binary file not shown.
9 changes: 8 additions & 1 deletion examples/dyldinfo.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use goblin::mach;
use goblin::mach::SingleArch;
use std::borrow::Cow;
use std::env;
use std::fs;
Expand Down Expand Up @@ -125,12 +126,18 @@ fn print_multi_arch(
if let Some((cputype, _)) = mach::constants::cputype::get_arch_from_flag(&arch) {
for bin in multi_arch.into_iter() {
match bin {
Ok(bin) => {
Ok(SingleArch::MachO(bin)) => {
if bin.header.cputype == cputype {
print(&bin, bind, lazy_bind);
process::exit(0);
}
}
Ok(SingleArch::Archive(_)) => {
// dyld_info doesn't seem to handle archives
// in fat binaries, so neither do we.
println!("Does not contain specified arches");
process::exit(1);
}
Err(err) => {
println!("err: {:?}", err);
process::exit(1);
Expand Down
76 changes: 29 additions & 47 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,33 @@ pub mod container {
}
}

/// Takes a reference to the first 16 bytes of the total bytes slice and convert it to an array for `peek_bytes` to use.
/// Returns None if bytes's length is less than 16.
#[allow(unused)]
fn take_hint_bytes(bytes: &[u8]) -> Option<&[u8; 16]> {
bytes
.get(0..16)
.and_then(|hint_bytes_slice| hint_bytes_slice.try_into().ok())
}

#[derive(Debug, Default)]
/// Information obtained from a peek `Hint`
pub struct HintData {
pub is_lsb: bool,
pub is_64: Option<bool>,
}

#[derive(Debug)]
/// A hint at the underlying binary format for 16 bytes of arbitrary data
pub enum Hint {
Elf(HintData),
Mach(HintData),
MachFat(usize),
PE,
Archive,
Unknown(u64),
}

macro_rules! if_everything {
($($i:item)*) => ($(
#[cfg(all(feature = "endian_fd", feature = "elf64", feature = "elf32", feature = "pe64", feature = "pe32", feature = "mach64", feature = "mach32", feature = "archive"))]
Expand All @@ -210,28 +237,9 @@ macro_rules! if_everything {

if_everything! {

#[derive(Debug, Default)]
/// Information obtained from a peek `Hint`
pub struct HintData {
pub is_lsb: bool,
pub is_64: Option<bool>,
}

#[derive(Debug)]
/// A hint at the underlying binary format for 16 bytes of arbitrary data
pub enum Hint {
Elf(HintData),
Mach(HintData),
MachFat(usize),
PE,
Archive,
Unknown(u64),
}

/// Peeks at `bytes`, and returns a `Hint`
pub fn peek_bytes(bytes: &[u8; 16]) -> error::Result<Hint> {
use scroll::{Pread, LE, BE};
use crate::mach::{fat, header};
use scroll::{Pread, LE};
if &bytes[0..elf::header::SELFMAG] == elf::header::ELFMAG {
let class = bytes[elf::header::EI_CLASS];
let is_lsb = bytes[elf::header::EI_DATA] == elf::header::ELFDATA2LSB;
Expand All @@ -248,23 +256,7 @@ if_everything! {
} else if (&bytes[0..2]).pread_with::<u16>(0, LE)? == pe::header::DOS_MAGIC {
Ok(Hint::PE)
} else {
let (magic, maybe_ctx) = mach::parse_magic_and_ctx(bytes, 0)?;
match magic {
fat::FAT_MAGIC => {
// should probably verify this is always Big Endian...
let narchitectures = bytes.pread_with::<u32>(4, BE)? as usize;
Ok(Hint::MachFat(narchitectures))
},
header::MH_CIGAM_64 | header::MH_CIGAM | header::MH_MAGIC_64 | header::MH_MAGIC => {
if let Some(ctx) = maybe_ctx {
Ok(Hint::Mach(HintData { is_lsb: ctx.le.is_little(), is_64: Some(ctx.container.is_big()) }))
} else {
Err(error::Error::Malformed(format!("Correct mach magic {:#x} does not have a matching parsing context!", magic)))
}
},
// its something else
_ => Ok(Hint::Unknown(bytes.pread::<u64>(0)?))
}
mach::peek_bytes(bytes)
}
}

Expand All @@ -279,16 +271,6 @@ if_everything! {
peek_bytes(&bytes)
}

/// Takes a reference to the first 16 bytes of the total bytes slice and convert it to an array for `peek_bytes` to use.
/// Returns None if bytes's length is less than 16.
fn take_hint_bytes(bytes: &[u8]) -> Option<&[u8; 16]> {
use core::convert::TryInto;
bytes.get(0..16)
.and_then(|hint_bytes_slice| {
hint_bytes_slice.try_into().ok()
})
}

#[derive(Debug)]
#[allow(clippy::large_enum_variant)]
/// A parseable object that goblin understands
Expand Down
148 changes: 134 additions & 14 deletions src/mach/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ use log::debug;
use scroll::ctx::SizeWith;
use scroll::{Pread, BE};

use crate::container;
use crate::error;
use crate::{archive, container};
use crate::{error, take_hint_bytes};

pub mod bind_opcodes;
pub mod constants;
Expand Down Expand Up @@ -296,6 +296,15 @@ pub struct FatArchIterator<'a> {
start: usize,
}

/// A single architecture froma multi architecture binary container
/// ([MultiArch]).
#[derive(Debug)]
#[allow(clippy::large_enum_variant)]
pub enum SingleArch<'a> {
MachO(MachO<'a>),
Archive(archive::Archive<'a>),
}

impl<'a> Iterator for FatArchIterator<'a> {
type Item = error::Result<fat::FatArch>;
fn next(&mut self) -> Option<Self::Item> {
Expand All @@ -313,16 +322,65 @@ impl<'a> Iterator for FatArchIterator<'a> {
}
}

/// Iterator over every `MachO` binary contained in this `MultiArch` container
pub struct MachOIterator<'a> {
/// Iterator over every entry contained in this `MultiArch` container
pub struct SingleArchIterator<'a> {
index: usize,
data: &'a [u8],
narches: usize,
start: usize,
}

impl<'a> Iterator for MachOIterator<'a> {
type Item = error::Result<MachO<'a>>;
pub fn peek_bytes(bytes: &[u8; 16]) -> error::Result<crate::Hint> {
if &bytes[0..archive::SIZEOF_MAGIC] == archive::MAGIC {
Ok(crate::Hint::Archive)
} else {
let (magic, maybe_ctx) = parse_magic_and_ctx(bytes, 0)?;
match magic {
header::MH_CIGAM_64 | header::MH_CIGAM | header::MH_MAGIC_64 | header::MH_MAGIC => {
if let Some(ctx) = maybe_ctx {
Ok(crate::Hint::Mach(crate::HintData {
is_lsb: ctx.le.is_little(),
is_64: Some(ctx.container.is_big()),
}))
} else {
Err(error::Error::Malformed(format!(
"Correct mach magic {:#x} does not have a matching parsing context!",
magic
)))
}
}
fat::FAT_MAGIC => {
// should probably verify this is always Big Endian...
let narchitectures = bytes.pread_with::<u32>(4, BE)? as usize;
Ok(crate::Hint::MachFat(narchitectures))
}
_ => Ok(crate::Hint::Unknown(bytes.pread::<u64>(0)?)),
}
}
}

fn extract_multi_entry(bytes: &[u8]) -> error::Result<SingleArch> {
if let Some(hint_bytes) = take_hint_bytes(bytes) {
match peek_bytes(hint_bytes)? {
crate::Hint::Mach(_) => {
let binary = MachO::parse(bytes, 0)?;
Ok(SingleArch::MachO(binary))
}
crate::Hint::Archive => {
let archive = archive::Archive::parse(bytes)?;
Ok(SingleArch::Archive(archive))
}
_ => Err(error::Error::Malformed(format!(
"multi-arch entry must be a Mach-O binary or an archive"
))),
}
} else {
Err(error::Error::Malformed(format!("Object is too small")))
}
}

impl<'a> Iterator for SingleArchIterator<'a> {
type Item = error::Result<SingleArch<'a>>;
fn next(&mut self) -> Option<Self::Item> {
if self.index >= self.narches {
None
Expand All @@ -333,8 +391,7 @@ impl<'a> Iterator for MachOIterator<'a> {
match self.data.pread_with::<fat::FatArch>(offset, scroll::BE) {
Ok(arch) => {
let bytes = arch.slice(self.data);
let binary = MachO::parse(bytes, 0);
Some(binary)
Some(extract_multi_entry(bytes))
}
Err(e) => Some(Err(e.into())),
}
Expand All @@ -343,10 +400,10 @@ impl<'a> Iterator for MachOIterator<'a> {
}

impl<'a, 'b> IntoIterator for &'b MultiArch<'a> {
type Item = error::Result<MachO<'a>>;
type IntoIter = MachOIterator<'a>;
type Item = error::Result<SingleArch<'a>>;
type IntoIter = SingleArchIterator<'a>;
fn into_iter(self) -> Self::IntoIter {
MachOIterator {
SingleArchIterator {
index: 0,
data: self.data,
narches: self.narches,
Expand Down Expand Up @@ -387,7 +444,7 @@ impl<'a> MultiArch<'a> {
Ok(arches)
}
/// Try to get the Mach-o binary at `index`
pub fn get(&self, index: usize) -> error::Result<MachO<'a>> {
pub fn get(&self, index: usize) -> error::Result<SingleArch<'a>> {
if index >= self.narches {
return Err(error::Error::Malformed(format!(
"Requested the {}-th binary, but there are only {} architectures in this container",
Expand All @@ -397,13 +454,13 @@ impl<'a> MultiArch<'a> {
let offset = (index * fat::SIZEOF_FAT_ARCH) + self.start;
let arch = self.data.pread_with::<fat::FatArch>(offset, scroll::BE)?;
let bytes = arch.slice(self.data);
Ok(MachO::parse(bytes, 0)?)
extract_multi_entry(bytes)
}

pub fn find<F: Fn(error::Result<fat::FatArch>) -> bool>(
&'a self,
f: F,
) -> Option<error::Result<MachO<'a>>> {
) -> Option<error::Result<SingleArch<'a>>> {
for (i, arch) in self.iter_arches().enumerate() {
if f(arch) {
return Some(self.get(i));
Expand Down Expand Up @@ -464,3 +521,66 @@ impl<'a> Mach<'a> {
}
}
}

#[cfg(test)]
mod test {
use super::{Mach, SingleArch};

#[test]
fn parse_multi_arch_of_macho_binaries() {
// Create via:
// clang -arch arm64 -shared -o /tmp/hello_world_arm hello_world.c
// clang -arch x86_64 -shared -o /tmp/hello_world_x86_64 hello_world.c
// lipo -create -output hello_world_fat_binaries /tmp/hello_world_arm /tmp/hello_world_x86_64
// strip hello_world_fat_binaries
let bytes = include_bytes!(concat!(
env!("CARGO_MANIFEST_DIR"),
"/assets/hello_world_fat_binaries"
));
let mach = Mach::parse(bytes).expect("failed to parse input file");
match mach {
Mach::Fat(fat) => {
assert!(fat.into_iter().count() > 0);
for entry in fat.into_iter() {
let entry = entry.expect("failed to read entry");
match entry {
SingleArch::MachO(macho) => {
assert!(macho.symbols().count() > 0);
}
_ => panic!("expected MultiArchEntry::MachO, got {:?}", entry),
}
}
}
Mach::Binary(_) => panic!("expected Mach::Fat, got Mach::Binary"),
}
}

#[test]
fn parse_multi_arch_of_archives() {
// Created with:
// clang -c -o /tmp/hello_world.o hello_world.c
// ar -r /tmp/hello_world.a /tmp/hello_world.o
// lipo -create -output hello_world_fat_archives /tmp/hello_world.a
// strip hello_world_fat_archives
let bytes = include_bytes!(concat!(
env!("CARGO_MANIFEST_DIR"),
"/assets/hello_world_fat_archives"
));
let mach = Mach::parse(bytes).expect("failed to parse input file");
match mach {
Mach::Fat(fat) => {
assert!(fat.into_iter().count() > 0);
for entry in fat.into_iter() {
let entry = entry.expect("failed to read entry");
match entry {
SingleArch::Archive(archive) => {
assert!(!archive.members().is_empty())
}
_ => panic!("expected MultiArchEntry::Archive, got {:?}", entry),
}
}
}
Mach::Binary(_) => panic!("expected Mach::Fat, got Mach::Binary"),
}
}
}

0 comments on commit e2b5207

Please sign in to comment.