diff --git a/crates/rewrite/src/main.rs b/crates/rewrite/src/main.rs index 849c9ad8..4c77de45 100644 --- a/crates/rewrite/src/main.rs +++ b/crates/rewrite/src/main.rs @@ -4,6 +4,7 @@ use std::path::{Path, PathBuf}; use anyhow::{anyhow, Context, Result}; use clap::{command, Arg, ArgAction, ArgGroup}; +use log::info; use object_rewrite as rewrite; fn main() -> Result<()> { @@ -123,6 +124,10 @@ fn main() -> Result<()> { .value_name("path") .value_parser(clap::value_parser!(String)) .help("Set the interpreter path in the PT_INTERP segment"), + Arg::new("ignore-unknown-format") + .long("ignore-unknown-format") + .action(ArgAction::SetTrue) + .help("Ignore input files with unknown formats"), Arg::new("verbose") .short('v') .long("verbose") @@ -171,9 +176,18 @@ fn main() -> Result<()> { let in_data = unsafe { memmap2::Mmap::map(&in_file) } .with_context(|| format!("Failed to map input file '{}'", in_path.display()))?; let in_data = &*in_data; - match object::FileKind::parse(in_data) { - Ok(object::FileKind::Elf32) | Ok(object::FileKind::Elf64) => {} - _ => return Ok(()), + + if matches.get_flag("ignore-unknown-format") { + match object::FileKind::parse(in_data) { + Ok(object::FileKind::Elf32) | Ok(object::FileKind::Elf64) => {} + _ => { + info!( + "Ignoring input file '{}' with unknown format", + in_path.display() + ); + return Ok(()); + } + } } let mut rewriter = rewrite::Rewriter::read(in_data) .with_context(|| format!("Failed to parse input file '{}'", in_path.display()))?; diff --git a/src/build/bytes.rs b/src/build/bytes.rs index b83ac630..23635585 100644 --- a/src/build/bytes.rs +++ b/src/build/bytes.rs @@ -20,6 +20,11 @@ impl<'a> Bytes<'a> { pub fn to_mut(&mut self) -> &mut Vec { self.0.to_mut() } + + /// Get the bytes as a slice. + pub fn as_slice(&self) -> &[u8] { + self.0.as_ref() + } } impl<'a> core::ops::Deref for Bytes<'a> { diff --git a/src/build/elf.rs b/src/build/elf.rs index 26cb5ab5..235b71a3 100644 --- a/src/build/elf.rs +++ b/src/build/elf.rs @@ -179,6 +179,7 @@ impl<'data> Builder<'data> { index, endian, is_mips64el, + section, rels, link, &symbols, @@ -189,6 +190,7 @@ impl<'data> Builder<'data> { index, endian, is_mips64el, + section, rels, link, &symbols, @@ -207,9 +209,10 @@ impl<'data> Builder<'data> { } let data = match section.sh_type(endian) { elf::SHT_NOBITS => SectionData::UninitializedData(section.sh_size(endian).into()), - elf::SHT_PROGBITS | elf::SHT_INIT_ARRAY | elf::SHT_FINI_ARRAY => { - SectionData::Data(section.data(endian, data)?.into()) - } + elf::SHT_PROGBITS + | elf::SHT_INIT_ARRAY + | elf::SHT_FINI_ARRAY + | elf::SHT_PREINIT_ARRAY => SectionData::Data(section.data(endian, data)?.into()), elf::SHT_REL | elf::SHT_RELA => relocations, elf::SHT_SYMTAB => { if index == symbols.section().0 { @@ -272,7 +275,9 @@ impl<'data> Builder<'data> { elf::SHT_GNU_VERNEED => SectionData::GnuVerneed, other => match (builder.header.e_machine, other) { (elf::EM_ARM, elf::SHT_ARM_ATTRIBUTES) - | (elf::EM_AARCH64, elf::SHT_AARCH64_ATTRIBUTES) => { + | (elf::EM_AARCH64, elf::SHT_AARCH64_ATTRIBUTES) + | (elf::EM_CSKY, elf::SHT_CSKY_ATTRIBUTES) + | (elf::EM_RISCV, elf::SHT_RISCV_ATTRIBUTES) => { let attributes = section.attributes(endian, data)?; Self::read_attributes(index, attributes, sections.len(), symbols.len())? } @@ -282,7 +287,8 @@ impl<'data> Builder<'data> { (elf::EM_ARM, elf::SHT_ARM_EXIDX) | (elf::EM_IA_64, elf::SHT_IA_64_UNWIND) | (elf::EM_MIPS, elf::SHT_MIPS_REGINFO) - | (elf::EM_MIPS, elf::SHT_MIPS_DWARF) => { + | (elf::EM_MIPS, elf::SHT_MIPS_DWARF) + | (elf::EM_X86_64, elf::SHT_X86_64_UNWIND) => { SectionData::Data(section.data(endian, data)?.into()) } _ => return Err(Error(format!("Unsupported section type {:x}", other))), @@ -362,6 +368,7 @@ impl<'data> Builder<'data> { index: usize, endian: Elf::Endian, is_mips64el: bool, + section: &'data Elf::SectionHeader, rels: &'data [Rel], link: read::SectionIndex, symbols: &read::elf::SymbolTable<'data, Elf, R>, @@ -372,7 +379,27 @@ impl<'data> Builder<'data> { Rel: Copy + Into, R: ReadRef<'data>, { - if link.0 == 0 { + if link == dynamic_symbols.section() { + Self::read_relocations_impl::( + index, + endian, + is_mips64el, + rels, + dynamic_symbols.len(), + ) + .map(SectionData::DynamicRelocation) + } else if link.0 == 0 || section.sh_flags(endian).into() & u64::from(elf::SHF_ALLOC) != 0 { + // If there's no link, then none of the relocations may reference symbols. + // Assume that these are dynamic relocations, but don't use the dynamic + // symbol table when parsing. + // + // Additionally, sometimes there is an allocated section that links to + // the static symbol table. We don't currently support this case in general, + // but if none of the relocation entries reference a symbol then it is + // safe to treat it as a dynamic relocation section. + // + // For both of these cases, if there is a reference to a symbol then + // an error will be returned when parsing the relocations. Self::read_relocations_impl::(index, endian, is_mips64el, rels, 0) .map(SectionData::DynamicRelocation) } else if link == symbols.section() { @@ -384,15 +411,6 @@ impl<'data> Builder<'data> { symbols.len(), ) .map(SectionData::Relocation) - } else if link == dynamic_symbols.section() { - Self::read_relocations_impl::( - index, - endian, - is_mips64el, - rels, - dynamic_symbols.len(), - ) - .map(SectionData::DynamicRelocation) } else { return Err(Error(format!( "Invalid sh_link {} in relocation section at index {}", @@ -862,8 +880,16 @@ impl<'data> Builder<'data> { // Assign dynamic symbol indices. let mut out_dynsyms = Vec::with_capacity(self.dynamic_symbols.len()); - let mut gnu_hash_symbol_count = 0; - for symbol in &self.dynamic_symbols { + // Local symbols must come before global. + let local_symbols = self + .dynamic_symbols + .into_iter() + .filter(|symbol| symbol.st_bind() == elf::STB_LOCAL); + let global_symbols = self + .dynamic_symbols + .into_iter() + .filter(|symbol| symbol.st_bind() != elf::STB_LOCAL); + for symbol in local_symbols.chain(global_symbols) { let mut name = None; let mut hash = None; let mut gnu_hash = None; @@ -872,9 +898,8 @@ impl<'data> Builder<'data> { if hash_id.is_some() { hash = Some(elf::hash(&symbol.name)); } - if gnu_hash_id.is_some() && symbol.st_shndx != elf::SHN_UNDEF { + if gnu_hash_id.is_some() && symbol.section.is_some() { gnu_hash = Some(elf::gnu_hash(&symbol.name)); - gnu_hash_symbol_count += 1; } } out_dynsyms.push(DynamicSymbolOut { @@ -884,16 +909,26 @@ impl<'data> Builder<'data> { gnu_hash, }); } + let num_local_dynamic = out_dynsyms + .iter() + .take_while(|sym| self.dynamic_symbols.get(sym.id).st_bind() == elf::STB_LOCAL) + .count(); // We must sort for GNU hash before allocating symbol indices. + let mut gnu_hash_symbol_count = 0; if gnu_hash_id.is_some() { if self.gnu_hash_bucket_count == 0 { return Err(Error::new(".gnu.hash bucket count is zero")); } // TODO: recalculate bucket_count? - out_dynsyms.sort_by_key(|sym| match sym.gnu_hash { + out_dynsyms[num_local_dynamic..].sort_by_key(|sym| match sym.gnu_hash { None => (0, 0), Some(hash) => (1, hash % self.gnu_hash_bucket_count), }); + gnu_hash_symbol_count = out_dynsyms + .iter() + .skip(num_local_dynamic) + .skip_while(|sym| sym.gnu_hash.is_none()) + .count() as u32; } let mut out_dynsyms_index = vec![None; self.dynamic_symbols.len()]; if dynsym_id.is_some() { @@ -938,10 +973,10 @@ impl<'data> Builder<'data> { name, }); } - let num_local = 1 + out_syms + let num_local = out_syms .iter() .take_while(|sym| self.symbols.get(sym.id).st_bind() == elf::STB_LOCAL) - .count() as u32; + .count(); let mut out_syms_index = vec![None; self.symbols.len()]; if symtab_id.is_some() { writer.reserve_null_symbol_index(); @@ -1589,7 +1624,13 @@ impl<'data> Builder<'data> { SectionData::Dynamic(dynamics) => { ((1 + dynamics.len()) * self.class().dyn_size()) as u64 } - _ => 0, + SectionData::Attributes(_) => out_section.attributes.len() as u64, + _ => { + return Err(Error(format!( + "Unimplemented size for section type {:x}", + section.sh_type + ))) + } }; let sh_link = if let Some(id) = section.sh_link_section { if let Some(index) = out_sections_index[id.0] { @@ -1634,7 +1675,7 @@ impl<'data> Builder<'data> { writer.write_shstrtab_section_header(); } SectionData::Symbol => { - writer.write_symtab_section_header(num_local); + writer.write_symtab_section_header(1 + num_local as u32); } SectionData::SymbolSectionIndex => { writer.write_symtab_shndx_section_header(); @@ -1646,7 +1687,8 @@ impl<'data> Builder<'data> { writer.write_dynstr_section_header(section.sh_addr); } SectionData::DynamicSymbol => { - writer.write_dynsym_section_header(section.sh_addr, 1); + writer + .write_dynsym_section_header(section.sh_addr, 1 + num_local_dynamic as u32); } SectionData::Hash => { writer.write_hash_section_header(section.sh_addr); @@ -3011,7 +3053,7 @@ pub struct AttributesSubsubsection<'data> { } /// The tag for a sub-subsection in an attributes section. -#[derive(Debug, Clone)] +#[derive(Debug, Clone, PartialEq, Eq)] pub enum AttributeTag { /// The attributes apply to the whole file. /// diff --git a/src/elf.rs b/src/elf.rs index 9baf2e4e..7398340f 100644 --- a/src/elf.rs +++ b/src/elf.rs @@ -5807,6 +5807,10 @@ pub const EF_RISCV_FLOAT_ABI_QUAD: u32 = 0x0006; pub const EF_RISCV_RVE: u32 = 0x0008; pub const EF_RISCV_TSO: u32 = 0x0010; +// RISC-V values for `SectionHeader*::sh_type`. +/// RISC-V attributes section. +pub const SHT_RISCV_ATTRIBUTES: u32 = SHT_LOPROC + 3; + // RISC-V values `Rel*::r_type`. pub const R_RISCV_NONE: u32 = 0; pub const R_RISCV_32: u32 = 1; diff --git a/src/write/elf/writer.rs b/src/write/elf/writer.rs index 246f4f54..65ef2fdc 100644 --- a/src/write/elf/writer.rs +++ b/src/write/elf/writer.rs @@ -1524,7 +1524,7 @@ impl<'a> Writer<'a> { sh_link: self.dynsym_index.0, sh_info: 0, sh_addralign: self.elf_align as u64, - sh_entsize: 0, + sh_entsize: if self.is_64 { 0 } else { 4 }, }); } diff --git a/tests/build/elf.rs b/tests/build/elf.rs index e5dcfc39..53979ee1 100644 --- a/tests/build/elf.rs +++ b/tests/build/elf.rs @@ -55,8 +55,6 @@ fn test_no_dynstr() { section.name = b".dynsym"[..].into(); section.sh_type = elf::SHT_DYNSYM; section.sh_flags = elf::SHF_ALLOC as u64; - section.sh_addr = 0x1000; - section.sh_offset = 0x1000; section.sh_addralign = 8; section.data = build::elf::SectionData::DynamicSymbol; let dynsym_id = section.id(); @@ -65,8 +63,6 @@ fn test_no_dynstr() { section.name = b".rela.dyn"[..].into(); section.sh_type = elf::SHT_RELA; section.sh_flags = elf::SHF_ALLOC as u64; - section.sh_addr = 0x1100; - section.sh_offset = 0x1100; section.sh_addralign = 8; section.data = build::elf::SectionData::DynamicRelocation(vec![build::elf::DynamicRelocation { @@ -77,17 +73,16 @@ fn test_no_dynstr() { }]); let rela_id = section.id(); + builder.set_section_sizes(); + let segment = builder.segments.add(); segment.p_type = elf::PT_LOAD; segment.p_flags = elf::PF_R; - segment.p_offset = 0x1000; - segment.p_vaddr = 0x1000; - segment.p_paddr = 0x1000; - segment.p_filesz = 0x200; - segment.p_memsz = 0x200; + segment.p_filesz = 0x1000; + segment.p_memsz = 0x1000; segment.p_align = 8; - segment.sections.push(dynsym_id); - segment.sections.push(rela_id); + segment.append_section(builder.sections.get_mut(dynsym_id)); + segment.append_section(builder.sections.get_mut(rela_id)); let mut buf = Vec::new(); builder.write(&mut buf).unwrap(); @@ -101,10 +96,159 @@ fn test_no_dynstr() { assert_eq!(section.sh_offset, 0x1000); } build::elf::SectionData::DynamicRelocation(rela) => { - assert_eq!(section.sh_offset, 0x1100); + assert_eq!(section.sh_offset, 0x1018); assert_eq!(rela.len(), 1); } _ => {} } } } + +#[test] +fn test_attribute() { + let mut builder = build::elf::Builder::new(object::Endianness::Little, true); + builder.header.e_type = elf::ET_EXEC; + builder.header.e_machine = elf::EM_X86_64; + builder.header.e_phoff = 0x40; + + let section = builder.sections.add(); + section.name = b".shstrtab"[..].into(); + section.sh_type = elf::SHT_STRTAB; + section.data = build::elf::SectionData::SectionString; + + let attributes = build::elf::AttributesSection { + subsections: vec![build::elf::AttributesSubsection { + vendor: b"GNU"[..].into(), + subsubsections: vec![ + (build::elf::AttributesSubsubsection { + tag: build::elf::AttributeTag::File, + data: b"123"[..].into(), + }), + ], + }], + }; + let section = builder.sections.add(); + section.name = b".gnu.attributes"[..].into(); + section.sh_type = elf::SHT_GNU_ATTRIBUTES; + section.sh_addralign = 8; + section.data = build::elf::SectionData::Attributes(attributes); + + let mut buf = Vec::new(); + builder.write(&mut buf).unwrap(); + + let builder = build::elf::Builder::read(&*buf).unwrap(); + assert_eq!(builder.sections.count(), 2); + for section in &builder.sections { + if let build::elf::SectionData::Attributes(attributes) = §ion.data { + assert_eq!(attributes.subsections.len(), 1); + assert_eq!(attributes.subsections[0].vendor.as_slice(), b"GNU"); + assert_eq!(attributes.subsections[0].subsubsections.len(), 1); + assert_eq!( + attributes.subsections[0].subsubsections[0].tag, + build::elf::AttributeTag::File + ); + assert_eq!( + attributes.subsections[0].subsubsections[0].data.as_slice(), + b"123" + ); + } + } +} + +#[test] +fn test_dynsym() { + let mut builder = build::elf::Builder::new(object::Endianness::Little, true); + builder.header.e_type = elf::ET_EXEC; + builder.header.e_machine = elf::EM_X86_64; + builder.header.e_phoff = 0x40; + + let section = builder.sections.add(); + section.name = b".shstrtab"[..].into(); + section.sh_type = elf::SHT_STRTAB; + section.data = build::elf::SectionData::SectionString; + + let section = builder.sections.add(); + section.name = b".text"[..].into(); + section.sh_type = elf::SHT_PROGBITS; + section.sh_flags = (elf::SHF_ALLOC | elf::SHF_EXECINSTR) as u64; + section.sh_addralign = 16; + section.data = build::elf::SectionData::Data(vec![0xcc; 100].into()); + let text_id = section.id(); + + let section = builder.sections.add(); + section.name = b".dynsym"[..].into(); + section.sh_type = elf::SHT_DYNSYM; + section.sh_flags = elf::SHF_ALLOC as u64; + section.sh_addralign = 8; + section.data = build::elf::SectionData::DynamicSymbol; + let dynsym_id = section.id(); + + let section = builder.sections.add(); + section.name = b".dynstr"[..].into(); + section.sh_type = elf::SHT_STRTAB; + section.sh_flags = elf::SHF_ALLOC as u64; + section.sh_addralign = 1; + section.data = build::elf::SectionData::DynamicString; + let dynstr_id = section.id(); + + let section = builder.sections.add(); + section.name = b".gnu.hash"[..].into(); + section.sh_type = elf::SHT_GNU_HASH; + section.sh_flags = elf::SHF_ALLOC as u64; + section.sh_addralign = 8; + section.data = build::elf::SectionData::GnuHash; + let gnu_hash_id = section.id(); + builder.gnu_hash_bloom_shift = 1; + builder.gnu_hash_bloom_count = 1; + builder.gnu_hash_bucket_count = 1; + + let symbol = builder.dynamic_symbols.add(); + symbol.name = b"global"[..].into(); + symbol.set_st_info(elf::STB_GLOBAL, elf::STT_FUNC); + symbol.section = Some(text_id); + + let symbol = builder.dynamic_symbols.add(); + symbol.name = b"undefined"[..].into(); + symbol.set_st_info(elf::STB_GLOBAL, elf::STT_NOTYPE); + + let symbol = builder.dynamic_symbols.add(); + symbol.name = b"local"[..].into(); + symbol.set_st_info(elf::STB_LOCAL, elf::STT_FUNC); + symbol.section = Some(text_id); + + builder.set_section_sizes(); + + let segment = builder.segments.add(); + segment.p_type = elf::PT_LOAD; + segment.p_flags = elf::PF_R; + segment.p_filesz = 0x1000; + segment.p_memsz = 0x1000; + segment.p_align = 8; + segment.append_section(builder.sections.get_mut(text_id)); + segment.append_section(builder.sections.get_mut(dynsym_id)); + segment.append_section(builder.sections.get_mut(dynstr_id)); + segment.append_section(builder.sections.get_mut(gnu_hash_id)); + + let mut buf = Vec::new(); + builder.write(&mut buf).unwrap(); + + let builder = build::elf::Builder::read(&*buf).unwrap(); + assert_eq!(builder.sections.count(), 5); + assert_eq!(builder.dynamic_symbols.count(), 3); + // Check that the dynamic symbol table sorting handles + // local and undefined symbols correctly. + assert_eq!( + builder + .dynamic_symbols + .iter() + .map(|s| s.name.as_slice()) + .collect::>(), + vec![&b"local"[..], &b"undefined"[..], &b"global"[..]] + ); + for section in &builder.sections { + if let build::elf::SectionData::DynamicSymbol = §ion.data { + // Check that sh_info includes the number of local symbols. + assert_eq!(section.sh_info, 2); + } + } +}