diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..32f22e0 --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,135 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "adler2" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627" + +[[package]] +name = "anyhow" +version = "1.0.86" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b3d1d046238990b9cf5bcde22a3fb3584ee5cf65fb2765f454ed428c7a0063da" + +[[package]] +name = "bincode" +version = "1.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad" +dependencies = [ + "serde", +] + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "crc32fast" +version = "1.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a97769d94ddab943e4510d138150169a2758b5ef3eb191a9ee688de3e23ef7b3" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "flate2" +version = "1.0.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "324a1be68054ef05ad64b861cc9eaf1d623d2d8cb25b4bf2cb9cdd902b4bf253" +dependencies = [ + "crc32fast", + "miniz_oxide", +] + +[[package]] +name = "idb-rs" +version = "0.1.0" +dependencies = [ + "anyhow", + "bincode", + "flate2", + "serde", + "serde_repr", +] + +[[package]] +name = "miniz_oxide" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2d80299ef12ff69b16a84bb182e3b9df68b5a91574d3d4fa6e41b65deec4df1" +dependencies = [ + "adler2", +] + +[[package]] +name = "proc-macro2" +version = "1.0.86" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5b9d34b8991d19d98081b46eacdd8eb58c6f2b201139f7c5f643cc155a633af" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "serde" +version = "1.0.209" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "99fce0ffe7310761ca6bf9faf5115afbc19688edd00171d81b1bb1b116c63e09" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.209" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5831b979fd7b5439637af1752d535ff49f4860c0f341d1baeb6faf0f4242170" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_repr" +version = "0.1.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c64451ba24fc7a6a2d60fc75dd9c83c90903b19028d4eff35e88fc1e86564e9" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "syn" +version = "2.0.76" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "578e081a14e0cefc3279b0472138c513f37b41a08d5a3cca9b6e4e8ceb6cd525" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "unicode-ident" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" diff --git a/resources/Readme.md b/resources/Readme.md new file mode 100644 index 0000000..c9411d7 --- /dev/null +++ b/resources/Readme.md @@ -0,0 +1,2 @@ +Put here the `*.idb` `*.i64` in `idbs` folder and `*.til` files in `tils` folder. +Those files will be used to test the parser by the `cargo test` command. diff --git a/resources/idbs/idb_and_i64_files_here b/resources/idbs/idb_and_i64_files_here new file mode 100644 index 0000000..e69de29 diff --git a/resources/tils/til_files_here b/resources/tils/til_files_here new file mode 100644 index 0000000..e69de29 diff --git a/src/id0.rs b/src/id0.rs new file mode 100644 index 0000000..cfbeeac --- /dev/null +++ b/src/id0.rs @@ -0,0 +1,2390 @@ +use std::ffi::CStr; +use std::io::{BufRead, Cursor, ErrorKind, Read, Seek, SeekFrom}; +use std::num::NonZeroU32; + +use crate::{read_bytes_len_u16, read_c_string_raw, IDBHeader, IDBSectionCompression}; + +use anyhow::{anyhow, ensure, Result}; + +#[derive(Debug, Clone, Copy)] +enum ID0Version { + V15, + V16, + V20, +} + +impl ID0Version { + pub(crate) fn read(input: &mut I) -> Result { + let value = read_c_string_raw(input)?; + match &value[..] { + b"B-tree v 1.5 (C) Pol 1990" => Ok(Self::V15), + b"B-tree v 1.6 (C) Pol 1990" => Ok(Self::V16), + b"B-tree v2" => Ok(Self::V20), + name => Err(anyhow!( + "Unknown B-tree version: {}", + String::from_utf8_lossy(name) + )), + } + } +} + +#[derive(Debug, Clone, Copy)] +struct ID0Header { + // TODO handle the next_free_offset being the fist free page + _next_free_offset: Option, + page_size: u16, + // assuming None here means there are no entries in this ID0 + root_page: Option, + record_count: u32, + page_count: u32, + //unk12: u8, + version: ID0Version, +} + +impl ID0Header { + pub(crate) fn read(input: &mut I, buf: &mut Vec) -> Result { + buf.resize(64, 0); + input.read_exact(buf)?; + // TODO handle the 15 version of the header: + // { + // let next_free_offset: u16 = bincode::deserialize_from(&mut *input)?; + // let page_size: u16 = bincode::deserialize_from(&mut *input)?; + // let root_page: u16 = bincode::deserialize_from(&mut *input)?; + // let record_count: u32 = bincode::deserialize_from(&mut *input)?; + // let page_count: u16 = bincode::deserialize_from(&mut *input)?; + // let unk12: u8 = bincode::deserialize_from(&mut *input)?; + // let version = ID0Version::read(input)?; + // } + + let mut cursor = Cursor::new(&buf); + let next_free_offset: u32 = bincode::deserialize_from(&mut cursor)?; + let page_size: u16 = bincode::deserialize_from(&mut cursor)?; + let root_page: u32 = bincode::deserialize_from(&mut cursor)?; + let record_count: u32 = bincode::deserialize_from(&mut cursor)?; + let page_count: u32 = bincode::deserialize_from(&mut cursor)?; + let _unk12: u8 = bincode::deserialize_from(&mut cursor)?; + let version = ID0Version::read(&mut cursor)?; + // TODO move this code out of here and use seek instead + // read the rest of the page + ensure!(page_size >= 64); + buf.resize(page_size.into(), 0); + input.read_exact(&mut buf[64..])?; + // the rest of the header should be only zeros + ensure!( + buf[64..].iter().all(|b| *b == 0), + "Extra data on the header was not parsed" + ); + Ok(ID0Header { + _next_free_offset: NonZeroU32::new(next_free_offset), + page_size, + root_page: NonZeroU32::new(root_page), + record_count, + page_count, + version, + }) + } +} + +#[derive(Debug, Clone)] +pub struct ID0Section { + is_64: bool, + pub entries: Vec, +} + +#[derive(Debug, Clone)] +pub struct ID0Entry { + pub key: Vec, + pub value: Vec, +} + +impl ID0Section { + pub(crate) fn read( + input: &mut I, + header: &IDBHeader, + compress: IDBSectionCompression, + ) -> Result { + match compress { + IDBSectionCompression::None => Self::read_inner(input, header), + IDBSectionCompression::Zlib => { + let mut input = flate2::read::ZlibDecoder::new(input); + Self::read_inner(&mut input, header) + } + } + } + + // NOTE this was written this way to validate the data in each file, so it's clear that no + // data is being parsed incorrectly or is left unparsed. There way too many validations + // and non-necessary parsing is done on delete data. + // TODO This is probably much more efficient if written with , this + // way it's not necessary to read and cache the unused/deleted pages, if you are sure this + // implementation is correct, you could rewrite this function to do that. + fn read_inner(input: &mut I, idb_header: &IDBHeader) -> Result { + // pages size are usually around that size + let mut buf = Vec::with_capacity(0x2000); + let header = ID0Header::read(&mut *input, &mut buf)?; + buf.resize(header.page_size.into(), 0); + // NOTE sometimes deleted pages are included here, seems to happen specially if a + // index is deleted with all it's leafs, leaving the now-empty index and the + // now-disconnected children + let mut pages = Vec::with_capacity(header.page_count.try_into().unwrap()); + loop { + let read = read_exact_or_nothing(&mut *input, &mut buf)?; + if read == 0 { + // no more data, hit eof + break; + } + if read != header.page_size.into() { + // only read part of the page + return Err(anyhow!("Found EoF in the middle of the page")); + } + // read the full page + let page = ID0TreeEntrRaw::read(&buf, &header)?; + pages.push(Some(page)); + } + + // verify for duplicated entries + let pages_tree = Self::create_tree(header.root_page, &mut pages)?; + + // verify that the correct number of pages were consumed and added to the tree + let in_tree_pages = pages + .iter() + .map(Option::as_ref) + .filter(Option::is_none) + .count(); + ensure!(in_tree_pages == header.page_count.try_into().unwrap()); + + // make sure only empty pages are left out-of-the-tree + for page in pages.into_iter().flatten() { + match page { + ID0TreeEntrRaw::Leaf(leaf) if leaf.is_empty() => {} + ID0TreeEntrRaw::Index { entries, .. } if entries.is_empty() => {} + ID0TreeEntrRaw::Index { + preceeding, + entries, + } => { + return Err(anyhow!( + "Extra Index preceeding {}, with {} entries", + preceeding.get(), + entries.len() + )) + } + ID0TreeEntrRaw::Leaf(entries) => { + let entries_len = entries + .iter() + .filter(|e| !e.key.is_empty() || !e.value.is_empty()) + .count(); + if entries_len != 0 { + return Err(anyhow!("Extra Leaf with {} entry", entries_len)); + } + } + } + } + + // put it all in order on the vector + let mut entries = Vec::with_capacity(header.record_count.try_into().unwrap()); + Self::tree_to_vec(pages_tree, &mut entries); + + // make sure the vector is sorted + ensure!(entries.windows(2).all(|win| { + let [a, b] = win else { unreachable!() }; + a.key < b.key + })); + + // make sure the right number of entries are in the final vector + ensure!(entries.len() == header.record_count.try_into().unwrap()); + + Ok(ID0Section { + is_64: idb_header.magic_version.is_64(), + entries, + }) + } + + fn create_tree( + index: Option, + pages: &mut Vec>, + ) -> Result { + let Some(index) = index else { + return Ok(ID0TreeEntry::Leaf(vec![])); + }; + + let index = usize::try_from(index.get()).unwrap() - 1; + let entry = pages + .get_mut(index) + .ok_or_else(|| anyhow!("invalid page index: {index}"))? + .take() + .ok_or_else(|| anyhow!("page index {index} is referenciated multiple times"))?; + match entry { + ID0TreeEntrRaw::Leaf(leaf) => Ok(ID0TreeEntry::Leaf(leaf)), + ID0TreeEntrRaw::Index { + preceeding, + entries, + } => { + let preceeding = Self::create_tree(Some(preceeding), &mut *pages)?; + let index = entries + .into_iter() + .map(|e| { + let page = Self::create_tree(e.page, &mut *pages)?; + Ok(ID0TreeIndex { + page: Box::new(page), + key: e.key, + value: e.value, + }) + }) + .collect::>()?; + Ok(ID0TreeEntry::Index { + preceeding: Box::new(preceeding), + index, + }) + } + } + } + + fn tree_to_vec(entry: ID0TreeEntry, output: &mut Vec) { + match entry { + ID0TreeEntry::Index { preceeding, index } => { + Self::tree_to_vec(*preceeding, &mut *output); + for ID0TreeIndex { page, key, value } in index { + output.push(ID0Entry { key, value }); + Self::tree_to_vec(*page, &mut *output); + } + } + ID0TreeEntry::Leaf(entries) => output.extend(entries), + } + } + + fn binary_search(&self, key: impl AsRef<[u8]>) -> Result { + let key = key.as_ref(); + self.entries.binary_search_by_key(&key, |b| &b.key[..]) + } + + pub fn get(&self, key: impl AsRef<[u8]>) -> Option<&ID0Entry> { + self.binary_search(key).ok().map(|i| &self.entries[i]) + } + + pub fn sub_values(&self, key: Vec) -> impl Iterator { + let start = self.binary_search(&key); + let start = match start { + Ok(pos) => pos, + Err(start) => start, + }; + + self.entries[start..] + .iter() + .take_while(move |e| e.key.starts_with(&key)) + } + + pub fn segments(&self) -> Result> + '_> { + let entry = self + .get("N$ segs") + .ok_or_else(|| anyhow!("Unable to find entry segs"))?; + let key: Vec = b"." + .iter() + .chain(entry.value.iter().rev()) + .chain(b"S") + .copied() + .collect(); + Ok(self + .sub_values(key) + .map(|e| Segment::read(&e.value, self.is_64))) + } + + pub fn loader_name(&self) -> Result>> { + let entry = self + .get("N$ loader name") + .ok_or_else(|| anyhow!("Unable to find entry loader name"))?; + // TODO check that keys are 0 => plugin, or 1 => format + let key: Vec = b"." + .iter() + .chain(entry.value.iter().rev()) + .chain(b"S") + .copied() + .collect(); + Ok(self + .sub_values(key) + .map(|e| Ok(CStr::from_bytes_with_nul(&e.value)?.to_str()?))) + } + + pub fn root_info(&self) -> Result>> { + let entry = self + .get("NRoot Node") + .ok_or_else(|| anyhow!("Unable to find entry Root Node"))?; + let key: Vec = b"." + .iter() + .chain(entry.value.iter().rev()) + .copied() + .collect(); + let key_len = key.len(); + Ok(self.sub_values(key).map(move |entry| { + let sub_key = &entry.key[key_len..]; + let Some(sub_type) = sub_key.first().copied() else { + return Ok(IDBRootInfo::Unknown(entry)); + }; + match (sub_type, sub_key.len()) { + (b'N', 1) => { + ensure!( + parse_maybe_cstr(&entry.value) == Some("Root Node"), + "Invalid Root Node Name" + ); + return Ok(IDBRootInfo::RootNodeName); + } + // TODO filenames can be non-utf-8, but are they always CStr? + (b'V', 1) => return Ok(IDBRootInfo::InputFile(&entry.value)), + _ => {} + } + let Some(value) = parse_number(&sub_key[1..], true, self.is_64) else { + return Ok(IDBRootInfo::Unknown(entry)); + }; + match (sub_type, value as i64) { + (b'A', -6) => parse_number(&entry.value, false, self.is_64) + .ok_or_else(|| anyhow!("Unable to parse imagebase value")) + .map(IDBRootInfo::ImageBase), + (b'A', -5) => parse_number(&entry.value, false, self.is_64) + .ok_or_else(|| anyhow!("Unable to parse crc value")) + .map(IDBRootInfo::Crc), + (b'A', -4) => parse_number(&entry.value, false, self.is_64) + .ok_or_else(|| anyhow!("Unable to parse open_count value")) + .map(IDBRootInfo::OpenCount), + (b'A', -2) => parse_number(&entry.value, false, self.is_64) + .ok_or_else(|| anyhow!("Unable to parse CreatedDate value")) + .map(IDBRootInfo::CreatedDate), + (b'A', -1) => parse_number(&entry.value, false, self.is_64) + .ok_or_else(|| anyhow!("Unable to parse Version value")) + .map(IDBRootInfo::Version), + (b'S', 1302) => entry + .value + .as_slice() + .try_into() + .map(IDBRootInfo::Md5) + .map_err(|_| anyhow!("Value Md5 with invalid len")), + (b'S', 1303) => parse_maybe_cstr(&entry.value) + .map(IDBRootInfo::VersionString) + .ok_or_else(|| anyhow!("Unable to parse VersionString string")), + (b'S', 1349) => entry + .value + .as_slice() + .try_into() + .map(IDBRootInfo::Sha256) + .map_err(|_| anyhow!("Value Sha256 with invalid len")), + (b'S', 0x41b994) => IDBParam::read(&entry.value, self.is_64) + .map(Box::new) + .map(IDBRootInfo::IDAInfo), + _ => Ok(IDBRootInfo::Unknown(entry)), + } + })) + } + + pub fn ida_info(&self) -> Result { + // TODO Root Node is always the last one? + let entry = self + .get("NRoot Node") + .ok_or_else(|| anyhow!("Unable to find entry Root Node"))?; + let sub_key = if self.is_64 { + 0x41B994u64.to_be_bytes().to_vec() + } else { + 0x41B994u32.to_be_bytes().to_vec() + }; + let key: Vec = b"." + .iter() + .chain(entry.value.iter().rev()) + .chain(b"S") + .chain(sub_key.iter()) + .copied() + .collect(); + let description = self + .sub_values(key) + .next() + .ok_or_else(|| anyhow!("Unable to find id_params inside Root Node"))?; + IDBParam::read(&description.value, self.is_64) + } + + pub fn file_regions( + &self, + version: u16, + ) -> Result> + '_> { + let entry = self + .get("N$ fileregions") + .ok_or_else(|| anyhow!("Unable to find fileregions"))?; + let key: Vec = b"." + .iter() + .chain(entry.value.iter().rev()) + .chain(b"S") + .copied() + .collect(); + let key_len = key.len(); + // TODO find the meaning of "$ fileregions" b'V' entries + Ok(self.sub_values(key).map(move |e| { + let key = &e.key[key_len..]; + IDBFileRegions::read(key, &e.value, version, self.is_64) + })) + } + + pub fn functions_and_comments( + &self, + ) -> Result>> { + let entry = self + .get("N$ funcs") + .ok_or_else(|| anyhow!("Unable to find functions"))?; + let key: Vec = b"." + .iter() + .chain(entry.value.iter().rev()) + .copied() + .collect(); + let key_len = key.len(); + Ok(self.sub_values(key).map(move |e| { + let key = &e.key[key_len..]; + FunctionsAndComments::read(key, &e.value, self.is_64) + })) + } + + // TODO implement $ fixups + // TODO implement $ segsstrings + // TODO implement $ imports + // TODO implement $ scriptsnippets + + pub fn entry_points(&self) -> Result>> { + let entry = self + .get("N$ entry points") + .ok_or_else(|| anyhow!("Unable to find functions"))?; + let key: Vec = b"." + .iter() + .chain(entry.value.iter().rev()) + .copied() + .collect(); + let key_len = key.len(); + Ok(self.sub_values(key).map(move |e| { + let key = &e.key[key_len..]; + EntryPoint::read(key, &e.value, self.is_64) + })) + } +} + +#[derive(Clone, Debug)] +pub struct Segment { + pub startea: u64, + pub size: u64, + pub name_id: u64, + pub class_id: u64, + /// This field is IDP dependent. + /// You may keep your information about the segment here + pub orgbase: u64, + /// See more at [flags](https://hex-rays.com//products/ida/support/sdkdoc/group___s_f_l__.html) + pub flags: u32, + /// [Segment alignment codes](https://hex-rays.com//products/ida/support/sdkdoc/group__sa__.html) + pub align: u32, + /// [Segment combination codes](https://hex-rays.com//products/ida/support/sdkdoc/group__sc__.html) + pub comb: u32, + /// [Segment permissions](https://hex-rays.com//products/ida/support/sdkdoc/group___s_e_g_p_e_r_m__.html) (0 means no information) + pub perm: u32, + /// Number of bits in the segment addressing. + /// 0: 16 bits + /// 1: 32 bits + /// 2: 64 bits + pub bitness: u32, + /// Segment type (see [Segment types](https://hex-rays.com//products/ida/support/sdkdoc/group___s_e_g__.html)). + /// The kernel treats different segment types differently. Segments marked with '*' contain no instructions or data and are not declared as 'segments' in the disassembly. + pub seg_type: u32, + /// Segment selector - should be unique. + /// You can't change this field after creating the segment. + /// Exception: 16bit OMF files may have several segments with the same selector, + /// but this is not good (no way to denote a segment exactly) so it should be fixed in + /// the future. + pub selector: u64, + /// Default segment register values. + /// First element of this array keeps information about value of [processor_t::reg_first_sreg](https://hex-rays.com//products/ida/support/sdkdoc/structprocessor__t.html#a4206e35bf99d211c18d53bd1035eb2e3) + pub defsr: [u64; 16], + /// the segment color + pub color: u32, +} + +impl Segment { + fn read(value: &[u8], is_64: bool) -> Result { + let mut cursor = Cursor::new(value); + let startea = parse_word(&mut cursor, is_64)?; + let size = parse_word(&mut cursor, is_64)?; + let name_id = parse_word(&mut cursor, is_64)?; + let class_id = parse_word(&mut cursor, is_64)?; + let orgbase = parse_word(&mut cursor, is_64)?; + let flags = unpack_dd(&mut cursor)?; + let align = unpack_dd(&mut cursor)?; + let comb = unpack_dd(&mut cursor)?; + let perm = unpack_dd(&mut cursor)?; + let bitness = unpack_dd(&mut cursor)?; + let seg_type = unpack_dd(&mut cursor)?; + let selector = parse_word(&mut cursor, is_64)?; + let defsr: Vec<_> = (0..16) + .map(|_| parse_word(&mut cursor, is_64)) + .collect::>()?; + let color = unpack_dd(&mut cursor)?; + + // TODO maybe new versions include extra information and thid check fails + ensure!(cursor.position() == value.len().try_into().unwrap()); + Ok(Segment { + startea, + size, + name_id, + class_id, + orgbase, + flags, + align, + comb, + perm, + bitness, + seg_type, + selector, + defsr: defsr.try_into().unwrap(), + color, + }) + } +} + +#[derive(Clone, Debug)] +pub enum IDBRootInfo<'a> { + /// it's just the "Root Node" String + RootNodeName, + InputFile(&'a [u8]), + Crc(u64), + ImageBase(u64), + OpenCount(u64), + CreatedDate(u64), + Version(u64), + Md5(&'a [u8; 16]), + VersionString(&'a str), + Sha256(&'a [u8; 32]), + IDAInfo(Box), + Unknown(&'a ID0Entry), +} + +#[derive(Clone, Debug)] +pub enum IDBParam { + V1(IDBParam1), + V2(IDBParam2), +} + +#[derive(Clone, Debug)] +pub struct IDBParam1 { + pub version: u16, + pub cpu: String, + pub lflags: u8, + pub demnames: u8, + pub filetype: u16, + pub fcoresize: u64, + pub corestart: u64, + pub ostype: u16, + pub apptype: u16, + pub startsp: u64, + pub af: u16, + pub startip: u64, + pub startea: u64, + pub minea: u64, + pub maxea: u64, + pub ominea: u64, + pub omaxea: u64, + pub lowoff: u64, + pub highoff: u64, + pub maxref: u64, + pub ascii_break: u8, + pub wide_high_byte_first: u8, + pub indent: u8, + pub comment: u8, + pub xrefnum: u8, + pub entab: u8, + pub specsegs: u8, + pub voids: u8, + pub showauto: u8, + pub auto: u8, + pub border: u8, + pub null: u8, + pub genflags: u8, + pub showpref: u8, + pub prefseg: u8, + pub asmtype: u8, + pub baseaddr: u64, + pub xrefs: u8, + pub binpref: u16, + pub cmtflag: u8, + pub nametype: u8, + pub showbads: u8, + pub prefflag: u8, + pub packbase: u8, + pub asciiflags: u8, + pub listnames: u8, + pub asciiprefs: [u8; 16], + pub asciisernum: u64, + pub asciizeroes: u8, + pub tribyte_order: u8, + pub mf: u8, + pub org: u8, + pub assume: u8, + pub checkarg: u8, + // offset 131 + pub start_ss: u64, + pub start_cs: u64, + pub main: u64, + pub short_dn: u64, + pub long_dn: u64, + pub datatypes: u64, + pub strtype: u64, + pub af2: u16, + pub namelen: u16, + pub margin: u16, + pub lenxref: u16, + pub lprefix: [u8; 16], + pub lprefixlen: u8, + pub compiler: u8, + pub model: u8, + pub sizeof_int: u8, + pub sizeof_bool: u8, + pub sizeof_enum: u8, + pub sizeof_algn: u8, + pub sizeof_short: u8, + pub sizeof_long: u8, + pub sizeof_llong: u8, + pub change_counter: u32, + pub sizeof_ldbl: u8, + pub abiname: [u8; 16], + pub abibits: u32, + pub refcmts: u8, +} + +#[derive(Clone, Debug)] +pub struct IDBParam2 { + pub version: u16, + pub cpu: String, + pub genflags: Inffl, + pub lflags: Lflg, + pub database_change_count: u32, + pub filetype: FileType, + pub ostype: u16, + pub apptype: u16, + pub asmtype: u8, + pub specsegs: u8, + pub af: Af, + pub baseaddr: u64, + pub start_ss: u64, + pub start_cs: u64, + pub start_ip: u64, + pub start_ea: u64, + pub start_sp: u64, + pub main: u64, + pub min_ea: u64, + pub max_ea: u64, + pub omin_ea: u64, + pub omax_ea: u64, + pub lowoff: u64, + pub highoff: u64, + pub maxref: u64, + pub privrange_start_ea: u64, + pub privrange_end_ea: u64, + pub netdelta: u64, + pub xrefnum: u8, + pub type_xrefnum: u8, + pub refcmtnum: u8, + pub xrefflag: XRef, + pub max_autoname_len: u16, + pub nametype: NameType, + pub short_demnames: u32, + pub long_demnames: u32, + pub demnames: DemName, + pub listnames: ListName, + pub indent: u8, + pub cmt_ident: u8, + pub margin: u16, + pub lenxref: u16, + pub outflags: OutputFlags, + pub cmtflg: CommentOptions, + pub limiter: DelimiterOptions, + pub bin_prefix_size: u16, + pub prefflag: LinePrefixOptions, + pub strlit_flags: StrLiteralFlags, + pub strlit_break: u8, + pub strlit_zeroes: u8, + pub strtype: u32, + pub strlit_pref: String, + pub strlit_sernum: u64, + pub datatypes: u64, + pub cc_id: Compiler, + pub cc_cm: u8, + pub cc_size_i: u8, + pub cc_size_b: u8, + pub cc_size_e: u8, + pub cc_defalign: u8, + pub cc_size_s: u8, + pub cc_size_l: u8, + pub cc_size_ll: u8, + pub cc_size_ldbl: u8, + pub abibits: AbiOptions, + pub appcall_options: u32, +} + +impl IDBParam { + pub(crate) fn read(data: &[u8], is_64: bool) -> Result { + let mut input = Cursor::new(data); + let magic: [u8; 3] = bincode::deserialize_from(&mut input)?; + let magic_old = match &magic[..] { + b"ida" => { + let zero: u8 = bincode::deserialize_from(&mut input)?; + ensure!(zero == 0); + true + } + b"IDA" => false, + _ => return Err(anyhow!("Invalid IDBParam Magic")), + }; + let version: u16 = bincode::deserialize_from(&mut input)?; + + let cpu_len = match (magic_old, version) { + (_, ..700) => 8, + (true, 700..) => 16, + (false, 700..) => { + let cpu_len: u8 = bincode::deserialize_from(&mut input)?; + cpu_len.into() + } + }; + let mut cpu = vec![0; cpu_len]; + input.read_exact(&mut cpu)?; + // remove any \x00 that marks the end of the str + if let Some(end_cpu_str) = cpu.iter().position(|b| *b == 0) { + // make sure there is no data after the \x00 in the string + ensure!(cpu[end_cpu_str..].iter().all(|b| *b == 0)); + cpu.truncate(end_cpu_str); + } + let cpu = String::from_utf8(cpu)?; + + // TODO tight those ranges up + let param = match version { + ..700 => Self::read_v1(&mut input, is_64, version, cpu)?, + 700.. => Self::read_v2(&mut input, is_64, magic_old, version, cpu)?, + }; + match version { + // TODO old version may contain extra data at the end with unknown purpose + ..700 => {} + 700.. => ensure!( + input.position() == data.len().try_into().unwrap(), + "Data left after the IDBParam: {}", + u64::try_from(data.len()).unwrap() - input.position() + ), + } + Ok(param) + } + + pub(crate) fn read_v1( + mut input: I, + is_64: bool, + version: u16, + cpu: String, + ) -> Result { + let lflags: u8 = bincode::deserialize_from(&mut input)?; + let demnames: u8 = bincode::deserialize_from(&mut input)?; + let filetype: u16 = bincode::deserialize_from(&mut input)?; + let fcoresize: u64 = read_word(&mut input, is_64)?; + let corestart: u64 = read_word(&mut input, is_64)?; + let ostype: u16 = bincode::deserialize_from(&mut input)?; + let apptype: u16 = bincode::deserialize_from(&mut input)?; + let startsp: u64 = read_word(&mut input, is_64)?; + let af: u16 = bincode::deserialize_from(&mut input)?; + let startip: u64 = read_word(&mut input, is_64)?; + let startea: u64 = read_word(&mut input, is_64)?; + let minea: u64 = read_word(&mut input, is_64)?; + let maxea: u64 = read_word(&mut input, is_64)?; + let ominea: u64 = read_word(&mut input, is_64)?; + let omaxea: u64 = read_word(&mut input, is_64)?; + let lowoff: u64 = read_word(&mut input, is_64)?; + let highoff: u64 = read_word(&mut input, is_64)?; + let maxref: u64 = read_word(&mut input, is_64)?; + let ascii_break: u8 = bincode::deserialize_from(&mut input)?; + let wide_high_byte_first: u8 = bincode::deserialize_from(&mut input)?; + let indent: u8 = bincode::deserialize_from(&mut input)?; + let comment: u8 = bincode::deserialize_from(&mut input)?; + let xrefnum: u8 = bincode::deserialize_from(&mut input)?; + let entab: u8 = bincode::deserialize_from(&mut input)?; + let specsegs: u8 = bincode::deserialize_from(&mut input)?; + let voids: u8 = bincode::deserialize_from(&mut input)?; + let _unkownw: u8 = bincode::deserialize_from(&mut input)?; + let showauto: u8 = bincode::deserialize_from(&mut input)?; + let auto: u8 = bincode::deserialize_from(&mut input)?; + let border: u8 = bincode::deserialize_from(&mut input)?; + let null: u8 = bincode::deserialize_from(&mut input)?; + let genflags: u8 = bincode::deserialize_from(&mut input)?; + let showpref: u8 = bincode::deserialize_from(&mut input)?; + let prefseg: u8 = bincode::deserialize_from(&mut input)?; + let asmtype: u8 = bincode::deserialize_from(&mut input)?; + let baseaddr: u64 = read_word(&mut input, is_64)?; + let xrefs: u8 = bincode::deserialize_from(&mut input)?; + let binpref: u16 = bincode::deserialize_from(&mut input)?; + let cmtflag: u8 = bincode::deserialize_from(&mut input)?; + let nametype: u8 = bincode::deserialize_from(&mut input)?; + let showbads: u8 = bincode::deserialize_from(&mut input)?; + let prefflag: u8 = bincode::deserialize_from(&mut input)?; + let packbase: u8 = bincode::deserialize_from(&mut input)?; + let asciiflags: u8 = bincode::deserialize_from(&mut input)?; + let listnames: u8 = bincode::deserialize_from(&mut input)?; + let asciiprefs: [u8; 16] = bincode::deserialize_from(&mut input)?; + let asciisernum: u64 = read_word(&mut input, is_64)?; + let asciizeroes: u8 = bincode::deserialize_from(&mut input)?; + let _unknown2: u16 = bincode::deserialize_from(&mut input)?; + let tribyte_order: u8 = bincode::deserialize_from(&mut input)?; + let mf: u8 = bincode::deserialize_from(&mut input)?; + let org: u8 = bincode::deserialize_from(&mut input)?; + let assume: u8 = bincode::deserialize_from(&mut input)?; + let checkarg: u8 = bincode::deserialize_from(&mut input)?; + // offset 131 + let start_ss: u64 = read_word(&mut input, is_64)?; + let start_cs: u64 = read_word(&mut input, is_64)?; + let main: u64 = read_word(&mut input, is_64)?; + let short_dn: u64 = read_word(&mut input, is_64)?; + let long_dn: u64 = read_word(&mut input, is_64)?; + let datatypes: u64 = read_word(&mut input, is_64)?; + let strtype: u64 = read_word(&mut input, is_64)?; + let af2: u16 = bincode::deserialize_from(&mut input)?; + let namelen: u16 = bincode::deserialize_from(&mut input)?; + let margin: u16 = bincode::deserialize_from(&mut input)?; + let lenxref: u16 = bincode::deserialize_from(&mut input)?; + let lprefix: [u8; 16] = bincode::deserialize_from(&mut input)?; + let lprefixlen: u8 = bincode::deserialize_from(&mut input)?; + let compiler: u8 = bincode::deserialize_from(&mut input)?; + let model: u8 = bincode::deserialize_from(&mut input)?; + let sizeof_int: u8 = bincode::deserialize_from(&mut input)?; + let sizeof_bool: u8 = bincode::deserialize_from(&mut input)?; + let sizeof_enum: u8 = bincode::deserialize_from(&mut input)?; + let sizeof_algn: u8 = bincode::deserialize_from(&mut input)?; + let sizeof_short: u8 = bincode::deserialize_from(&mut input)?; + let sizeof_long: u8 = bincode::deserialize_from(&mut input)?; + let sizeof_llong: u8 = bincode::deserialize_from(&mut input)?; + let change_counter: u32 = bincode::deserialize_from(&mut input)?; + let sizeof_ldbl: u8 = bincode::deserialize_from(&mut input)?; + let _unknown_3: u32 = bincode::deserialize_from(&mut input)?; + let abiname: [u8; 16] = bincode::deserialize_from(&mut input)?; + let abibits: u32 = bincode::deserialize_from(&mut input)?; + let refcmts: u8 = bincode::deserialize_from(&mut input)?; + + Ok(IDBParam::V1(IDBParam1 { + version, + cpu, + lflags, + demnames, + filetype, + fcoresize, + corestart, + ostype, + apptype, + startsp, + af, + startip, + startea, + minea, + maxea, + ominea, + omaxea, + lowoff, + highoff, + maxref, + ascii_break, + wide_high_byte_first, + indent, + comment, + xrefnum, + entab, + specsegs, + voids, + showauto, + auto, + border, + null, + genflags, + showpref, + prefseg, + asmtype, + baseaddr, + xrefs, + binpref, + cmtflag, + nametype, + showbads, + prefflag, + packbase, + asciiflags, + listnames, + asciiprefs, + asciisernum, + asciizeroes, + tribyte_order, + mf, + org, + assume, + checkarg, + start_ss, + start_cs, + main, + short_dn, + long_dn, + datatypes, + strtype, + af2, + namelen, + margin, + lenxref, + lprefix, + lprefixlen, + compiler, + model, + sizeof_int, + sizeof_bool, + sizeof_enum, + sizeof_algn, + sizeof_short, + sizeof_long, + sizeof_llong, + change_counter, + sizeof_ldbl, + abiname, + abibits, + refcmts, + })) + } + + pub(crate) fn read_v2( + mut input: I, + is_64: bool, + magic_old: bool, + version: u16, + cpu: String, + ) -> Result { + // NOTE in this version parse_* functions are used + let genflags = Inffl::new(unpack_dw(&mut input)?)?; + let lflags = Lflg::new(unpack_dd(&mut input)?)?; + let database_change_count = unpack_dd(&mut input)?; + let filetype = FileType::from_value(unpack_dw(&mut input)?) + .ok_or_else(|| anyhow!("Invalid FileType value"))?; + let ostype = unpack_dw(&mut input)?; + let apptype = unpack_dw(&mut input)?; + let asmtype = parse_u8(&mut input)?; + let specsegs = parse_u8(&mut input)?; + let af1 = unpack_dd(&mut input)?; + let af2 = unpack_dd(&mut input)?; + let af = Af::new(af1, af2)?; + let baseaddr = parse_word(&mut input, is_64)?; + let start_ss = parse_word(&mut input, is_64)?; + let start_cs = parse_word(&mut input, is_64)?; + let start_ip = parse_word(&mut input, is_64)?; + let start_ea = parse_word(&mut input, is_64)?; + let start_sp = parse_word(&mut input, is_64)?; + let main = parse_word(&mut input, is_64)?; + let min_ea = parse_word(&mut input, is_64)?; + let max_ea = parse_word(&mut input, is_64)?; + let omin_ea = parse_word(&mut input, is_64)?; + let omax_ea = parse_word(&mut input, is_64)?; + let lowoff = parse_word(&mut input, is_64)?; + let highoff = parse_word(&mut input, is_64)?; + let maxref = parse_word(&mut input, is_64)?; + let privrange_start_ea = parse_word(&mut input, is_64)?; + let privrange_end_ea = parse_word(&mut input, is_64)?; + let netdelta = parse_word(&mut input, is_64)?; + let xrefnum = parse_u8(&mut input)?; + let type_xrefnum = parse_u8(&mut input)?; + let refcmtnum = parse_u8(&mut input)?; + let xrefflag = XRef::new(parse_u8(&mut input)?)?; + let max_autoname_len = unpack_dw(&mut input)?; + + if magic_old { + let _unknown: [u8; 17] = bincode::deserialize_from(&mut input)?; + } + + let nametype = parse_u8(&mut input)?; + let nametype = NameType::new(nametype).ok_or_else(|| anyhow!("Invalid NameType value"))?; + let short_demnames = unpack_dd(&mut input)?; + let long_demnames = unpack_dd(&mut input)?; + let demnames = DemName::new(parse_u8(&mut input)?)?; + let listnames = ListName::new(parse_u8(&mut input)?)?; + let indent = parse_u8(&mut input)?; + let cmt_ident = parse_u8(&mut input)?; + let margin = unpack_dw(&mut input)?; + let lenxref = unpack_dw(&mut input)?; + let outflags = OutputFlags::new(unpack_dd(&mut input)?)?; + let cmtflg = CommentOptions::new(parse_u8(&mut input)?); + let limiter = DelimiterOptions::new(parse_u8(&mut input)?)?; + let bin_prefix_size = unpack_dw(&mut input)?; + let prefflag = LinePrefixOptions::new(parse_u8(&mut input)?)?; + let strlit_flags = StrLiteralFlags::new(parse_u8(&mut input)?)?; + let strlit_break = parse_u8(&mut input)?; + let strlit_zeroes = parse_u8(&mut input)?; + let strtype = unpack_dd(&mut input)?; + + // TODO read the len and the ignore it? + let strlit_pref_len = parse_u8(&mut input)?; + let strlit_pref_len = if magic_old { 16 } else { strlit_pref_len }; + let mut strlit_pref = vec![0; strlit_pref_len.into()]; + input.read_exact(&mut strlit_pref)?; + let strlit_pref = String::from_utf8(strlit_pref)?; + + let strlit_sernum = parse_word(&mut input, is_64)?; + let datatypes = parse_word(&mut input, is_64)?; + let cc_id = Compiler::from_value(parse_u8(&mut input)?) + .ok_or_else(|| anyhow!("invalid Compiler ID Value"))?; + let cc_cm = parse_u8(&mut input)?; + let cc_size_i = parse_u8(&mut input)?; + let cc_size_b = parse_u8(&mut input)?; + let cc_size_e = parse_u8(&mut input)?; + let cc_defalign = parse_u8(&mut input)?; + let cc_size_s = parse_u8(&mut input)?; + let cc_size_l = parse_u8(&mut input)?; + let cc_size_ll = parse_u8(&mut input)?; + let cc_size_ldbl = parse_u8(&mut input)?; + let abibits = AbiOptions::new(unpack_dd(&mut input)?)?; + let appcall_options = unpack_dd(&mut input)?; + + Ok(IDBParam::V2(IDBParam2 { + version, + cpu, + genflags, + lflags, + database_change_count, + filetype, + ostype, + apptype, + asmtype, + specsegs, + af, + baseaddr, + start_ss, + start_cs, + start_ip, + start_ea, + start_sp, + main, + min_ea, + max_ea, + omin_ea, + omax_ea, + lowoff, + highoff, + maxref, + privrange_start_ea, + privrange_end_ea, + netdelta, + xrefnum, + type_xrefnum, + refcmtnum, + xrefflag, + max_autoname_len, + nametype, + short_demnames, + long_demnames, + demnames, + listnames, + indent, + cmt_ident, + margin, + lenxref, + outflags, + cmtflg, + limiter, + bin_prefix_size, + prefflag, + strlit_flags, + strlit_break, + strlit_zeroes, + strtype, + strlit_pref, + strlit_sernum, + datatypes, + cc_id, + cc_cm, + cc_size_i, + cc_size_b, + cc_size_e, + cc_defalign, + cc_size_s, + cc_size_l, + cc_size_ll, + cc_size_ldbl, + abibits, + appcall_options, + })) + } +} + +/// General idainfo flags +#[derive(Debug, Clone, Copy)] +pub struct Inffl(u8); +impl Inffl { + fn new(value: u16) -> Result { + ensure!(value < 0x100, "Invalid INFFL flag"); + // TODO check for unused flags? + Ok(Self(value as u8)) + } + + /// Autoanalysis is enabled? + pub fn is_auto_analysis_enabled(&self) -> bool { + self.0 & 0x01 != 0 + } + /// May use constructs not supported by the target assembler + pub fn maybe_not_supported(&self) -> bool { + self.0 & 0x02 != 0 + } + /// loading an idc file that contains database info + pub fn is_database_info_in_idc(&self) -> bool { + self.0 & 0x04 != 0 + } + /// do not store user info in the database + pub fn is_user_info_not_in_database(&self) -> bool { + self.0 & 0x08 != 0 + } + /// (internal) temporary interdiction to modify the database + pub fn is_read_only(&self) -> bool { + self.0 & 0x10 != 0 + } + /// check manual operands? (unused) + pub fn is_manual_operands(&self) -> bool { + self.0 & 0x20 != 0 + } + /// allow non-matched operands? (unused) + pub fn is_non_matched_operands(&self) -> bool { + self.0 & 0x40 != 0 + } + /// currently using graph options + pub fn is_using_graph(&self) -> bool { + self.0 & 0x80 != 0 + } +} + +#[derive(Debug, Clone, Copy)] +pub struct Lflg(u16); +impl Lflg { + fn new(value: u32) -> Result { + ensure!(value < 0x1000, "Invalid LFLG flag"); + Ok(Self(value as u16)) + } + + /// decode floating point processor instructions? + pub fn is_decode_float(&self) -> bool { + self.0 & 0x0001 != 0 + } + /// 32-bit program (or higher)? + pub fn is_program_32b_or_bigger(&self) -> bool { + self.0 & 0x0002 != 0 + } + /// 64-bit program? + pub fn is_program_64b(&self) -> bool { + self.0 & 0x0004 != 0 + } + /// Is dynamic library? + pub fn is_dyn_lib(&self) -> bool { + self.0 & 0x0008 != 0 + } + /// treat ::REF_OFF32 as 32-bit offset for 16bit segments (otherwise try SEG16:OFF16) + pub fn is_flat_off32(&self) -> bool { + self.0 & 0x0010 != 0 + } + /// Byte order: is MSB first? + pub fn is_big_endian(&self) -> bool { + self.0 & 0x0020 != 0 + } + /// Bit order of wide bytes: high byte first? + pub fn is_wide_byte_first(&self) -> bool { + self.0 & 0x0040 != 0 + } + /// do not store input full path in debugger process options + pub fn is_dbg_non_fullpath(&self) -> bool { + self.0 & 0x0080 != 0 + } + /// memory snapshot was taken? + pub fn is_snapshot_taken(&self) -> bool { + self.0 & 0x0100 != 0 + } + /// pack the database? + pub fn is_database_pack(&self) -> bool { + self.0 & 0x0200 != 0 + } + /// compress the database? + pub fn is_database_compress(&self) -> bool { + self.0 & 0x0400 != 0 + } + /// is kernel mode binary? + pub fn is_kernel_mode(&self) -> bool { + self.0 & 0x0800 != 0 + } +} + +#[derive(Debug, Clone, Copy)] +pub struct Af(u32, u8); +impl Af { + fn new(value1: u32, value2: u32) -> Result { + ensure!(value2 < 0x8, "Invalid AF2 value {value2:#x}"); + Ok(Self(value1, value2 as u8)) + } + + /// Trace execution flow + pub fn is_code(&self) -> bool { + self.0 & 0x00000001 != 0 + } + /// Mark typical code sequences as code + pub fn is_markcode(&self) -> bool { + self.0 & 0x00000002 != 0 + } + /// Locate and create jump tables + pub fn is_jumptbl(&self) -> bool { + self.0 & 0x00000004 != 0 + } + /// Control flow to data segment is ignored + pub fn is_purdat(&self) -> bool { + self.0 & 0x00000008 != 0 + } + /// Analyze and create all xrefs + pub fn is_used(&self) -> bool { + self.0 & 0x00000010 != 0 + } + /// Delete instructions with no xrefs + pub fn is_unk(&self) -> bool { + self.0 & 0x00000020 != 0 + } + + /// Create function if data xref data->code32 exists + pub fn is_procptr(&self) -> bool { + self.0 & 0x00000040 != 0 + } + /// Create functions if call is present + pub fn is_proc(&self) -> bool { + self.0 & 0x00000080 != 0 + } + /// Create function tails + pub fn is_ftail(&self) -> bool { + self.0 & 0x00000100 != 0 + } + /// Create stack variables + pub fn is_lvar(&self) -> bool { + self.0 & 0x00000200 != 0 + } + /// Propagate stack argument information + pub fn is_stkarg(&self) -> bool { + self.0 & 0x00000400 != 0 + } + /// Propagate register argument information + pub fn is_regarg(&self) -> bool { + self.0 & 0x00000800 != 0 + } + /// Trace stack pointer + pub fn is_trace(&self) -> bool { + self.0 & 0x00001000 != 0 + } + /// Perform full SP-analysis. + pub fn is_versp(&self) -> bool { + self.0 & 0x00002000 != 0 + } + /// Perform 'no-return' analysis + pub fn is_anoret(&self) -> bool { + self.0 & 0x00004000 != 0 + } + /// Try to guess member function types + pub fn is_memfunc(&self) -> bool { + self.0 & 0x00008000 != 0 + } + /// Truncate functions upon code deletion + pub fn is_trfunc(&self) -> bool { + self.0 & 0x00010000 != 0 + } + + /// Create string literal if data xref exists + pub fn is_strlit(&self) -> bool { + self.0 & 0x00020000 != 0 + } + /// Check for unicode strings + pub fn is_chkuni(&self) -> bool { + self.0 & 0x00040000 != 0 + } + /// Create offsets and segments using fixup info + pub fn is_fixup(&self) -> bool { + self.0 & 0x00080000 != 0 + } + /// Create offset if data xref to seg32 exists + pub fn is_drefoff(&self) -> bool { + self.0 & 0x00100000 != 0 + } + /// Convert 32bit instruction operand to offset + pub fn is_immoff(&self) -> bool { + self.0 & 0x00200000 != 0 + } + /// Automatically convert data to offsets + pub fn is_datoff(&self) -> bool { + self.0 & 0x00400000 != 0 + } + + /// Use flirt signatures + pub fn is_flirt(&self) -> bool { + self.0 & 0x00800000 != 0 + } + /// Append a signature name comment for recognized anonymous library functions + pub fn is_sigcmt(&self) -> bool { + self.0 & 0x01000000 != 0 + } + /// Allow recognition of several copies of the same function + pub fn is_sigmlt(&self) -> bool { + self.0 & 0x02000000 != 0 + } + /// Automatically hide library functions + pub fn is_hflirt(&self) -> bool { + self.0 & 0x04000000 != 0 + } + + /// Rename jump functions as j_... + pub fn is_jfunc(&self) -> bool { + self.0 & 0x08000000 != 0 + } + /// Rename empty functions as nullsub_... + pub fn is_nullsub(&self) -> bool { + self.0 & 0x10000000 != 0 + } + + /// Coagulate data segs at the final pass + pub fn is_dodata(&self) -> bool { + self.0 & 0x20000000 != 0 + } + /// Coagulate code segs at the final pass + pub fn is_docode(&self) -> bool { + self.0 & 0x40000000 != 0 + } + /// Final pass of analysis + pub fn is_final(&self) -> bool { + self.0 & 0x80000000 != 0 + } + + /// Handle EH information + pub fn is_doeh(&self) -> bool { + self.1 & 0x1 != 0 + } + /// Handle RTTI information + pub fn is_dortti(&self) -> bool { + self.1 & 0x2 != 0 + } + /// Try to combine several instructions + pub fn is_macro(&self) -> bool { + self.1 & 0x4 != 0 + } +} + +#[derive(Debug, Clone, Copy)] +pub struct XRef(u8); +impl XRef { + fn new(value: u8) -> Result { + ensure!(value < 0x10, "Invalid XRef flag"); + Ok(Self(value)) + } + /// show segments in xrefs? + pub fn is_segxrf(&self) -> bool { + self.0 & 0x01 != 0 + } + /// show xref type marks? + pub fn is_xrfmrk(&self) -> bool { + self.0 & 0x02 != 0 + } + /// show function offsets? + pub fn is_xrffnc(&self) -> bool { + self.0 & 0x04 != 0 + } + /// show xref values? (otherwise-"...") + pub fn is_xrfval(&self) -> bool { + self.0 & 0x08 != 0 + } +} + +#[derive(Debug, Clone, Copy)] +pub enum NameType { + RelOff, + PtrOff, + NamOff, + RelEa, + PtrEa, + NamEa, + Ea, + Ea4, + Ea8, + Short, + Serial, +} + +// InnerRef: 8e6e20 +impl NameType { + fn new(value: u8) -> Option { + Some(match value { + 0 => Self::RelOff, + 1 => Self::PtrOff, + 2 => Self::NamOff, + 3 => Self::RelEa, + 4 => Self::PtrEa, + 5 => Self::NamEa, + 6 => Self::Ea, + 7 => Self::Ea4, + 8 => Self::Ea8, + 9 => Self::Short, + 10 => Self::Serial, + _ => return None, + }) + } +} + +// InnerRef: 8e6de0 +#[derive(Debug, Clone, Copy)] +pub enum DemNamesForm { + /// display demangled names as comments + Cmnt, + /// display demangled names as regular names + Name, + /// don't display demangled names + None, +} + +#[derive(Clone, Copy, Debug)] +pub struct DemName(u8); +impl DemName { + fn new(value: u8) -> Result { + ensure!(value < 0x10, "Invalid DemName flag"); + ensure!(value != 0x3); + Ok(Self(value)) + } + pub fn name_form(&self) -> DemNamesForm { + match self.0 & 0x3 { + 0 => DemNamesForm::Cmnt, + 1 => DemNamesForm::Name, + 2 => DemNamesForm::None, + _ => unreachable!(), + } + } + + /// assume gcc3 names (valid for gnu compiler) + pub fn is_gcc3(&self) -> bool { + self.0 & 0x4 != 0 + } + + /// override type info + pub fn override_type_info(&self) -> bool { + self.0 & 0x8 != 0 + } +} + +#[derive(Clone, Copy, Debug)] +pub struct ListName(u8); +impl ListName { + fn new(value: u8) -> Result { + ensure!(value < 0x10, "Invalid ListName flag"); + Ok(Self(value)) + } + /// include normal names + pub fn is_normal(&self) -> bool { + self.0 & 0x01 != 0 + } + /// include public names + pub fn is_public(&self) -> bool { + self.0 & 0x02 != 0 + } + /// include autogenerated names + pub fn is_auto(&self) -> bool { + self.0 & 0x04 != 0 + } + /// include weak names + pub fn is_weak(&self) -> bool { + self.0 & 0x08 != 0 + } +} + +#[derive(Clone, Copy, Debug)] +pub struct OutputFlags(u16); +impl OutputFlags { + fn new(value: u32) -> Result { + ensure!(value < 0x800); + Ok(Self(value as u16)) + } + /// Display void marks? + pub fn show_void(&self) -> bool { + self.0 & 0x002 != 0 + } + /// Display autoanalysis indicator? + pub fn show_auto(&self) -> bool { + self.0 & 0x004 != 0 + } + /// Generate empty lines? + pub fn gen_null(&self) -> bool { + self.0 & 0x010 != 0 + } + /// Show line prefixes? + pub fn show_pref(&self) -> bool { + self.0 & 0x020 != 0 + } + /// line prefixes with segment name? + pub fn is_pref_seg(&self) -> bool { + self.0 & 0x040 != 0 + } + /// generate leading zeroes in numbers + pub fn gen_lzero(&self) -> bool { + self.0 & 0x080 != 0 + } + /// Generate 'org' directives? + pub fn gen_org(&self) -> bool { + self.0 & 0x100 != 0 + } + /// Generate 'assume' directives? + pub fn gen_assume(&self) -> bool { + self.0 & 0x200 != 0 + } + /// Generate try/catch directives? + pub fn gen_tryblks(&self) -> bool { + self.0 & 0x400 != 0 + } +} + +#[derive(Clone, Copy, Debug)] +pub struct CommentOptions(u8); +impl CommentOptions { + fn new(value: u8) -> Self { + Self(value) + } + /// show repeatable comments? + pub fn is_rptcmt(&self) -> bool { + self.0 & 0x01 != 0 + } + /// comment all lines? + pub fn is_allcmt(&self) -> bool { + self.0 & 0x02 != 0 + } + /// no comments at all + pub fn is_nocmt(&self) -> bool { + self.0 & 0x04 != 0 + } + /// show source line numbers + pub fn is_linnum(&self) -> bool { + self.0 & 0x08 != 0 + } + /// testida.idc is running + pub fn is_testmode(&self) -> bool { + self.0 & 0x10 != 0 + } + /// show hidden instructions + pub fn is_shhid_item(&self) -> bool { + self.0 & 0x20 != 0 + } + /// show hidden functions + pub fn is_shhid_func(&self) -> bool { + self.0 & 0x40 != 0 + } + /// show hidden segments + pub fn is_shhid_segm(&self) -> bool { + self.0 & 0x80 != 0 + } +} + +#[derive(Clone, Copy, Debug)] +pub struct DelimiterOptions(u8); +impl DelimiterOptions { + fn new(value: u8) -> Result { + ensure!(value < 0x08); + Ok(Self(value)) + } + /// thin borders + pub fn is_thin(&self) -> bool { + self.0 & 0x01 != 0 + } + /// thick borders + pub fn is_thick(&self) -> bool { + self.0 & 0x02 != 0 + } + /// empty lines at the end of basic blocks + pub fn is_empty(&self) -> bool { + self.0 & 0x04 != 0 + } +} + +#[derive(Clone, Copy, Debug)] +pub struct LinePrefixOptions(u8); +impl LinePrefixOptions { + fn new(value: u8) -> Result { + ensure!(value < 0x10, "Invalid LinePrefixOptions"); + Ok(Self(value)) + } + /// show segment addresses? + pub fn is_segadr(&self) -> bool { + self.0 & 0x01 != 0 + } + /// show function offsets? + pub fn is_fncoff(&self) -> bool { + self.0 & 0x02 != 0 + } + /// show stack pointer? + pub fn is_stack(&self) -> bool { + self.0 & 0x04 != 0 + } + /// truncate instruction bytes if they would need more than 1 line + pub fn is_pfxtrunc(&self) -> bool { + self.0 & 0x08 != 0 + } +} + +#[derive(Clone, Copy, Debug)] +pub struct StrLiteralFlags(u8); +impl StrLiteralFlags { + fn new(value: u8) -> Result { + ensure!(value < 0x40); + Ok(Self(value)) + } + /// generate names? + pub fn is_gen(&self) -> bool { + self.0 & 0x01 != 0 + } + /// names have 'autogenerated' bit? + pub fn is_auto(&self) -> bool { + self.0 & 0x02 != 0 + } + /// generate serial names? + pub fn is_serial(&self) -> bool { + self.0 & 0x04 != 0 + } + /// unicode strings are present? + pub fn is_unicode(&self) -> bool { + self.0 & 0x08 != 0 + } + /// generate auto comment for string references? + pub fn is_comment(&self) -> bool { + self.0 & 0x10 != 0 + } + /// preserve case of strings for identifiers + pub fn is_savecase(&self) -> bool { + self.0 & 0x20 != 0 + } +} + +#[derive(Clone, Copy, Debug)] +pub struct AbiOptions(u16); +impl AbiOptions { + fn new(value: u32) -> Result { + ensure!(value < 0x400); + Ok(Self(value as u16)) + } + /// 4 byte alignment for 8byte scalars (__int64/double) inside structures? + pub fn is_8align4(&self) -> bool { + self.0 & 0x001 != 0 + } + /// do not align stack arguments to stack slots + pub fn is_pack_stkargs(&self) -> bool { + self.0 & 0x002 != 0 + } + /// use natural type alignment for argument if the alignment exceeds native word size. + /// (e.g. __int64 argument should be 8byte aligned on some 32bit platforms) + pub fn is_bigarg_align(&self) -> bool { + self.0 & 0x004 != 0 + } + /// long double arguments are passed on stack + pub fn is_stack_ldbl(&self) -> bool { + self.0 & 0x008 != 0 + } + /// varargs are always passed on stack (even when there are free registers) + pub fn is_stack_varargs(&self) -> bool { + self.0 & 0x010 != 0 + } + /// use the floating-point register set + pub fn is_hard_float(&self) -> bool { + self.0 & 0x020 != 0 + } + /// compiler/abi were set by user flag and require SETCOMP_BY_USER flag to be changed + pub fn is_set_by_user(&self) -> bool { + self.0 & 0x040 != 0 + } + /// use gcc layout for udts (used for mingw) + pub fn is_gcc_layout(&self) -> bool { + self.0 & 0x080 != 0 + } + /// register arguments are mapped to stack area (and consume stack slots) + pub fn is_map_stkargs(&self) -> bool { + self.0 & 0x100 != 0 + } + /// use natural type alignment for an argument even if its alignment exceeds double + /// native word size (the default is to use double word max). + /// e.g. if this bit is set, __int128 has 16-byte alignment. + /// This bit is not used by ida yet + pub fn is_hugearg_align(&self) -> bool { + self.0 & 0x200 != 0 + } +} + +// InnerRef: 8e6ee0 +#[derive(Debug, Clone)] +pub enum FileType { + Raw, + MsdosDriver, + Ne, + IntelHex, + Mex, + Lx, + Le, + Nlm, + Coff, + Pe, + Omf, + RRecords, + Zip, + Omflib, + Ar, + LoaderSpecific, + Elf, + W32run, + Aout, + Palmpilot, + MsdosExe, + MsdosCom, + Aixar, + Macho, + Psxobj, +} + +impl FileType { + fn from_value(value: u16) -> Option { + Some(match value { + 0x2 => Self::Raw, + 0x3 => Self::MsdosDriver, + 0x4 => Self::Ne, + 0x5 => Self::IntelHex, + 0x6 => Self::Mex, + 0x7 => Self::Lx, + 0x8 => Self::Le, + 0x9 => Self::Nlm, + 0xA => Self::Coff, + 0xB => Self::Pe, + 0xC => Self::Omf, + 0xD => Self::RRecords, + 0xE => Self::Zip, + 0xF => Self::Omflib, + 0x10 => Self::Ar, + 0x11 => Self::LoaderSpecific, + 0x12 => Self::Elf, + 0x13 => Self::W32run, + 0x14 => Self::Aout, + 0x15 => Self::Palmpilot, + 0x16 => Self::MsdosExe, + 0x17 => Self::MsdosCom, + 0x18 => Self::Aixar, + 0x19 => Self::Macho, + 0x1A => Self::Psxobj, + _ => return None, + }) + } +} + +// InnerRef: 8e6cc0 +#[derive(Debug, Clone)] +pub enum Compiler { + Unknown, + VisualStudio, + Borland, + Watcom, + Gnu, + VisualAge, + Delphi, +} + +impl Compiler { + pub fn from_value(value: u8) -> Option { + Some(match value { + 0x0 => Self::Unknown, + 0x1 => Self::VisualStudio, + 0x2 => Self::Borland, + 0x3 => Self::Watcom, + 0x6 => Self::Gnu, + 0x7 => Self::VisualAge, + 0x8 => Self::Delphi, + _ => return None, + }) + } +} + +#[derive(Debug, Clone)] +enum ID0TreeEntry { + Index { + preceeding: Box, + index: Vec, + }, + Leaf(Vec), +} + +#[derive(Debug, Clone)] +struct ID0TreeIndex { + page: Box, + key: Vec, + value: Vec, +} + +#[derive(Debug, Clone)] +enum ID0TreeEntrRaw { + Index { + preceeding: NonZeroU32, + entries: Vec, + }, + Leaf(Vec), +} + +#[derive(Debug, Clone)] +struct ID0TreeIndexRaw { + page: Option, + key: Vec, + value: Vec, +} + +impl ID0TreeEntrRaw { + fn read(page: &[u8], header: &ID0Header) -> Result { + match header.version { + ID0Version::V15 => Self::read_xx( + page, + header, + 4, + Self::header_4, + Self::index_header_4, + Self::leaf_header_v15, + Self::index_value_v1x, + Self::leaf_value_v1x, + Self::freeptr_v1x, + ), + ID0Version::V16 => Self::read_xx( + page, + header, + 6, + Self::header_6, + Self::index_header_6, + Self::leaf_header_v16, + Self::index_value_v1x, + Self::leaf_value_v1x, + Self::freeptr_v1x, + ), + ID0Version::V20 => Self::read_xx( + page, + header, + 6, + Self::header_6, + Self::index_header_6, + Self::leaf_header_v20, + Self::index_value_v20, + Self::leaf_value_v20, + Self::freeptr_v20, + ), + } + } + + #[allow(clippy::type_complexity)] + #[allow(clippy::too_many_arguments)] + fn read_xx( + page: &[u8], + id0_header: &ID0Header, + entry_len: u16, + header: fn(&mut Cursor<&[u8]>) -> Result<(Option, u16)>, + index_header: fn(&mut Cursor<&[u8]>) -> Result<(Option, u16)>, + leaf_header: fn(&mut Cursor<&[u8]>) -> Result<(u16, u16)>, + index_value: fn(&mut Cursor<&[u8]>) -> Result<(Vec, Vec)>, + leaf_value: fn(&mut Cursor<&[u8]>) -> Result<(Vec, Vec)>, + freeptr: fn(&mut Cursor<&[u8]>) -> Result, + ) -> Result { + let mut input = Cursor::new(page); + let (preceeding, count) = header(&mut input)?; + let min_data_pos = entry_len + .checked_mul(count + 2) + .ok_or_else(|| anyhow!("Invalid number of entries"))?; + ensure!(min_data_pos <= id0_header.page_size); + + let mut data_offsets = (entry_len..).step_by(entry_len.into()); + let entry_offsets = (&mut data_offsets).take(count.into()); + let entry = if let Some(preceeding) = preceeding { + // index + let entries = entry_offsets + .map(|offset| { + input.seek(SeekFrom::Start(offset.into())).unwrap(); + let (page, recofs) = index_header(&mut input)?; + ensure!( + recofs >= min_data_pos, + "Invalid recofs value {recofs} >= {min_data_pos}" + ); + ensure!(recofs < id0_header.page_size); + input.seek(SeekFrom::Start(recofs.into())).unwrap(); + let (key, value) = index_value(&mut input)?; + Ok(ID0TreeIndexRaw { page, key, value }) + }) + .collect::, _>>()?; + ID0TreeEntrRaw::Index { + preceeding, + entries, + } + } else { + // leaf + // keys are usually very similar to one another, so it reuses the last key + // value to build the next + let mut last_key = Vec::new(); + let entry = entry_offsets + .map(|offset| { + input.seek(SeekFrom::Start(offset.into())).unwrap(); + let (indent, recofs) = leaf_header(&mut input)?; + if recofs == 0 { + // TODO this only happen in deleted entries? + // TODO have an option to diferenciate? + return Ok(ID0Entry { + key: vec![], + value: vec![], + }); + } + ensure!( + recofs >= min_data_pos, + "Invalid recofs value {recofs} >= {min_data_pos}" + ); + ensure!(recofs < id0_header.page_size); + input.seek(SeekFrom::Start(recofs.into())).unwrap(); + let (ext_key, value) = leaf_value(&mut input)?; + + // keys may reutilize the start of the last key + let reused_key = last_key + .get(..indent.into()) + .ok_or_else(|| anyhow!("key indent is too small"))?; + let key: Vec = reused_key.iter().copied().chain(ext_key).collect(); + + // update the last key + last_key.clear(); + last_key.extend(&key); + + Ok(ID0Entry { key, value }) + }) + .collect::, _>>()?; + ID0TreeEntrRaw::Leaf(entry) + }; + + input + .seek(SeekFrom::Start(data_offsets.next().unwrap().into())) + .unwrap(); + // TODO what is the freeptr? + let _freeptr = freeptr(&mut input)?; + Ok(entry) + } + + fn header_4(input: &mut Cursor<&[u8]>) -> Result<(Option, u16)> { + let preceeding: u16 = bincode::deserialize_from(&mut *input)?; + let count: u16 = bincode::deserialize_from(input)?; + Ok((NonZeroU32::new(preceeding.into()), count)) + } + + fn header_6(input: &mut Cursor<&[u8]>) -> Result<(Option, u16)> { + let preceeding: u32 = bincode::deserialize_from(&mut *input)?; + let count: u16 = bincode::deserialize_from(input)?; + Ok((NonZeroU32::new(preceeding), count)) + } + + fn index_header_4(input: &mut Cursor<&[u8]>) -> Result<(Option, u16)> { + let page: u16 = bincode::deserialize_from(&mut *input)?; + let recofs: u16 = bincode::deserialize_from(input)?; + Ok((NonZeroU32::new(page.into()), recofs)) + } + + fn index_header_6(input: &mut Cursor<&[u8]>) -> Result<(Option, u16)> { + let page: u32 = bincode::deserialize_from(&mut *input)?; + let recofs: u16 = bincode::deserialize_from(input)?; + Ok((NonZeroU32::new(page), recofs)) + } + + fn leaf_header_v15(input: &mut Cursor<&[u8]>) -> Result<(u16, u16)> { + let indent: u8 = bincode::deserialize_from(&mut *input)?; + let _unknown1: u8 = bincode::deserialize_from(&mut *input)?; + let recofs: u16 = bincode::deserialize_from(input)?; + Ok((indent.into(), recofs)) + } + + fn leaf_header_v16(input: &mut Cursor<&[u8]>) -> Result<(u16, u16)> { + let indent: u8 = bincode::deserialize_from(&mut *input)?; + // TODO is this _unknown1 just part of indent (u16)? + let _unknown1: u8 = bincode::deserialize_from(&mut *input)?; + let _unknown2: u16 = bincode::deserialize_from(&mut *input)?; + let recofs: u16 = bincode::deserialize_from(input)?; + Ok((indent.into(), recofs)) + } + + fn leaf_header_v20(input: &mut Cursor<&[u8]>) -> Result<(u16, u16)> { + let indent: u16 = bincode::deserialize_from(&mut *input)?; + let _unknown1: u16 = bincode::deserialize_from(&mut *input)?; + let recofs: u16 = bincode::deserialize_from(input)?; + Ok((indent, recofs)) + } + + fn index_value_v1x(input: &mut Cursor<&[u8]>) -> Result<(Vec, Vec)> { + let _unknown: u8 = bincode::deserialize_from(&mut *input)?; + let key = read_bytes_len_u16(&mut *input)?; + let value = read_bytes_len_u16(input)?; + Ok((key, value)) + } + + fn index_value_v20(input: &mut Cursor<&[u8]>) -> Result<(Vec, Vec)> { + let key = read_bytes_len_u16(&mut *input)?; + let value = read_bytes_len_u16(input)?; + Ok((key, value)) + } + + fn leaf_value_v1x(input: &mut Cursor<&[u8]>) -> Result<(Vec, Vec)> { + let _unknown: u8 = bincode::deserialize_from(&mut *input)?; + let key = read_bytes_len_u16(&mut *input)?; + let value = read_bytes_len_u16(input)?; + Ok((key, value)) + } + + fn leaf_value_v20(input: &mut Cursor<&[u8]>) -> Result<(Vec, Vec)> { + let key = read_bytes_len_u16(&mut *input)?; + let value = read_bytes_len_u16(input)?; + Ok((key, value)) + } + + fn freeptr_v1x(input: &mut Cursor<&[u8]>) -> Result { + let _unknown: u16 = bincode::deserialize_from(&mut *input)?; + let freeptr: u16 = bincode::deserialize_from(input)?; + Ok(freeptr) + } + + fn freeptr_v20(input: &mut Cursor<&[u8]>) -> Result { + let _unknown: u32 = bincode::deserialize_from(&mut *input)?; + let freeptr: u16 = bincode::deserialize_from(input)?; + Ok(freeptr) + } +} + +#[derive(Clone, Debug)] +pub struct IDBFileRegions { + pub start: u64, + pub end: u64, + pub eva: u64, +} + +impl IDBFileRegions { + fn read(key: &[u8], data: &[u8], version: u16, is_64: bool) -> Result { + let mut input = Cursor::new(data); + // TODO detect versions with more accuracy + let (start, end, eva) = match version { + ..700 => { + let start = read_word(&mut input, is_64)?; + let end = read_word(&mut input, is_64)?; + let rva: u32 = bincode::deserialize_from(&mut input)?; + (start, end, rva.into()) + } + 700.. => { + let start = parse_word(&mut input, is_64)?; + let end = start + .checked_add(parse_word(&mut input, is_64)?) + .ok_or_else(|| anyhow!("Overflow address in File Regions"))?; + let rva = parse_word(&mut input, is_64)?; + // TODO some may include an extra 0 byte at the end? + if let Ok(_unknown) = parse_word(&mut input, is_64) { + ensure!(_unknown == 0); + } + (start, end, rva) + } + }; + let key_offset = + parse_number(key, true, is_64).ok_or_else(|| anyhow!("Invalid IDB File Key Offset"))?; + ensure!(key_offset == start); + ensure!(input.position() == u64::try_from(data.len()).unwrap()); + Ok(Self { start, end, eva }) + } +} + +#[derive(Clone, Debug)] +pub enum FunctionsAndComments<'a> { + // It's just the name "$ funcs" + Name, + Function(IDBFunction), + Comment { address: u64, value: &'a str }, + RepeatableComment { address: u64, value: &'a str }, + Unknown { key: &'a [u8], value: &'a [u8] }, +} + +impl<'a> FunctionsAndComments<'a> { + fn read(key: &'a [u8], value: &'a [u8], is_64: bool) -> Result { + let [key_type, sub_key @ ..] = key else { + return Err(anyhow!("invalid Funcs subkey")); + }; + match *key_type { + b'N' => { + ensure!(parse_maybe_cstr(value) == Some("$ funcs")); + Ok(Self::Name) + } + b'S' => IDBFunction::read(sub_key, value, is_64).map(Self::Function), + b'C' => { + let address = parse_number(sub_key, true, is_64) + .ok_or_else(|| anyhow!("Invalid Comment address"))?; + parse_maybe_cstr(value) + .map(|value| Self::Comment { address, value }) + .ok_or_else(|| anyhow!("Invalid Comment string")) + } + b'R' => { + let address = parse_number(sub_key, true, is_64) + .ok_or_else(|| anyhow!("Invalid Repetable Comment address"))?; + parse_maybe_cstr(value) + .map(|value| Self::RepeatableComment { address, value }) + .ok_or_else(|| anyhow!("Invalid Repetable Comment string")) + } + // TODO find the meaning of "$ funcs" b'V' entries + _ => Ok(Self::Unknown { key, value }), + } + } +} + +#[derive(Clone, Debug)] +pub struct IDBFunction { + pub start: u64, + pub end: u64, + pub flags: u16, + pub extra: Option, +} + +#[derive(Clone, Debug)] +pub enum IDBFunctionExtra { + NonTail { + frame: u64, + frsize: u64, + frregs: u16, + argsize: u64, + }, + Tail { + /// offset of the function owner in relation to the function start + owner: i64, + refqty: u32, + }, +} + +impl IDBFunction { + fn read(key: &[u8], value: &[u8], is_64: bool) -> Result { + let key_address = parse_number(key, true, is_64) + .ok_or_else(|| anyhow!("Invalid IDB FileRefion Key Offset"))?; + let mut input = Cursor::new(value); + let start = parse_word(&mut input, is_64)?; + ensure!(key_address == start); + let end = start + .checked_add(parse_word(&mut input, is_64)?) + .ok_or_else(|| anyhow!("Function range overflows"))?; + let flags = unpack_dw(&mut input)?; + + // CONST migrate this to mod flags + const FUNC_TAIL: u16 = 0x8000; + let extra = if flags & FUNC_TAIL != 0 { + Self::read_extra_tail(&mut input, is_64) + } else { + Self::read_extra_regular(&mut input, is_64) + }; + // TODO make sure all the data is parsed + //ensure!(input.position() == u64::try_from(data.len()).unwrap()); + Ok(Self { + start, + end, + flags, + extra, + }) + } + + // TODO make sure all the data is parsed + fn read_extra_regular(input: &mut impl Read, is_64: bool) -> Option { + let frame = parse_word(&mut *input, is_64).ok()?; + let frsize = parse_word(&mut *input, is_64).ok()?; + let frregs = unpack_dw(&mut *input).ok()?; + let argsize = parse_word(&mut *input, is_64).ok()?; + Some(IDBFunctionExtra::NonTail { + frame, + frsize, + frregs, + argsize, + }) + } + + fn read_extra_tail(input: &mut impl Read, is_64: bool) -> Option { + let owner = parse_word(&mut *input, is_64).ok()? as i64; + let refqty = unpack_dd(&mut *input).ok()?; + Some(IDBFunctionExtra::Tail { owner, refqty }) + } +} + +#[derive(Clone, Debug)] +pub enum EntryPoint<'a> { + Name, + Function { + key: u64, + address: u64, + }, + Ordinal { + key: u64, + ordinal: u64, + }, + ForwardedSymbol { + key: u64, + symbol: &'a str, + }, + FunctionName { + key: u64, + name: &'a str, + }, + Unknown { + key_type: u8, + key: u64, + value: &'a [u8], + }, +} + +impl<'a> EntryPoint<'a> { + fn read(key: &'a [u8], value: &'a [u8], is_64: bool) -> Result { + let [key_type, sub_key @ ..] = key else { + return Err(anyhow!("invalid Funcs subkey")); + }; + if *key_type == b'N' { + ensure!(parse_maybe_cstr(value) == Some("$ entry points")); + return Ok(Self::Name); + } + let key = read_word(sub_key, is_64)?; + match *key_type { + b'A' => read_word(value, is_64) + .map(|address| Self::Function { key, address }) + .map_err(|_| anyhow!("Invalid Function address")), + b'I' => read_word(value, is_64) + .map(|ordinal| Self::Ordinal { key, ordinal }) + .map_err(|_| anyhow!("Invalid Ordinal value")), + b'F' => parse_maybe_cstr(value) + .map(|symbol| Self::ForwardedSymbol { key, symbol }) + .ok_or_else(|| anyhow!("Invalid Forwarded symbol name")), + b'S' => parse_maybe_cstr(value) + .map(|name| Self::FunctionName { key, name }) + .ok_or_else(|| anyhow!("Invalid Function name")), + // TODO find the meaning of "$ funcs" b'V' entries + key_type => Ok(Self::Unknown { + key_type, + key, + value, + }), + } + } +} + +fn read_exact_or_nothing( + this: &mut R, + mut buf: &mut [u8], +) -> std::io::Result { + let len = buf.len(); + while !buf.is_empty() { + match this.read(buf) { + Ok(0) => break, + Ok(n) => { + buf = &mut buf[n..]; + } + Err(ref e) if e.kind() == ErrorKind::Interrupted => {} + Err(e) => return Err(e), + } + } + Ok(len - buf.len()) +} + +fn read_word(input: I, is_64: bool) -> Result { + if is_64 { + Ok(bincode::deserialize_from(input)?) + } else { + Ok(bincode::deserialize_from::<_, u32>(input).map(u64::from)?) + } +} + +fn parse_word(input: &mut I, is_64: bool) -> Result { + if is_64 { + unpack_dq(input) + } else { + unpack_dd(input).map(u64::from) + } +} + +fn parse_u8(input: &mut I) -> Result { + Ok(bincode::deserialize_from(&mut *input)?) +} + +// InnerRef: unpack_dw +// NOTE the orignal implementation never fails, if input hit EoF it a partial result or 0 +/// Reads 1 to 3 bytes. +fn unpack_dw(input: &mut I) -> Result { + let b1: u8 = bincode::deserialize_from(&mut *input)?; + match b1 { + // 7 bit value + // [0xxx xxxx] + 0x00..0x80 => Ok(b1.into()), + // 14 bits value + // [10xx xxxx] xxxx xxxx + 0x80..0xC0 => { + let lo: u8 = bincode::deserialize_from(&mut *input)?; + Ok(u16::from_be_bytes([b1 & 0x3F, lo])) + } + // 16 bits value + // [11XX XXXX] xxxx xxxx xxxx xxxx + 0xC0..=0xFF => { + // NOTE first byte 6 bits seems to be ignored + //ensure!(header != 0xC0 && header != 0xFF); + Ok(u16::from_be_bytes(bincode::deserialize_from(&mut *input)?)) + } + } +} + +// InnerRef: unpack_dd +// NOTE the orignal implementation never fails, if input hit EoF it a partial result or 0 +/// Reads 1 to 5 bytes. +fn unpack_dd(input: &mut I) -> Result { + let b1: u8 = bincode::deserialize_from(&mut *input)?; + match b1 { + // 7 bit value + // [0xxx xxxx] + 0x00..0x80 => Ok(b1.into()), + // 14 bits value + // [10xx xxxx] xxxx xxxx + 0x80..0xC0 => { + let lo: u8 = bincode::deserialize_from(&mut *input)?; + Ok(u32::from_be_bytes([0, 0, b1 & 0x3F, lo])) + } + // 29 bit value: + // [110x xxxx] xxxx xxxx xxxx xxxx xxxx xxxx + 0xC0..0xE0 => { + let bytes: [u8; 3] = bincode::deserialize_from(&mut *input)?; + Ok(u32::from_be_bytes([ + b1 & 0x1F, + bytes[0], + bytes[1], + bytes[2], + ])) + } + // 32 bits value + // [111X XXXX] xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx + 0xE0..=0xFF => { + // NOTE first byte 5 bits seems to be ignored + //ensure!(header != 0xE0 && header != 0xFF); + Ok(u32::from_be_bytes(bincode::deserialize_from(&mut *input)?)) + } + } +} + +// InnerRef: unpack_dq +// NOTE the orignal implementation never fails, if input hit EoF it a partial result or 0 +/// Reads 2 to 10 bytes. +fn unpack_dq(input: &mut I) -> Result { + let lo = unpack_dd(&mut *input)?; + let hi = unpack_dd(&mut *input)?; + Ok((u64::from(hi) << 32) | u64::from(lo)) +} + +// InnerRef: unpack_ds +// NOTE the orignal implementation never fails, if input hit EoF it a partial result or 0 +fn unpack_ds(input: &mut I) -> Result> { + let len = unpack_dd(&mut *input)?; + let mut result = vec![0; len.try_into().unwrap()]; + input.read_exact(&mut result)?; + Ok(result) +} + +fn parse_number(data: &[u8], big_endian: bool, is_64: bool) -> Option { + Some(match (data.len(), is_64, big_endian) { + (8, true, true) => u64::from_be_bytes(data.try_into().unwrap()), + (8, true, false) => u64::from_le_bytes(data.try_into().unwrap()), + (4, false, true) => u32::from_be_bytes(data.try_into().unwrap()).into(), + (4, false, false) => u32::from_le_bytes(data.try_into().unwrap()).into(), + _ => return None, + }) +} + +// parse a string that maybe is finilized with \x00 +fn parse_maybe_cstr(data: &[u8]) -> Option<&str> { + // find the end of the string + let end_pos = data.iter().position(|b| *b == 0).unwrap_or(data.len()); + // make sure there is no data after the \x00 + if data[end_pos..].iter().any(|b| *b != 0) { + return None; + } + core::str::from_utf8(&data[..end_pos]).ok() +} diff --git a/src/id1.rs b/src/id1.rs new file mode 100644 index 0000000..7ec9133 --- /dev/null +++ b/src/id1.rs @@ -0,0 +1,258 @@ +use anyhow::{anyhow, ensure, Result}; + +use std::io::{Cursor, Read}; +use std::ops::Range; + +use crate::{IDBHeader, IDBSectionCompression, VaVersion}; + +#[derive(Clone, Debug)] +pub struct ID1Section { + pub seglist: Vec, +} + +#[derive(Clone, Debug)] +pub struct SegInfo { + pub offset: u64, + pub data: Vec, + // TODO find a way to decode this data + _flags: Vec, +} + +impl ID1Section { + pub(crate) fn read( + input: &mut I, + header: &IDBHeader, + compress: IDBSectionCompression, + ) -> Result { + match compress { + IDBSectionCompression::None => Self::read_inner(input, header), + IDBSectionCompression::Zlib => { + let mut input = flate2::read::ZlibDecoder::new(input); + Self::read_inner(&mut input, header) + } + } + } + + fn read_inner(input: &mut I, header: &IDBHeader) -> Result { + // TODO pages are always 0x2000? + const PAGE_SIZE: usize = 0x2000; + let mut buf = vec![0; PAGE_SIZE]; + input.read_exact(&mut buf[..])?; + let mut header_page = Cursor::new(&buf); + let version = VaVersion::read(&mut header_page)?; + let (npages, seglist_raw) = match version { + VaVersion::Va0 | VaVersion::Va1 | VaVersion::Va2 | VaVersion::Va3 | VaVersion::Va4 => { + let nsegments: u16 = bincode::deserialize_from(&mut header_page)?; + let npages: u16 = bincode::deserialize_from(&mut header_page)?; + ensure!( + npages > 0, + "Invalid number of pages, net at least one for the header" + ); + // TODO section_size / npages == 0x2000 + + // TODO the reference code uses the magic version, should it use + // the version itself instead? + let seglist: Vec = if header.magic_version.is_64() { + (0..nsegments) + .map(|_| { + let start: u64 = bincode::deserialize_from(&mut header_page)?; + let end: u64 = bincode::deserialize_from(&mut header_page)?; + ensure!(start <= end); + let offset: u64 = bincode::deserialize_from(&mut header_page)?; + Ok(SegInfoVaNRaw { + address: start..end, + offset, + }) + }) + .collect::>()? + } else { + (0..nsegments) + .map(|_| { + let start: u32 = bincode::deserialize_from(&mut header_page)?; + let end: u32 = bincode::deserialize_from(&mut header_page)?; + ensure!(start <= end); + let offset: u32 = bincode::deserialize_from(&mut header_page)?; + Ok(SegInfoVaNRaw { + address: start.into()..end.into(), + offset: offset.into(), + }) + }) + .collect::>()? + }; + (u32::from(npages), SegInfoRaw::VaN(seglist)) + } + VaVersion::VaX => { + let unknown_always3: u32 = bincode::deserialize_from(&mut header_page)?; + ensure!(unknown_always3 == 3); + let nsegments: u32 = bincode::deserialize_from(&mut header_page)?; + let unknown_always2048: u32 = bincode::deserialize_from(&mut header_page)?; + ensure!(unknown_always2048 == 2048); + let npages: u32 = bincode::deserialize_from(&mut header_page)?; + + let seglist: Vec> = (0..nsegments) + // TODO the reference code uses the magic version, should it use + // the version itself instead? + .map(|_| { + let (start, end) = match header.magic_version { + crate::IDBMagic::IDA0 | crate::IDBMagic::IDA1 => { + let startea: u32 = bincode::deserialize_from(&mut header_page)?; + let endea: u32 = bincode::deserialize_from(&mut header_page)?; + (startea.into(), endea.into()) + } + crate::IDBMagic::IDA2 => ( + bincode::deserialize_from(&mut header_page)?, + bincode::deserialize_from(&mut header_page)?, + ), + }; + ensure!(start <= end); + Ok(start..end) + }) + .collect::>()?; + (npages, SegInfoRaw::VaX(seglist)) + } + }; + // make sure the unused values a all zero + ensure!(buf[header_page.position().try_into().unwrap()..] + .iter() + .all(|b| *b == 0)); + + // sort segments by address + let mut overlay_check = match &seglist_raw { + SegInfoRaw::VaN(segs) => segs.iter().map(|s| s.address.clone()).collect(), + SegInfoRaw::VaX(segs) => segs.clone(), + }; + overlay_check.sort_unstable_by_key(|s| s.start); + + // make sure segments don't overlap + let overlap = overlay_check.windows(2).any(|segs| { + let [seg1, seg2] = segs else { unreachable!() }; + seg1.end >= seg2.start + }); + ensure!(!overlap); + + // make sure the data fits the available pages + let required_size: u64 = overlay_check.iter().map(|s| (s.end - s.start) * 4).sum(); + let required_pages = required_size.div_ceil(u64::try_from(PAGE_SIZE).unwrap()); + // TODO if the extra data at the end of the section is identified, review replacing <= with == + // -1 because the first page is always the header + ensure!(required_pages <= u64::from(npages - 1)); + + // populated the seglist data using the pages + let seglist = match seglist_raw { + SegInfoRaw::VaN(mut segs) => { + // sort it by disk offset, so we can read one after the other + segs.sort_unstable_by_key(|s| s.offset); + let mut current_offset = u64::try_from(PAGE_SIZE).unwrap(); + segs.into_iter() + .map(|seg| { + // skip any gaps + match seg.offset.cmp(¤t_offset) { + std::cmp::Ordering::Less => return Err(anyhow!("invalid offset")), + std::cmp::Ordering::Greater => { + // TODO can be any deleted sector contains randon data? + // skip intermidiate bytes, also ensuring they are all zeros + ensure_all_bytes_are_zero( + input.take(seg.offset - current_offset), + &mut buf, + )?; + current_offset = seg.offset; + } + std::cmp::Ordering::Equal => {} + } + let len = seg.address.end - seg.address.start; + let (data, _flags) = split_flags_data(&mut *input, len)?; + current_offset += len * 4; + Ok(SegInfo { + offset: seg.address.start, + data, + _flags, + }) + }) + .collect::>()? + } + SegInfoRaw::VaX(segs) => { + // the data for the segments are stored sequentialy in disk + segs.into_iter() + .map(|address| { + let (data, _flags) = + split_flags_data(&mut *input, address.end - address.start)?; + Ok(SegInfo { + offset: address.start, + data, + _flags, + }) + }) + .collect::>()? + } + }; + + //// ensure the rest of the data (page alignment) is just zeros + //ensure_all_bytes_are_zero(input, &mut buf)?; + // TODO sometimes there some extra data with unknown meaning, maybe it's just a + // deleted segment + ignore_bytes(input, &mut buf)?; + + Ok(Self { seglist }) + } +} + +#[derive(Clone, Debug)] +enum SegInfoRaw { + VaN(Vec), + VaX(Vec>), +} + +#[derive(Clone, Debug)] +struct SegInfoVaNRaw { + address: Range, + offset: u64, +} + +fn ensure_all_bytes_are_zero(mut input: I, buf: &mut [u8]) -> Result<()> { + loop { + match input.read(buf) { + // found EoF + Ok(0) => break, + // read something + Ok(n) => ensure!(&buf[..n].iter().all(|b| *b == 0)), + // ignore interrupts + Err(ref e) if matches!(e.kind(), std::io::ErrorKind::Interrupted) => {} + Err(e) => return Err(e.into()), + }; + } + Ok(()) +} + +fn ignore_bytes(mut input: I, buf: &mut [u8]) -> Result<()> { + loop { + match input.read(buf) { + // found EoF + Ok(0) => break, + // read something + Ok(_n) => {} + // ignore interrupts + Err(ref e) if matches!(e.kind(), std::io::ErrorKind::Interrupted) => {} + Err(e) => return Err(e.into()), + }; + } + Ok(()) +} + +fn split_flags_data(mut input: I, len: u64) -> Result<(Vec, Vec)> { + let len = usize::try_from(len).unwrap(); + let mut flags = vec![0u32; len]; + // SAFETY: don't worry &mut[u32] is compatible with &mut[u8] with len * 4 + input.read_exact(unsafe { + &mut *core::slice::from_raw_parts_mut(flags.as_mut_ptr() as *mut u8, len * 4) + })?; + // extract the bytes into other vector and leave the flags there + let data = flags + .iter_mut() + .map(|b| { + let value = (*b & 0xFF) as u8; + *b >>= 8; + value + }) + .collect(); + Ok((data, flags)) +} diff --git a/src/lib.rs b/src/lib.rs index 57e1082..611912f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,11 +1,17 @@ +pub mod id0; +pub mod id1; +pub mod nam; pub mod til; use std::fmt::Debug; use std::io::{BufRead, Read, Seek, SeekFrom}; use std::num::NonZeroU64; +use id0::ID0Section; use serde::Deserialize; +use crate::id1::ID1Section; +use crate::nam::NamSection; use crate::til::section::TILSection; use anyhow::{anyhow, ensure, Result}; @@ -15,6 +21,15 @@ pub struct IDBParser { header: IDBHeader, } +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct ID0Offset(NonZeroU64); + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct ID1Offset(NonZeroU64); + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct NamOffset(NonZeroU64); + #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub struct TILOffset(NonZeroU64); @@ -24,25 +39,63 @@ impl IDBParser { Ok(Self { input, header }) } + pub fn id0_section_offset(&self) -> Option { + self.header.id0_offset.map(ID0Offset) + } + + pub fn id1_section_offset(&self) -> Option { + self.header.id1_offset.map(ID1Offset) + } + + pub fn nam_section_offset(&self) -> Option { + self.header.nam_offset.map(NamOffset) + } + pub fn til_section_offset(&self) -> Option { self.header.til_offset.map(TILOffset) } + pub fn read_id0_section(&mut self, id0: ID0Offset) -> Result { + read_section(&mut self.input, &self.header, id0.0.get(), ID0Section::read) + } + + pub fn read_id1_section(&mut self, id1: ID1Offset) -> Result { + read_section(&mut self.input, &self.header, id1.0.get(), ID1Section::read) + } + + pub fn read_nam_section(&mut self, nam: NamOffset) -> Result { + read_section(&mut self.input, &self.header, nam.0.get(), NamSection::read) + } + pub fn read_til_section(&mut self, til: TILOffset) -> Result { - self.input.seek(SeekFrom::Start(til.0.get()))?; + read_section( + &mut self.input, + &self.header, + til.0.get(), + |input, _, compress| TILSection::read(input, compress), + ) + } + + #[cfg(test)] + pub(crate) fn decompress_section( + &mut self, + offset: u64, + output: &mut impl std::io::Write, + ) -> Result<()> { + self.input.seek(SeekFrom::Start(offset))?; let section_header = IDBSectionHeader::read(&self.header, &mut self.input)?; // makes sure the reader doesn't go out-of-bounds let mut input = Read::take(&mut self.input, section_header.len); - let result = TILSection::read(&mut input, section_header.compress)?; - - // TODO seems its normal to have a few extra bytes at the end of the sector, maybe - // because of the compressions stuff, anyway verify that - ensure!( - input.limit() <= 16, - "Sector have more data then expected, left {} bytes", - input.limit() - ); - Ok(result) + match section_header.compress { + IDBSectionCompression::Zlib => { + let mut input = flate2::read::ZlibDecoder::new(input); + let _ = std::io::copy(&mut input, output)?; + } + IDBSectionCompression::None => { + let _ = std::io::copy(&mut input, output)?; + } + } + Ok(()) } #[cfg(test)] @@ -59,6 +112,36 @@ impl IDBParser { } } +fn read_section<'a, I, T, F>( + input: &'a mut I, + header: &IDBHeader, + offset: u64, + mut process: F, +) -> Result +where + I: BufRead + Seek, + F: FnMut(&mut std::io::Take<&'a mut I>, &IDBHeader, IDBSectionCompression) -> Result, +{ + input.seek(SeekFrom::Start(offset))?; + let section_header = IDBSectionHeader::read(header, &mut *input)?; + // makes sure the reader doesn't go out-of-bounds + let mut input = Read::take(input, section_header.len); + let result = process(&mut input, header, section_header.compress)?; + + // TODO seems its normal to have a few extra bytes at the end of the sector, maybe + // because of the compressions stuff, anyway verify that + ensure!( + if matches!(section_header.compress, IDBSectionCompression::None) { + input.limit() == 0 + } else { + input.limit() <= 16 + }, + "Sector have more data then expected, left {} bytes", + input.limit() + ); + Ok(result) +} + #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] enum IDBMagic { IDA0, @@ -78,8 +161,19 @@ impl TryFrom<[u8; 4]> for IDBMagic { } } } + +impl IDBMagic { + fn is_64(&self) -> bool { + match self { + IDBMagic::IDA0 | IDBMagic::IDA1 => false, + IDBMagic::IDA2 => true, + } + } +} + #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] enum IDBVersion { + // TODO add other versions V1, V4, V5, @@ -88,6 +182,7 @@ enum IDBVersion { #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] struct IDBHeader { + magic_version: IDBMagic, version: IDBVersion, id0_offset: Option, id1_offset: Option, @@ -155,22 +250,27 @@ struct IDBHeaderRaw { impl IDBHeader { pub fn read(input: &mut I) -> Result { let header_raw: IDBHeaderRaw = bincode::deserialize_from(&mut *input)?; - let _magic = IDBMagic::try_from(header_raw.magic)?; + let magic = IDBMagic::try_from(header_raw.magic)?; ensure!( header_raw.signature == 0xAABB_CCDD, "Invalid header signature {:#x}", header_raw.signature ); + // TODO associate header.version and magic? match header_raw.version { - 1 => Self::read_v1(&header_raw, input), - 4 => Self::read_v4(&header_raw, input), - 5 => Self::read_v5(&header_raw, input), - 6 => Self::read_v6(&header_raw, input), + 1 => Self::read_v1(&header_raw, magic, input), + 4 => Self::read_v4(&header_raw, magic, input), + 5 => Self::read_v5(&header_raw, magic, input), + 6 => Self::read_v6(&header_raw, magic, input), v => Err(anyhow!("Unable to parse version `{v}`")), } } - fn read_v1(header_raw: &IDBHeaderRaw, input: I) -> Result { + fn read_v1( + header_raw: &IDBHeaderRaw, + magic: IDBMagic, + input: I, + ) -> Result { #[derive(Debug, Deserialize)] struct V1Raw { id2_offset: u32, @@ -186,6 +286,7 @@ impl IDBHeader { ensure!(v1_raw.unk38_zeroed == [0; 6], "unk38 is not zeroed"); Ok(Self { + magic_version: magic, version: IDBVersion::V1, id0_offset: NonZeroU64::new(header_raw.offsets[0].into()), id1_offset: NonZeroU64::new(header_raw.offsets[1].into()), @@ -199,7 +300,11 @@ impl IDBHeader { }) } - fn read_v4(header_raw: &IDBHeaderRaw, input: I) -> Result { + fn read_v4( + header_raw: &IDBHeaderRaw, + magic: IDBMagic, + input: I, + ) -> Result { #[derive(Debug, Deserialize)] struct V4Raw { id2_offset: u32, @@ -223,6 +328,7 @@ impl IDBHeader { ensure!(v4_raw.unk5c_zeroed == [[0; 16]; 8], "unk5c is not zeroed"); Ok(Self { + magic_version: magic, version: IDBVersion::V4, id0_offset: NonZeroU64::new(header_raw.offsets[0].into()), id1_offset: NonZeroU64::new(header_raw.offsets[1].into()), @@ -236,7 +342,7 @@ impl IDBHeader { }) } - fn read_v5(header_raw: &IDBHeaderRaw, input: impl Read) -> Result { + fn read_v5(header_raw: &IDBHeaderRaw, magic: IDBMagic, input: impl Read) -> Result { #[derive(Debug, Deserialize)] struct V5Raw { nam_offset: u64, @@ -268,6 +374,7 @@ impl IDBHeader { ensure!(v5_raw.unk3_zeroed == [[0; 16]; 8], "unk3 is not zeroed"); Ok(Self { + magic_version: magic, version: IDBVersion::V5, id0_offset: NonZeroU64::new(id0_offset), id1_offset: NonZeroU64::new(id1_offset), @@ -282,7 +389,7 @@ impl IDBHeader { }) } - fn read_v6(header_raw: &IDBHeaderRaw, input: impl Read) -> Result { + fn read_v6(header_raw: &IDBHeaderRaw, magic: IDBMagic, input: impl Read) -> Result { #[derive(Debug, Deserialize)] struct V6Raw { nam_offset: u64, @@ -311,6 +418,7 @@ impl IDBHeader { ensure!(v6_raw.unk3_zeroed == [[0; 16]; 8], "unk3 is not zeroed"); Ok(Self { + magic_version: magic, version: IDBVersion::V6, id0_offset: NonZeroU64::new(id0_offset), id1_offset: NonZeroU64::new(id1_offset), @@ -364,6 +472,39 @@ impl IDBSectionHeader { } } +#[derive(Clone, Copy, Debug)] +enum VaVersion { + Va0, + Va1, + Va2, + Va3, + Va4, + VaX, +} + +impl VaVersion { + fn read(input: &mut I) -> Result { + let mut magic: [u8; 4] = [0; 4]; + input.read_exact(&mut magic)?; + match &magic[..] { + b"Va0\x00" => Ok(Self::Va0), + b"Va1\x00" => Ok(Self::Va1), + b"Va2\x00" => Ok(Self::Va2), + b"Va3\x00" => Ok(Self::Va3), + b"Va4\x00" => Ok(Self::Va4), + b"VA*\x00" => Ok(Self::VaX), + other_magic => Err(anyhow!("Invalid Va magic: {other_magic:?}")), + } + } +} +fn read_bytes_len_u16(mut input: I) -> Result> { + let mut len = [0, 0]; + input.read_exact(&mut len)?; + let mut bytes = vec![0u8; u16::from_le_bytes(len).into()]; + input.read_exact(&mut bytes)?; + Ok(bytes) +} + fn read_bytes_len_u8(mut input: I) -> Result> { let mut len = [0]; input.read_exact(&mut len)?; @@ -439,6 +580,12 @@ mod test { use std::io::{BufReader, BufWriter}; use std::path::{Path, PathBuf}; + #[test] + fn parse_idb_param() { + let param = b"IDA\xbc\x02\x06metapc#\x8a\x03\x03\x02\x00\x00\x00\x00\xff_\xff\xff\xf7\x03\x00\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x00\x0d\x00\x0d \x0d\x10\xff\xff\x00\x00\x00\xc0\x80\x00\x00\x00\x02\x02\x01\x0f\x0f\x06\xce\xa3\xbeg\xc6@\x00\x07\x00\x07\x10(FP\x87t\x09\x03\x00\x01\x13\x0a\x00\x00\x01a\x00\x07\x00\x13\x04\x04\x04\x00\x02\x04\x08\x00\x00\x00"; + let _parsed = id0::IDBParam::read(param, false).unwrap(); + } + #[test] fn parse_idbs() { let files = find_all("resources/idbs".as_ref(), &["idb".as_ref(), "i64".as_ref()]).unwrap(); @@ -446,20 +593,92 @@ mod test { println!("{}", filename.to_str().unwrap()); let file = BufReader::new(File::open(&filename).unwrap()); let mut parser = IDBParser::new(file).unwrap(); - let til = parser.read_til_section(parser.til_section_offset().unwrap()); + match parser.read_id0_section(parser.id0_section_offset().unwrap()) { + Err(error) => { + let mut output = BufWriter::new(File::create("/tmp/lasterror.id0").unwrap()); + parser + .decompress_section( + parser.id0_section_offset().unwrap().0.get(), + &mut output, + ) + .unwrap(); + panic!("id0 {error:?}") + } + Ok(id0) => { + let _segments: Vec<()> = id0 + .segments() + .unwrap() + .map(Result::unwrap) + .map(|_| ()) + .collect(); + + let _loader_names: Vec<()> = id0 + .loader_name() + .unwrap() + .map(Result::unwrap) + .map(|_| ()) + .collect(); + + let _root_info: Vec<()> = id0 + .root_info() + .unwrap() + .map(Result::unwrap) + .map(|_| ()) + .collect(); + + let _ida_info = id0.ida_info().unwrap(); + let version = match _ida_info { + id0::IDBParam::V1(x) => x.version, + id0::IDBParam::V2(x) => x.version, + }; + + let _file_regions: Vec<()> = id0 + .file_regions(version) + .unwrap() + .map(Result::unwrap) + .map(|_| ()) + .collect(); + + let _functions: Vec<()> = id0 + .functions_and_comments() + .unwrap() + .map(Result::unwrap) + .map(|_| ()) + .collect(); + + let _entry_points: Vec<()> = id0 + .entry_points() + .unwrap() + .map(Result::unwrap) + .map(|_| ()) + .collect(); + } + } + + if let Err(error) = parser.read_id1_section(parser.id1_section_offset().unwrap()) { + let mut output = BufWriter::new(File::create("/tmp/lasterror.id1").unwrap()); + parser + .decompress_section(parser.id1_section_offset().unwrap().0.get(), &mut output) + .unwrap(); + panic!("id1 {error:?}") + } + + if let Err(error) = parser.read_nam_section(parser.nam_section_offset().unwrap()) { + let mut output = BufWriter::new(File::create("/tmp/lasterror.nam").unwrap()); + parser + .decompress_section(parser.nam_section_offset().unwrap().0.get(), &mut output) + .unwrap(); + panic!("nam {error:?}") + } // if success, parse next file - let error = match til { - Ok(_til) => continue, - Err(e) => e, - }; - - //otherwise create a decompress version of the file for more testing - let mut output = BufWriter::new(File::create("/tmp/lasterror.til").unwrap()); - parser - .decompress_til_section(parser.til_section_offset().unwrap(), &mut output) - .unwrap(); - panic!("{error:?}") + if let Err(error) = parser.read_til_section(parser.til_section_offset().unwrap()) { + let mut output = BufWriter::new(File::create("/tmp/lasterror.til").unwrap()); + parser + .decompress_til_section(parser.til_section_offset().unwrap(), &mut output) + .unwrap(); + panic!("til {error:?}") + } } } diff --git a/src/nam.rs b/src/nam.rs new file mode 100644 index 0000000..f35d3ac --- /dev/null +++ b/src/nam.rs @@ -0,0 +1,130 @@ +use anyhow::{ensure, Result}; +use std::io::{Cursor, Read}; + +use crate::{IDBHeader, IDBSectionCompression, VaVersion}; + +#[derive(Debug, Clone)] +pub struct NamSection { + pub names: Vec, +} + +impl NamSection { + pub(crate) fn read( + input: &mut I, + header: &IDBHeader, + compress: IDBSectionCompression, + ) -> Result { + match compress { + IDBSectionCompression::None => Self::read_inner(input, header), + IDBSectionCompression::Zlib => { + let mut input = flate2::read::ZlibDecoder::new(input); + Self::read_inner(&mut input, header) + } + } + } + pub(crate) fn read_inner(input: &mut I, header: &IDBHeader) -> Result { + // NOTE 64 should be enougth for all version, if a new version is implemented + // review this value + const MAX_HEADER_LEN: usize = 64; + const DEFAULT_PAGE_SIZE: usize = 0x2000; + //assert!(MAX_HEADER_LEN < DEFAULT_PAGE_SIZE); + + let mut buf = vec![0; MAX_HEADER_LEN]; + input.read_exact(&mut buf[..])?; + let mut header_page = Cursor::new(&buf); + let version = VaVersion::read(&mut header_page)?; + + let (npages, nnames, pagesize) = match version { + VaVersion::Va0 | VaVersion::Va1 | VaVersion::Va2 | VaVersion::Va3 | VaVersion::Va4 => { + let always1: u16 = bincode::deserialize_from(&mut header_page)?; + ensure!(always1 == 1); + let npages: u64 = if header.magic_version.is_64() { + bincode::deserialize_from(&mut header_page)? + } else { + bincode::deserialize_from::<_, u32>(&mut header_page)?.into() + }; + let always0: u16 = bincode::deserialize_from(&mut header_page)?; + ensure!(always0 == 0); + let nnames: u64 = if header.magic_version.is_64() { + // TODO nnames / 2? Why? + bincode::deserialize_from::<_, u64>(&mut header_page)? / 2 + } else { + bincode::deserialize_from::<_, u32>(&mut header_page)?.into() + }; + let pagesize: u32 = bincode::deserialize_from(&mut header_page)?; + ensure!(pagesize >= 64); + (npages, nnames, pagesize) + } + VaVersion::VaX => { + let always3: u32 = bincode::deserialize_from(&mut header_page)?; + ensure!(always3 == 3); + let one_or_zero: u32 = bincode::deserialize_from(&mut header_page)?; + ensure!([0, 1].contains(&one_or_zero)); + // TODO always2048 have some relation to pagesize? + let always2048: u32 = bincode::deserialize_from(&mut header_page)?; + ensure!(always2048 == 2048); + let npages: u64 = if header.magic_version.is_64() { + bincode::deserialize_from(&mut header_page)? + } else { + bincode::deserialize_from::<_, u32>(&mut header_page)?.into() + }; + let always0: u32 = bincode::deserialize_from(&mut header_page)?; + ensure!(always0 == 0); + let nnames: u64 = if header.magic_version.is_64() { + // TODO nnames / 2? Why? + bincode::deserialize_from::<_, u64>(&mut header_page)? / 2 + } else { + bincode::deserialize_from::<_, u32>(&mut header_page)?.into() + }; + (npages, nnames, DEFAULT_PAGE_SIZE.try_into().unwrap()) + } + }; + ensure!( + npages >= 1, + "Invalid number of pages, need at least one page for the header" + ); + + // read the rest of the header page and ensure it's all zeros + buf.resize(pagesize.try_into().unwrap(), 0); + input.read_exact(&mut buf[64..])?; + ensure!(buf[64..].iter().all(|b| *b == 0)); + + let name_len = if header.magic_version.is_64() { 8 } else { 4 }; + // ensure pages dont break a name + ensure!(pagesize % name_len == 0); + // names fit inside the pages + let size_required = nnames * u64::from(name_len); + let available_data = (npages - 1) * u64::from(pagesize); + ensure!( + size_required <= available_data, + "there is no enough size required {size_required} <= {available_data}" + ); + + let mut names = Vec::with_capacity(nnames.try_into().unwrap()); + let mut current_nnames = nnames; + for _page in 1..npages { + input.read_exact(&mut buf)?; + let mut input = Cursor::new(&buf[..]); + loop { + if current_nnames == 0 { + break; + }; + let name = if header.magic_version.is_64() { + bincode::deserialize_from::<_, u64>(&mut input) + } else { + bincode::deserialize_from::<_, u32>(&mut input).map(u64::from) + }; + let Ok(name) = name else { + break; + }; + names.push(name); + current_nnames -= 1; + } + // if anything is left, make sure it's all zeros + ensure!(input.bytes().all(|b| b.unwrap() == 0)); + } + + assert!(current_nnames == 0); + Ok(Self { names }) + } +}