From ea8805f540d752cc1a2e8eaa5cba08a2e689f395 Mon Sep 17 00:00:00 2001 From: Yury Delendik Date: Mon, 25 Mar 2019 19:15:29 -0500 Subject: [PATCH] Move cranelift-debug from wasmtime; update dependencies --- Cargo.toml | 4 +- cranelift-debug/.gitignore | 3 + cranelift-debug/Cargo.toml | 33 ++ cranelift-debug/LICENSE | 220 ++++++++ cranelift-debug/README.md | 4 + cranelift-debug/src/address_transform.rs | 141 +++++ cranelift-debug/src/data.rs | 41 ++ cranelift-debug/src/lib.rs | 179 +++++++ cranelift-debug/src/read_debuginfo.rs | 136 +++++ cranelift-debug/src/transform.rs | 641 +++++++++++++++++++++++ cranelift-debug/src/write_debuginfo.rs | 209 ++++++++ 11 files changed, 1610 insertions(+), 1 deletion(-) create mode 100644 cranelift-debug/.gitignore create mode 100644 cranelift-debug/Cargo.toml create mode 100644 cranelift-debug/LICENSE create mode 100644 cranelift-debug/README.md create mode 100644 cranelift-debug/src/address_transform.rs create mode 100644 cranelift-debug/src/data.rs create mode 100644 cranelift-debug/src/lib.rs create mode 100644 cranelift-debug/src/read_debuginfo.rs create mode 100644 cranelift-debug/src/transform.rs create mode 100644 cranelift-debug/src/write_debuginfo.rs diff --git a/Cargo.toml b/Cargo.toml index f6d338428..76e1c39c0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -20,6 +20,7 @@ path = "src/clif-util.rs" [dependencies] cfg-if = "0.1" cranelift-codegen = { path = "cranelift-codegen", version = "0.29.0" } +cranelift-debug = { path = "cranelift-debug", version = "0.29.0", optional = true } cranelift-entity = { path = "cranelift-entity", version = "0.29.0" } cranelift-reader = { path = "cranelift-reader", version = "0.29.0" } cranelift-frontend = { path = "cranelift-frontend", version = "0.29.0" } @@ -43,8 +44,9 @@ pretty_env_logger = "0.3.0" file-per-thread-logger = "0.1.2" [features] -default = ["disas", "wasm"] +default = ["disas", "wasm", "dwarf"] disas = ["capstone"] +dwarf = ["cranelift-debug", "wasm"] wasm = ["wabt", "cranelift-wasm"] # We want debug symbols on release binaries by default since it allows profiling diff --git a/cranelift-debug/.gitignore b/cranelift-debug/.gitignore new file mode 100644 index 000000000..4308d8220 --- /dev/null +++ b/cranelift-debug/.gitignore @@ -0,0 +1,3 @@ +target/ +**/*.rs.bk +Cargo.lock diff --git a/cranelift-debug/Cargo.toml b/cranelift-debug/Cargo.toml new file mode 100644 index 000000000..7f95a49f8 --- /dev/null +++ b/cranelift-debug/Cargo.toml @@ -0,0 +1,33 @@ +[package] +name = "cranelift-debug" +version = "0.29.0" +authors = ["The Wasmtime Project Developers"] +description = "Debug utils for WebAsssembly code in Cranelift" +repository = "https://github.com/CraneStation/cranelift" +documentation = "https://docs.rs/cranelift-debug/" +categories = ["wasm"] +keywords = ["webassembly", "wasm", "debuginfo"] +license = "Apache-2.0 WITH LLVM-exception" +readme = "README.md" +edition = "2018" + +[dependencies] +gimli = "0.17.0" +wasmparser = { version = "0.28.0", default-features = false } +cranelift-codegen = { path = "../cranelift-codegen", version = "0.29.0", default-features = false } +cranelift-entity = { path = "../cranelift-entity", version = "0.29.0", default-features = false } +cranelift-wasm = { path = "../cranelift-wasm", version = "0.29.0", default-features = false } +hashmap_core = { version = "0.1.9", optional = true } +faerie = "0.9.1" +target-lexicon = { version = "0.3.0", default-features = false } +failure = { version = "0.1.3", default-features = false } +failure_derive = { version = "0.1.3", default-features = false } + +[features] +default = ["std"] +std = ["cranelift-codegen/std", "cranelift-wasm/std", "wasmparser/std"] +core = ["hashmap_core", "cranelift-codegen/core", "cranelift-wasm/core", "wasmparser/core"] + +[badges] +maintenance = { status = "experimental" } +travis-ci = { repository = "CraneStation/wasmtime" } diff --git a/cranelift-debug/LICENSE b/cranelift-debug/LICENSE new file mode 100644 index 000000000..f9d81955f --- /dev/null +++ b/cranelift-debug/LICENSE @@ -0,0 +1,220 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + +--- LLVM Exceptions to the Apache 2.0 License ---- + +As an exception, if, as a result of your compiling your source code, portions +of this Software are embedded into an Object form of such source code, you +may redistribute such embedded portions in such Object form without complying +with the conditions of Sections 4(a), 4(b) and 4(d) of the License. + +In addition, if you combine or link compiled forms of this Software with +software that is licensed under the GPLv2 ("Combined Software") and if a +court of competent jurisdiction determines that the patent provision (Section +3), the indemnity provision (Section 9) or other Section of the License +conflicts with the conditions of the GPLv2, you may retroactively and +prospectively choose to deem waived or otherwise exclude such Section(s) of +the License, but only in their entirety and only with respect to the Combined +Software. + diff --git a/cranelift-debug/README.md b/cranelift-debug/README.md new file mode 100644 index 000000000..21514c885 --- /dev/null +++ b/cranelift-debug/README.md @@ -0,0 +1,4 @@ +This is the `cranelift-debug` crate, which provides functionality to +read, transform, and write DWARF section. + +[`cranelift-debug`]: https://crates.io/crates/cranelift-debug diff --git a/cranelift-debug/src/address_transform.rs b/cranelift-debug/src/address_transform.rs new file mode 100644 index 000000000..f7c3e6365 --- /dev/null +++ b/cranelift-debug/src/address_transform.rs @@ -0,0 +1,141 @@ +//! Utility data structures for WebAssembly address space transformation. + +use crate::data::ModuleAddressMap; +use crate::read_debuginfo::WasmFileInfo; +use cranelift_entity::{EntityRef, PrimaryMap}; +use cranelift_wasm::DefinedFuncIndex; +use gimli::write; +use std::collections::BTreeMap; +use std::ops::Bound::{Included, Unbounded}; +use std::vec::Vec; + +pub type GeneratedAddress = usize; +pub type WasmAddress = u64; +pub type SymbolIndex = usize; + +#[derive(Debug)] +pub struct AddressMap { + pub generated: GeneratedAddress, + pub wasm: WasmAddress, +} + +#[derive(Debug)] +pub struct FunctionMap { + pub offset: GeneratedAddress, + pub len: GeneratedAddress, + pub addresses: Box<[AddressMap]>, +} + +#[derive(Debug)] +pub struct AddressTransform { + lookup: BTreeMap, + map: PrimaryMap, + func_ranges: Vec<(usize, usize)>, +} + +impl AddressTransform { + pub fn new(at: &ModuleAddressMap, wasm_file: &WasmFileInfo) -> Self { + let code_section_offset = wasm_file.code_section_offset; + let function_offsets = &wasm_file.function_offsets_and_sizes; + let mut lookup = BTreeMap::new(); + let mut map = PrimaryMap::new(); + let mut func_ranges = Vec::new(); + for (i, ft) in at { + let index = i.index(); + let (fn_offset, fn_size) = function_offsets[index]; + assert!(code_section_offset <= fn_offset); + let fn_offset: WasmAddress = fn_offset - code_section_offset; + let fn_size = fn_size as WasmAddress; + func_ranges.push((ft.body_offset, ft.body_offset + ft.body_len)); + lookup.insert( + fn_offset as WasmAddress, + (index, ft.body_offset, ft.body_offset), + ); + let mut fn_map = Vec::new(); + for t in &ft.instructions { + if t.srcloc.is_default() { + // TODO extend some range if possible + continue; + } + // src_offset is a wasm bytecode offset in the code section + let src_offset = t.srcloc.bits() as WasmAddress - code_section_offset; + assert!(fn_offset <= src_offset && src_offset <= fn_offset + fn_size); + lookup.insert( + src_offset, + (index, t.code_offset, t.code_offset + t.code_len), + ); + fn_map.push(AddressMap { + generated: t.code_offset, + wasm: src_offset, + }); + } + let last_addr = ft.body_offset + ft.body_len; + lookup.insert(fn_offset + fn_size, (index, last_addr, last_addr)); + fn_map.sort_by(|a, b| a.generated.cmp(&b.generated)); + map.push(FunctionMap { + offset: ft.body_offset, + len: ft.body_len, + addresses: fn_map.into_boxed_slice(), + }); + } + AddressTransform { + lookup, + map, + func_ranges, + } + } + + pub fn translate(&self, addr: u64) -> Option { + if addr == 0 { + // It's normally 0 for debug info without the linked code. + return None; + } + let search = self.lookup.range((Unbounded, Included(addr))); + if let Some((_, value)) = search.last() { + return Some(write::Address::Relative { + symbol: value.0, + addend: value.1 as i64, + }); + } + // Address was not found: function was not compiled? + None + } + + pub fn diff(&self, addr1: u64, addr2: u64) -> Option { + let t1 = self.translate(addr1); + let t2 = self.translate(addr2); + if t1.is_none() || t2.is_none() { + return None; + } + if let ( + Some(write::Address::Relative { + symbol: s1, + addend: a, + }), + Some(write::Address::Relative { + symbol: s2, + addend: b, + }), + ) = (t1, t2) + { + if s1 != s2 { + panic!("different symbol"); + } + Some((b - a) as u64) + } else { + unreachable!(); + } + } + + pub fn delta(&self, addr1: u64, u: u64) -> Option { + self.diff(addr1, addr1 + u) + } + + pub fn map(&self) -> &PrimaryMap { + &self.map + } + + pub fn func_range(&self, index: usize) -> (usize, usize) { + self.func_ranges[index] + } +} diff --git a/cranelift-debug/src/data.rs b/cranelift-debug/src/data.rs new file mode 100644 index 000000000..985030a9e --- /dev/null +++ b/cranelift-debug/src/data.rs @@ -0,0 +1,41 @@ +//! External data structures needed for address transform. + +use cranelift_codegen::ir; +use cranelift_entity::PrimaryMap; +use cranelift_wasm::DefinedFuncIndex; + +/// Single wasm source location to generated address mapping. +#[derive(Debug)] +pub struct InstructionAddressMap { + /// Original source location. + pub srcloc: ir::SourceLoc, + + /// Generated instructions offset. + pub code_offset: usize, + + /// Generated instructions length. + pub code_len: usize, +} + +/// Function and its instructions addresses mappings. +#[derive(Debug)] +pub struct FunctionAddressMap { + /// Instructions maps. + /// The array is sorted by the InstructionAddressMap::code_offset field. + pub instructions: Vec, + + /// Function start source location (normally declaration). + pub start_srcloc: ir::SourceLoc, + + /// Function end source location. + pub end_srcloc: ir::SourceLoc, + + /// Generated function body offset if applicable, otherwise 0. + pub body_offset: usize, + + /// Generated function body length. + pub body_len: usize, +} + +/// Module functions addresses mappings. +pub type ModuleAddressMap = PrimaryMap; diff --git a/cranelift-debug/src/lib.rs b/cranelift-debug/src/lib.rs new file mode 100644 index 000000000..3a2d65dbe --- /dev/null +++ b/cranelift-debug/src/lib.rs @@ -0,0 +1,179 @@ +//! Debug utils for WebAssembly using Cranelift. + +use cranelift_codegen::isa::TargetFrontendConfig; +use faerie::{Artifact, Decl}; +use failure::Error; +use target_lexicon::{BinaryFormat, Triple}; + +pub use crate::data::*; +pub use crate::read_debuginfo::{read_debuginfo, DebugInfoData}; +pub use crate::transform::transform_dwarf; +pub use crate::write_debuginfo::{emit_dwarf, ResolvedSymbol, SymbolResolver}; + +mod address_transform; +mod data; +mod read_debuginfo; +mod transform; +mod write_debuginfo; + +#[macro_use] +extern crate failure_derive; + +struct FunctionRelocResolver {} +impl SymbolResolver for FunctionRelocResolver { + fn resolve_symbol(&self, symbol: usize, addend: i64) -> ResolvedSymbol { + let name = format!("_wasm_function_{}", symbol); + ResolvedSymbol::Reloc { name, addend } + } +} + +pub fn emit_debugsections( + obj: &mut Artifact, + target_config: &TargetFrontendConfig, + debuginfo_data: &DebugInfoData, + at: &ModuleAddressMap, +) -> Result<(), Error> { + let dwarf = transform_dwarf(target_config, debuginfo_data, at)?; + let resolver = FunctionRelocResolver {}; + emit_dwarf(obj, dwarf, &resolver); + Ok(()) +} + +struct ImageRelocResolver<'a> { + func_offsets: &'a Vec, +} + +impl<'a> SymbolResolver for ImageRelocResolver<'a> { + fn resolve_symbol(&self, symbol: usize, addend: i64) -> ResolvedSymbol { + let func_start = self.func_offsets[symbol]; + ResolvedSymbol::PhysicalAddress(func_start + addend as u64) + } +} + +pub fn emit_debugsections_image( + triple: Triple, + target_config: &TargetFrontendConfig, + debuginfo_data: &DebugInfoData, + at: &ModuleAddressMap, + funcs: &Vec<(*const u8, usize)>, +) -> Result, Error> { + let ref func_offsets = funcs + .iter() + .map(|(ptr, _)| *ptr as u64) + .collect::>(); + let mut obj = Artifact::new(triple, String::from("module")); + let dwarf = transform_dwarf(target_config, debuginfo_data, at)?; + let resolver = ImageRelocResolver { func_offsets }; + + // Assuming all functions in the same code block, looking min/max of its range. + assert!(funcs.len() > 0); + let mut segment_body: (usize, usize) = (!0, 0); + for (body_ptr, body_len) in funcs.iter() { + segment_body.0 = ::std::cmp::min(segment_body.0, *body_ptr as usize); + segment_body.1 = ::std::cmp::max(segment_body.1, *body_ptr as usize + body_len); + } + let segment_body = (segment_body.0 as *const u8, segment_body.1 - segment_body.0); + + let body = unsafe { ::std::slice::from_raw_parts(segment_body.0, segment_body.1) }; + obj.declare_with("all", Decl::function(), body.to_vec())?; + + emit_dwarf(&mut obj, dwarf, &resolver); + + // LLDB is too "magical" about mach-o, generating elf + let mut bytes = obj.emit_as(BinaryFormat::Elf)?; + // elf is still missing details... + convert_faerie_elf_to_loadable_file(&mut bytes, segment_body.0); + + Ok(bytes) +} + +fn convert_faerie_elf_to_loadable_file(bytes: &mut Vec, code_ptr: *const u8) { + use std::ffi::CStr; + use std::os::raw::c_char; + + assert!( + bytes[0x4] == 2 && bytes[0x5] == 1, + "bits and endianess in .ELF" + ); + let e_phoff = unsafe { *(bytes.as_ptr().offset(0x20) as *const u64) }; + let e_phnum = unsafe { *(bytes.as_ptr().offset(0x38) as *const u16) }; + assert!( + e_phoff == 0 && e_phnum == 0, + "program header table is empty" + ); + let e_phentsize = unsafe { *(bytes.as_ptr().offset(0x36) as *const u16) }; + assert!(e_phentsize == 0x38, "size of ph"); + let e_shentsize = unsafe { *(bytes.as_ptr().offset(0x3A) as *const u16) }; + assert!(e_shentsize == 0x40, "size of sh"); + + let e_shoff = unsafe { *(bytes.as_ptr().offset(0x28) as *const u64) }; + let e_shnum = unsafe { *(bytes.as_ptr().offset(0x3C) as *const u16) }; + let mut shstrtab_off = 0; + let mut segment = None; + for i in 0..e_shnum { + let off = e_shoff as isize + i as isize * e_shentsize as isize; + let sh_type = unsafe { *(bytes.as_ptr().offset(off + 0x4) as *const u32) }; + if sh_type == /* SHT_SYMTAB */ 3 { + shstrtab_off = unsafe { *(bytes.as_ptr().offset(off + 0x18) as *const u64) }; + } + if sh_type != /* SHT_PROGBITS */ 1 { + continue; + } + // It is a SHT_PROGBITS, but we need to check sh_name to ensure it is our function + let sh_name = unsafe { + let sh_name_off = *(bytes.as_ptr().offset(off) as *const u32); + CStr::from_ptr( + bytes + .as_ptr() + .offset((shstrtab_off + sh_name_off as u64) as isize) + as *const c_char, + ) + .to_str() + .expect("name") + }; + if sh_name != ".text.all" { + continue; + } + + assert!(segment.is_none()); + // Functions was added at emit_debugsections_image as .text.all. + // Patch vaddr, and save file location and its size. + unsafe { + *(bytes.as_ptr().offset(off + 0x10) as *mut u64) = code_ptr as u64; + }; + let sh_offset = unsafe { *(bytes.as_ptr().offset(off + 0x18) as *const u64) }; + let sh_size = unsafe { *(bytes.as_ptr().offset(off + 0x20) as *const u64) }; + segment = Some((sh_offset, code_ptr, sh_size)); + // Fix name too: cut it to just ".text" + unsafe { + let sh_name_off = *(bytes.as_ptr().offset(off) as *const u32); + bytes[(shstrtab_off + sh_name_off as u64) as usize + ".text".len()] = 0; + } + } + + // LLDB wants segment with virtual address set, placing them at the end of ELF. + let ph_off = bytes.len(); + if let Some((sh_offset, v_offset, sh_size)) = segment { + let mut segment = Vec::with_capacity(0x38); + segment.resize(0x38, 0); + unsafe { + *(segment.as_ptr() as *mut u32) = /* PT_LOAD */ 0x1; + *(segment.as_ptr().offset(0x8) as *mut u64) = sh_offset; + *(segment.as_ptr().offset(0x10) as *mut u64) = v_offset as u64; + *(segment.as_ptr().offset(0x18) as *mut u64) = v_offset as u64; + *(segment.as_ptr().offset(0x20) as *mut u64) = sh_size; + *(segment.as_ptr().offset(0x28) as *mut u64) = sh_size; + } + bytes.extend_from_slice(&segment); + } else { + unreachable!(); + } + + // It is somewhat loadable ELF file at this moment. + // Update e_flags, e_phoff and e_phnum. + unsafe { + *(bytes.as_ptr().offset(0x10) as *mut u16) = /* ET_DYN */ 3; + *(bytes.as_ptr().offset(0x20) as *mut u64) = ph_off as u64; + *(bytes.as_ptr().offset(0x38) as *mut u16) = 1 as u16; + } +} diff --git a/cranelift-debug/src/read_debuginfo.rs b/cranelift-debug/src/read_debuginfo.rs new file mode 100644 index 000000000..4af51f166 --- /dev/null +++ b/cranelift-debug/src/read_debuginfo.rs @@ -0,0 +1,136 @@ +//! Reading of WebAssembly DWARF sections. + +use std::collections::HashMap; +use wasmparser::{ModuleReader, SectionCode}; + +use gimli; + +use gimli::{ + DebugAbbrev, DebugAddr, DebugInfo, DebugLine, DebugLineStr, DebugLoc, DebugLocLists, + DebugRanges, DebugRngLists, DebugStr, DebugStrOffsets, DebugTypes, EndianSlice, LittleEndian, + LocationLists, RangeLists, +}; + +trait Reader: gimli::Reader {} + +impl<'input> Reader for gimli::EndianSlice<'input, LittleEndian> {} + +pub type Dwarf<'input> = gimli::Dwarf>; + +#[derive(Debug)] +pub struct WasmFileInfo { + pub code_section_offset: u64, + pub function_offsets_and_sizes: Box<[(u64, u32)]>, +} + +#[derive(Debug)] +pub struct DebugInfoData<'a> { + pub dwarf: Dwarf<'a>, + pub wasm_file: WasmFileInfo, +} + +fn convert_sections<'a>(sections: HashMap<&str, &'a [u8]>) -> Dwarf<'a> { + let endian = LittleEndian; + let debug_str = DebugStr::new(sections[".debug_str"], endian); + let debug_abbrev = DebugAbbrev::new(sections[".debug_abbrev"], endian); + let debug_info = DebugInfo::new(sections[".debug_info"], endian); + let debug_line = DebugLine::new(sections[".debug_line"], endian); + + if sections.contains_key(".debug_addr") { + panic!("Unexpected .debug_addr"); + } + + let debug_addr = DebugAddr::from(EndianSlice::new(&[], endian)); + + if sections.contains_key(".debug_line_str") { + panic!("Unexpected .debug_line_str"); + } + + let debug_line_str = DebugLineStr::from(EndianSlice::new(&[], endian)); + let debug_str_sup = DebugStr::from(EndianSlice::new(&[], endian)); + + if sections.contains_key(".debug_rnglists") { + panic!("Unexpected .debug_rnglists"); + } + + let debug_ranges = match sections.get(".debug_ranges") { + Some(section) => DebugRanges::new(section, endian), + None => DebugRanges::new(&[], endian), + }; + let debug_rnglists = DebugRngLists::new(&[], endian); + let ranges = RangeLists::new(debug_ranges, debug_rnglists); + + if sections.contains_key(".debug_loclists") { + panic!("Unexpected .debug_loclists"); + } + + let debug_loc = match sections.get(".debug_loc") { + Some(section) => DebugLoc::new(section, endian), + None => DebugLoc::new(&[], endian), + }; + let debug_loclists = DebugLocLists::new(&[], endian); + let locations = LocationLists::new(debug_loc, debug_loclists); + + if sections.contains_key(".debug_str_offsets") { + panic!("Unexpected .debug_str_offsets"); + } + + let debug_str_offsets = DebugStrOffsets::from(EndianSlice::new(&[], endian)); + + if sections.contains_key(".debug_types") { + panic!("Unexpected .debug_types"); + } + + let debug_types = DebugTypes::from(EndianSlice::new(&[], endian)); + + Dwarf { + debug_abbrev, + debug_addr, + debug_info, + debug_line, + debug_line_str, + debug_str, + debug_str_offsets, + debug_str_sup, + debug_types, + locations, + ranges, + } +} + +pub fn read_debuginfo(data: &[u8]) -> DebugInfoData { + let mut reader = ModuleReader::new(data).expect("reader"); + let mut sections = HashMap::new(); + let mut code_section_offset = 0; + let mut function_offsets_and_sizes = Vec::new(); + while !reader.eof() { + let section = reader.read().expect("section"); + if let SectionCode::Custom { name, .. } = section.code { + if name.starts_with(".debug_") { + let mut reader = section.get_binary_reader(); + let len = reader.bytes_remaining(); + sections.insert(name, reader.read_bytes(len).expect("bytes")); + } + } + if let SectionCode::Code = section.code { + code_section_offset = section.range().start as u64; + // TODO remove me later + let mut reader = section.get_code_section_reader().expect("code reader"); + for _ in 0..reader.get_count() { + let body = reader.read().expect("function body read"); + let range = body.range(); + let fn_body_size = range.end - range.start; + let fn_body_offset = range.start; + function_offsets_and_sizes.push((fn_body_offset as u64, fn_body_size as u32)); + } + } + } + let function_offsets_and_sizes = function_offsets_and_sizes.into_boxed_slice(); + DebugInfoData { + dwarf: convert_sections(sections), + wasm_file: WasmFileInfo { + code_section_offset, + function_offsets_and_sizes, + }, + } +} diff --git a/cranelift-debug/src/transform.rs b/cranelift-debug/src/transform.rs new file mode 100644 index 000000000..72822adb9 --- /dev/null +++ b/cranelift-debug/src/transform.rs @@ -0,0 +1,641 @@ +//! Transformation logic of WebAssembly DWARF into native format. + +use crate::address_transform::AddressTransform; +use crate::data::ModuleAddressMap; +pub use crate::read_debuginfo::DebugInfoData; +use cranelift_codegen::isa::TargetFrontendConfig; +use cranelift_entity::EntityRef; +use failure::Error; +use std::collections::{BTreeMap, HashMap}; +use std::ops::Bound::{Included, Unbounded}; + +use gimli; + +use gimli::{ + AttributeValue, CompilationUnitHeader, DebugAbbrev, DebugAddr, DebugAddrBase, DebugLine, + DebugLineOffset, DebugStr, DebuggingInformationEntry, LineEncoding, LocationLists, RangeLists, + UnitOffset, +}; + +use gimli::write; + +trait Reader: gimli::Reader {} + +impl<'input, Endian> Reader for gimli::EndianSlice<'input, Endian> where Endian: gimli::Endianity {} + +#[derive(Fail, Debug)] +#[fail(display = "Debug info transform error: {}", _0)] +pub struct TransformError(&'static str); + +pub struct TransformedDwarf { + pub encoding: gimli::Encoding, + pub strings: write::StringTable, + pub units: write::UnitTable, + pub line_strings: write::LineStringTable, + pub range_lists: write::RangeListTable, +} + +struct DebugInputContext<'a, R> +where + R: Reader, +{ + debug_abbrev: &'a DebugAbbrev, + debug_str: &'a DebugStr, + debug_line: &'a DebugLine, + debug_addr: &'a DebugAddr, + debug_addr_base: DebugAddrBase, + rnglists: &'a RangeLists, + loclists: &'a LocationLists, +} + +type PendingDieRef = (write::UnitEntryId, gimli::DwAt, UnitOffset); + +enum FileAttributeContext<'a> { + Root(Option), + Children(&'a Vec), +} + +fn clone_die_attributes<'a, R>( + entry: &DebuggingInformationEntry, + context: &DebugInputContext, + addr_tr: &'a AddressTransform, + unit_encoding: &gimli::Encoding, + current_scope: &mut write::DebuggingInformationEntry, + current_scope_id: write::UnitEntryId, + subprogram_range: Option<(write::Address, u64)>, + out_strings: &mut write::StringTable, + die_ref_map: &HashMap, + pending_die_refs: &mut Vec, + file_context: FileAttributeContext<'a>, +) -> Result<(), Error> +where + R: Reader, +{ + let _tag = &entry.tag(); + let mut attrs = entry.attrs(); + let mut low_pc = None; + while let Some(attr) = attrs.next()? { + let attr_value = match attr.value() { + AttributeValue::Addr(_) + if attr.name() == gimli::DW_AT_low_pc && subprogram_range.is_some() => + { + write::AttributeValue::Address(subprogram_range.unwrap().0) + } + AttributeValue::Udata(_) + if attr.name() == gimli::DW_AT_high_pc && subprogram_range.is_some() => + { + write::AttributeValue::Udata(subprogram_range.unwrap().1) + } + AttributeValue::Addr(u) => { + let addr = addr_tr.translate(u).unwrap_or(write::Address::Absolute(0)); + if attr.name() == gimli::DW_AT_low_pc { + low_pc = Some((u, addr)); + } + write::AttributeValue::Address(addr) + } + AttributeValue::Udata(u) => { + if attr.name() != gimli::DW_AT_high_pc || low_pc.is_none() { + write::AttributeValue::Udata(u) + } else { + let u = addr_tr.delta(low_pc.unwrap().0, u).unwrap_or(0); + write::AttributeValue::Udata(u) + } + } + AttributeValue::Data1(d) => write::AttributeValue::Data1(d), + AttributeValue::Data2(d) => write::AttributeValue::Data2(d), + AttributeValue::Data4(d) => write::AttributeValue::Data4(d), + AttributeValue::Sdata(d) => write::AttributeValue::Sdata(d), + AttributeValue::Flag(f) => write::AttributeValue::Flag(f), + AttributeValue::DebugLineRef(line_program_offset) => { + if let FileAttributeContext::Root(o) = file_context { + if o != Some(line_program_offset) { + return Err(TransformError("invalid debug_line offset").into()); + } + write::AttributeValue::LineProgramRef + } else { + return Err(TransformError("unexpected debug_line index attribute").into()); + } + } + AttributeValue::FileIndex(i) => { + if let FileAttributeContext::Children(file_map) = file_context { + write::AttributeValue::FileIndex(Some(file_map[(i - 1) as usize])) + } else { + return Err(TransformError("unexpected file index attribute").into()); + } + } + AttributeValue::DebugStrRef(str_offset) => { + let s = context.debug_str.get_str(str_offset)?.to_slice()?.to_vec(); + write::AttributeValue::StringRef(out_strings.add(s)) + } + AttributeValue::RangeListsRef(r) => { + let low_pc = 0; + let mut ranges = context.rnglists.ranges( + r, + *unit_encoding, + low_pc, + &context.debug_addr, + context.debug_addr_base, + )?; + let mut _result = Vec::new(); + while let Some(range) = ranges.next()? { + assert!(range.begin <= range.end); + _result.push((range.begin as i64, range.end as i64)); + } + // FIXME _result contains invalid code offsets; translate_address + continue; // ignore attribute + } + AttributeValue::LocationListsRef(r) => { + let low_pc = 0; + let mut locs = context.loclists.locations( + r, + *unit_encoding, + low_pc, + &context.debug_addr, + context.debug_addr_base, + )?; + let mut _result = Vec::new(); + while let Some(loc) = locs.next()? { + _result.push((loc.range.begin as i64, loc.range.end as i64, loc.data.0)); + } + // FIXME _result contains invalid expressions and code offsets + continue; // ignore attribute + } + AttributeValue::Exprloc(ref _expr) => { + // FIXME _expr contains invalid expression + continue; // ignore attribute + } + AttributeValue::Encoding(e) => write::AttributeValue::Encoding(e), + AttributeValue::DecimalSign(e) => write::AttributeValue::DecimalSign(e), + AttributeValue::Endianity(e) => write::AttributeValue::Endianity(e), + AttributeValue::Accessibility(e) => write::AttributeValue::Accessibility(e), + AttributeValue::Visibility(e) => write::AttributeValue::Visibility(e), + AttributeValue::Virtuality(e) => write::AttributeValue::Virtuality(e), + AttributeValue::Language(e) => write::AttributeValue::Language(e), + AttributeValue::AddressClass(e) => write::AttributeValue::AddressClass(e), + AttributeValue::IdentifierCase(e) => write::AttributeValue::IdentifierCase(e), + AttributeValue::CallingConvention(e) => write::AttributeValue::CallingConvention(e), + AttributeValue::Inline(e) => write::AttributeValue::Inline(e), + AttributeValue::Ordering(e) => write::AttributeValue::Ordering(e), + AttributeValue::UnitRef(ref offset) => { + if let Some(unit_id) = die_ref_map.get(offset) { + write::AttributeValue::ThisUnitEntryRef(*unit_id) + } else { + pending_die_refs.push((current_scope_id, attr.name(), *offset)); + continue; + } + } + // AttributeValue::DebugInfoRef(_) => { + // continue; + // } + _ => panic!(), //write::AttributeValue::StringRef(out_strings.add("_")), + }; + current_scope.set(attr.name(), attr_value); + } + Ok(()) +} + +fn clone_attr_string( + attr_value: &AttributeValue, + form: gimli::DwForm, + debug_str: &DebugStr, + out_strings: &mut write::StringTable, +) -> Result +where + R: Reader, +{ + let content = match attr_value { + AttributeValue::DebugStrRef(str_offset) => { + debug_str.get_str(*str_offset)?.to_slice()?.to_vec() + } + AttributeValue::String(b) => b.to_slice()?.to_vec(), + _ => panic!("Unexpected attribute value"), + }; + Ok(match form { + gimli::DW_FORM_strp => { + let id = out_strings.add(content); + write::LineString::StringRef(id) + } + gimli::DW_FORM_string => write::LineString::String(content), + _ => panic!("DW_FORM_line_strp or other not supported"), + }) +} + +#[derive(Debug)] +enum SavedLineProgramRow { + Normal { + address: u64, + op_index: u64, + file_index: u64, + line: u64, + column: u64, + discriminator: u64, + is_stmt: bool, + basic_block: bool, + prologue_end: bool, + epilogue_begin: bool, + isa: u64, + }, + EndOfSequence(u64), +} + +#[derive(Debug, Eq, PartialEq)] +enum ReadLineProgramState { + SequenceEnded, + ReadSequence, + IgnoreSequence, +} + +fn clone_line_program( + unit: &CompilationUnitHeader, + root: &DebuggingInformationEntry, + addr_tr: &AddressTransform, + out_encoding: &gimli::Encoding, + debug_str: &DebugStr, + debug_line: &DebugLine, + out_strings: &mut write::StringTable, +) -> Result<(write::LineProgram, DebugLineOffset, Vec), Error> +where + R: Reader, +{ + let offset = match root.attr_value(gimli::DW_AT_stmt_list)? { + Some(gimli::AttributeValue::DebugLineRef(offset)) => offset, + _ => { + return Err(TransformError("Debug line offset is not found").into()); + } + }; + let comp_dir = root.attr_value(gimli::DW_AT_comp_dir)?; + let comp_name = root.attr_value(gimli::DW_AT_name)?; + let out_comp_dir = clone_attr_string( + comp_dir.as_ref().expect("comp_dir"), + gimli::DW_FORM_strp, + debug_str, + out_strings, + )?; + let out_comp_name = clone_attr_string( + comp_name.as_ref().expect("comp_name"), + gimli::DW_FORM_strp, + debug_str, + out_strings, + )?; + + let program = debug_line.program( + offset, + unit.address_size(), + comp_dir.and_then(|val| val.string_value(&debug_str)), + comp_name.and_then(|val| val.string_value(&debug_str)), + ); + if let Ok(program) = program { + let header = program.header(); + assert!(header.version() <= 4, "not supported 5"); + let line_encoding = LineEncoding { + minimum_instruction_length: header.minimum_instruction_length(), + maximum_operations_per_instruction: header.maximum_operations_per_instruction(), + default_is_stmt: header.default_is_stmt(), + line_base: header.line_base(), + line_range: header.line_range(), + }; + let mut out_program = write::LineProgram::new( + *out_encoding, + line_encoding, + out_comp_dir, + out_comp_name, + None, + ); + let mut dirs = Vec::new(); + dirs.push(out_program.default_directory()); + for dir_attr in header.include_directories() { + let dir_id = out_program.add_directory(clone_attr_string( + dir_attr, + gimli::DW_FORM_string, + debug_str, + out_strings, + )?); + dirs.push(dir_id); + } + let mut files = Vec::new(); + for file_entry in header.file_names() { + let dir_id = dirs[file_entry.directory_index() as usize]; + let file_id = out_program.add_file( + clone_attr_string( + &file_entry.path_name(), + gimli::DW_FORM_string, + debug_str, + out_strings, + )?, + dir_id, + None, + ); + files.push(file_id); + } + + let mut rows = program.rows(); + let mut saved_rows = BTreeMap::new(); + let mut state = ReadLineProgramState::SequenceEnded; + while let Some((_header, row)) = rows.next_row()? { + if state == ReadLineProgramState::IgnoreSequence { + if row.end_sequence() { + state = ReadLineProgramState::SequenceEnded; + } + continue; + } + let saved_row = if row.end_sequence() { + state = ReadLineProgramState::SequenceEnded; + SavedLineProgramRow::EndOfSequence(row.address()) + } else { + if state == ReadLineProgramState::SequenceEnded { + // Discard sequences for non-existent code. + if row.address() == 0 { + state = ReadLineProgramState::IgnoreSequence; + continue; + } + state = ReadLineProgramState::ReadSequence; + } + SavedLineProgramRow::Normal { + address: row.address(), + op_index: row.op_index(), + file_index: row.file_index(), + line: row.line().unwrap_or(0), + column: match row.column() { + gimli::ColumnType::LeftEdge => 0, + gimli::ColumnType::Column(val) => val, + }, + discriminator: row.discriminator(), + is_stmt: row.is_stmt(), + basic_block: row.basic_block(), + prologue_end: row.prologue_end(), + epilogue_begin: row.epilogue_begin(), + isa: row.isa(), + } + }; + saved_rows.insert(row.address(), saved_row); + } + + for (i, map) in addr_tr.map() { + let symbol = i.index(); + let base_addr = map.offset; + out_program.begin_sequence(Some(write::Address::Relative { symbol, addend: 0 })); + // TODO track and place function declaration line here + let mut last_address = None; + for addr_map in map.addresses.iter() { + let mut saved_row = saved_rows.get(&addr_map.wasm); + if saved_row.is_none() { + // No direct match -- repeat search with range. + saved_row = saved_rows + .range((Unbounded, Included(addr_map.wasm))) + .last() + .map(|p| p.1); + } + if let Some(SavedLineProgramRow::Normal { + address, + op_index, + file_index, + line, + column, + discriminator, + is_stmt, + basic_block, + prologue_end, + epilogue_begin, + isa, + }) = saved_row + { + // Ignore duplicates + if Some(*address) != last_address { + let address_offset = if last_address.is_none() { + // Extend first entry to the function declaration + // TODO use the function declaration line instead + 0 + } else { + (addr_map.generated - base_addr) as u64 + }; + out_program.row().address_offset = address_offset; + out_program.row().op_index = *op_index; + out_program.row().file = files[(file_index - 1) as usize]; + out_program.row().line = *line; + out_program.row().column = *column; + out_program.row().discriminator = *discriminator; + out_program.row().is_statement = *is_stmt; + out_program.row().basic_block = *basic_block; + out_program.row().prologue_end = *prologue_end; + out_program.row().epilogue_begin = *epilogue_begin; + out_program.row().isa = *isa; + out_program.generate_row(); + last_address = Some(*address); + } + } + } + let end_addr = (map.offset + map.len - 1) as u64; + out_program.end_sequence(end_addr); + } + Ok((out_program, offset, files)) + } else { + Err(TransformError("Valid line program not found").into()) + } +} + +fn get_subprogram_range<'a, R>( + entry: &DebuggingInformationEntry, + addr_tr: &'a AddressTransform, +) -> Result, Error> +where + R: Reader, +{ + let low_pc = entry.attr_value(gimli::DW_AT_low_pc)?; + if let Some(AttributeValue::Addr(addr)) = low_pc { + let transformed = addr_tr.translate(addr); + if let Some(write::Address::Relative { symbol, .. }) = transformed { + let range = addr_tr.func_range(symbol); + let addr = write::Address::Relative { + symbol, + addend: range.0 as i64, + }; + let len = (range.1 - range.0) as u64; + return Ok(Some((addr, len))); + } + } + Ok(None) +} + +fn clone_unit<'a, R>( + unit: &CompilationUnitHeader, + context: &DebugInputContext, + addr_tr: &'a AddressTransform, + out_encoding: &gimli::Encoding, + out_units: &mut write::UnitTable, + out_strings: &mut write::StringTable, +) -> Result<(), Error> +where + R: Reader, +{ + let abbrevs = unit.abbreviations(context.debug_abbrev)?; + + let mut die_ref_map = HashMap::new(); + let mut pending_die_refs = Vec::new(); + let mut stack = Vec::new(); + + // Iterate over all of this compilation unit's entries. + let mut entries = unit.entries(&abbrevs); + let (comp_unit, file_map) = if let Some((depth_delta, entry)) = entries.next_dfs()? { + assert!(depth_delta == 0); + let (out_line_program, debug_line_offset, file_map) = clone_line_program( + unit, + entry, + addr_tr, + out_encoding, + context.debug_str, + context.debug_line, + out_strings, + )?; + + if entry.tag() == gimli::DW_TAG_compile_unit { + let unit_id = out_units.add(write::Unit::new(*out_encoding, out_line_program)); + let comp_unit = out_units.get_mut(unit_id); + + let root_id = comp_unit.root(); + die_ref_map.insert(entry.offset(), root_id); + + clone_die_attributes( + entry, + context, + addr_tr, + &unit.encoding(), + comp_unit.get_mut(root_id), + root_id, + None, + out_strings, + &die_ref_map, + &mut pending_die_refs, + FileAttributeContext::Root(Some(debug_line_offset)), + )?; + + stack.push(root_id); + (comp_unit, file_map) + } else { + return Err(TransformError("Unexpected unit header").into()); + } + } else { + return Ok(()); // empty + }; + let mut skip_at_depth = None; + while let Some((depth_delta, entry)) = entries.next_dfs()? { + let depth_delta = if let Some(depth) = skip_at_depth { + let new_depth = depth + depth_delta; + if new_depth >= 0 { + skip_at_depth = Some(new_depth); + continue; + } + skip_at_depth = None; + new_depth + } else { + depth_delta + }; + let range = if entry.tag() == gimli::DW_TAG_subprogram { + let range = get_subprogram_range(entry, addr_tr)?; + if range.is_none() { + // Subprogram was not compiled: discarding all its info. + skip_at_depth = Some(0); + continue; + } + range + } else { + None + }; + + if depth_delta <= 0 { + for _ in depth_delta..1 { + stack.pop(); + } + } else { + assert!(depth_delta == 1); + } + let parent = stack.last().unwrap(); + let die_id = comp_unit.add(*parent, entry.tag()); + let current_scope = comp_unit.get_mut(die_id); + + stack.push(die_id); + die_ref_map.insert(entry.offset(), die_id); + + clone_die_attributes( + entry, + context, + addr_tr, + &unit.encoding(), + current_scope, + die_id, + range, + out_strings, + &die_ref_map, + &mut pending_die_refs, + FileAttributeContext::Children(&file_map), + )?; + } + for (die_id, attr_name, offset) in pending_die_refs { + let die = comp_unit.get_mut(die_id); + let unit_id = die_ref_map[&offset]; + die.set(attr_name, write::AttributeValue::ThisUnitEntryRef(unit_id)); + } + Ok(()) +} + +pub fn transform_dwarf( + target_config: &TargetFrontendConfig, + di: &DebugInfoData, + at: &ModuleAddressMap, +) -> Result { + let context = DebugInputContext { + debug_abbrev: &di.dwarf.debug_abbrev, + debug_str: &di.dwarf.debug_str, + debug_line: &di.dwarf.debug_line, + debug_addr: &di.dwarf.debug_addr, + debug_addr_base: DebugAddrBase(0), + rnglists: &di.dwarf.ranges, + loclists: &di.dwarf.locations, + }; + + let out_encoding = gimli::Encoding { + format: gimli::Format::Dwarf32, + // TODO: this should be configurable + // macOS doesn't seem to support DWARF > 3 + version: 3, + address_size: target_config.pointer_bytes(), + }; + + let addr_tr = AddressTransform::new(at, &di.wasm_file); + + let mut out_strings = write::StringTable::default(); + let mut out_units = write::UnitTable::default(); + + let out_range_lists = write::RangeListTable::default(); + let out_line_strings = write::LineStringTable::default(); + + let mut iter = di.dwarf.debug_info.units(); + while let Some(ref unit) = iter.next().unwrap_or(None) { + clone_unit( + unit, + &context, + &addr_tr, + &out_encoding, + &mut out_units, + &mut out_strings, + )?; + } + + // let unit_range_list = write::RangeList(Vec::new()); + // let unit_range_list_id = out_range_lists.add(unit_range_list.clone()); + // let unit = dwarf.units.get_mut(self.unit_id); + // let root = unit.root(); + // let root = unit.get_mut(root); + // root.set( + // gimli::DW_AT_ranges, + // AttributeValue::RangeListRef(unit_range_list_id), + // ); + + //println!("{:?} \n====\n {:?}", di, at); + Ok(TransformedDwarf { + encoding: out_encoding, + strings: out_strings, + units: out_units, + line_strings: out_line_strings, + range_lists: out_range_lists, + }) +} diff --git a/cranelift-debug/src/write_debuginfo.rs b/cranelift-debug/src/write_debuginfo.rs new file mode 100644 index 000000000..0512e5e62 --- /dev/null +++ b/cranelift-debug/src/write_debuginfo.rs @@ -0,0 +1,209 @@ +//! Writing native DWAFT sections. + +use crate::transform::TransformedDwarf; + +use gimli::write::{ + Address, DebugAbbrev, DebugInfo, DebugLine, DebugLineStr, DebugRanges, DebugRngLists, DebugStr, + EndianVec, Result, SectionId, Sections, Writer, +}; +use gimli::RunTimeEndian; + +use faerie::artifact::Decl; +use faerie::*; + +struct DebugReloc { + offset: u32, + size: u8, + name: String, + addend: i64, +} + +macro_rules! decl_section { + ($artifact:ident . $section:ident = $name:expr) => { + $artifact + .declare_with( + SectionId::$section.name(), + Decl::debug_section(), + $name.0.writer.into_vec(), + ) + .unwrap(); + }; +} + +macro_rules! sect_relocs { + ($artifact:ident . $section:ident = $name:expr) => { + for reloc in $name.0.relocs { + $artifact + .link_with( + faerie::Link { + from: SectionId::$section.name(), + to: &reloc.name, + at: u64::from(reloc.offset), + }, + faerie::Reloc::Debug { + size: reloc.size, + addend: reloc.addend as i32, + }, + ) + .expect("faerie relocation error"); + } + }; +} + +pub enum ResolvedSymbol { + PhysicalAddress(u64), + Reloc { name: String, addend: i64 }, +} + +pub trait SymbolResolver { + fn resolve_symbol(&self, symbol: usize, addend: i64) -> ResolvedSymbol; +} + +pub fn emit_dwarf( + artifact: &mut Artifact, + mut dwarf: TransformedDwarf, + symbol_resolver: &SymbolResolver, +) { + let endian = RunTimeEndian::Little; + let debug_abbrev = DebugAbbrev::from(WriterRelocate::new(endian, symbol_resolver)); + let debug_info = DebugInfo::from(WriterRelocate::new(endian, symbol_resolver)); + let debug_str = DebugStr::from(WriterRelocate::new(endian, symbol_resolver)); + let debug_line = DebugLine::from(WriterRelocate::new(endian, symbol_resolver)); + let debug_ranges = DebugRanges::from(WriterRelocate::new(endian, symbol_resolver)); + let debug_rnglists = DebugRngLists::from(WriterRelocate::new(endian, symbol_resolver)); + let debug_line_str = DebugLineStr::from(WriterRelocate::new(endian, symbol_resolver)); + + let mut sections = Sections { + debug_abbrev, + debug_info, + debug_line, + debug_line_str, + debug_ranges, + debug_rnglists, + debug_str, + }; + + let debug_str_offsets = dwarf.strings.write(&mut sections.debug_str).unwrap(); + let debug_line_str_offsets = dwarf + .line_strings + .write(&mut sections.debug_line_str) + .unwrap(); + dwarf + .units + .write(&mut sections, &debug_line_str_offsets, &debug_str_offsets) + .unwrap(); + + decl_section!(artifact.DebugAbbrev = sections.debug_abbrev); + decl_section!(artifact.DebugInfo = sections.debug_info); + decl_section!(artifact.DebugStr = sections.debug_str); + decl_section!(artifact.DebugLine = sections.debug_line); + + let debug_ranges_not_empty = !sections.debug_ranges.0.writer.slice().is_empty(); + if debug_ranges_not_empty { + decl_section!(artifact.DebugRanges = sections.debug_ranges); + } + + let debug_rnglists_not_empty = !sections.debug_rnglists.0.writer.slice().is_empty(); + if debug_rnglists_not_empty { + decl_section!(artifact.DebugRngLists = sections.debug_rnglists); + } + + sect_relocs!(artifact.DebugAbbrev = sections.debug_abbrev); + sect_relocs!(artifact.DebugInfo = sections.debug_info); + sect_relocs!(artifact.DebugStr = sections.debug_str); + sect_relocs!(artifact.DebugLine = sections.debug_line); + + if debug_ranges_not_empty { + sect_relocs!(artifact.DebugRanges = sections.debug_ranges); + } + + if debug_rnglists_not_empty { + sect_relocs!(artifact.DebugRngLists = sections.debug_rnglists); + } +} + +struct WriterRelocate<'a> { + relocs: Vec, + writer: EndianVec, + symbol_resolver: &'a SymbolResolver, +} + +impl<'a> WriterRelocate<'a> { + fn new(endian: RunTimeEndian, symbol_resolver: &'a SymbolResolver) -> Self { + WriterRelocate { + relocs: Vec::new(), + writer: EndianVec::new(endian), + symbol_resolver, + } + } +} + +impl<'a> Writer for WriterRelocate<'a> { + type Endian = RunTimeEndian; + + fn endian(&self) -> Self::Endian { + self.writer.endian() + } + + fn len(&self) -> usize { + self.writer.len() + } + + fn write(&mut self, bytes: &[u8]) -> Result<()> { + self.writer.write(bytes) + } + + fn write_at(&mut self, offset: usize, bytes: &[u8]) -> Result<()> { + self.writer.write_at(offset, bytes) + } + + fn write_address(&mut self, address: Address, size: u8) -> Result<()> { + match address { + Address::Absolute(val) => self.write_word(val, size), + Address::Relative { symbol, addend } => { + match self.symbol_resolver.resolve_symbol(symbol, addend as i64) { + ResolvedSymbol::PhysicalAddress(addr) => self.write_word(addr, size), + ResolvedSymbol::Reloc { name, addend } => { + let offset = self.len() as u64; + self.relocs.push(DebugReloc { + offset: offset as u32, + size, + name, + addend, + }); + self.write_word(addend as u64, size) + } + } + } + } + } + + fn write_offset(&mut self, val: usize, section: SectionId, size: u8) -> Result<()> { + let offset = self.len() as u32; + let name = section.name().to_string(); + self.relocs.push(DebugReloc { + offset, + size, + name, + addend: val as i64, + }); + self.write_word(val as u64, size) + } + + fn write_offset_at( + &mut self, + offset: usize, + val: usize, + section: SectionId, + size: u8, + ) -> Result<()> { + let name = section.name().to_string(); + self.relocs.push(DebugReloc { + offset: offset as u32, + size, + name, + addend: val as i64, + }); + self.write_word_at(offset, val as u64, size) + } +}