From dd6c5b3f283b9b7a049411a3b510490f0b497787 Mon Sep 17 00:00:00 2001 From: Yury Delendik Date: Mon, 1 Jul 2019 15:40:38 -0500 Subject: [PATCH] Transform ranges and simple expressions --- Cargo.toml | 2 + src/wasm2obj.rs | 41 +- wasmtime-debug/src/address_transform.rs | 158 ----- wasmtime-debug/src/gc.rs | 2 +- wasmtime-debug/src/lib.rs | 13 +- wasmtime-debug/src/read_debuginfo.rs | 13 - wasmtime-debug/src/transform.rs | 641 ----------------- .../src/transform/address_transform.rs | 655 ++++++++++++++++++ wasmtime-debug/src/transform/attr.rs | 295 ++++++++ wasmtime-debug/src/transform/expression.rs | 486 +++++++++++++ wasmtime-debug/src/transform/line_program.rs | 233 +++++++ wasmtime-debug/src/transform/mod.rs | 102 +++ .../src/transform/range_info_builder.rs | 225 ++++++ wasmtime-debug/src/transform/unit.rs | 515 ++++++++++++++ wasmtime-environ/src/address_map.rs | 21 + wasmtime-environ/src/cache.rs | 22 +- wasmtime-environ/src/compilation.rs | 13 +- wasmtime-environ/src/cranelift.rs | 92 ++- wasmtime-environ/src/lib.rs | 4 +- wasmtime-jit/src/compiler.rs | 33 +- 20 files changed, 2700 insertions(+), 866 deletions(-) delete mode 100644 wasmtime-debug/src/address_transform.rs delete mode 100644 wasmtime-debug/src/transform.rs create mode 100644 wasmtime-debug/src/transform/address_transform.rs create mode 100644 wasmtime-debug/src/transform/attr.rs create mode 100644 wasmtime-debug/src/transform/expression.rs create mode 100644 wasmtime-debug/src/transform/line_program.rs create mode 100644 wasmtime-debug/src/transform/mod.rs create mode 100644 wasmtime-debug/src/transform/range_info_builder.rs create mode 100644 wasmtime-debug/src/transform/unit.rs diff --git a/Cargo.toml b/Cargo.toml index a459727865ef..bb3fa8fb266b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -24,6 +24,8 @@ path = "src/wasm2obj.rs" [dependencies] cranelift-codegen = { version = "0.38.0", features = ["enable-serde"] } cranelift-native = "0.38.0" +cranelift-entity = { version = "0.38.0", features = ["enable-serde"] } +cranelift-wasm = { version = "0.38.0", features = ["enable-serde"] } wasmtime-debug = { path = "wasmtime-debug" } wasmtime-environ = { path = "wasmtime-environ" } wasmtime-runtime = { path = "wasmtime-runtime" } diff --git a/src/wasm2obj.rs b/src/wasm2obj.rs index bc99441bd885..5958077e3bdb 100644 --- a/src/wasm2obj.rs +++ b/src/wasm2obj.rs @@ -32,7 +32,9 @@ use cranelift_codegen::isa; use cranelift_codegen::settings; use cranelift_codegen::settings::Configurable; +use cranelift_entity::EntityRef; use cranelift_native; +use cranelift_wasm::DefinedMemoryIndex; use docopt::Docopt; use faerie::Artifact; use serde::Deserialize; @@ -49,7 +51,9 @@ use std::str::FromStr; use target_lexicon::Triple; use wasmtime_debug::{emit_debugsections, read_debuginfo}; use wasmtime_environ::cache_conf; -use wasmtime_environ::{Compiler, Cranelift, ModuleEnvironment, Tunables}; +use wasmtime_environ::{ + Compiler, Cranelift, ModuleEnvironment, ModuleVmctxInfo, Tunables, VMOffsets, +}; use wasmtime_obj::emit_module; mod utils; @@ -183,13 +187,23 @@ fn handle_module( ) }; - let (compilation, relocations, address_transform) = Cranelift::compile_module( - &module, - lazy_function_body_inputs, - &*isa, - generate_debug_info, - ) - .map_err(|e| e.to_string())?; + let (compilation, relocations, address_transform, value_ranges, stack_slots) = + Cranelift::compile_module( + &module, + lazy_function_body_inputs, + &*isa, + generate_debug_info, + ) + .map_err(|e| e.to_string())?; + + let module_vmctx_info = { + let ofs = VMOffsets::new(target_config.pointer_bytes(), &module); + let memory_offset = ofs.vmctx_vmmemory_definition_base(DefinedMemoryIndex::new(0)) as i64; + ModuleVmctxInfo { + memory_offset, + stack_slots, + } + }; emit_module( &mut obj, @@ -202,8 +216,15 @@ fn handle_module( if generate_debug_info { let debug_data = read_debuginfo(&data); - emit_debugsections(&mut obj, &target_config, &debug_data, &address_transform) - .map_err(|e| e.to_string())?; + emit_debugsections( + &mut obj, + &module_vmctx_info, + &target_config, + &debug_data, + &address_transform, + &value_ranges, + ) + .map_err(|e| e.to_string())?; } // FIXME: Make the format a parameter. diff --git a/wasmtime-debug/src/address_transform.rs b/wasmtime-debug/src/address_transform.rs deleted file mode 100644 index 6f5599d585ce..000000000000 --- a/wasmtime-debug/src/address_transform.rs +++ /dev/null @@ -1,158 +0,0 @@ -use crate::read_debuginfo::WasmFileInfo; -use cranelift_entity::{EntityRef, PrimaryMap}; -use cranelift_wasm::DefinedFuncIndex; -use gimli::write; -use std::collections::BTreeMap; -use std::iter::FromIterator; -use std::vec::Vec; -use wasmtime_environ::ModuleAddressMap; - -pub type GeneratedAddress = usize; -pub type WasmAddress = u64; -pub type SymbolIndex = usize; - -#[derive(Debug)] -pub struct AddressMap { - pub generated: GeneratedAddress, - pub wasm: WasmAddress, -} - -#[derive(Debug)] -pub struct FunctionMap { - pub offset: GeneratedAddress, - pub len: GeneratedAddress, - pub addresses: Box<[AddressMap]>, -} - -#[derive(Debug)] -pub struct AddressTransform { - lookup: Vec<( - WasmAddress, - (SymbolIndex, GeneratedAddress, GeneratedAddress), - )>, - map: PrimaryMap, - func_ranges: Vec<(usize, usize)>, -} - -impl AddressTransform { - pub fn new(at: &ModuleAddressMap, wasm_file: &WasmFileInfo) -> Self { - let code_section_offset = wasm_file.code_section_offset; - let function_offsets = &wasm_file.function_offsets_and_sizes; - let mut lookup = BTreeMap::new(); - let mut map = PrimaryMap::new(); - let mut func_ranges = Vec::new(); - for (i, ft) in at { - let index = i.index(); - let (fn_offset, fn_size) = function_offsets[index]; - assert!(code_section_offset <= fn_offset); - let fn_offset: WasmAddress = fn_offset - code_section_offset; - let fn_size = fn_size as WasmAddress; - func_ranges.push((ft.body_offset, ft.body_offset + ft.body_len)); - lookup.insert( - fn_offset as WasmAddress, - (index, ft.body_offset, ft.body_offset), - ); - let mut fn_map = Vec::new(); - for t in &ft.instructions { - if t.srcloc.is_default() { - // TODO extend some range if possible - continue; - } - // src_offset is a wasm bytecode offset in the code section - let src_offset = t.srcloc.bits() as WasmAddress - code_section_offset; - assert!(fn_offset <= src_offset && src_offset <= fn_offset + fn_size); - lookup.insert( - src_offset, - (index, t.code_offset, t.code_offset + t.code_len), - ); - fn_map.push(AddressMap { - generated: t.code_offset, - wasm: src_offset, - }); - } - let last_addr = ft.body_offset + ft.body_len; - lookup.insert(fn_offset + fn_size, (index, last_addr, last_addr)); - fn_map.sort_by(|a, b| a.generated.cmp(&b.generated)); - map.push(FunctionMap { - offset: ft.body_offset, - len: ft.body_len, - addresses: fn_map.into_boxed_slice(), - }); - } - - let lookup = Vec::from_iter(lookup.into_iter()); - - AddressTransform { - lookup, - map, - func_ranges, - } - } - - pub fn can_translate_address(&self, addr: u64) -> bool { - self.translate(addr).is_some() - } - - pub fn translate(&self, addr: u64) -> Option { - if addr == 0 { - // It's normally 0 for debug info without the linked code. - return None; - } - let found = match self.lookup.binary_search_by(|entry| entry.0.cmp(&addr)) { - Ok(i) => Some(&self.lookup[i].1), - Err(i) => { - if i > 0 { - Some(&self.lookup[i - 1].1) - } else { - None - } - } - }; - if let Some(value) = found { - return Some(write::Address::Symbol { - symbol: value.0, - addend: value.1 as i64, - }); - } - // Address was not found: function was not compiled? - None - } - - pub fn diff(&self, addr1: u64, addr2: u64) -> Option { - let t1 = self.translate(addr1); - let t2 = self.translate(addr2); - if t1.is_none() || t2.is_none() { - return None; - } - if let ( - Some(write::Address::Symbol { - symbol: s1, - addend: a, - }), - Some(write::Address::Symbol { - symbol: s2, - addend: b, - }), - ) = (t1, t2) - { - if s1 != s2 { - panic!("different symbol"); - } - Some((b - a) as u64) - } else { - unreachable!(); - } - } - - pub fn delta(&self, addr1: u64, u: u64) -> Option { - self.diff(addr1, addr1 + u) - } - - pub fn map(&self) -> &PrimaryMap { - &self.map - } - - pub fn func_range(&self, index: usize) -> (usize, usize) { - self.func_ranges[index] - } -} diff --git a/wasmtime-debug/src/gc.rs b/wasmtime-debug/src/gc.rs index 16a54d1a63a2..f97a7ef0933b 100644 --- a/wasmtime-debug/src/gc.rs +++ b/wasmtime-debug/src/gc.rs @@ -1,4 +1,4 @@ -use crate::address_transform::AddressTransform; +use crate::transform::AddressTransform; use gimli::constants; use gimli::read; use gimli::{Reader, UnitSectionOffset}; diff --git a/wasmtime-debug/src/lib.rs b/wasmtime-debug/src/lib.rs index cc6e3e6b49ae..9ed090e69c1d 100644 --- a/wasmtime-debug/src/lib.rs +++ b/wasmtime-debug/src/lib.rs @@ -3,13 +3,12 @@ use cranelift_codegen::isa::TargetFrontendConfig; use faerie::{Artifact, Decl}; use failure::Error; use target_lexicon::{BinaryFormat, Triple}; -use wasmtime_environ::ModuleAddressMap; +use wasmtime_environ::{ModuleAddressMap, ModuleVmctxInfo, ValueLabelsRanges}; pub use crate::read_debuginfo::{read_debuginfo, DebugInfoData, WasmFileInfo}; -pub use crate::transform::{transform_dwarf, ModuleVmctxInfo, ValueLabelsRanges}; +pub use crate::transform::transform_dwarf; pub use crate::write_debuginfo::{emit_dwarf, ResolvedSymbol, SymbolResolver}; -mod address_transform; mod gc; mod read_debuginfo; mod transform; @@ -28,12 +27,14 @@ impl SymbolResolver for FunctionRelocResolver { pub fn emit_debugsections( obj: &mut Artifact, + vmctx_info: &ModuleVmctxInfo, target_config: &TargetFrontendConfig, debuginfo_data: &DebugInfoData, at: &ModuleAddressMap, + ranges: &ValueLabelsRanges, ) -> Result<(), Error> { let resolver = FunctionRelocResolver {}; - let dwarf = transform_dwarf(target_config, debuginfo_data, at)?; + let dwarf = transform_dwarf(target_config, debuginfo_data, at, vmctx_info, ranges)?; emit_dwarf(obj, dwarf, &resolver)?; Ok(()) } @@ -53,7 +54,9 @@ pub fn emit_debugsections_image( triple: Triple, target_config: &TargetFrontendConfig, debuginfo_data: &DebugInfoData, + vmctx_info: &ModuleVmctxInfo, at: &ModuleAddressMap, + ranges: &ValueLabelsRanges, funcs: &Vec<(*const u8, usize)>, ) -> Result, Error> { let ref func_offsets = funcs @@ -62,7 +65,7 @@ pub fn emit_debugsections_image( .collect::>(); let mut obj = Artifact::new(triple, String::from("module")); let resolver = ImageRelocResolver { func_offsets }; - let dwarf = transform_dwarf(target_config, debuginfo_data, at)?; + let dwarf = transform_dwarf(target_config, debuginfo_data, at, vmctx_info, ranges)?; // Assuming all functions in the same code block, looking min/max of its range. assert!(funcs.len() > 0); diff --git a/wasmtime-debug/src/read_debuginfo.rs b/wasmtime-debug/src/read_debuginfo.rs index 70e88fb36f3b..c39f1d1eb94e 100644 --- a/wasmtime-debug/src/read_debuginfo.rs +++ b/wasmtime-debug/src/read_debuginfo.rs @@ -18,7 +18,6 @@ pub type Dwarf<'input> = gimli::Dwarf>; #[derive(Debug)] pub struct WasmFileInfo { pub code_section_offset: u64, - pub function_offsets_and_sizes: Box<[(u64, u32)]>, } #[derive(Debug)] @@ -100,7 +99,6 @@ pub fn read_debuginfo(data: &[u8]) -> DebugInfoData { let mut reader = ModuleReader::new(data).expect("reader"); let mut sections = HashMap::new(); let mut code_section_offset = 0; - let mut function_offsets_and_sizes = Vec::new(); while !reader.eof() { let section = reader.read().expect("section"); if let SectionCode::Custom { name, .. } = section.code { @@ -112,23 +110,12 @@ pub fn read_debuginfo(data: &[u8]) -> DebugInfoData { } if let SectionCode::Code = section.code { code_section_offset = section.range().start as u64; - // TODO remove me later - let mut reader = section.get_code_section_reader().expect("code reader"); - for _ in 0..reader.get_count() { - let body = reader.read().expect("function body read"); - let range = body.range(); - let fn_body_size = range.end - range.start; - let fn_body_offset = range.start; - function_offsets_and_sizes.push((fn_body_offset as u64, fn_body_size as u32)); - } } } - let function_offsets_and_sizes = function_offsets_and_sizes.into_boxed_slice(); DebugInfoData { dwarf: convert_sections(sections), wasm_file: WasmFileInfo { code_section_offset, - function_offsets_and_sizes, }, } } diff --git a/wasmtime-debug/src/transform.rs b/wasmtime-debug/src/transform.rs deleted file mode 100644 index 9af0daca0872..000000000000 --- a/wasmtime-debug/src/transform.rs +++ /dev/null @@ -1,641 +0,0 @@ -use crate::address_transform::AddressTransform; -use crate::gc::build_dependencies; -pub use crate::read_debuginfo::DebugInfoData; -use cranelift_codegen::ir; -use cranelift_codegen::isa::TargetFrontendConfig; -use cranelift_entity::{EntityRef, PrimaryMap}; -use cranelift_wasm::DefinedFuncIndex; -use failure::Error; -use std::collections::{BTreeMap, HashMap, HashSet}; -use std::iter::FromIterator; -use wasmtime_environ::ModuleAddressMap; - -use gimli; - -use gimli::{ - AttributeValue, DebugAddr, DebugAddrBase, DebugLine, DebugLineOffset, DebugStr, - DebuggingInformationEntry, LineEncoding, LocationLists, RangeLists, Unit, UnitOffset, - UnitSectionOffset, -}; - -use gimli::write; - -trait Reader: gimli::Reader {} - -impl<'input, Endian> Reader for gimli::EndianSlice<'input, Endian> where Endian: gimli::Endianity {} - -#[derive(Fail, Debug)] -#[fail(display = "Debug info transform error: {}", _0)] -pub struct TransformError(&'static str); - -/// Module `vmctx` related info. -pub struct ModuleVmctxInfo { - pub memory_offset: i64, - pub stack_slots: PrimaryMap, -} - -/// Value ranges for functions. -pub type ValueLabelsRanges = PrimaryMap; - -struct DebugInputContext<'a, R> -where - R: Reader, -{ - debug_str: &'a DebugStr, - debug_line: &'a DebugLine, - debug_addr: &'a DebugAddr, - debug_addr_base: DebugAddrBase, - rnglists: &'a RangeLists, - loclists: &'a LocationLists, - reachable: HashSet, -} - -type PendingDieRef = (write::UnitEntryId, gimli::DwAt, UnitOffset); - -enum FileAttributeContext<'a> { - Root(Option), - Children(&'a Vec), -} - -fn clone_die_attributes<'a, R>( - entry: &DebuggingInformationEntry, - context: &DebugInputContext, - addr_tr: &'a AddressTransform, - unit_encoding: &gimli::Encoding, - current_scope: &mut write::DebuggingInformationEntry, - current_scope_id: write::UnitEntryId, - subprogram_range: Option<(write::Address, u64)>, - out_strings: &mut write::StringTable, - die_ref_map: &HashMap, - pending_die_refs: &mut Vec, - file_context: FileAttributeContext<'a>, -) -> Result<(), Error> -where - R: Reader, -{ - let _tag = &entry.tag(); - let mut attrs = entry.attrs(); - let mut low_pc = None; - while let Some(attr) = attrs.next()? { - let attr_value = match attr.value() { - AttributeValue::Addr(_) - if attr.name() == gimli::DW_AT_low_pc && subprogram_range.is_some() => - { - write::AttributeValue::Address(subprogram_range.unwrap().0) - } - AttributeValue::Udata(_) - if attr.name() == gimli::DW_AT_high_pc && subprogram_range.is_some() => - { - write::AttributeValue::Udata(subprogram_range.unwrap().1) - } - AttributeValue::Addr(u) => { - let addr = addr_tr.translate(u).unwrap_or(write::Address::Constant(0)); - if attr.name() == gimli::DW_AT_low_pc { - low_pc = Some((u, addr)); - } - write::AttributeValue::Address(addr) - } - AttributeValue::Udata(u) => { - if attr.name() != gimli::DW_AT_high_pc || low_pc.is_none() { - write::AttributeValue::Udata(u) - } else { - let u = addr_tr.delta(low_pc.unwrap().0, u).unwrap_or(0); - write::AttributeValue::Udata(u) - } - } - AttributeValue::Data1(d) => write::AttributeValue::Data1(d), - AttributeValue::Data2(d) => write::AttributeValue::Data2(d), - AttributeValue::Data4(d) => write::AttributeValue::Data4(d), - AttributeValue::Sdata(d) => write::AttributeValue::Sdata(d), - AttributeValue::Flag(f) => write::AttributeValue::Flag(f), - AttributeValue::DebugLineRef(line_program_offset) => { - if let FileAttributeContext::Root(o) = file_context { - if o != Some(line_program_offset) { - return Err(TransformError("invalid debug_line offset").into()); - } - write::AttributeValue::LineProgramRef - } else { - return Err(TransformError("unexpected debug_line index attribute").into()); - } - } - AttributeValue::FileIndex(i) => { - if let FileAttributeContext::Children(file_map) = file_context { - write::AttributeValue::FileIndex(Some(file_map[(i - 1) as usize])) - } else { - return Err(TransformError("unexpected file index attribute").into()); - } - } - AttributeValue::DebugStrRef(str_offset) => { - let s = context.debug_str.get_str(str_offset)?.to_slice()?.to_vec(); - write::AttributeValue::StringRef(out_strings.add(s)) - } - AttributeValue::RangeListsRef(r) => { - let low_pc = 0; - let mut ranges = context.rnglists.ranges( - r, - *unit_encoding, - low_pc, - &context.debug_addr, - context.debug_addr_base, - )?; - let mut _result = Vec::new(); - while let Some(range) = ranges.next()? { - assert!(range.begin <= range.end); - _result.push((range.begin as i64, range.end as i64)); - } - // FIXME _result contains invalid code offsets; translate_address - continue; // ignore attribute - } - AttributeValue::LocationListsRef(r) => { - let low_pc = 0; - let mut locs = context.loclists.locations( - r, - *unit_encoding, - low_pc, - &context.debug_addr, - context.debug_addr_base, - )?; - let mut _result = Vec::new(); - while let Some(loc) = locs.next()? { - _result.push((loc.range.begin as i64, loc.range.end as i64, loc.data.0)); - } - // FIXME _result contains invalid expressions and code offsets - continue; // ignore attribute - } - AttributeValue::Exprloc(ref _expr) => { - // FIXME _expr contains invalid expression - continue; // ignore attribute - } - AttributeValue::Encoding(e) => write::AttributeValue::Encoding(e), - AttributeValue::DecimalSign(e) => write::AttributeValue::DecimalSign(e), - AttributeValue::Endianity(e) => write::AttributeValue::Endianity(e), - AttributeValue::Accessibility(e) => write::AttributeValue::Accessibility(e), - AttributeValue::Visibility(e) => write::AttributeValue::Visibility(e), - AttributeValue::Virtuality(e) => write::AttributeValue::Virtuality(e), - AttributeValue::Language(e) => write::AttributeValue::Language(e), - AttributeValue::AddressClass(e) => write::AttributeValue::AddressClass(e), - AttributeValue::IdentifierCase(e) => write::AttributeValue::IdentifierCase(e), - AttributeValue::CallingConvention(e) => write::AttributeValue::CallingConvention(e), - AttributeValue::Inline(e) => write::AttributeValue::Inline(e), - AttributeValue::Ordering(e) => write::AttributeValue::Ordering(e), - AttributeValue::UnitRef(ref offset) => { - if let Some(unit_id) = die_ref_map.get(offset) { - write::AttributeValue::ThisUnitEntryRef(*unit_id) - } else { - pending_die_refs.push((current_scope_id, attr.name(), *offset)); - continue; - } - } - // AttributeValue::DebugInfoRef(_) => { - // continue; - // } - _ => panic!(), //write::AttributeValue::StringRef(out_strings.add("_")), - }; - current_scope.set(attr.name(), attr_value); - } - Ok(()) -} - -fn clone_attr_string( - attr_value: &AttributeValue, - form: gimli::DwForm, - debug_str: &DebugStr, - out_strings: &mut write::StringTable, -) -> Result -where - R: Reader, -{ - let content = match attr_value { - AttributeValue::DebugStrRef(str_offset) => { - debug_str.get_str(*str_offset)?.to_slice()?.to_vec() - } - AttributeValue::String(b) => b.to_slice()?.to_vec(), - _ => panic!("Unexpected attribute value"), - }; - Ok(match form { - gimli::DW_FORM_strp => { - let id = out_strings.add(content); - write::LineString::StringRef(id) - } - gimli::DW_FORM_string => write::LineString::String(content), - _ => panic!("DW_FORM_line_strp or other not supported"), - }) -} - -#[derive(Debug)] -enum SavedLineProgramRow { - Normal { - address: u64, - op_index: u64, - file_index: u64, - line: u64, - column: u64, - discriminator: u64, - is_stmt: bool, - basic_block: bool, - prologue_end: bool, - epilogue_begin: bool, - isa: u64, - }, - EndOfSequence(u64), -} - -#[derive(Debug, Eq, PartialEq)] -enum ReadLineProgramState { - SequenceEnded, - ReadSequence, - IgnoreSequence, -} - -fn clone_line_program( - unit: &Unit, - root: &DebuggingInformationEntry, - addr_tr: &AddressTransform, - out_encoding: &gimli::Encoding, - debug_str: &DebugStr, - debug_line: &DebugLine, - out_strings: &mut write::StringTable, -) -> Result<(write::LineProgram, DebugLineOffset, Vec), Error> -where - R: Reader, -{ - let offset = match root.attr_value(gimli::DW_AT_stmt_list)? { - Some(gimli::AttributeValue::DebugLineRef(offset)) => offset, - _ => { - return Err(TransformError("Debug line offset is not found").into()); - } - }; - let comp_dir = root.attr_value(gimli::DW_AT_comp_dir)?; - let comp_name = root.attr_value(gimli::DW_AT_name)?; - let out_comp_dir = clone_attr_string( - comp_dir.as_ref().expect("comp_dir"), - gimli::DW_FORM_strp, - debug_str, - out_strings, - )?; - let out_comp_name = clone_attr_string( - comp_name.as_ref().expect("comp_name"), - gimli::DW_FORM_strp, - debug_str, - out_strings, - )?; - - let program = debug_line.program( - offset, - unit.header.address_size(), - comp_dir.and_then(|val| val.string_value(&debug_str)), - comp_name.and_then(|val| val.string_value(&debug_str)), - ); - if let Ok(program) = program { - let header = program.header(); - assert!(header.version() <= 4, "not supported 5"); - let line_encoding = LineEncoding { - minimum_instruction_length: header.minimum_instruction_length(), - maximum_operations_per_instruction: header.maximum_operations_per_instruction(), - default_is_stmt: header.default_is_stmt(), - line_base: header.line_base(), - line_range: header.line_range(), - }; - let mut out_program = write::LineProgram::new( - *out_encoding, - line_encoding, - out_comp_dir, - out_comp_name, - None, - ); - let mut dirs = Vec::new(); - dirs.push(out_program.default_directory()); - for dir_attr in header.include_directories() { - let dir_id = out_program.add_directory(clone_attr_string( - dir_attr, - gimli::DW_FORM_string, - debug_str, - out_strings, - )?); - dirs.push(dir_id); - } - let mut files = Vec::new(); - for file_entry in header.file_names() { - let dir_id = dirs[file_entry.directory_index() as usize]; - let file_id = out_program.add_file( - clone_attr_string( - &file_entry.path_name(), - gimli::DW_FORM_string, - debug_str, - out_strings, - )?, - dir_id, - None, - ); - files.push(file_id); - } - - let mut rows = program.rows(); - let mut saved_rows = BTreeMap::new(); - let mut state = ReadLineProgramState::SequenceEnded; - while let Some((_header, row)) = rows.next_row()? { - if state == ReadLineProgramState::IgnoreSequence { - if row.end_sequence() { - state = ReadLineProgramState::SequenceEnded; - } - continue; - } - let saved_row = if row.end_sequence() { - state = ReadLineProgramState::SequenceEnded; - SavedLineProgramRow::EndOfSequence(row.address()) - } else { - if state == ReadLineProgramState::SequenceEnded { - // Discard sequences for non-existent code. - if row.address() == 0 { - state = ReadLineProgramState::IgnoreSequence; - continue; - } - state = ReadLineProgramState::ReadSequence; - } - SavedLineProgramRow::Normal { - address: row.address(), - op_index: row.op_index(), - file_index: row.file_index(), - line: row.line().unwrap_or(0), - column: match row.column() { - gimli::ColumnType::LeftEdge => 0, - gimli::ColumnType::Column(val) => val, - }, - discriminator: row.discriminator(), - is_stmt: row.is_stmt(), - basic_block: row.basic_block(), - prologue_end: row.prologue_end(), - epilogue_begin: row.epilogue_begin(), - isa: row.isa(), - } - }; - saved_rows.insert(row.address(), saved_row); - } - - let saved_rows = Vec::from_iter(saved_rows.into_iter()); - - for (i, map) in addr_tr.map() { - let symbol = i.index(); - let base_addr = map.offset; - out_program.begin_sequence(Some(write::Address::Symbol { symbol, addend: 0 })); - // TODO track and place function declaration line here - let mut last_address = None; - for addr_map in map.addresses.iter() { - let saved_row = - match saved_rows.binary_search_by(|entry| entry.0.cmp(&addr_map.wasm)) { - Ok(i) => Some(&saved_rows[i].1), - Err(i) => { - if i > 0 { - Some(&saved_rows[i - 1].1) - } else { - None - } - } - }; - if let Some(SavedLineProgramRow::Normal { - address, - op_index, - file_index, - line, - column, - discriminator, - is_stmt, - basic_block, - prologue_end, - epilogue_begin, - isa, - }) = saved_row - { - // Ignore duplicates - if Some(*address) != last_address { - let address_offset = if last_address.is_none() { - // Extend first entry to the function declaration - // TODO use the function declaration line instead - 0 - } else { - (addr_map.generated - base_addr) as u64 - }; - out_program.row().address_offset = address_offset; - out_program.row().op_index = *op_index; - out_program.row().file = files[(file_index - 1) as usize]; - out_program.row().line = *line; - out_program.row().column = *column; - out_program.row().discriminator = *discriminator; - out_program.row().is_statement = *is_stmt; - out_program.row().basic_block = *basic_block; - out_program.row().prologue_end = *prologue_end; - out_program.row().epilogue_begin = *epilogue_begin; - out_program.row().isa = *isa; - out_program.generate_row(); - last_address = Some(*address); - } - } - } - let end_addr = (map.offset + map.len - 1) as u64; - out_program.end_sequence(end_addr); - } - Ok((out_program, offset, files)) - } else { - Err(TransformError("Valid line program not found").into()) - } -} - -fn get_subprogram_range<'a, R>( - entry: &DebuggingInformationEntry, - addr_tr: &'a AddressTransform, -) -> Result, Error> -where - R: Reader, -{ - let low_pc = entry.attr_value(gimli::DW_AT_low_pc)?; - if let Some(AttributeValue::Addr(addr)) = low_pc { - let transformed = addr_tr.translate(addr); - if let Some(write::Address::Symbol { symbol, .. }) = transformed { - let range = addr_tr.func_range(symbol); - let addr = write::Address::Symbol { - symbol, - addend: range.0 as i64, - }; - let len = (range.1 - range.0) as u64; - return Ok(Some((addr, len))); - } - } - Ok(None) -} - -fn clone_unit<'a, R>( - unit: Unit, - context: &DebugInputContext, - addr_tr: &'a AddressTransform, - out_encoding: &gimli::Encoding, - out_units: &mut write::UnitTable, - out_strings: &mut write::StringTable, -) -> Result<(), Error> -where - R: Reader, -{ - let mut die_ref_map = HashMap::new(); - let mut pending_die_refs = Vec::new(); - let mut stack = Vec::new(); - - // Iterate over all of this compilation unit's entries. - let mut entries = unit.entries(); - let (comp_unit, file_map) = if let Some((depth_delta, entry)) = entries.next_dfs()? { - assert!(depth_delta == 0); - let (out_line_program, debug_line_offset, file_map) = clone_line_program( - &unit, - entry, - addr_tr, - out_encoding, - context.debug_str, - context.debug_line, - out_strings, - )?; - - if entry.tag() == gimli::DW_TAG_compile_unit { - let unit_id = out_units.add(write::Unit::new(*out_encoding, out_line_program)); - let comp_unit = out_units.get_mut(unit_id); - - let root_id = comp_unit.root(); - die_ref_map.insert(entry.offset(), root_id); - - clone_die_attributes( - entry, - context, - addr_tr, - &unit.encoding(), - comp_unit.get_mut(root_id), - root_id, - None, - out_strings, - &die_ref_map, - &mut pending_die_refs, - FileAttributeContext::Root(Some(debug_line_offset)), - )?; - - stack.push(root_id); - (comp_unit, file_map) - } else { - return Err(TransformError("Unexpected unit header").into()); - } - } else { - return Ok(()); // empty - }; - let mut skip_at_depth = None; - while let Some((depth_delta, entry)) = entries.next_dfs()? { - let depth_delta = if let Some((depth, cached)) = skip_at_depth { - let new_depth = depth + depth_delta; - if new_depth > 0 { - skip_at_depth = Some((new_depth, cached)); - continue; - } - skip_at_depth = None; - new_depth + cached - } else { - depth_delta - }; - if !context - .reachable - .contains(&entry.offset().to_unit_section_offset(&unit)) - { - // entry is not reachable: discarding all its info. - skip_at_depth = Some((0, depth_delta)); - continue; - } - - let range = if entry.tag() == gimli::DW_TAG_subprogram { - get_subprogram_range(entry, addr_tr)? - } else { - None - }; - - if depth_delta <= 0 { - for _ in depth_delta..1 { - stack.pop(); - } - } else { - assert!(depth_delta == 1); - } - let parent = stack.last().unwrap(); - let die_id = comp_unit.add(*parent, entry.tag()); - let current_scope = comp_unit.get_mut(die_id); - - stack.push(die_id); - die_ref_map.insert(entry.offset(), die_id); - - clone_die_attributes( - entry, - context, - addr_tr, - &unit.encoding(), - current_scope, - die_id, - range, - out_strings, - &die_ref_map, - &mut pending_die_refs, - FileAttributeContext::Children(&file_map), - )?; - } - for (die_id, attr_name, offset) in pending_die_refs { - let die = comp_unit.get_mut(die_id); - // TODO we probably loosing DW_AT_abstract_origin and DW_AT_type references - // here, find out if we drop stuff we don't need to. - if let Some(unit_id) = die_ref_map.get(&offset) { - die.set(attr_name, write::AttributeValue::ThisUnitEntryRef(*unit_id)); - } - } - Ok(()) -} - -pub fn transform_dwarf( - target_config: &TargetFrontendConfig, - di: &DebugInfoData, - at: &ModuleAddressMap, -) -> Result { - let addr_tr = AddressTransform::new(at, &di.wasm_file); - let reachable = build_dependencies(&di.dwarf, &addr_tr)?.get_reachable(); - - let context = DebugInputContext { - debug_str: &di.dwarf.debug_str, - debug_line: &di.dwarf.debug_line, - debug_addr: &di.dwarf.debug_addr, - debug_addr_base: DebugAddrBase(0), - rnglists: &di.dwarf.ranges, - loclists: &di.dwarf.locations, - reachable, - }; - - let out_encoding = gimli::Encoding { - format: gimli::Format::Dwarf32, - // TODO: this should be configurable - // macOS doesn't seem to support DWARF > 3 - version: 3, - address_size: target_config.pointer_bytes(), - }; - - let mut out_strings = write::StringTable::default(); - let mut out_units = write::UnitTable::default(); - - let out_line_strings = write::LineStringTable::default(); - - let mut iter = di.dwarf.debug_info.units(); - while let Some(unit) = iter.next().unwrap_or(None) { - let unit = di.dwarf.unit(unit)?; - clone_unit( - unit, - &context, - &addr_tr, - &out_encoding, - &mut out_units, - &mut out_strings, - )?; - } - - Ok(write::Dwarf { - units: out_units, - line_programs: vec![], - line_strings: out_line_strings, - strings: out_strings, - }) -} diff --git a/wasmtime-debug/src/transform/address_transform.rs b/wasmtime-debug/src/transform/address_transform.rs new file mode 100644 index 000000000000..8e6715c82539 --- /dev/null +++ b/wasmtime-debug/src/transform/address_transform.rs @@ -0,0 +1,655 @@ +use crate::WasmFileInfo; +use cranelift_codegen::ir::SourceLoc; +use cranelift_entity::{EntityRef, PrimaryMap}; +use cranelift_wasm::DefinedFuncIndex; +use gimli::write; +use std::boxed::Box; +use std::collections::{BTreeMap, HashMap}; +use std::iter::FromIterator; +use std::vec::Vec; +use wasmtime_environ::{FunctionAddressMap, ModuleAddressMap}; + +pub type GeneratedAddress = usize; +pub type WasmAddress = u64; + +/// Contains mapping of the generated address to its original +/// source location. +#[derive(Debug)] +pub struct AddressMap { + pub generated: GeneratedAddress, + pub wasm: WasmAddress, +} + +/// Information about generated function code: its body start, +/// length, and instructions addresses. +#[derive(Debug)] +pub struct FunctionMap { + pub offset: GeneratedAddress, + pub len: GeneratedAddress, + pub wasm_start: WasmAddress, + pub wasm_end: WasmAddress, + pub addresses: Box<[AddressMap]>, +} + +/// Mapping of the source location to its generated code range. +#[derive(Debug)] +struct Position { + wasm_pos: WasmAddress, + gen_start: GeneratedAddress, + gen_end: GeneratedAddress, +} + +/// Mapping of continuous range of source location to its generated +/// code. The positions are always in accending order for search. +#[derive(Debug)] +struct Range { + wasm_start: WasmAddress, + wasm_end: WasmAddress, + gen_start: GeneratedAddress, + gen_end: GeneratedAddress, + positions: Box<[Position]>, +} + +/// Helper function address lookup data. Contains ranges start positions +/// index and ranges data. The multiple ranges can include the same +/// original source position. The index (B-Tree) uses range start +/// position as a key. +#[derive(Debug)] +struct FuncLookup { + index: Vec<(WasmAddress, Box<[usize]>)>, + ranges: Box<[Range]>, +} + +/// Mapping of original functions to generated code locations/ranges. +#[derive(Debug)] +struct FuncTransform { + start: WasmAddress, + end: WasmAddress, + index: DefinedFuncIndex, + lookup: FuncLookup, +} + +/// Module functions mapping to generated code. +#[derive(Debug)] +pub struct AddressTransform { + map: PrimaryMap, + func: Vec<(WasmAddress, FuncTransform)>, +} + +/// Returns a wasm bytecode offset in the code section from SourceLoc. +pub fn get_wasm_code_offset(loc: SourceLoc, code_section_offset: u64) -> WasmAddress { + // Code section size <= 4GB, allow wrapped SourceLoc to recover the overflow. + loc.bits().wrapping_sub(code_section_offset as u32) as WasmAddress +} + +fn build_function_lookup( + ft: &FunctionAddressMap, + code_section_offset: u64, +) -> (WasmAddress, WasmAddress, FuncLookup) { + assert!(code_section_offset <= ft.start_srcloc.bits() as u64); + let fn_start = get_wasm_code_offset(ft.start_srcloc, code_section_offset); + let fn_end = get_wasm_code_offset(ft.end_srcloc, code_section_offset); + assert!(fn_start <= fn_end); + + // Build ranges of continuous source locations. The new ranges starts when + // non-descending order is interrupted. Assuming the same origin location can + // be present in multiple ranges. + let mut range_wasm_start = fn_start; + let mut range_gen_start = ft.body_offset; + let mut last_wasm_pos = range_wasm_start; + let mut ranges = Vec::new(); + let mut ranges_index = BTreeMap::new(); + let mut current_range = Vec::new(); + for t in &ft.instructions { + if t.srcloc.is_default() { + continue; + } + + let offset = get_wasm_code_offset(t.srcloc, code_section_offset); + assert!(fn_start <= offset && offset <= fn_end); + + let inst_gen_start = t.code_offset; + let inst_gen_end = t.code_offset + t.code_len; + + if last_wasm_pos > offset { + // Start new range. + ranges_index.insert(range_wasm_start, ranges.len()); + ranges.push(Range { + wasm_start: range_wasm_start, + wasm_end: last_wasm_pos, + gen_start: range_gen_start, + gen_end: inst_gen_start, + positions: current_range.into_boxed_slice(), + }); + range_wasm_start = offset; + range_gen_start = inst_gen_start; + current_range = Vec::new(); + } + // Continue existing range: add new wasm->generated code position. + current_range.push(Position { + wasm_pos: offset, + gen_start: inst_gen_start, + gen_end: inst_gen_end, + }); + last_wasm_pos = offset; + } + let last_gen_addr = ft.body_offset + ft.body_len; + ranges_index.insert(range_wasm_start, ranges.len()); + ranges.push(Range { + wasm_start: range_wasm_start, + wasm_end: fn_end, + gen_start: range_gen_start, + gen_end: last_gen_addr, + positions: current_range.into_boxed_slice(), + }); + + // Making ranges lookup faster by building index: B-tree with every range + // start position that maps into list of active ranges at this position. + let ranges = ranges.into_boxed_slice(); + let mut active_ranges = Vec::new(); + let mut index = BTreeMap::new(); + let mut last_wasm_pos = None; + for (wasm_start, range_index) in ranges_index { + if Some(wasm_start) == last_wasm_pos { + active_ranges.push(range_index); + continue; + } + if last_wasm_pos.is_some() { + index.insert( + last_wasm_pos.unwrap(), + active_ranges.clone().into_boxed_slice(), + ); + } + active_ranges.retain(|r| ranges[*r].wasm_end.cmp(&wasm_start) != std::cmp::Ordering::Less); + active_ranges.push(range_index); + last_wasm_pos = Some(wasm_start); + } + index.insert(last_wasm_pos.unwrap(), active_ranges.into_boxed_slice()); + let index = Vec::from_iter(index.into_iter()); + (fn_start, fn_end, FuncLookup { index, ranges }) +} + +fn build_function_addr_map( + at: &ModuleAddressMap, + code_section_offset: u64, +) -> PrimaryMap { + let mut map = PrimaryMap::new(); + for (_, ft) in at { + let mut fn_map = Vec::new(); + for t in &ft.instructions { + if t.srcloc.is_default() { + continue; + } + let offset = get_wasm_code_offset(t.srcloc, code_section_offset); + fn_map.push(AddressMap { + generated: t.code_offset, + wasm: offset, + }); + } + + if cfg!(debug) { + // fn_map is sorted by the generated field -- see FunctionAddressMap::instructions. + for i in 1..fn_map.len() { + assert!(fn_map[i - 1].generated <= fn_map[i].generated); + } + } + + map.push(FunctionMap { + offset: ft.body_offset, + len: ft.body_len, + wasm_start: get_wasm_code_offset(ft.start_srcloc, code_section_offset), + wasm_end: get_wasm_code_offset(ft.end_srcloc, code_section_offset), + addresses: fn_map.into_boxed_slice(), + }); + } + map +} + +struct TransformRangeIter<'a> { + addr: u64, + indicies: &'a [usize], + ranges: &'a [Range], +} + +impl<'a> TransformRangeIter<'a> { + fn new(func: &'a FuncTransform, addr: u64) -> Self { + let found = match func + .lookup + .index + .binary_search_by(|entry| entry.0.cmp(&addr)) + { + Ok(i) => Some(&func.lookup.index[i].1), + Err(i) => { + if i > 0 { + Some(&func.lookup.index[i - 1].1) + } else { + None + } + } + }; + if let Some(range_indices) = found { + TransformRangeIter { + addr, + indicies: range_indices, + ranges: &func.lookup.ranges, + } + } else { + unreachable!(); + } + } +} +impl<'a> Iterator for TransformRangeIter<'a> { + type Item = (usize, usize); + fn next(&mut self) -> Option { + if let Some((first, tail)) = self.indicies.split_first() { + let range_index = *first; + let range = &self.ranges[range_index]; + self.indicies = tail; + let address = match range + .positions + .binary_search_by(|a| a.wasm_pos.cmp(&self.addr)) + { + Ok(i) => range.positions[i].gen_start, + Err(i) => { + if i == 0 { + range.gen_start + } else { + range.positions[i - 1].gen_end + } + } + }; + Some((address, range_index)) + } else { + None + } + } +} + +struct TransformRangeEndIter<'a> { + addr: u64, + indicies: &'a [usize], + ranges: &'a [Range], +} + +impl<'a> TransformRangeEndIter<'a> { + fn new(func: &'a FuncTransform, addr: u64) -> Self { + let found = match func + .lookup + .index + .binary_search_by(|entry| entry.0.cmp(&addr)) + { + Ok(i) => Some(&func.lookup.index[i].1), + Err(i) => { + if i > 0 { + Some(&func.lookup.index[i - 1].1) + } else { + None + } + } + }; + if let Some(range_indices) = found { + TransformRangeEndIter { + addr, + indicies: range_indices, + ranges: &func.lookup.ranges, + } + } else { + unreachable!(); + } + } +} + +impl<'a> Iterator for TransformRangeEndIter<'a> { + type Item = (usize, usize); + fn next(&mut self) -> Option { + while let Some((first, tail)) = self.indicies.split_first() { + let range_index = *first; + let range = &self.ranges[range_index]; + if range.wasm_start >= self.addr { + continue; + } + self.indicies = tail; + let address = match range + .positions + .binary_search_by(|a| a.wasm_pos.cmp(&self.addr)) + { + Ok(i) => range.positions[i].gen_end, + Err(i) => { + if i == range.positions.len() { + range.gen_end + } else { + range.positions[i].gen_start + } + } + }; + return Some((address, range_index)); + } + None + } +} + +impl AddressTransform { + pub fn new(at: &ModuleAddressMap, wasm_file: &WasmFileInfo) -> Self { + let code_section_offset = wasm_file.code_section_offset; + + let mut func = BTreeMap::new(); + for (i, ft) in at { + let (fn_start, fn_end, lookup) = build_function_lookup(ft, code_section_offset); + + func.insert( + fn_start, + FuncTransform { + start: fn_start, + end: fn_end, + index: i, + lookup, + }, + ); + } + + let map = build_function_addr_map(at, code_section_offset); + let func = Vec::from_iter(func.into_iter()); + AddressTransform { map, func } + } + + fn find_func(&self, addr: u64) -> Option<&FuncTransform> { + // TODO check if we need to include end address + let func = match self.func.binary_search_by(|entry| entry.0.cmp(&addr)) { + Ok(i) => &self.func[i].1, + Err(i) => { + if i > 0 { + &self.func[i - 1].1 + } else { + return None; + } + } + }; + if addr >= func.start { + return Some(func); + } + None + } + + pub fn find_func_index(&self, addr: u64) -> Option { + self.find_func(addr).map(|f| f.index) + } + + pub fn translate_raw(&self, addr: u64) -> Option<(DefinedFuncIndex, GeneratedAddress)> { + if addr == 0 { + // It's normally 0 for debug info without the linked code. + return None; + } + if let Some(func) = self.find_func(addr) { + if addr == func.end { + // Clamp last address to the end to extend translation to the end + // of the function. + let map = &self.map[func.index]; + return Some((func.index, map.len)); + } + let first_result = TransformRangeIter::new(func, addr).next(); + first_result.map(|(address, _)| (func.index, address)) + } else { + // Address was not found: function was not compiled? + None + } + } + + pub fn can_translate_address(&self, addr: u64) -> bool { + self.translate(addr).is_some() + } + + pub fn translate(&self, addr: u64) -> Option { + self.translate_raw(addr) + .map(|(func_index, address)| write::Address::Symbol { + symbol: func_index.index(), + addend: address as i64, + }) + } + + pub fn translate_ranges_raw( + &self, + start: u64, + end: u64, + ) -> Option<(DefinedFuncIndex, Vec<(GeneratedAddress, GeneratedAddress)>)> { + if start == 0 { + // It's normally 0 for debug info without the linked code. + return None; + } + if let Some(func) = self.find_func(start) { + let mut starts: HashMap = + HashMap::from_iter(TransformRangeIter::new(func, start).map(|(a, r)| (r, a))); + let mut result = Vec::new(); + TransformRangeEndIter::new(func, end).for_each(|(a, r)| { + let range_start = if let Some(range_start) = starts.get(&r) { + let range_start = *range_start; + starts.remove(&r); + range_start + } else { + let range = &func.lookup.ranges[r]; + range.gen_start + }; + result.push((range_start, a)); + }); + for (r, range_start) in starts { + let range = &func.lookup.ranges[r]; + result.push((range_start, range.gen_end)); + } + return Some((func.index, result)); + } + // Address was not found: function was not compiled? + None + } + + pub fn translate_ranges(&self, start: u64, end: u64) -> Vec<(write::Address, u64)> { + self.translate_ranges_raw(start, end) + .map_or(vec![], |(func_index, ranges)| { + ranges + .iter() + .map(|(start, end)| { + ( + write::Address::Symbol { + symbol: func_index.index(), + addend: *start as i64, + }, + (*end - *start) as u64, + ) + }) + .collect::>() + }) + } + + pub fn map(&self) -> &PrimaryMap { + &self.map + } + + pub fn func_range(&self, index: DefinedFuncIndex) -> (GeneratedAddress, GeneratedAddress) { + let map = &self.map[index]; + (map.offset, map.offset + map.len) + } + + pub fn func_source_range(&self, index: DefinedFuncIndex) -> (WasmAddress, WasmAddress) { + let map = &self.map[index]; + (map.wasm_start, map.wasm_end) + } + + pub fn convert_to_code_range( + &self, + addr: write::Address, + len: u64, + ) -> (GeneratedAddress, GeneratedAddress) { + let start = if let write::Address::Symbol { addend, .. } = addr { + // TODO subtract self.map[symbol].offset ? + addend as GeneratedAddress + } else { + unreachable!(); + }; + (start, start + len as GeneratedAddress) + } +} + +#[cfg(test)] +mod tests { + use super::{build_function_lookup, get_wasm_code_offset, AddressTransform}; + use crate::read_debuginfo::WasmFileInfo; + use cranelift_codegen::ir::SourceLoc; + use cranelift_entity::PrimaryMap; + use gimli::write::Address; + use std::iter::FromIterator; + use wasmtime_environ::{FunctionAddressMap, InstructionAddressMap, ModuleAddressMap}; + + #[test] + fn test_get_wasm_code_offset() { + let offset = get_wasm_code_offset(SourceLoc::new(3), 1); + assert_eq!(2, offset); + let offset = get_wasm_code_offset(SourceLoc::new(16), 0xF000_0000); + assert_eq!(0x1000_0010, offset); + let offset = get_wasm_code_offset(SourceLoc::new(1), 0x20_8000_0000); + assert_eq!(0x8000_0001, offset); + } + + fn create_simple_func(wasm_offset: u32) -> FunctionAddressMap { + FunctionAddressMap { + instructions: vec![ + InstructionAddressMap { + srcloc: SourceLoc::new(wasm_offset + 2), + code_offset: 5, + code_len: 3, + }, + InstructionAddressMap { + srcloc: SourceLoc::new(wasm_offset + 7), + code_offset: 15, + code_len: 8, + }, + ], + start_srcloc: SourceLoc::new(wasm_offset), + end_srcloc: SourceLoc::new(wasm_offset + 10), + body_offset: 0, + body_len: 30, + } + } + + fn create_simple_module(func: FunctionAddressMap) -> ModuleAddressMap { + PrimaryMap::from_iter(vec![func]) + } + + #[test] + fn test_build_function_lookup_simple() { + let input = create_simple_func(11); + let (start, end, lookup) = build_function_lookup(&input, 1); + assert_eq!(10, start); + assert_eq!(20, end); + + assert_eq!(1, lookup.index.len()); + let index_entry = lookup.index.into_iter().next().unwrap(); + assert_eq!((10u64, vec![0].into_boxed_slice()), index_entry); + assert_eq!(1, lookup.ranges.len()); + let range = &lookup.ranges[0]; + assert_eq!(10, range.wasm_start); + assert_eq!(20, range.wasm_end); + assert_eq!(0, range.gen_start); + assert_eq!(30, range.gen_end); + let positions = &range.positions; + assert_eq!(2, positions.len()); + assert_eq!(12, positions[0].wasm_pos); + assert_eq!(5, positions[0].gen_start); + assert_eq!(8, positions[0].gen_end); + assert_eq!(17, positions[1].wasm_pos); + assert_eq!(15, positions[1].gen_start); + assert_eq!(23, positions[1].gen_end); + } + + #[test] + fn test_build_function_lookup_two_ranges() { + let mut input = create_simple_func(11); + // append instruction with same srcloc as input.instructions[0] + input.instructions.push(InstructionAddressMap { + srcloc: SourceLoc::new(11 + 2), + code_offset: 23, + code_len: 3, + }); + let (start, end, lookup) = build_function_lookup(&input, 1); + assert_eq!(10, start); + assert_eq!(20, end); + + assert_eq!(2, lookup.index.len()); + let index_entries = Vec::from_iter(lookup.index.into_iter()); + assert_eq!((10u64, vec![0].into_boxed_slice()), index_entries[0]); + assert_eq!((12u64, vec![0, 1].into_boxed_slice()), index_entries[1]); + assert_eq!(2, lookup.ranges.len()); + + let range = &lookup.ranges[0]; + assert_eq!(10, range.wasm_start); + assert_eq!(17, range.wasm_end); + assert_eq!(0, range.gen_start); + assert_eq!(23, range.gen_end); + let positions = &range.positions; + assert_eq!(2, positions.len()); + assert_eq!(12, positions[0].wasm_pos); + assert_eq!(5, positions[0].gen_start); + assert_eq!(8, positions[0].gen_end); + assert_eq!(17, positions[1].wasm_pos); + assert_eq!(15, positions[1].gen_start); + assert_eq!(23, positions[1].gen_end); + + let range = &lookup.ranges[1]; + assert_eq!(12, range.wasm_start); + assert_eq!(20, range.wasm_end); + assert_eq!(23, range.gen_start); + assert_eq!(30, range.gen_end); + let positions = &range.positions; + assert_eq!(1, positions.len()); + assert_eq!(12, positions[0].wasm_pos); + assert_eq!(23, positions[0].gen_start); + assert_eq!(26, positions[0].gen_end); + } + + #[test] + fn test_addr_translate() { + let input = create_simple_module(create_simple_func(11)); + let at = AddressTransform::new( + &input, + &WasmFileInfo { + code_section_offset: 1, + }, + ); + + let addr = at.translate(10); + assert_eq!( + Some(Address::Symbol { + symbol: 0, + addend: 0, + }), + addr + ); + + let addr = at.translate(20); + assert_eq!( + Some(Address::Symbol { + symbol: 0, + addend: 30, + }), + addr + ); + + let addr = at.translate(0); + assert_eq!(None, addr); + + let addr = at.translate(12); + assert_eq!( + Some(Address::Symbol { + symbol: 0, + addend: 5, + }), + addr + ); + + let addr = at.translate(18); + assert_eq!( + Some(Address::Symbol { + symbol: 0, + addend: 23, + }), + addr + ); + } +} diff --git a/wasmtime-debug/src/transform/attr.rs b/wasmtime-debug/src/transform/attr.rs new file mode 100644 index 000000000000..166a7cf9cf1c --- /dev/null +++ b/wasmtime-debug/src/transform/attr.rs @@ -0,0 +1,295 @@ +use failure::Error; +use std::collections::HashMap; + +use gimli; + +use gimli::{AttributeValue, DebugLineOffset, DebugStr, DebuggingInformationEntry, UnitOffset}; + +use gimli::write; + +use super::address_transform::AddressTransform; +use super::expression::{compile_expression, CompiledExpression, FunctionFrameInfo}; +use super::range_info_builder::RangeInfoBuilder; +use super::unit::PendingDieRef; +use super::{DebugInputContext, Reader, TransformError}; + +pub(crate) enum FileAttributeContext<'a> { + Root(Option), + Children(&'a Vec, Option<&'a CompiledExpression>), +} + +fn is_exprloc_to_loclist_allowed(attr_name: gimli::constants::DwAt) -> bool { + match attr_name { + gimli::DW_AT_location + | gimli::DW_AT_string_length + | gimli::DW_AT_return_addr + | gimli::DW_AT_data_member_location + | gimli::DW_AT_frame_base + | gimli::DW_AT_segment + | gimli::DW_AT_static_link + | gimli::DW_AT_use_location + | gimli::DW_AT_vtable_elem_location => true, + _ => false, + } +} + +pub(crate) fn clone_die_attributes<'a, R>( + entry: &DebuggingInformationEntry, + context: &DebugInputContext, + addr_tr: &'a AddressTransform, + frame_info: Option<&FunctionFrameInfo>, + unit_encoding: &gimli::Encoding, + out_unit: &mut write::Unit, + current_scope_id: write::UnitEntryId, + subprogram_range_builder: Option, + scope_ranges: Option<&Vec<(u64, u64)>>, + cu_low_pc: u64, + out_strings: &mut write::StringTable, + die_ref_map: &HashMap, + pending_die_refs: &mut Vec, + file_context: FileAttributeContext<'a>, +) -> Result<(), Error> +where + R: Reader, +{ + let _tag = &entry.tag(); + let endian = gimli::RunTimeEndian::Little; + + let range_info = if let Some(subprogram_range_builder) = subprogram_range_builder { + subprogram_range_builder + } else { + if entry.tag() == gimli::DW_TAG_compile_unit { + // FIXME currently address_transform operate on a single func range, + // once it is fixed we can properly set DW_AT_ranges attribute. + // Using for now DW_AT_low_pc = 0. + RangeInfoBuilder::Position(0) + } else { + RangeInfoBuilder::from(entry, context, unit_encoding, cu_low_pc)? + } + }; + range_info.build(addr_tr, out_unit, current_scope_id); + + let mut attrs = entry.attrs(); + while let Some(attr) = attrs.next()? { + let attr_value = match attr.value() { + AttributeValue::Addr(_) if attr.name() == gimli::DW_AT_low_pc => { + continue; + } + AttributeValue::Udata(_) if attr.name() == gimli::DW_AT_high_pc => { + continue; + } + AttributeValue::RangeListsRef(_) if attr.name() == gimli::DW_AT_ranges => { + continue; + } + AttributeValue::Exprloc(_) if attr.name() == gimli::DW_AT_frame_base => { + continue; + } + + AttributeValue::Addr(u) => { + let addr = addr_tr.translate(u).unwrap_or(write::Address::Constant(0)); + write::AttributeValue::Address(addr) + } + AttributeValue::Udata(u) => write::AttributeValue::Udata(u), + AttributeValue::Data1(d) => write::AttributeValue::Data1(d), + AttributeValue::Data2(d) => write::AttributeValue::Data2(d), + AttributeValue::Data4(d) => write::AttributeValue::Data4(d), + AttributeValue::Sdata(d) => write::AttributeValue::Sdata(d), + AttributeValue::Flag(f) => write::AttributeValue::Flag(f), + AttributeValue::DebugLineRef(line_program_offset) => { + if let FileAttributeContext::Root(o) = file_context { + if o != Some(line_program_offset) { + return Err(TransformError("invalid debug_line offset").into()); + } + write::AttributeValue::LineProgramRef + } else { + return Err(TransformError("unexpected debug_line index attribute").into()); + } + } + AttributeValue::FileIndex(i) => { + if let FileAttributeContext::Children(file_map, _) = file_context { + write::AttributeValue::FileIndex(Some(file_map[(i - 1) as usize])) + } else { + return Err(TransformError("unexpected file index attribute").into()); + } + } + AttributeValue::DebugStrRef(str_offset) => { + let s = context.debug_str.get_str(str_offset)?.to_slice()?.to_vec(); + write::AttributeValue::StringRef(out_strings.add(s)) + } + AttributeValue::RangeListsRef(r) => { + let range_info = + RangeInfoBuilder::from_ranges_ref(r, context, unit_encoding, cu_low_pc)?; + let range_list_id = range_info.build_ranges(addr_tr, &mut out_unit.ranges); + write::AttributeValue::RangeListRef(range_list_id) + } + AttributeValue::LocationListsRef(r) => { + let low_pc = 0; + let mut locs = context.loclists.locations( + r, + *unit_encoding, + low_pc, + &context.debug_addr, + context.debug_addr_base, + )?; + let frame_base = if let FileAttributeContext::Children(_, frame_base) = file_context + { + frame_base + } else { + None + }; + let mut result = None; + while let Some(loc) = locs.next()? { + if let Some(expr) = compile_expression(&loc.data, unit_encoding, frame_base)? { + if result.is_none() { + result = Some(Vec::new()); + } + for (start, len, expr) in expr.build_with_locals( + &[(loc.range.begin, loc.range.end)], + addr_tr, + frame_info, + endian, + ) { + if len == 0 { + // Ignore empty range + continue; + } + result.as_mut().unwrap().push(write::Location::StartLength { + begin: start, + length: len, + data: expr, + }); + } + } else { + // FIXME _expr contains invalid expression + continue; // ignore entry + } + } + if result.is_none() { + continue; // no valid locations + } + let list_id = out_unit.locations.add(write::LocationList(result.unwrap())); + write::AttributeValue::LocationListRef(list_id) + } + AttributeValue::Exprloc(ref expr) => { + let frame_base = if let FileAttributeContext::Children(_, frame_base) = file_context + { + frame_base + } else { + None + }; + if let Some(expr) = compile_expression(expr, unit_encoding, frame_base)? { + if expr.is_simple() { + if let Some(expr) = expr.build() { + write::AttributeValue::Exprloc(expr) + } else { + continue; + } + } else { + // Conversion to loclist is required. + if let Some(scope_ranges) = scope_ranges { + let exprs = + expr.build_with_locals(scope_ranges, addr_tr, frame_info, endian); + if exprs.len() == 0 { + continue; + } + let found_single_expr = { + // Micro-optimization all expressions alike, use one exprloc. + let mut found_expr: Option = None; + for (_, _, expr) in &exprs { + if let Some(ref prev_expr) = found_expr { + if expr.0.eq(&prev_expr.0) { + continue; // the same expression + } + found_expr = None; + break; + } + found_expr = Some(expr.clone()) + } + found_expr + }; + if found_single_expr.is_some() { + write::AttributeValue::Exprloc(found_single_expr.unwrap()) + } else if is_exprloc_to_loclist_allowed(attr.name()) { + // Converting exprloc to loclist. + let mut locs = Vec::new(); + for (begin, length, data) in exprs { + if length == 0 { + // Ignore empty range + continue; + } + locs.push(write::Location::StartLength { + begin, + length, + data, + }); + } + let list_id = out_unit.locations.add(write::LocationList(locs)); + write::AttributeValue::LocationListRef(list_id) + } else { + continue; + } + } else { + continue; + } + } + } else { + // FIXME _expr contains invalid expression + continue; // ignore attribute + } + } + AttributeValue::Encoding(e) => write::AttributeValue::Encoding(e), + AttributeValue::DecimalSign(e) => write::AttributeValue::DecimalSign(e), + AttributeValue::Endianity(e) => write::AttributeValue::Endianity(e), + AttributeValue::Accessibility(e) => write::AttributeValue::Accessibility(e), + AttributeValue::Visibility(e) => write::AttributeValue::Visibility(e), + AttributeValue::Virtuality(e) => write::AttributeValue::Virtuality(e), + AttributeValue::Language(e) => write::AttributeValue::Language(e), + AttributeValue::AddressClass(e) => write::AttributeValue::AddressClass(e), + AttributeValue::IdentifierCase(e) => write::AttributeValue::IdentifierCase(e), + AttributeValue::CallingConvention(e) => write::AttributeValue::CallingConvention(e), + AttributeValue::Inline(e) => write::AttributeValue::Inline(e), + AttributeValue::Ordering(e) => write::AttributeValue::Ordering(e), + AttributeValue::UnitRef(ref offset) => { + if let Some(unit_id) = die_ref_map.get(offset) { + write::AttributeValue::ThisUnitEntryRef(*unit_id) + } else { + pending_die_refs.push((current_scope_id, attr.name(), *offset)); + continue; + } + } + // AttributeValue::DebugInfoRef(_) => { + // continue; + // } + _ => panic!(), //write::AttributeValue::StringRef(out_strings.add("_")), + }; + let current_scope = out_unit.get_mut(current_scope_id); + current_scope.set(attr.name(), attr_value); + } + Ok(()) +} + +pub(crate) fn clone_attr_string( + attr_value: &AttributeValue, + form: gimli::DwForm, + debug_str: &DebugStr, + out_strings: &mut write::StringTable, +) -> Result +where + R: Reader, +{ + let content = match attr_value { + AttributeValue::DebugStrRef(str_offset) => { + debug_str.get_str(*str_offset)?.to_slice()?.to_vec() + } + AttributeValue::String(b) => b.to_slice()?.to_vec(), + _ => panic!("Unexpected attribute value"), + }; + Ok(match form { + gimli::DW_FORM_strp => { + let id = out_strings.add(content); + write::LineString::StringRef(id) + } + gimli::DW_FORM_string => write::LineString::String(content), + _ => panic!("DW_FORM_line_strp or other not supported"), + }) +} diff --git a/wasmtime-debug/src/transform/expression.rs b/wasmtime-debug/src/transform/expression.rs new file mode 100644 index 000000000000..19f360f0db26 --- /dev/null +++ b/wasmtime-debug/src/transform/expression.rs @@ -0,0 +1,486 @@ +use cranelift_codegen::ir::{StackSlots, ValueLabel, ValueLoc}; +use cranelift_codegen::isa::RegUnit; +use cranelift_codegen::ValueLabelsRanges; +use cranelift_entity::EntityRef; +use cranelift_wasm::{get_vmctx_value_label, DefinedFuncIndex}; +use failure::Error; +use gimli::write; +use gimli::{self, Expression, Operation, Reader, ReaderOffset, Register, X86_64}; +use std::collections::{HashMap, HashSet}; + +use super::address_transform::AddressTransform; + +#[derive(Debug)] +pub struct FunctionFrameInfo<'a> { + pub value_ranges: &'a ValueLabelsRanges, + pub memory_offset: i64, + pub stack_slots: &'a StackSlots, +} + +#[derive(Debug)] +enum CompiledExpressionPart { + Code(Vec), + Local(ValueLabel), + Deref, +} + +#[derive(Debug)] +pub struct CompiledExpression { + parts: Vec, + need_deref: bool, +} + +impl Clone for CompiledExpressionPart { + fn clone(&self) -> Self { + match self { + CompiledExpressionPart::Code(c) => CompiledExpressionPart::Code(c.clone()), + CompiledExpressionPart::Local(i) => CompiledExpressionPart::Local(*i), + CompiledExpressionPart::Deref => CompiledExpressionPart::Deref, + } + } +} + +impl CompiledExpression { + pub fn vmctx() -> CompiledExpression { + CompiledExpression { + parts: vec![ + CompiledExpressionPart::Local(get_vmctx_value_label()), + CompiledExpressionPart::Code(vec![gimli::constants::DW_OP_stack_value.0 as u8]), + ], + need_deref: false, + } + } +} + +fn map_reg(reg: RegUnit) -> Register { + static mut REG_X86_MAP: Option> = None; + // FIXME lazy initialization? + unsafe { + if REG_X86_MAP.is_none() { + REG_X86_MAP = Some(HashMap::new()); + } + if let Some(val) = REG_X86_MAP.as_mut().unwrap().get(®) { + return *val; + } + let result = match reg { + 0 => X86_64::RAX, + 1 => X86_64::RCX, + 2 => X86_64::RDX, + 3 => X86_64::RBX, + 4 => X86_64::RSP, + 5 => X86_64::RBP, + 6 => X86_64::RSI, + 7 => X86_64::RDI, + 8 => X86_64::R8, + 9 => X86_64::R9, + 10 => X86_64::R10, + 11 => X86_64::R11, + 12 => X86_64::R12, + 13 => X86_64::R13, + 14 => X86_64::R14, + 15 => X86_64::R15, + 16 => X86_64::XMM0, + 17 => X86_64::XMM1, + 18 => X86_64::XMM2, + 19 => X86_64::XMM3, + 20 => X86_64::XMM4, + 21 => X86_64::XMM5, + 22 => X86_64::XMM6, + 23 => X86_64::XMM7, + _ => panic!("{}", reg), + }; + REG_X86_MAP.as_mut().unwrap().insert(reg, result); + result + } +} + +fn translate_loc(loc: ValueLoc, frame_info: Option<&FunctionFrameInfo>) -> Option> { + match loc { + ValueLoc::Reg(reg) => { + let machine_reg = map_reg(reg).0 as u8; + assert!(machine_reg < 32); // FIXME + Some(vec![gimli::constants::DW_OP_reg0.0 + machine_reg]) + } + ValueLoc::Stack(ss) => { + if let Some(frame_info) = frame_info { + if let Some(ss_offset) = frame_info.stack_slots[ss].offset { + use gimli::write::Writer; + let endian = gimli::RunTimeEndian::Little; + let mut writer = write::EndianVec::new(endian); + writer + .write_u8(gimli::constants::DW_OP_breg0.0 + X86_64::RBP.0 as u8) + .expect("bp wr"); + writer.write_sleb128(ss_offset as i64 + 16).expect("ss wr"); + writer + .write_u8(gimli::constants::DW_OP_deref.0 as u8) + .expect("bp wr"); + let buf = writer.into_vec(); + return Some(buf); + } + } + None + } + _ => None, + } +} + +fn append_memory_deref( + buf: &mut Vec, + frame_info: &FunctionFrameInfo, + vmctx_loc: ValueLoc, + endian: gimli::RunTimeEndian, +) -> write::Result { + use gimli::write::Writer; + let mut writer = write::EndianVec::new(endian); + match vmctx_loc { + ValueLoc::Reg(vmctx_reg) => { + let reg = map_reg(vmctx_reg); + writer.write_u8(gimli::constants::DW_OP_breg0.0 + reg.0 as u8)?; + writer.write_sleb128(frame_info.memory_offset)?; + } + ValueLoc::Stack(ss) => { + if let Some(ss_offset) = frame_info.stack_slots[ss].offset { + writer.write_u8(gimli::constants::DW_OP_breg0.0 + X86_64::RBP.0 as u8)?; + writer.write_sleb128(ss_offset as i64 + 16)?; + writer.write_u8(gimli::constants::DW_OP_deref.0 as u8)?; + + writer.write_u8(gimli::constants::DW_OP_consts.0 as u8)?; + writer.write_sleb128(frame_info.memory_offset)?; + writer.write_u8(gimli::constants::DW_OP_plus.0 as u8)?; + } else { + return Ok(false); + } + } + _ => { + return Ok(false); + } + } + writer.write_u8(gimli::constants::DW_OP_deref.0 as u8)?; + writer.write_u8(gimli::constants::DW_OP_swap.0 as u8)?; + writer.write_u8(gimli::constants::DW_OP_stack_value.0 as u8)?; + writer.write_u8(gimli::constants::DW_OP_constu.0 as u8)?; + writer.write_uleb128(0xffff_ffff)?; + writer.write_u8(gimli::constants::DW_OP_and.0 as u8)?; + writer.write_u8(gimli::constants::DW_OP_plus.0 as u8)?; + buf.extend_from_slice(writer.slice()); + Ok(true) +} + +impl CompiledExpression { + pub fn is_simple(&self) -> bool { + if let [CompiledExpressionPart::Code(_)] = self.parts.as_slice() { + true + } else { + self.parts.len() == 0 + } + } + + pub fn build(&self) -> Option { + if let [CompiledExpressionPart::Code(code)] = self.parts.as_slice() { + return Some(write::Expression(code.to_vec())); + } + // locals found, not supported + None + } + + pub fn build_with_locals( + &self, + scope: &[(u64, u64)], // wasm ranges + addr_tr: &AddressTransform, + frame_info: Option<&FunctionFrameInfo>, + endian: gimli::RunTimeEndian, + ) -> std::vec::Vec<(write::Address, u64, write::Expression)> { + if scope.len() == 0 { + return vec![]; + } + + if let [CompiledExpressionPart::Code(code)] = self.parts.as_slice() { + let mut result_scope = Vec::new(); + for s in scope { + for (addr, len) in addr_tr.translate_ranges(s.0, s.1) { + result_scope.push((addr, len, write::Expression(code.to_vec()))); + } + } + return result_scope; + } + + let vmctx_label = get_vmctx_value_label(); + + // Some locals are present, preparing and divided ranges based on the scope + // and frame_info data. + let mut ranges_builder = ValueLabelRangesBuilder::new(scope, addr_tr, frame_info); + for p in &self.parts { + match p { + CompiledExpressionPart::Code(_) => (), + CompiledExpressionPart::Local(label) => ranges_builder.process_label(*label), + CompiledExpressionPart::Deref => ranges_builder.process_label(vmctx_label), + } + } + if self.need_deref { + ranges_builder.process_label(vmctx_label); + } + ranges_builder.remove_incomplete_ranges(); + let ranges = ranges_builder.ranges; + + let mut result = Vec::new(); + 'range: for CachedValueLabelRange { + func_index, + start, + end, + label_location, + } in ranges + { + // build expression + let mut code_buf = Vec::new(); + for part in &self.parts { + match part { + CompiledExpressionPart::Code(c) => code_buf.extend_from_slice(c.as_slice()), + CompiledExpressionPart::Local(label) => { + let loc = *label_location.get(&label).expect("loc"); + if let Some(expr) = translate_loc(loc, frame_info) { + code_buf.extend_from_slice(&expr) + } else { + continue 'range; + } + } + CompiledExpressionPart::Deref => { + if let (Some(vmctx_loc), Some(frame_info)) = + (label_location.get(&vmctx_label), frame_info) + { + if !append_memory_deref(&mut code_buf, frame_info, *vmctx_loc, endian) + .expect("append_memory_deref") + { + continue 'range; + } + } else { + continue 'range; + }; + } + } + } + if self.need_deref { + if let (Some(vmctx_loc), Some(frame_info)) = + (label_location.get(&vmctx_label), frame_info) + { + if !append_memory_deref(&mut code_buf, frame_info, *vmctx_loc, endian) + .expect("append_memory_deref") + { + continue 'range; + } + } else { + continue 'range; + }; + } + result.push(( + write::Address::Symbol { + symbol: func_index.index(), + addend: start as i64, + }, + (end - start) as u64, + write::Expression(code_buf), + )); + } + + return result; + } +} + +pub fn compile_expression( + expr: &Expression, + encoding: &gimli::Encoding, + frame_base: Option<&CompiledExpression>, +) -> Result, Error> +where + R: Reader, +{ + let mut parts = Vec::new(); + let mut need_deref = false; + if let Some(frame_base) = frame_base { + parts.extend_from_slice(&frame_base.parts); + need_deref = frame_base.need_deref; + } + let base_len = parts.len(); + let mut pc = expr.0.clone(); + let mut code_chunk = Vec::new(); + let buf = expr.0.to_slice()?; + while !pc.is_empty() { + let next = buf[pc.offset_from(&expr.0).into_u64() as usize]; + need_deref = true; + if next == 0xED { + // WebAssembly DWARF extension + pc.read_u8()?; + let ty = pc.read_uleb128()?; + assert_eq!(ty, 0); + let index = pc.read_sleb128()?; + pc.read_u8()?; // consume 159 + if code_chunk.len() > 0 { + parts.push(CompiledExpressionPart::Code(code_chunk)); + code_chunk = Vec::new(); + } + let label = ValueLabel::from_u32(index as u32); + parts.push(CompiledExpressionPart::Local(label)); + } else { + let pos = pc.offset_from(&expr.0).into_u64() as usize; + let op = Operation::parse(&mut pc, &expr.0, *encoding)?; + match op { + Operation::Literal { .. } | Operation::PlusConstant { .. } => (), + Operation::StackValue => { + need_deref = false; + } + Operation::Deref { .. } => { + if code_chunk.len() > 0 { + parts.push(CompiledExpressionPart::Code(code_chunk)); + code_chunk = Vec::new(); + } + parts.push(CompiledExpressionPart::Deref); + } + _ => { + return Ok(None); + } + } + let chunk = &buf[pos..pc.offset_from(&expr.0).into_u64() as usize]; + code_chunk.extend_from_slice(chunk); + } + } + + if code_chunk.len() > 0 { + parts.push(CompiledExpressionPart::Code(code_chunk)); + } + + if base_len > 0 && base_len + 1 < parts.len() { + // see if we can glue two code chunks + if let [CompiledExpressionPart::Code(cc1), CompiledExpressionPart::Code(cc2)] = + &parts[base_len..base_len + 1] + { + let mut combined = cc1.clone(); + combined.extend_from_slice(cc2); + parts[base_len] = CompiledExpressionPart::Code(combined); + parts.remove(base_len + 1); + } + } + + Ok(Some(CompiledExpression { parts, need_deref })) +} + +#[derive(Debug, Clone)] +struct CachedValueLabelRange { + func_index: DefinedFuncIndex, + start: usize, + end: usize, + label_location: HashMap, +} + +struct ValueLabelRangesBuilder<'a, 'b> { + ranges: Vec, + addr_tr: &'a AddressTransform, + frame_info: Option<&'a FunctionFrameInfo<'b>>, + processed_labels: HashSet, +} + +impl<'a, 'b> ValueLabelRangesBuilder<'a, 'b> { + fn new( + scope: &[(u64, u64)], // wasm ranges + addr_tr: &'a AddressTransform, + frame_info: Option<&'a FunctionFrameInfo<'b>>, + ) -> Self { + let mut ranges = Vec::new(); + for s in scope { + if let Some((func_index, tr)) = addr_tr.translate_ranges_raw(s.0, s.1) { + for (start, end) in tr { + ranges.push(CachedValueLabelRange { + func_index, + start, + end, + label_location: HashMap::new(), + }) + } + } + } + ranges.sort_unstable_by(|a, b| a.start.cmp(&b.start)); + ValueLabelRangesBuilder { + ranges, + addr_tr, + frame_info, + processed_labels: HashSet::new(), + } + } + + fn process_label(&mut self, label: ValueLabel) { + if self.processed_labels.contains(&label) { + return; + } + self.processed_labels.insert(label); + + let value_ranges = if let Some(frame_info) = self.frame_info { + &frame_info.value_ranges + } else { + return; + }; + + let ranges = &mut self.ranges; + if let Some(local_ranges) = value_ranges.get(&label) { + for local_range in local_ranges { + let wasm_start = local_range.start; + let wasm_end = local_range.end; + let loc = local_range.loc; + // Find all native ranges for the value label ranges. + for (addr, len) in self + .addr_tr + .translate_ranges(wasm_start as u64, wasm_end as u64) + { + let (range_start, range_end) = self.addr_tr.convert_to_code_range(addr, len); + if range_start == range_end { + continue; + } + assert!(range_start < range_end); + // Find acceptable scope of ranges to intersect with. + let i = match ranges.binary_search_by(|s| s.start.cmp(&range_start)) { + Ok(i) => i, + Err(i) => { + if i > 0 && range_start < ranges[i - 1].end { + i - 1 + } else { + i + } + } + }; + let j = match ranges.binary_search_by(|s| s.start.cmp(&range_end)) { + Ok(i) | Err(i) => i, + }; + // Starting for the end, intersect (range_start..range_end) with + // self.ranges array. + for i in (i..j).rev() { + if range_end <= ranges[i].start || ranges[i].end <= range_start { + continue; + } + if range_end < ranges[i].end { + // Cutting some of the range from the end. + let mut tail = ranges[i].clone(); + ranges[i].end = range_end; + tail.start = range_end; + ranges.insert(i + 1, tail); + } + assert!(ranges[i].end <= range_end); + if range_start <= ranges[i].start { + ranges[i].label_location.insert(label, loc); + continue; + } + // Cutting some of the range from the start. + let mut tail = ranges[i].clone(); + ranges[i].end = range_start; + tail.start = range_start; + tail.label_location.insert(label, loc); + ranges.insert(i + 1, tail); + } + } + } + } + } + + fn remove_incomplete_ranges(&mut self) { + // Ranges with not-enough labels are discarded. + let processed_labels_len = self.processed_labels.len(); + self.ranges + .retain(|r| r.label_location.len() == processed_labels_len); + } +} diff --git a/wasmtime-debug/src/transform/line_program.rs b/wasmtime-debug/src/transform/line_program.rs new file mode 100644 index 000000000000..812d183feddc --- /dev/null +++ b/wasmtime-debug/src/transform/line_program.rs @@ -0,0 +1,233 @@ +use cranelift_entity::EntityRef; +use failure::Error; +use std::collections::BTreeMap; +use std::iter::FromIterator; + +use gimli; + +use gimli::{DebugLine, DebugLineOffset, DebugStr, DebuggingInformationEntry, LineEncoding, Unit}; + +use gimli::write; + +use super::address_transform::AddressTransform; +use super::attr::clone_attr_string; +use super::{Reader, TransformError}; + +#[derive(Debug)] +enum SavedLineProgramRow { + Normal { + address: u64, + op_index: u64, + file_index: u64, + line: u64, + column: u64, + discriminator: u64, + is_stmt: bool, + basic_block: bool, + prologue_end: bool, + epilogue_begin: bool, + isa: u64, + }, + EndOfSequence(u64), +} + +#[derive(Debug, Eq, PartialEq)] +enum ReadLineProgramState { + SequenceEnded, + ReadSequence, + IgnoreSequence, +} + +pub(crate) fn clone_line_program( + unit: &Unit, + root: &DebuggingInformationEntry, + addr_tr: &AddressTransform, + out_encoding: &gimli::Encoding, + debug_str: &DebugStr, + debug_line: &DebugLine, + out_strings: &mut write::StringTable, +) -> Result<(write::LineProgram, DebugLineOffset, Vec), Error> +where + R: Reader, +{ + let offset = match root.attr_value(gimli::DW_AT_stmt_list)? { + Some(gimli::AttributeValue::DebugLineRef(offset)) => offset, + _ => { + return Err(TransformError("Debug line offset is not found").into()); + } + }; + let comp_dir = root.attr_value(gimli::DW_AT_comp_dir)?; + let comp_name = root.attr_value(gimli::DW_AT_name)?; + let out_comp_dir = clone_attr_string( + comp_dir.as_ref().expect("comp_dir"), + gimli::DW_FORM_strp, + debug_str, + out_strings, + )?; + let out_comp_name = clone_attr_string( + comp_name.as_ref().expect("comp_name"), + gimli::DW_FORM_strp, + debug_str, + out_strings, + )?; + + let program = debug_line.program( + offset, + unit.header.address_size(), + comp_dir.and_then(|val| val.string_value(&debug_str)), + comp_name.and_then(|val| val.string_value(&debug_str)), + ); + if let Ok(program) = program { + let header = program.header(); + assert!(header.version() <= 4, "not supported 5"); + let line_encoding = LineEncoding { + minimum_instruction_length: header.minimum_instruction_length(), + maximum_operations_per_instruction: header.maximum_operations_per_instruction(), + default_is_stmt: header.default_is_stmt(), + line_base: header.line_base(), + line_range: header.line_range(), + }; + let mut out_program = write::LineProgram::new( + *out_encoding, + line_encoding, + out_comp_dir, + out_comp_name, + None, + ); + let mut dirs = Vec::new(); + dirs.push(out_program.default_directory()); + for dir_attr in header.include_directories() { + let dir_id = out_program.add_directory(clone_attr_string( + dir_attr, + gimli::DW_FORM_string, + debug_str, + out_strings, + )?); + dirs.push(dir_id); + } + let mut files = Vec::new(); + for file_entry in header.file_names() { + let dir_id = dirs[file_entry.directory_index() as usize]; + let file_id = out_program.add_file( + clone_attr_string( + &file_entry.path_name(), + gimli::DW_FORM_string, + debug_str, + out_strings, + )?, + dir_id, + None, + ); + files.push(file_id); + } + + let mut rows = program.rows(); + let mut saved_rows = BTreeMap::new(); + let mut state = ReadLineProgramState::SequenceEnded; + while let Some((_header, row)) = rows.next_row()? { + if state == ReadLineProgramState::IgnoreSequence { + if row.end_sequence() { + state = ReadLineProgramState::SequenceEnded; + } + continue; + } + let saved_row = if row.end_sequence() { + state = ReadLineProgramState::SequenceEnded; + SavedLineProgramRow::EndOfSequence(row.address()) + } else { + if state == ReadLineProgramState::SequenceEnded { + // Discard sequences for non-existent code. + if row.address() == 0 { + state = ReadLineProgramState::IgnoreSequence; + continue; + } + state = ReadLineProgramState::ReadSequence; + } + SavedLineProgramRow::Normal { + address: row.address(), + op_index: row.op_index(), + file_index: row.file_index(), + line: row.line().unwrap_or(0), + column: match row.column() { + gimli::ColumnType::LeftEdge => 0, + gimli::ColumnType::Column(val) => val, + }, + discriminator: row.discriminator(), + is_stmt: row.is_stmt(), + basic_block: row.basic_block(), + prologue_end: row.prologue_end(), + epilogue_begin: row.epilogue_begin(), + isa: row.isa(), + } + }; + saved_rows.insert(row.address(), saved_row); + } + + let saved_rows = Vec::from_iter(saved_rows.into_iter()); + for (i, map) in addr_tr.map() { + if map.len == 0 { + continue; // no code generated + } + let symbol = i.index(); + let base_addr = map.offset; + out_program.begin_sequence(Some(write::Address::Symbol { symbol, addend: 0 })); + // TODO track and place function declaration line here + let mut last_address = None; + for addr_map in map.addresses.iter() { + let saved_row = match saved_rows.binary_search_by_key(&addr_map.wasm, |i| i.0) { + Ok(i) => Some(&saved_rows[i].1), + Err(i) => { + if i > 0 { + Some(&saved_rows[i - 1].1) + } else { + None + } + } + }; + if let Some(SavedLineProgramRow::Normal { + address, + op_index, + file_index, + line, + column, + discriminator, + is_stmt, + basic_block, + prologue_end, + epilogue_begin, + isa, + }) = saved_row + { + // Ignore duplicates + if Some(*address) != last_address { + let address_offset = if last_address.is_none() { + // Extend first entry to the function declaration + // TODO use the function declaration line instead + 0 + } else { + (addr_map.generated - base_addr) as u64 + }; + out_program.row().address_offset = address_offset; + out_program.row().op_index = *op_index; + out_program.row().file = files[(file_index - 1) as usize]; + out_program.row().line = *line; + out_program.row().column = *column; + out_program.row().discriminator = *discriminator; + out_program.row().is_statement = *is_stmt; + out_program.row().basic_block = *basic_block; + out_program.row().prologue_end = *prologue_end; + out_program.row().epilogue_begin = *epilogue_begin; + out_program.row().isa = *isa; + out_program.generate_row(); + last_address = Some(*address); + } + } + } + let end_addr = (map.offset + map.len - 1) as u64; + out_program.end_sequence(end_addr); + } + Ok((out_program, offset, files)) + } else { + Err(TransformError("Valid line program not found").into()) + } +} diff --git a/wasmtime-debug/src/transform/mod.rs b/wasmtime-debug/src/transform/mod.rs new file mode 100644 index 000000000000..94f80908afd9 --- /dev/null +++ b/wasmtime-debug/src/transform/mod.rs @@ -0,0 +1,102 @@ +use crate::gc::build_dependencies; +use crate::DebugInfoData; +use cranelift_codegen::isa::TargetFrontendConfig; +use failure::Error; +use std::collections::HashSet; +use wasmtime_environ::{ModuleAddressMap, ModuleVmctxInfo, ValueLabelsRanges}; + +use gimli; + +use gimli::{ + DebugAddr, DebugAddrBase, DebugLine, DebugStr, LocationLists, RangeLists, UnitSectionOffset, +}; + +use gimli::write; + +pub use address_transform::AddressTransform; + +use unit::clone_unit; + +mod address_transform; +mod attr; +mod expression; +mod line_program; +mod range_info_builder; +mod unit; + +pub(crate) trait Reader: gimli::Reader {} + +impl<'input, Endian> Reader for gimli::EndianSlice<'input, Endian> where Endian: gimli::Endianity {} + +#[derive(Fail, Debug)] +#[fail(display = "Debug info transform error: {}", _0)] +pub struct TransformError(&'static str); + +pub(crate) struct DebugInputContext<'a, R> +where + R: Reader, +{ + debug_str: &'a DebugStr, + debug_line: &'a DebugLine, + debug_addr: &'a DebugAddr, + debug_addr_base: DebugAddrBase, + rnglists: &'a RangeLists, + loclists: &'a LocationLists, + reachable: &'a HashSet, +} + +pub fn transform_dwarf( + target_config: &TargetFrontendConfig, + di: &DebugInfoData, + at: &ModuleAddressMap, + vmctx_info: &ModuleVmctxInfo, + ranges: &ValueLabelsRanges, +) -> Result { + let addr_tr = AddressTransform::new(at, &di.wasm_file); + let reachable = build_dependencies(&di.dwarf, &addr_tr)?.get_reachable(); + + let context = DebugInputContext { + debug_str: &di.dwarf.debug_str, + debug_line: &di.dwarf.debug_line, + debug_addr: &di.dwarf.debug_addr, + debug_addr_base: DebugAddrBase(0), + rnglists: &di.dwarf.ranges, + loclists: &di.dwarf.locations, + reachable: &reachable, + }; + + let out_encoding = gimli::Encoding { + format: gimli::Format::Dwarf32, + // TODO: this should be configurable + // macOS doesn't seem to support DWARF > 3 + version: 3, + address_size: target_config.pointer_bytes(), + }; + + let mut out_strings = write::StringTable::default(); + let mut out_units = write::UnitTable::default(); + + let out_line_strings = write::LineStringTable::default(); + + let mut iter = di.dwarf.debug_info.units(); + while let Some(unit) = iter.next().unwrap_or(None) { + let unit = di.dwarf.unit(unit)?; + clone_unit( + unit, + &context, + &addr_tr, + &ranges, + &out_encoding, + &vmctx_info, + &mut out_units, + &mut out_strings, + )?; + } + + Ok(write::Dwarf { + units: out_units, + line_programs: vec![], + line_strings: out_line_strings, + strings: out_strings, + }) +} diff --git a/wasmtime-debug/src/transform/range_info_builder.rs b/wasmtime-debug/src/transform/range_info_builder.rs new file mode 100644 index 000000000000..bc751555644c --- /dev/null +++ b/wasmtime-debug/src/transform/range_info_builder.rs @@ -0,0 +1,225 @@ +use cranelift_entity::EntityRef; +use cranelift_wasm::DefinedFuncIndex; +use failure::Error; + +use gimli; + +use gimli::{AttributeValue, DebuggingInformationEntry, RangeListsOffset}; + +use gimli::write; + +use super::address_transform::AddressTransform; +use super::DebugInputContext; +use super::Reader; + +pub(crate) enum RangeInfoBuilder { + Undefined, + Position(u64), + Ranges(Vec<(u64, u64)>), + Function(DefinedFuncIndex), +} + +impl RangeInfoBuilder { + pub(crate) fn from( + entry: &DebuggingInformationEntry, + context: &DebugInputContext, + unit_encoding: &gimli::Encoding, + cu_low_pc: u64, + ) -> Result + where + R: Reader, + { + if let Some(AttributeValue::RangeListsRef(r)) = entry.attr_value(gimli::DW_AT_ranges)? { + return RangeInfoBuilder::from_ranges_ref(r, context, unit_encoding, cu_low_pc); + }; + + let low_pc = + if let Some(AttributeValue::Addr(addr)) = entry.attr_value(gimli::DW_AT_low_pc)? { + addr + } else { + return Ok(RangeInfoBuilder::Undefined); + }; + + Ok( + if let Some(AttributeValue::Udata(u)) = entry.attr_value(gimli::DW_AT_high_pc)? { + RangeInfoBuilder::Ranges(vec![(low_pc, low_pc + u)]) + } else { + RangeInfoBuilder::Position(low_pc) + }, + ) + } + + pub(crate) fn from_ranges_ref( + ranges: RangeListsOffset, + context: &DebugInputContext, + unit_encoding: &gimli::Encoding, + cu_low_pc: u64, + ) -> Result + where + R: Reader, + { + let mut ranges = context.rnglists.ranges( + ranges, + *unit_encoding, + cu_low_pc, + &context.debug_addr, + context.debug_addr_base, + )?; + let mut result = Vec::new(); + while let Some(range) = ranges.next()? { + if range.begin >= range.end { + // ignore empty ranges + } + result.push((range.begin, range.end)); + } + + return Ok(if result.len() > 0 { + RangeInfoBuilder::Ranges(result) + } else { + RangeInfoBuilder::Undefined + }); + } + + pub(crate) fn from_subprogram_die( + entry: &DebuggingInformationEntry, + context: &DebugInputContext, + unit_encoding: &gimli::Encoding, + addr_tr: &AddressTransform, + cu_low_pc: u64, + ) -> Result + where + R: Reader, + { + let addr = + if let Some(AttributeValue::Addr(addr)) = entry.attr_value(gimli::DW_AT_low_pc)? { + addr + } else if let Some(AttributeValue::RangeListsRef(r)) = + entry.attr_value(gimli::DW_AT_ranges)? + { + let mut ranges = context.rnglists.ranges( + r, + *unit_encoding, + cu_low_pc, + &context.debug_addr, + context.debug_addr_base, + )?; + if let Some(range) = ranges.next()? { + range.begin + } else { + return Ok(RangeInfoBuilder::Undefined); + } + } else { + return Ok(RangeInfoBuilder::Undefined); + }; + + let index = addr_tr.find_func_index(addr); + if index.is_none() { + return Ok(RangeInfoBuilder::Undefined); + } + Ok(RangeInfoBuilder::Function(index.unwrap())) + } + + pub(crate) fn build( + &self, + addr_tr: &AddressTransform, + out_unit: &mut write::Unit, + current_scope_id: write::UnitEntryId, + ) { + match self { + RangeInfoBuilder::Undefined => (), + RangeInfoBuilder::Position(pc) => { + let addr = addr_tr + .translate(*pc) + .unwrap_or(write::Address::Constant(0)); + let current_scope = out_unit.get_mut(current_scope_id); + current_scope.set(gimli::DW_AT_low_pc, write::AttributeValue::Address(addr)); + } + RangeInfoBuilder::Ranges(ranges) => { + let mut result = Vec::new(); + for (begin, end) in ranges { + for tr in addr_tr.translate_ranges(*begin, *end) { + if tr.1 == 0 { + // Ignore empty range + continue; + } + result.push(tr); + } + } + if result.len() != 1 { + let range_list = result + .iter() + .map(|tr| write::Range::StartLength { + begin: tr.0, + length: tr.1, + }) + .collect::>(); + let range_list_id = out_unit.ranges.add(write::RangeList(range_list)); + let current_scope = out_unit.get_mut(current_scope_id); + current_scope.set( + gimli::DW_AT_ranges, + write::AttributeValue::RangeListRef(range_list_id), + ); + } else { + let current_scope = out_unit.get_mut(current_scope_id); + current_scope.set( + gimli::DW_AT_low_pc, + write::AttributeValue::Address(result[0].0), + ); + current_scope.set( + gimli::DW_AT_high_pc, + write::AttributeValue::Udata(result[0].1), + ); + } + } + RangeInfoBuilder::Function(index) => { + let range = addr_tr.func_range(*index); + let symbol = index.index(); + let addr = write::Address::Symbol { + symbol, + addend: range.0 as i64, + }; + let len = (range.1 - range.0) as u64; + let current_scope = out_unit.get_mut(current_scope_id); + current_scope.set(gimli::DW_AT_low_pc, write::AttributeValue::Address(addr)); + current_scope.set(gimli::DW_AT_high_pc, write::AttributeValue::Udata(len)); + } + } + } + + pub(crate) fn get_ranges(&self, addr_tr: &AddressTransform) -> Vec<(u64, u64)> { + match self { + RangeInfoBuilder::Undefined | RangeInfoBuilder::Position(_) => vec![], + RangeInfoBuilder::Ranges(ranges) => ranges.clone(), + RangeInfoBuilder::Function(index) => { + let range = addr_tr.func_source_range(*index); + vec![(range.0, range.1)] + } + } + } + + pub(crate) fn build_ranges( + &self, + addr_tr: &AddressTransform, + out_range_lists: &mut write::RangeListTable, + ) -> write::RangeListId { + if let RangeInfoBuilder::Ranges(ranges) = self { + let mut range_list = Vec::new(); + for (begin, end) in ranges { + assert!(begin < end); + for tr in addr_tr.translate_ranges(*begin, *end) { + if tr.1 == 0 { + // Ignore empty range + continue; + } + range_list.push(write::Range::StartLength { + begin: tr.0, + length: tr.1, + }); + } + } + out_range_lists.add(write::RangeList(range_list)) + } else { + unreachable!(); + } + } +} diff --git a/wasmtime-debug/src/transform/unit.rs b/wasmtime-debug/src/transform/unit.rs new file mode 100644 index 000000000000..f76c9cb15975 --- /dev/null +++ b/wasmtime-debug/src/transform/unit.rs @@ -0,0 +1,515 @@ +use cranelift_wasm::DefinedFuncIndex; +use failure::Error; +use std::collections::HashMap; +use wasmtime_environ::{ModuleVmctxInfo, ValueLabelsRanges}; + +use gimli; + +use gimli::{AttributeValue, DebuggingInformationEntry, Unit, UnitOffset}; + +use gimli::write; + +use super::address_transform::AddressTransform; +use super::attr::{clone_die_attributes, FileAttributeContext}; +use super::expression::{compile_expression, CompiledExpression, FunctionFrameInfo}; +use super::line_program::clone_line_program; +use super::range_info_builder::RangeInfoBuilder; +use super::{DebugInputContext, Reader, TransformError}; + +pub(crate) type PendingDieRef = (write::UnitEntryId, gimli::DwAt, UnitOffset); + +struct InheritedAttr { + stack: Vec<(usize, T)>, +} + +impl InheritedAttr { + fn new() -> Self { + InheritedAttr { stack: Vec::new() } + } + + fn update(&mut self, depth: usize) { + while !self.stack.is_empty() && self.stack.last().unwrap().0 >= depth { + self.stack.pop(); + } + } + + fn push(&mut self, depth: usize, value: T) { + self.stack.push((depth, value)); + } + + fn top(&self) -> Option<&T> { + self.stack.last().map(|entry| &entry.1) + } + + fn is_empty(&self) -> bool { + self.stack.is_empty() + } +} + +fn get_function_frame_info<'a, 'b, 'c>( + module_info: &'b ModuleVmctxInfo, + func_index: DefinedFuncIndex, + value_ranges: &'c ValueLabelsRanges, +) -> Option> +where + 'b: 'a, + 'c: 'a, +{ + if let Some(value_ranges) = value_ranges.get(func_index) { + let frame_info = FunctionFrameInfo { + value_ranges, + memory_offset: module_info.memory_offset, + stack_slots: &module_info.stack_slots[func_index], + }; + Some(frame_info) + } else { + None + } +} + +fn add_internal_types( + comp_unit: &mut write::Unit, + root_id: write::UnitEntryId, + out_strings: &mut write::StringTable, + module_info: &ModuleVmctxInfo, +) -> (write::UnitEntryId, write::UnitEntryId) { + let wp_die_id = comp_unit.add(root_id, gimli::DW_TAG_base_type); + let wp_die = comp_unit.get_mut(wp_die_id); + wp_die.set( + gimli::DW_AT_name, + write::AttributeValue::StringRef(out_strings.add("WebAssemblyPtr")), + ); + wp_die.set(gimli::DW_AT_byte_size, write::AttributeValue::Data1(4)); + wp_die.set( + gimli::DW_AT_encoding, + write::AttributeValue::Encoding(gimli::DW_ATE_unsigned), + ); + + let memory_byte_die_id = comp_unit.add(root_id, gimli::DW_TAG_base_type); + let memory_byte_die = comp_unit.get_mut(memory_byte_die_id); + memory_byte_die.set( + gimli::DW_AT_name, + write::AttributeValue::StringRef(out_strings.add("u8")), + ); + memory_byte_die.set( + gimli::DW_AT_encoding, + write::AttributeValue::Encoding(gimli::DW_ATE_unsigned), + ); + memory_byte_die.set(gimli::DW_AT_byte_size, write::AttributeValue::Data1(1)); + + let memory_bytes_die_id = comp_unit.add(root_id, gimli::DW_TAG_pointer_type); + let memory_bytes_die = comp_unit.get_mut(memory_bytes_die_id); + memory_bytes_die.set( + gimli::DW_AT_name, + write::AttributeValue::StringRef(out_strings.add("u8*")), + ); + memory_bytes_die.set( + gimli::DW_AT_type, + write::AttributeValue::ThisUnitEntryRef(memory_byte_die_id), + ); + + let memory_offset = module_info.memory_offset; + let vmctx_die_id = comp_unit.add(root_id, gimli::DW_TAG_structure_type); + let vmctx_die = comp_unit.get_mut(vmctx_die_id); + vmctx_die.set( + gimli::DW_AT_name, + write::AttributeValue::StringRef(out_strings.add("WasmtimeVMContext")), + ); + vmctx_die.set( + gimli::DW_AT_byte_size, + write::AttributeValue::Data4(memory_offset as u32 + 8), + ); + + let m_die_id = comp_unit.add(vmctx_die_id, gimli::DW_TAG_member); + let m_die = comp_unit.get_mut(m_die_id); + m_die.set( + gimli::DW_AT_name, + write::AttributeValue::StringRef(out_strings.add("memory")), + ); + m_die.set( + gimli::DW_AT_type, + write::AttributeValue::ThisUnitEntryRef(memory_bytes_die_id), + ); + m_die.set( + gimli::DW_AT_data_member_location, + write::AttributeValue::Udata(memory_offset as u64), + ); + + let vmctx_ptr_die_id = comp_unit.add(root_id, gimli::DW_TAG_pointer_type); + let vmctx_ptr_die = comp_unit.get_mut(vmctx_ptr_die_id); + vmctx_ptr_die.set( + gimli::DW_AT_name, + write::AttributeValue::StringRef(out_strings.add("WasmtimeVMContext*")), + ); + vmctx_ptr_die.set( + gimli::DW_AT_type, + write::AttributeValue::ThisUnitEntryRef(vmctx_die_id), + ); + + (wp_die_id, vmctx_ptr_die_id) +} + +fn get_base_type_name( + type_entry: &DebuggingInformationEntry, + unit: &Unit, + context: &DebugInputContext, +) -> Result +where + R: Reader, +{ + // FIXME remove recursion. + match type_entry.attr_value(gimli::DW_AT_type)? { + Some(AttributeValue::UnitRef(ref offset)) => { + let mut entries = unit.entries_at_offset(*offset)?; + entries.next_entry()?; + if let Some(die) = entries.current() { + if let Some(AttributeValue::DebugStrRef(str_offset)) = + die.attr_value(gimli::DW_AT_name)? + { + return Ok(String::from( + context.debug_str.get_str(str_offset)?.to_string()?, + )); + } + match die.tag() { + gimli::DW_TAG_const_type => { + return Ok(format!("const {}", get_base_type_name(die, unit, context)?)); + } + gimli::DW_TAG_pointer_type => { + return Ok(format!("{}*", get_base_type_name(die, unit, context)?)); + } + gimli::DW_TAG_reference_type => { + return Ok(format!("{}&", get_base_type_name(die, unit, context)?)); + } + gimli::DW_TAG_array_type => { + return Ok(format!("{}[]", get_base_type_name(die, unit, context)?)); + } + _ => (), + } + } + } + _ => (), + }; + Ok(String::from("??")) +} + +fn replace_pointer_type( + parent_id: write::UnitEntryId, + comp_unit: &mut write::Unit, + wp_die_id: write::UnitEntryId, + entry: &DebuggingInformationEntry, + unit: &Unit, + context: &DebugInputContext, + out_strings: &mut write::StringTable, + pending_die_refs: &mut Vec<(write::UnitEntryId, gimli::DwAt, UnitOffset)>, +) -> Result +where + R: Reader, +{ + let die_id = comp_unit.add(parent_id, gimli::DW_TAG_structure_type); + let die = comp_unit.get_mut(die_id); + + let name = format!( + "WebAssemblyPtrWrapper<{}>", + get_base_type_name(entry, unit, context)? + ); + die.set( + gimli::DW_AT_name, + write::AttributeValue::StringRef(out_strings.add(name.as_str())), + ); + die.set(gimli::DW_AT_byte_size, write::AttributeValue::Data1(4)); + + let p_die_id = comp_unit.add(die_id, gimli::DW_TAG_template_type_parameter); + let p_die = comp_unit.get_mut(p_die_id); + p_die.set( + gimli::DW_AT_name, + write::AttributeValue::StringRef(out_strings.add("T")), + ); + p_die.set( + gimli::DW_AT_type, + write::AttributeValue::ThisUnitEntryRef(wp_die_id), + ); + match entry.attr_value(gimli::DW_AT_type)? { + Some(AttributeValue::UnitRef(ref offset)) => { + pending_die_refs.push((p_die_id, gimli::DW_AT_type, *offset)) + } + _ => (), + } + + let m_die_id = comp_unit.add(die_id, gimli::DW_TAG_member); + let m_die = comp_unit.get_mut(m_die_id); + m_die.set( + gimli::DW_AT_name, + write::AttributeValue::StringRef(out_strings.add("__ptr")), + ); + m_die.set( + gimli::DW_AT_type, + write::AttributeValue::ThisUnitEntryRef(wp_die_id), + ); + m_die.set( + gimli::DW_AT_data_member_location, + write::AttributeValue::Data1(0), + ); + Ok(die_id) +} + +fn append_vmctx_info( + comp_unit: &mut write::Unit, + parent_id: write::UnitEntryId, + vmctx_die_id: write::UnitEntryId, + addr_tr: &AddressTransform, + frame_info: Option<&FunctionFrameInfo>, + scope_ranges: &[(u64, u64)], + out_strings: &mut write::StringTable, +) -> Result<(), Error> { + let loc = { + let endian = gimli::RunTimeEndian::Little; + + let expr = CompiledExpression::vmctx(); + let mut locs = Vec::new(); + for (begin, length, data) in + expr.build_with_locals(scope_ranges, addr_tr, frame_info, endian) + { + locs.push(write::Location::StartLength { + begin, + length, + data, + }); + } + let list_id = comp_unit.locations.add(write::LocationList(locs)); + write::AttributeValue::LocationListRef(list_id) + }; + + let var_die_id = comp_unit.add(parent_id, gimli::DW_TAG_variable); + let var_die = comp_unit.get_mut(var_die_id); + var_die.set( + gimli::DW_AT_name, + write::AttributeValue::StringRef(out_strings.add("__vmctx")), + ); + var_die.set( + gimli::DW_AT_type, + write::AttributeValue::ThisUnitEntryRef(vmctx_die_id), + ); + var_die.set(gimli::DW_AT_location, loc); + + Ok(()) +} + +pub(crate) fn clone_unit<'a, R>( + unit: Unit, + context: &DebugInputContext, + addr_tr: &'a AddressTransform, + value_ranges: &'a ValueLabelsRanges, + out_encoding: &gimli::Encoding, + module_info: &ModuleVmctxInfo, + out_units: &mut write::UnitTable, + out_strings: &mut write::StringTable, +) -> Result<(), Error> +where + R: Reader, +{ + let mut die_ref_map = HashMap::new(); + let mut pending_die_refs = Vec::new(); + let mut stack = Vec::new(); + + // Iterate over all of this compilation unit's entries. + let mut entries = unit.entries(); + let (mut comp_unit, file_map, cu_low_pc, wp_die_id, vmctx_die_id) = + if let Some((depth_delta, entry)) = entries.next_dfs()? { + assert!(depth_delta == 0); + let (out_line_program, debug_line_offset, file_map) = clone_line_program( + &unit, + entry, + addr_tr, + out_encoding, + context.debug_str, + context.debug_line, + out_strings, + )?; + + if entry.tag() == gimli::DW_TAG_compile_unit { + let unit_id = out_units.add(write::Unit::new(*out_encoding, out_line_program)); + let comp_unit = out_units.get_mut(unit_id); + + let root_id = comp_unit.root(); + die_ref_map.insert(entry.offset(), root_id); + + let cu_low_pc = if let Some(AttributeValue::Addr(addr)) = + entry.attr_value(gimli::DW_AT_low_pc)? + { + addr + } else { + // FIXME? return Err(TransformError("No low_pc for unit header").into()); + 0 + }; + + clone_die_attributes( + entry, + context, + addr_tr, + None, + &unit.encoding(), + comp_unit, + root_id, + None, + None, + cu_low_pc, + out_strings, + &die_ref_map, + &mut pending_die_refs, + FileAttributeContext::Root(Some(debug_line_offset)), + )?; + + let (wp_die_id, vmctx_die_id) = + add_internal_types(comp_unit, root_id, out_strings, module_info); + + stack.push(root_id); + (comp_unit, file_map, cu_low_pc, wp_die_id, vmctx_die_id) + } else { + return Err(TransformError("Unexpected unit header").into()); + } + } else { + return Ok(()); // empty + }; + let mut skip_at_depth = None; + let mut current_frame_base = InheritedAttr::new(); + let mut current_value_range = InheritedAttr::new(); + let mut current_scope_ranges = InheritedAttr::new(); + while let Some((depth_delta, entry)) = entries.next_dfs()? { + let depth_delta = if let Some((depth, cached)) = skip_at_depth { + let new_depth = depth + depth_delta; + if new_depth > 0 { + skip_at_depth = Some((new_depth, cached)); + continue; + } + skip_at_depth = None; + new_depth + cached + } else { + depth_delta + }; + + if !context + .reachable + .contains(&entry.offset().to_unit_section_offset(&unit)) + { + // entry is not reachable: discarding all its info. + skip_at_depth = Some((0, depth_delta)); + continue; + } + + let new_stack_len = stack.len().wrapping_add(depth_delta as usize); + current_frame_base.update(new_stack_len); + current_scope_ranges.update(new_stack_len); + current_value_range.update(new_stack_len); + let range_builder = if entry.tag() == gimli::DW_TAG_subprogram { + let range_builder = RangeInfoBuilder::from_subprogram_die( + entry, + context, + &unit.encoding(), + addr_tr, + cu_low_pc, + )?; + if let RangeInfoBuilder::Function(func_index) = range_builder { + if let Some(frame_info) = + get_function_frame_info(module_info, func_index, value_ranges) + { + current_value_range.push(new_stack_len, frame_info); + } + current_scope_ranges.push(new_stack_len, range_builder.get_ranges(addr_tr)); + Some(range_builder) + } else { + // FIXME current_scope_ranges.push() + None + } + } else { + let high_pc = entry.attr_value(gimli::DW_AT_high_pc)?; + let ranges = entry.attr_value(gimli::DW_AT_ranges)?; + if high_pc.is_some() || ranges.is_some() { + let range_builder = + RangeInfoBuilder::from(entry, context, &unit.encoding(), cu_low_pc)?; + current_scope_ranges.push(new_stack_len, range_builder.get_ranges(addr_tr)); + Some(range_builder) + } else { + None + } + }; + + if depth_delta <= 0 { + for _ in depth_delta..1 { + stack.pop(); + } + } else { + assert!(depth_delta == 1); + } + + if let Some(AttributeValue::Exprloc(expr)) = entry.attr_value(gimli::DW_AT_frame_base)? { + if let Some(expr) = compile_expression(&expr, &unit.encoding(), None)? { + current_frame_base.push(new_stack_len, expr); + } + } + + let parent = stack.last().unwrap(); + + if entry.tag() == gimli::DW_TAG_pointer_type { + // Wrap pointer types. + // TODO reference types? + let die_id = replace_pointer_type( + *parent, + comp_unit, + wp_die_id, + entry, + &unit, + context, + out_strings, + &mut pending_die_refs, + )?; + stack.push(die_id); + assert!(stack.len() == new_stack_len); + die_ref_map.insert(entry.offset(), die_id); + continue; + } + + let die_id = comp_unit.add(*parent, entry.tag()); + + stack.push(die_id); + assert!(stack.len() == new_stack_len); + die_ref_map.insert(entry.offset(), die_id); + + clone_die_attributes( + entry, + context, + addr_tr, + current_value_range.top(), + &unit.encoding(), + &mut comp_unit, + die_id, + range_builder, + current_scope_ranges.top(), + cu_low_pc, + out_strings, + &die_ref_map, + &mut pending_die_refs, + FileAttributeContext::Children(&file_map, current_frame_base.top()), + )?; + + if entry.tag() == gimli::DW_TAG_subprogram && !current_scope_ranges.is_empty() { + append_vmctx_info( + comp_unit, + die_id, + vmctx_die_id, + addr_tr, + current_value_range.top(), + current_scope_ranges.top().expect("range"), + out_strings, + )?; + } + } + for (die_id, attr_name, offset) in pending_die_refs { + let die = comp_unit.get_mut(die_id); + if let Some(unit_id) = die_ref_map.get(&offset) { + die.set(attr_name, write::AttributeValue::ThisUnitEntryRef(*unit_id)); + } else { + // TODO check why loosing DIEs + } + } + Ok(()) +} diff --git a/wasmtime-environ/src/address_map.rs b/wasmtime-environ/src/address_map.rs index 5295c9d0ad28..cf4876e435bd 100644 --- a/wasmtime-environ/src/address_map.rs +++ b/wasmtime-environ/src/address_map.rs @@ -27,6 +27,12 @@ pub struct FunctionAddressMap { /// The array is sorted by the InstructionAddressMap::code_offset field. pub instructions: Vec, + /// Function start source location (normally declaration). + pub start_srcloc: ir::SourceLoc, + + /// Function end source location. + pub end_srcloc: ir::SourceLoc, + /// Generated function body offset if applicable, otherwise 0. pub body_offset: usize, @@ -36,3 +42,18 @@ pub struct FunctionAddressMap { /// Module functions addresses mappings. pub type ModuleAddressMap = PrimaryMap; + +/// Value ranges for functions. +pub type ValueLabelsRanges = PrimaryMap; + +/// Stack slots for functions. +pub type StackSlots = PrimaryMap; + +/// Module `vmctx` related info. +pub struct ModuleVmctxInfo { + /// The memory definition offset in the VMContext structure. + pub memory_offset: i64, + + /// The functions stack slots. + pub stack_slots: StackSlots, +} diff --git a/wasmtime-environ/src/cache.rs b/wasmtime-environ/src/cache.rs index a32f2e7d88e6..2c101e8a5781 100644 --- a/wasmtime-environ/src/cache.rs +++ b/wasmtime-environ/src/cache.rs @@ -1,4 +1,4 @@ -use crate::address_map::ModuleAddressMap; +use crate::address_map::{ModuleAddressMap, ValueLabelsRanges}; use crate::compilation::{CodeAndJTOffsets, Compilation, Relocations}; use crate::module::Module; use crate::module_environ::FunctionBodyData; @@ -102,9 +102,17 @@ pub struct ModuleCacheData { compilation: Compilation, relocations: Relocations, address_transforms: ModuleAddressMap, + value_ranges: ValueLabelsRanges, + stack_slots: PrimaryMap, } -type ModuleCacheDataTupleType = (Compilation, Relocations, ModuleAddressMap); +type ModuleCacheDataTupleType = ( + Compilation, + Relocations, + ModuleAddressMap, + ValueLabelsRanges, + PrimaryMap, +); struct Sha256Hasher(Sha256); @@ -225,11 +233,19 @@ impl ModuleCacheData { compilation: data.0, relocations: data.1, address_transforms: data.2, + value_ranges: data.3, + stack_slots: data.4, } } pub fn to_tuple(self) -> ModuleCacheDataTupleType { - (self.compilation, self.relocations, self.address_transforms) + ( + self.compilation, + self.relocations, + self.address_transforms, + self.value_ranges, + self.stack_slots, + ) } } diff --git a/wasmtime-environ/src/compilation.rs b/wasmtime-environ/src/compilation.rs index 1ceb67c015a9..c17d4b12c845 100644 --- a/wasmtime-environ/src/compilation.rs +++ b/wasmtime-environ/src/compilation.rs @@ -1,7 +1,7 @@ //! A `Compilation` contains the compiled function bodies for a WebAssembly //! module. -use crate::address_map::ModuleAddressMap; +use crate::address_map::{ModuleAddressMap, ValueLabelsRanges}; use crate::module; use crate::module_environ::FunctionBodyData; use cranelift_codegen::{binemit, ir, isa, CodegenError}; @@ -149,5 +149,14 @@ pub trait Compiler { function_body_inputs: PrimaryMap>, isa: &dyn isa::TargetIsa, generate_debug_info: bool, - ) -> Result<(Compilation, Relocations, ModuleAddressMap), CompileError>; + ) -> Result< + ( + Compilation, + Relocations, + ModuleAddressMap, + ValueLabelsRanges, + PrimaryMap, + ), + CompileError, + >; } diff --git a/wasmtime-environ/src/cranelift.rs b/wasmtime-environ/src/cranelift.rs index ba8b49c76b0a..7017fd341ac6 100644 --- a/wasmtime-environ/src/cranelift.rs +++ b/wasmtime-environ/src/cranelift.rs @@ -1,6 +1,8 @@ //! Support for compiling with Cranelift. -use crate::address_map::{FunctionAddressMap, InstructionAddressMap, ModuleAddressMap}; +use crate::address_map::{ + FunctionAddressMap, InstructionAddressMap, ModuleAddressMap, ValueLabelsRanges, +}; use crate::cache::{ModuleCacheData, ModuleCacheEntry}; use crate::compilation::{ CodeAndJTOffsets, Compilation, CompileError, Relocation, RelocationTarget, Relocations, @@ -90,8 +92,13 @@ impl RelocSink { } } -fn get_address_transform(context: &Context, isa: &isa::TargetIsa) -> Vec { - let mut result = Vec::new(); +fn get_function_address_map<'data>( + context: &Context, + data: &FunctionBodyData<'data>, + body_len: usize, + isa: &isa::TargetIsa, +) -> FunctionAddressMap { + let mut instructions = Vec::new(); let func = &context.func; let mut ebbs = func.layout.ebbs().collect::>(); @@ -101,14 +108,27 @@ fn get_address_transform(context: &Context, isa: &isa::TargetIsa) -> Vec>, isa: &dyn isa::TargetIsa, generate_debug_info: bool, - ) -> Result<(Compilation, Relocations, ModuleAddressMap), CompileError> { + ) -> Result< + ( + Compilation, + Relocations, + ModuleAddressMap, + ValueLabelsRanges, + PrimaryMap, + ), + CompileError, + > { let cache_entry = ModuleCacheEntry::new( module, &function_body_inputs, @@ -138,6 +167,8 @@ impl crate::compilation::Compiler for Cranelift { let mut functions = PrimaryMap::with_capacity(function_body_inputs.len()); let mut relocations = PrimaryMap::with_capacity(function_body_inputs.len()); let mut address_transforms = PrimaryMap::with_capacity(function_body_inputs.len()); + let mut value_ranges = PrimaryMap::with_capacity(function_body_inputs.len()); + let mut stack_slots = PrimaryMap::with_capacity(function_body_inputs.len()); function_body_inputs .into_iter() @@ -149,6 +180,9 @@ impl crate::compilation::Compiler for Cranelift { context.func.name = get_func_name(func_index); context.func.signature = module.signatures[module.functions[func_index]].clone(); + if generate_debug_info { + context.func.collect_debug_info(); + } let mut trans = FuncTranslator::new(); trans @@ -171,36 +205,48 @@ impl crate::compilation::Compiler for Cranelift { let address_transform = if generate_debug_info { let body_len = code_buf.len(); - let at = get_address_transform(&context, isa); + Some(get_function_address_map(&context, input, body_len, isa)) + } else { + None + }; - Some(FunctionAddressMap { - instructions: at, - body_offset: 0, - body_len, - }) + let ranges = if generate_debug_info { + Some( + context + .build_value_labels_ranges(isa) + .map_err(CompileError::Codegen)?, + ) } else { None }; + let stack_slots = context.func.stack_slots.clone(); + Ok(( code_buf, jt_offsets, reloc_sink.func_relocs, address_transform, + ranges, + stack_slots, )) }) .collect::, CompileError>>()? .into_iter() - .for_each(|(function, func_jt_offsets, relocs, address_transform)| { - functions.push(CodeAndJTOffsets { - body: function, - jt_offsets: func_jt_offsets, - }); - relocations.push(relocs); - if let Some(address_transform) = address_transform { - address_transforms.push(address_transform); - } - }); + .for_each( + |(function, func_jt_offsets, relocs, address_transform, ranges, sss)| { + functions.push(CodeAndJTOffsets { + body: function, + jt_offsets: func_jt_offsets, + }); + relocations.push(relocs); + if let Some(address_transform) = address_transform { + address_transforms.push(address_transform); + } + value_ranges.push(ranges.unwrap_or(std::collections::HashMap::new())); + stack_slots.push(sss); + }, + ); // TODO: Reorganize where we create the Vec for the resolved imports. @@ -208,6 +254,8 @@ impl crate::compilation::Compiler for Cranelift { Compilation::new(functions), relocations, address_transforms, + value_ranges, + stack_slots, )); cache_entry.update_data(&data); data diff --git a/wasmtime-environ/src/lib.rs b/wasmtime-environ/src/lib.rs index 4dbeca9c62ca..5b9c3abf8b6e 100644 --- a/wasmtime-environ/src/lib.rs +++ b/wasmtime-environ/src/lib.rs @@ -51,7 +51,9 @@ pub mod cranelift; #[cfg(feature = "lightbeam")] pub mod lightbeam; -pub use crate::address_map::{FunctionAddressMap, InstructionAddressMap, ModuleAddressMap}; +pub use crate::address_map::{ + FunctionAddressMap, InstructionAddressMap, ModuleAddressMap, ModuleVmctxInfo, ValueLabelsRanges, +}; pub use crate::cache::conf as cache_conf; pub use crate::compilation::{ Compilation, CompileError, Compiler, Relocation, RelocationTarget, Relocations, diff --git a/wasmtime-jit/src/compiler.rs b/wasmtime-jit/src/compiler.rs index 7d4c456dac81..01501100a092 100644 --- a/wasmtime-jit/src/compiler.rs +++ b/wasmtime-jit/src/compiler.rs @@ -8,15 +8,16 @@ use cranelift_codegen::ir::InstBuilder; use cranelift_codegen::isa::{TargetFrontendConfig, TargetIsa}; use cranelift_codegen::Context; use cranelift_codegen::{binemit, ir}; -use cranelift_entity::PrimaryMap; +use cranelift_entity::{EntityRef, PrimaryMap}; use cranelift_frontend::{FunctionBuilder, FunctionBuilderContext}; -use cranelift_wasm::DefinedFuncIndex; +use cranelift_wasm::{DefinedFuncIndex, DefinedMemoryIndex}; use std::boxed::Box; use std::string::String; use std::vec::Vec; use wasmtime_debug::{emit_debugsections_image, DebugInfoData}; use wasmtime_environ::{ - Compilation, CompileError, Compiler as _C, FunctionBodyData, Module, Relocations, Tunables, + Compilation, CompileError, Compiler as _C, FunctionBodyData, Module, ModuleVmctxInfo, + Relocations, Tunables, VMOffsets, }; use wasmtime_runtime::{InstantiationError, SignatureRegistry, VMFunctionBody}; @@ -83,13 +84,14 @@ impl Compiler { ), SetupError, > { - let (compilation, relocations, address_transform) = DefaultCompiler::compile_module( - module, - function_body_inputs, - &*self.isa, - debug_data.is_some(), - ) - .map_err(SetupError::Compile)?; + let (compilation, relocations, address_transform, value_ranges, stack_slots) = + DefaultCompiler::compile_module( + module, + function_body_inputs, + &*self.isa, + debug_data.is_some(), + ) + .map_err(SetupError::Compile)?; let allocated_functions = allocate_functions(&mut self.code_memory, &compilation).map_err(|message| { @@ -108,11 +110,22 @@ impl Compiler { let body_len = compilation.get(i).body.len(); funcs.push((ptr, body_len)); } + let module_vmctx_info = { + let ofs = VMOffsets::new(target_config.pointer_bytes(), &module); + let memory_offset = + ofs.vmctx_vmmemory_definition_base(DefinedMemoryIndex::new(0)) as i64; + ModuleVmctxInfo { + memory_offset, + stack_slots, + } + }; let bytes = emit_debugsections_image( triple, &target_config, &debug_data, + &module_vmctx_info, &address_transform, + &value_ranges, &funcs, ) .map_err(|e| SetupError::DebugInfo(e))?;