Skip to content

Commit

Permalink
read: add Dwarf::populate_abbreviations_cache (#679)
Browse files Browse the repository at this point in the history
  • Loading branch information
philipc authored Sep 14, 2023
1 parent 2d3ad32 commit 23ebfc8
Show file tree
Hide file tree
Showing 6 changed files with 109 additions and 200 deletions.
2 changes: 2 additions & 0 deletions crates/examples/src/bin/dwarfdump.rs
Original file line number Diff line number Diff line change
Expand Up @@ -695,6 +695,8 @@ where
dwarf.load_sup(&mut load_sup_section)?;
}

dwarf.populate_abbreviations_cache(gimli::AbbreviationsCacheStrategy::All);

if flags.eh_frame {
let eh_frame = gimli::EhFrame::load(&mut load_section).unwrap();
dump_eh_frame(w, file, eh_frame)?;
Expand Down
157 changes: 85 additions & 72 deletions src/read/abbrev.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,9 @@ use core::ops::Deref;
use crate::common::{DebugAbbrevOffset, Encoding, SectionId};
use crate::constants;
use crate::endianity::Endianity;
use crate::read::lazy::LazyArc;
use crate::read::{EndianSlice, Error, Reader, ReaderOffset, Result, Section, UnitHeader};
use crate::read::{
DebugInfoUnitHeadersIter, EndianSlice, Error, Reader, ReaderOffset, Result, Section, UnitHeader,
};

/// The `DebugAbbrev` struct represents the abbreviations describing
/// `DebuggingInformationEntry`s' attribute names and forms found in the
Expand Down Expand Up @@ -102,14 +103,24 @@ impl<R> From<R> for DebugAbbrev<R> {
}
}

/// The strategy to use for caching abbreviations.
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
#[non_exhaustive]
pub enum AbbreviationsCacheStrategy {
/// Cache abbreviations that are used more than once.
///
/// This is useful if the units in the `.debug_info` section will be parsed only once.
Duplicates,
/// Cache all abbreviations.
///
/// This is useful if the units in the `.debug_info` section will be parsed more than once.
All,
}

/// A cache of previously parsed `Abbreviations`.
///
/// Currently this only caches the abbreviations for offset 0,
/// since this is a common case in which abbreviations are reused.
/// This strategy may change in future if there is sufficient need.
#[derive(Debug, Default)]
pub struct AbbreviationsCache {
abbreviations: LazyArc<Abbreviations>,
abbreviations: btree_map::BTreeMap<u64, Result<Arc<Abbreviations>>>,
}

impl AbbreviationsCache {
Expand All @@ -118,19 +129,81 @@ impl AbbreviationsCache {
Self::default()
}

/// Parse abbreviations and store them in the cache.
///
/// This will iterate over the given units to determine the abbreviations
/// offsets. Any existing cache entries are discarded.
///
/// Errors during parsing abbreviations are also stored in the cache.
/// Errors during iterating over the units are ignored.
pub fn populate<R: Reader>(
&mut self,
strategy: AbbreviationsCacheStrategy,
debug_abbrev: &DebugAbbrev<R>,
mut units: DebugInfoUnitHeadersIter<R>,
) {
let mut offsets = Vec::new();
match strategy {
AbbreviationsCacheStrategy::Duplicates => {
while let Ok(Some(unit)) = units.next() {
offsets.push(unit.debug_abbrev_offset());
}
offsets.sort_unstable_by_key(|offset| offset.0);
let mut prev_offset = R::Offset::from_u8(0);
let mut count = 0;
offsets.retain(|offset| {
if count == 0 || prev_offset != offset.0 {
prev_offset = offset.0;
count = 1;
} else {
count += 1;
}
count == 2
});
}
AbbreviationsCacheStrategy::All => {
while let Ok(Some(unit)) = units.next() {
offsets.push(unit.debug_abbrev_offset());
}
offsets.sort_unstable_by_key(|offset| offset.0);
offsets.dedup();
}
}
self.abbreviations = offsets
.into_iter()
.map(|offset| {
(
offset.0.into_u64(),
debug_abbrev.abbreviations(offset).map(Arc::new),
)
})
.collect();
}

/// Set an entry in the abbreviations cache.
///
/// This is only required if you want to manually populate the cache.
pub fn set<R: Reader>(
&mut self,
offset: DebugAbbrevOffset<R::Offset>,
abbreviations: Arc<Abbreviations>,
) {
self.abbreviations
.insert(offset.0.into_u64(), Ok(abbreviations));
}

/// Parse the abbreviations at the given offset.
///
/// This uses or updates the cache as required.
/// This uses the cache if possible, but does not update it.
pub fn get<R: Reader>(
&self,
debug_abbrev: &DebugAbbrev<R>,
offset: DebugAbbrevOffset<R::Offset>,
) -> Result<Arc<Abbreviations>> {
if offset.0 != R::Offset::from_u8(0) {
return debug_abbrev.abbreviations(offset).map(Arc::new);
match self.abbreviations.get(&offset.0.into_u64()) {
Some(entry) => entry.clone(),
None => debug_abbrev.abbreviations(offset).map(Arc::new),
}
self.abbreviations
.get(|| debug_abbrev.abbreviations(offset))
}
}

Expand Down Expand Up @@ -1026,64 +1099,4 @@ pub mod tests {
.unwrap();
assert!(abbrevs.get(0).is_none());
}

#[test]
fn abbreviations_cache() {
#[rustfmt::skip]
let buf = Section::new()
.abbrev(1, constants::DW_TAG_subprogram, constants::DW_CHILDREN_no)
.abbrev_attr(constants::DW_AT_name, constants::DW_FORM_string)
.abbrev_attr_null()
.abbrev_null()
.abbrev(1, constants::DW_TAG_compile_unit, constants::DW_CHILDREN_yes)
.abbrev_attr(constants::DW_AT_producer, constants::DW_FORM_strp)
.abbrev_attr(constants::DW_AT_language, constants::DW_FORM_data2)
.abbrev_attr_null()
.abbrev_null()
.get_contents()
.unwrap();

let abbrev1 = Abbreviation::new(
1,
constants::DW_TAG_subprogram,
constants::DW_CHILDREN_no,
vec![AttributeSpecification::new(
constants::DW_AT_name,
constants::DW_FORM_string,
None,
)]
.into(),
);

let abbrev2 = Abbreviation::new(
1,
constants::DW_TAG_compile_unit,
constants::DW_CHILDREN_yes,
vec![
AttributeSpecification::new(
constants::DW_AT_producer,
constants::DW_FORM_strp,
None,
),
AttributeSpecification::new(
constants::DW_AT_language,
constants::DW_FORM_data2,
None,
),
]
.into(),
);

let debug_abbrev = DebugAbbrev::new(&buf, LittleEndian);
let cache = AbbreviationsCache::new();
let abbrevs1 = cache.get(&debug_abbrev, DebugAbbrevOffset(0)).unwrap();
assert_eq!(abbrevs1.get(1), Some(&abbrev1));
let abbrevs2 = cache.get(&debug_abbrev, DebugAbbrevOffset(8)).unwrap();
assert_eq!(abbrevs2.get(1), Some(&abbrev2));
let abbrevs3 = cache.get(&debug_abbrev, DebugAbbrevOffset(0)).unwrap();
assert_eq!(abbrevs3.get(1), Some(&abbrev1));

assert!(!Arc::ptr_eq(&abbrevs1, &abbrevs2));
assert!(Arc::ptr_eq(&abbrevs1, &abbrevs3));
}
}
2 changes: 1 addition & 1 deletion src/read/cfi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3142,7 +3142,7 @@ pub enum CallFrameInstruction<R: Reader> {
/// >
/// > AArch64 Extension
/// >
/// > The DW_CFA_AARCH64_negate_ra_state operation negates bit[0] of the
/// > The DW_CFA_AARCH64_negate_ra_state operation negates bit 0 of the
/// > RA_SIGN_STATE pseudo-register. It does not take any operands. The
/// > DW_CFA_AARCH64_negate_ra_state must not be mixed with other DWARF Register
/// > Rule Instructions on the RA_SIGN_STATE pseudo-register in one Common
Expand Down
29 changes: 21 additions & 8 deletions src/read/dwarf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,13 @@ use crate::common::{
};
use crate::constants;
use crate::read::{
Abbreviations, AbbreviationsCache, AttributeValue, DebugAbbrev, DebugAddr, DebugAranges,
DebugCuIndex, DebugInfo, DebugInfoUnitHeadersIter, DebugLine, DebugLineStr, DebugLoc,
DebugLocLists, DebugRngLists, DebugStr, DebugStrOffsets, DebugTuIndex, DebugTypes,
DebugTypesUnitHeadersIter, DebuggingInformationEntry, EntriesCursor, EntriesRaw, EntriesTree,
Error, IncompleteLineProgram, LocListIter, LocationLists, Range, RangeLists, RawLocListIter,
RawRngListIter, Reader, ReaderOffset, ReaderOffsetId, Result, RngListIter, Section, UnitHeader,
UnitIndex, UnitIndexSectionIterator, UnitOffset, UnitType,
Abbreviations, AbbreviationsCache, AbbreviationsCacheStrategy, AttributeValue, DebugAbbrev,
DebugAddr, DebugAranges, DebugCuIndex, DebugInfo, DebugInfoUnitHeadersIter, DebugLine,
DebugLineStr, DebugLoc, DebugLocLists, DebugRngLists, DebugStr, DebugStrOffsets, DebugTuIndex,
DebugTypes, DebugTypesUnitHeadersIter, DebuggingInformationEntry, EntriesCursor, EntriesRaw,
EntriesTree, Error, IncompleteLineProgram, LocListIter, LocationLists, Range, RangeLists,
RawLocListIter, RawRngListIter, Reader, ReaderOffset, ReaderOffsetId, Result, RngListIter,
Section, UnitHeader, UnitIndex, UnitIndexSectionIterator, UnitOffset, UnitType,
};

/// All of the commonly used DWARF sections, and other common information.
Expand Down Expand Up @@ -172,6 +172,18 @@ impl<T> Dwarf<T> {
}

impl<R: Reader> Dwarf<R> {
/// Parse abbreviations and store them in the cache.
///
/// This will iterate over the units in `self.debug_info` to determine the
/// abbreviations offsets.
///
/// Errors during parsing abbreviations are also stored in the cache.
/// Errors during iterating over the units are ignored.
pub fn populate_abbreviations_cache(&mut self, strategy: AbbreviationsCacheStrategy) {
self.abbreviations_cache
.populate(strategy, &self.debug_abbrev, self.debug_info.units());
}

/// Iterate the unit headers in the `.debug_info` section.
///
/// Can be [used with
Expand Down Expand Up @@ -862,7 +874,8 @@ impl<R: Reader> Unit<R> {
/// Construct a new `Unit` from the given unit header and abbreviations.
///
/// The abbreviations for this call can be obtained using `dwarf.abbreviations(&header)`.
/// The caller may implement caching to reuse the Abbreviations across units with the same header.debug_abbrev_offset() value.
/// The caller may implement caching to reuse the `Abbreviations` across units with the
/// same `header.debug_abbrev_offset()` value.
#[inline]
pub fn new_with_abbreviations(
dwarf: &Dwarf<R>,
Expand Down
116 changes: 0 additions & 116 deletions src/read/lazy.rs

This file was deleted.

3 changes: 0 additions & 3 deletions src/read/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -213,9 +213,6 @@ pub use self::aranges::*;
mod index;
pub use self::index::*;

#[cfg(feature = "read")]
mod lazy;

#[cfg(feature = "read")]
mod line;
#[cfg(feature = "read")]
Expand Down

0 comments on commit 23ebfc8

Please sign in to comment.