Skip to content

Commit

Permalink
Add support for reading the dynamic symbol table from PT_DYNAMIC (#11…
Browse files Browse the repository at this point in the history
…6689)

Resubmissions of #112596 with
buildbot fixes.

Allow LLDB to parse the dynamic symbol table from an ELF file or memory
image in an ELF file that has no section headers. This patch uses the
ability to parse the PT_DYNAMIC segment and find the DT_SYMTAB,
DT_SYMENT, DT_HASH or DT_GNU_HASH to find and parse the dynamic symbol
table if the section headers are not present. It also adds a helper
function to read data from a .dynamic key/value pair entry correctly
from the file or from memory.
  • Loading branch information
clayborg authored Nov 19, 2024
1 parent 170e1fe commit 5ae4d50
Show file tree
Hide file tree
Showing 3 changed files with 244 additions and 20 deletions.
180 changes: 160 additions & 20 deletions lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include "lldb/Host/LZMA.h"
#include "lldb/Symbol/DWARFCallFrameInfo.h"
#include "lldb/Symbol/SymbolContext.h"
#include "lldb/Target/Process.h"
#include "lldb/Target/SectionLoadList.h"
#include "lldb/Target/Target.h"
#include "lldb/Utility/ArchSpec.h"
Expand Down Expand Up @@ -3017,6 +3018,19 @@ void ObjectFileELF::ParseSymtab(Symtab &lldb_symtab) {
ParseSymbolTable(&lldb_symtab, symbol_id, dynsym);
symbol_id += num_symbols;
m_address_class_map.merge(address_class_map);
} else {
// Try and read the dynamic symbol table from the .dynamic section.
uint32_t dynamic_num_symbols = 0;
std::optional<DataExtractor> symtab_data =
GetDynsymDataFromDynamic(dynamic_num_symbols);
std::optional<DataExtractor> strtab_data = GetDynstrData();
if (symtab_data && strtab_data) {
auto [num_symbols_parsed, address_class_map] = ParseSymbols(
&lldb_symtab, symbol_id, section_list, dynamic_num_symbols,
symtab_data.value(), strtab_data.value());
symbol_id += num_symbols_parsed;
m_address_class_map.merge(address_class_map);
}
}
}

Expand Down Expand Up @@ -3828,6 +3842,32 @@ ObjectFileELF::MapFileDataWritable(const FileSpec &file, uint64_t Size,
Offset);
}

std::optional<DataExtractor>
ObjectFileELF::ReadDataFromDynamic(const ELFDynamic *dyn, uint64_t length,
uint64_t offset) {
// ELFDynamic values contain a "d_ptr" member that will be a load address if
// we have an ELF file read from memory, or it will be a file address if it
// was read from a ELF file. This function will correctly fetch data pointed
// to by the ELFDynamic::d_ptr, or return std::nullopt if the data isn't
// available.
const lldb::addr_t d_ptr_addr = dyn->d_ptr + offset;
if (ProcessSP process_sp = m_process_wp.lock()) {
if (DataBufferSP data_sp = ReadMemory(process_sp, d_ptr_addr, length))
return DataExtractor(data_sp, GetByteOrder(), GetAddressByteSize());
} else {
// We have an ELF file with no section headers or we didn't find the
// .dynamic section. Try and find the .dynstr section.
Address addr;
if (!addr.ResolveAddressUsingFileSections(d_ptr_addr, GetSectionList()))
return std::nullopt;
DataExtractor data;
addr.GetSection()->GetSectionData(data);
return DataExtractor(data, d_ptr_addr - addr.GetSection()->GetFileAddress(),
length);
}
return std::nullopt;
}

std::optional<DataExtractor> ObjectFileELF::GetDynstrData() {
if (SectionList *section_list = GetSectionList()) {
// Find the SHT_DYNAMIC section.
Expand Down Expand Up @@ -3855,31 +3895,15 @@ std::optional<DataExtractor> ObjectFileELF::GetDynstrData() {
// and represent the dynamic symbol tables's string table. These are needed
// by the dynamic loader and we can read them from a process' address space.
//
// When loading and ELF file from memory, only the program headers end up
// being mapped into memory, and we can find these values in the PT_DYNAMIC
// segment.
// When loading and ELF file from memory, only the program headers are
// guaranteed end up being mapped into memory, and we can find these values in
// the PT_DYNAMIC segment.
const ELFDynamic *strtab = FindDynamicSymbol(DT_STRTAB);
const ELFDynamic *strsz = FindDynamicSymbol(DT_STRSZ);
if (strtab == nullptr || strsz == nullptr)
return std::nullopt;

if (ProcessSP process_sp = m_process_wp.lock()) {
if (DataBufferSP data_sp =
ReadMemory(process_sp, strtab->d_ptr, strsz->d_val))
return DataExtractor(data_sp, GetByteOrder(), GetAddressByteSize());
} else {
// We have an ELF file with no section headers or we didn't find the
// .dynamic section. Try and find the .dynstr section.
Address addr;
if (addr.ResolveAddressUsingFileSections(strtab->d_ptr, GetSectionList())) {
DataExtractor data;
addr.GetSection()->GetSectionData(data);
return DataExtractor(data,
strtab->d_ptr - addr.GetSection()->GetFileAddress(),
strsz->d_val);
}
}
return std::nullopt;
return ReadDataFromDynamic(strtab, strsz->d_val, /*offset=*/0);
}

std::optional<lldb_private::DataExtractor> ObjectFileELF::GetDynamicData() {
Expand Down Expand Up @@ -3912,3 +3936,119 @@ std::optional<lldb_private::DataExtractor> ObjectFileELF::GetDynamicData() {
}
return std::nullopt;
}

std::optional<uint32_t> ObjectFileELF::GetNumSymbolsFromDynamicHash() {
const ELFDynamic *hash = FindDynamicSymbol(DT_HASH);
if (hash == nullptr)
return std::nullopt;

// The DT_HASH header looks like this:
struct DtHashHeader {
uint32_t nbucket;
uint32_t nchain;
};
if (auto data = ReadDataFromDynamic(hash, 8)) {
// We don't need the number of buckets value "nbucket", we just need the
// "nchain" value which contains the number of symbols.
offset_t offset = offsetof(DtHashHeader, nchain);
return data->GetU32(&offset);
}

return std::nullopt;
}

std::optional<uint32_t> ObjectFileELF::GetNumSymbolsFromDynamicGnuHash() {
const ELFDynamic *gnu_hash = FindDynamicSymbol(DT_GNU_HASH);
if (gnu_hash == nullptr)
return std::nullopt;

// Create a DT_GNU_HASH header
// https://flapenguin.me/elf-dt-gnu-hash
struct DtGnuHashHeader {
uint32_t nbuckets = 0;
uint32_t symoffset = 0;
uint32_t bloom_size = 0;
uint32_t bloom_shift = 0;
};
uint32_t num_symbols = 0;
// Read enogh data for the DT_GNU_HASH header so we can extract the values.
if (auto data = ReadDataFromDynamic(gnu_hash, sizeof(DtGnuHashHeader))) {
offset_t offset = 0;
DtGnuHashHeader header;
header.nbuckets = data->GetU32(&offset);
header.symoffset = data->GetU32(&offset);
header.bloom_size = data->GetU32(&offset);
header.bloom_shift = data->GetU32(&offset);
const size_t addr_size = GetAddressByteSize();
const addr_t buckets_offset =
sizeof(DtGnuHashHeader) + addr_size * header.bloom_size;
std::vector<uint32_t> buckets;
if (auto bucket_data = ReadDataFromDynamic(gnu_hash, header.nbuckets * 4,
buckets_offset)) {
offset = 0;
for (uint32_t i = 0; i < header.nbuckets; ++i)
buckets.push_back(bucket_data->GetU32(&offset));
// Locate the chain that handles the largest index bucket.
uint32_t last_symbol = 0;
for (uint32_t bucket_value : buckets)
last_symbol = std::max(bucket_value, last_symbol);
if (last_symbol < header.symoffset) {
num_symbols = header.symoffset;
} else {
// Walk the bucket's chain to add the chain length to the total.
const addr_t chains_base_offset = buckets_offset + header.nbuckets * 4;
for (;;) {
if (auto chain_entry_data = ReadDataFromDynamic(
gnu_hash, 4,
chains_base_offset + (last_symbol - header.symoffset) * 4)) {
offset = 0;
uint32_t chain_entry = chain_entry_data->GetU32(&offset);
++last_symbol;
// If the low bit is set, this entry is the end of the chain.
if (chain_entry & 1)
break;
} else {
break;
}
}
num_symbols = last_symbol;
}
}
}
if (num_symbols > 0)
return num_symbols;

return std::nullopt;
}

std::optional<DataExtractor>
ObjectFileELF::GetDynsymDataFromDynamic(uint32_t &num_symbols) {
// Every ELF file which represents an executable or shared library has
// mandatory .dynamic entries. The DT_SYMTAB value contains a pointer to the
// symbol table, and DT_SYMENT contains the size of a symbol table entry.
// We then can use either the DT_HASH or DT_GNU_HASH to find the number of
// symbols in the symbol table as the symbol count is not stored in the
// .dynamic section as a key/value pair.
//
// When loading and ELF file from memory, only the program headers end up
// being mapped into memory, and we can find these values in the PT_DYNAMIC
// segment.
num_symbols = 0;
// Get the process in case this is an in memory ELF file.
ProcessSP process_sp(m_process_wp.lock());
const ELFDynamic *symtab = FindDynamicSymbol(DT_SYMTAB);
const ELFDynamic *syment = FindDynamicSymbol(DT_SYMENT);
// DT_SYMTAB and DT_SYMENT are mandatory.
if (symtab == nullptr || syment == nullptr)
return std::nullopt;

if (std::optional<uint32_t> syms = GetNumSymbolsFromDynamicHash())
num_symbols = *syms;
else if (std::optional<uint32_t> syms = GetNumSymbolsFromDynamicGnuHash())
num_symbols = *syms;
else
return std::nullopt;
if (num_symbols == 0)
return std::nullopt;
return ReadDataFromDynamic(symtab, syment->d_val * num_symbols);
}
40 changes: 40 additions & 0 deletions lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.h
Original file line number Diff line number Diff line change
Expand Up @@ -435,6 +435,46 @@ class ObjectFileELF : public lldb_private::ObjectFile {
/// \return The bytes that represent the string table data or \c std::nullopt
/// if an error occured.
std::optional<lldb_private::DataExtractor> GetDynstrData();

/// Read the bytes pointed to by the \a dyn dynamic entry.
///
/// ELFDynamic::d_ptr values contain file addresses if we load the ELF file
/// form a file on disk, or they contain load addresses if they were read
/// from memory. This function will correctly extract the data in both cases
/// if it is available.
///
/// \param[in] dyn The dynamic entry to use to fetch the data from.
///
/// \param[in] length The number of bytes to read.
///
/// \param[in] offset The number of bytes to skip after the d_ptr value
/// before reading data.
///
/// \return The bytes that represent the dynanic entries data or
/// \c std::nullopt if an error occured or the data is not available.
std::optional<lldb_private::DataExtractor>
ReadDataFromDynamic(const elf::ELFDynamic *dyn, uint64_t length,
uint64_t offset = 0);

/// Get the bytes that represent the dynamic symbol table from the .dynamic
/// section from process memory.
///
/// This functon uses the DT_SYMTAB value from the .dynamic section to read
/// the symbols table data from process memory. The number of symbols in the
/// symbol table is calculated by looking at the DT_HASH or DT_GNU_HASH
/// values as the symbol count isn't stored in the .dynamic section.
///
/// \return The bytes that represent the symbol table data from the .dynamic
/// section or section headers or \c std::nullopt if an error
/// occured or if there is no dynamic symbol data available.
std::optional<lldb_private::DataExtractor>
GetDynsymDataFromDynamic(uint32_t &num_symbols);

/// Get the number of symbols from the DT_HASH dynamic entry.
std::optional<uint32_t> GetNumSymbolsFromDynamicHash();

/// Get the number of symbols from the DT_GNU_HASH dynamic entry.
std::optional<uint32_t> GetNumSymbolsFromDynamicGnuHash();
};

#endif // LLDB_SOURCE_PLUGINS_OBJECTFILE_ELF_OBJECTFILEELF_H
44 changes: 44 additions & 0 deletions lldb/test/Shell/ObjectFile/ELF/elf-dynsym.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
// This test verifies that loading an ELF file that has no section headers can
// load the dynamic symbol table using the DT_SYMTAB, DT_SYMENT, DT_HASH or
// the DT_GNU_HASH .dynamic key/value pairs that are loaded via the PT_DYNAMIC
// segment.

// RUN: llvm-mc -triple=x86_64-pc-linux -filetype=obj \
// RUN: -o - - <<<".globl defined, undefined; defined:" | \
// RUN: ld.lld /dev/stdin -o - --hash-style=gnu -export-dynamic -shared \
// RUN: -o %t.gnu
// RUN: llvm-strip --strip-sections %t.gnu
// RUN: %lldb %t.gnu -b \
// RUN: -o "image dump objfile" \
// RUN: | FileCheck %s --dump-input=always --check-prefix=GNU
// GNU: (lldb) image dump objfile
// GNU: Dumping headers for 1 module(s).
// GNU: ObjectFileELF, file =
// GNU: ELF Header
// GNU: e_type = 0x0003 ET_DYN
// Make sure there are no section headers
// GNU: e_shnum = 0x00000000
// Make sure we were able to load the symbols
// GNU: Symtab, file = {{.*}}elf-dynsym.test.tmp.gnu, num_symbols = 2:
// GNU-DAG: undefined
// GNU-DAG: defined

// RUN: llvm-mc -triple=x86_64-pc-linux -filetype=obj \
// RUN: -o - - <<<".globl defined, undefined; defined:" | \
// RUN: ld.lld /dev/stdin -o - --hash-style=sysv -export-dynamic -shared \
// RUN: -o %t.sysv
// RUN: llvm-strip --strip-sections %t.sysv
// RUN: %lldb %t.sysv -b \
// RUN: -o "image dump objfile" \
// RUN: | FileCheck %s --dump-input=always --check-prefix=HASH
// HASH: (lldb) image dump objfile
// HASH: Dumping headers for 1 module(s).
// HASH: ObjectFileELF, file =
// HASH: ELF Header
// HASH: e_type = 0x0003 ET_DYN
// Make sure there are no section headers
// HASH: e_shnum = 0x00000000
// Make sure we were able to load the symbols
// HASH: Symtab, file = {{.*}}elf-dynsym.test.tmp.sysv, num_symbols = 2:
// HASH-DAG: undefined
// HASH-DAG: defined

0 comments on commit 5ae4d50

Please sign in to comment.