diff --git a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp index 10d09662c0a47a..c5c37aafe505e3 100644 --- a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp +++ b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp @@ -44,6 +44,7 @@ #include "llvm/Support/MathExtras.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/MipsABIFlags.h" +#include "lldb/Target/Process.h" #define CASE_AND_STREAM(s, def, width) \ case def: \ @@ -2990,9 +2991,10 @@ void ObjectFileELF::ParseSymtab(Symtab &lldb_symtab) { // section, nomatter if .symtab was already parsed or not. This is because // minidebuginfo normally removes the .symtab symbols which have their // matching .dynsym counterparts. + Section *dynsym = nullptr; if (!symtab || GetSectionList()->FindSectionByName(ConstString(".gnu_debugdata"))) { - Section *dynsym = + dynsym = section_list->FindSectionByType(eSectionTypeELFDynamicSymbols, true) .get(); if (dynsym) { @@ -3002,6 +3004,20 @@ void ObjectFileELF::ParseSymtab(Symtab &lldb_symtab) { m_address_class_map.merge(address_class_map); } } + if (!dynsym) { + // Try and read the dynamic symbol table from the .dynamic section. + uint32_t num_symbols = 0; + std::optional symtab_data = + GetDynsymDataFromDynamic(num_symbols); + std::optional strtab_data = GetDynstrData(); + if (symtab_data && strtab_data) { + auto [num_symbols_parsed, address_class_map] = + ParseSymbols(&lldb_symtab, symbol_id, section_list, num_symbols, + symtab_data.value(), strtab_data.value()); + symbol_id += num_symbols_parsed; + m_address_class_map.merge(address_class_map); + } + } // DT_JMPREL // If present, this entry's d_ptr member holds the address of @@ -3811,6 +3827,33 @@ ObjectFileELF::MapFileDataWritable(const FileSpec &file, uint64_t Size, Offset); } +std::optional +ObjectFileELF::ReadDataFromDynamic(const ELFDynamic *dyn, uint64_t length, + uint64_t offset) { + // ELFDynamic values contain a "d_ptr" member that will be a load address if + // we have an ELF file read from memory, or it will be a file address if it + // was read from a ELF file. This function will correctly fetch data pointed + // to by the ELFDynamic::d_ptr, or return std::nullopt if the data isn't + // available. + const lldb::addr_t d_ptr_addr = dyn->d_ptr + offset; + if (ProcessSP process_sp = m_process_wp.lock()) { + if (DataBufferSP data_sp = ReadMemory(process_sp, d_ptr_addr, length)) + return DataExtractor(data_sp, GetByteOrder(), GetAddressByteSize()); + } else { + // We have an ELF file with no section headers or we didn't find the + // .dynamic section. Try and find the .dynstr section. + Address addr; + if (!addr.ResolveAddressUsingFileSections(d_ptr_addr, GetSectionList())) + return std::nullopt; + DataExtractor data; + addr.GetSection()->GetSectionData(data); + return DataExtractor(data, + d_ptr_addr - addr.GetSection()->GetFileAddress(), + length); + } + return std::nullopt; +} + std::optional ObjectFileELF::GetDynstrData() { if (SectionList *section_list = GetSectionList()) { // Find the SHT_DYNAMIC section. @@ -3838,31 +3881,15 @@ std::optional ObjectFileELF::GetDynstrData() { // and represent the dynamic symbol tables's string table. These are needed // by the dynamic loader and we can read them from a process' address space. // - // When loading and ELF file from memory, only the program headers end up - // being mapped into memory, and we can find these values in the PT_DYNAMIC - // segment. + // When loading and ELF file from memory, only the program headers are + // guaranteed end up being mapped into memory, and we can find these values in + // the PT_DYNAMIC segment. const ELFDynamic *strtab = FindDynamicSymbol(DT_STRTAB); const ELFDynamic *strsz = FindDynamicSymbol(DT_STRSZ); if (strtab == nullptr || strsz == nullptr) return std::nullopt; - if (ProcessSP process_sp = m_process_wp.lock()) { - if (DataBufferSP data_sp = - ReadMemory(process_sp, strtab->d_ptr, strsz->d_val)) - return DataExtractor(data_sp, GetByteOrder(), GetAddressByteSize()); - } else { - // We have an ELF file with no section headers or we didn't find the - // .dynamic section. Try and find the .dynstr section. - Address addr; - if (addr.ResolveAddressUsingFileSections(strtab->d_ptr, GetSectionList())) { - DataExtractor data; - addr.GetSection()->GetSectionData(data); - return DataExtractor(data, - strtab->d_ptr - addr.GetSection()->GetFileAddress(), - strsz->d_val); - } - } - return std::nullopt; + return ReadDataFromDynamic(strtab, strsz->d_val, /*offset=*/0); } std::optional ObjectFileELF::GetDynamicData() { @@ -3895,3 +3922,116 @@ std::optional ObjectFileELF::GetDynamicData() { } return std::nullopt; } + +std::optional ObjectFileELF::GetNumSymbolsFromDynamicHash() { + const ELFDynamic *hash = FindDynamicSymbol(DT_HASH); + if (hash == nullptr) + return std::nullopt; + + // The DT_HASH header looks like this: + struct DtHashHeader { + uint32_t nbucket; + uint32_t nchain; + }; + if (auto data = ReadDataFromDynamic(hash, 8)) { + // We don't need the number of buckets value "nbucket", we just need the + // "nchain" value which contains the number of symbols. + offset_t offset = offsetof(DtHashHeader, nchain); + return data->GetU32(&offset); + } + + return std::nullopt; +} + +std::optional ObjectFileELF::GetNumSymbolsFromDynamicGnuHash() { + const ELFDynamic *gnu_hash = FindDynamicSymbol(DT_GNU_HASH); + if (gnu_hash == nullptr) + return std::nullopt; + + // Create a DT_GNU_HASH header + // https://flapenguin.me/elf-dt-gnu-hash + struct DtGnuHashHeader { + uint32_t nbuckets = 0; + uint32_t symoffset = 0; + uint32_t bloom_size = 0; + uint32_t bloom_shift = 0; + }; + uint32_t num_symbols = 0; + // Read enogh data for the DT_GNU_HASH header so we can extract the values. + if (auto data = ReadDataFromDynamic(gnu_hash, sizeof(DtGnuHashHeader))) { + offset_t offset = 0; + DtGnuHashHeader header; + header.nbuckets = data->GetU32(&offset); + header.symoffset = data->GetU32(&offset); + header.bloom_size = data->GetU32(&offset); + header.bloom_shift = data->GetU32(&offset); + const size_t addr_size = GetAddressByteSize(); + const addr_t buckets_offset = + sizeof(DtGnuHashHeader) + addr_size * header.bloom_size; + std::vector buckets; + if (auto bucket_data = ReadDataFromDynamic(gnu_hash, header.nbuckets * 4, buckets_offset)) { + offset = 0; + for (uint32_t i = 0; i < header.nbuckets; ++i) + buckets.push_back(bucket_data->GetU32(&offset)); + // Locate the chain that handles the largest index bucket. + uint32_t last_symbol = 0; + for (uint32_t bucket_value : buckets) + last_symbol = std::max(bucket_value, last_symbol); + if (last_symbol < header.symoffset) { + num_symbols = header.symoffset; + } else { + // Walk the bucket's chain to add the chain length to the total. + const addr_t chains_base_offset = buckets_offset + header.nbuckets * 4; + for (;;) { + if (auto chain_entry_data = ReadDataFromDynamic(gnu_hash, 4, chains_base_offset + (last_symbol - header.symoffset) * 4)) { + offset = 0; + uint32_t chain_entry = chain_entry_data->GetU32(&offset); + ++last_symbol; + // If the low bit is set, this entry is the end of the chain. + if (chain_entry & 1) + break; + } else { + break; + } + } + num_symbols = last_symbol; + } + } + } + if (num_symbols > 0) + return num_symbols; + + return std::nullopt; +} + +std::optional +ObjectFileELF::GetDynsymDataFromDynamic(uint32_t &num_symbols) { + // Every ELF file which represents an executable or shared library has + // mandatory .dynamic entries. The DT_SYMTAB value contains a pointer to the + // symbol table, and DT_SYMENT contains the size of a symbol table entry. + // We then can use either the DT_HASH or DT_GNU_HASH to find the number of + // symbols in the symbol table as the symbol count is not stored in the + // .dynamic section as a key/value pair. + // + // When loading and ELF file from memory, only the program headers end up + // being mapped into memory, and we can find these values in the PT_DYNAMIC + // segment. + num_symbols = 0; + // Get the process in case this is an in memory ELF file. + ProcessSP process_sp(m_process_wp.lock()); + const ELFDynamic *symtab = FindDynamicSymbol(DT_SYMTAB); + const ELFDynamic *syment = FindDynamicSymbol(DT_SYMENT); + // DT_SYMTAB and DT_SYMENT are mandatory. + if (symtab == nullptr || syment == nullptr) + return std::nullopt; + + if (std::optional syms = GetNumSymbolsFromDynamicHash()) + num_symbols = *syms; + else if (std::optional syms = GetNumSymbolsFromDynamicGnuHash()) + num_symbols = *syms; + else + return std::nullopt; + if (num_symbols == 0) + return std::nullopt; + return ReadDataFromDynamic(symtab, syment->d_val * num_symbols); +} diff --git a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.h b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.h index aba3a5bfcbf5b6..16c216eb81e729 100644 --- a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.h +++ b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.h @@ -435,6 +435,47 @@ class ObjectFileELF : public lldb_private::ObjectFile { /// \return The bytes that represent the string table data or \c std::nullopt /// if an error occured. std::optional GetDynstrData(); + + /// Read the bytes pointed to by the \a dyn dynamic entry. + /// + /// ELFDynamic::d_ptr values contain file addresses if we load the ELF file + /// form a file on disk, or they contain load addresses if they were read + /// from memory. This function will correctly extract the data in both cases + /// if it is available. + /// + /// \param[in] dyn The dynamic entry to use to fetch the data from. + /// + /// \param[in] length The number of bytes to read. + /// + /// \param[in] offset The number of bytes to skip after the d_ptr value + /// before reading data. + /// + /// \return The bytes that represent the dynanic entries data or + /// \c std::nullopt if an error occured or the data is not available. + std::optional + ReadDataFromDynamic(const elf::ELFDynamic *dyn, uint64_t length, + uint64_t offset = 0); + + /// Get the bytes that represent the dynamic symbol table from the .dynamic + /// section from process memory. + /// + /// This functon uses the DT_SYMTAB value from the .dynamic section to read + /// the symbols table data from process memory. The number of symbols in the + /// symbol table is calculated by looking at the DT_HASH or DT_GNU_HASH + /// values as the symbol count isn't stored in the .dynamic section. + /// + /// \return The bytes that represent the symbol table data from the .dynamic + /// section or section headers or \c std::nullopt if an error + /// occured or if there is no dynamic symbol data available. + std::optional + GetDynsymDataFromDynamic(uint32_t &num_symbols); + + /// Get the number of symbols from the DT_HASH dynamic entry. + std::optional GetNumSymbolsFromDynamicHash(); + + /// Get the number of symbols from the DT_GNU_HASH dynamic entry. + std::optional GetNumSymbolsFromDynamicGnuHash(); + }; #endif // LLDB_SOURCE_PLUGINS_OBJECTFILE_ELF_OBJECTFILEELF_H diff --git a/lldb/test/Shell/ObjectFile/ELF/elf-dynsym.test b/lldb/test/Shell/ObjectFile/ELF/elf-dynsym.test new file mode 100644 index 00000000000000..7d948e2cd225c6 --- /dev/null +++ b/lldb/test/Shell/ObjectFile/ELF/elf-dynsym.test @@ -0,0 +1,42 @@ +// This test verifies that loading an ELF file that has no section headers can +// load the dynamic symbol table using the DT_SYMTAB, DT_SYMENT, DT_HASH or +// the DT_GNU_HASH .dynamic key/value pairs that are loaded via the PT_DYNAMIC +// segment. + +// RUN: llvm-mc -triple=x86_64-pc-linux -filetype=obj \ +// RUN: -o - - <<<".globl defined, undefined; defined:" | \ +// RUN: ld.lld /dev/stdin -o - --hash-style=gnu -export-dynamic -shared \ +// RUN: -z nosectionheader -o %t.gnu +// RUN: %lldb %t.gnu -b \ +// RUN: -o "image dump objfile" \ +// RUN: | FileCheck %s --dump-input=always --check-prefix=GNU +// GNU: (lldb) image dump objfile +// GNU: Dumping headers for 1 module(s). +// GNU: ObjectFileELF, file = +// GNU: ELF Header +// GNU: e_type = 0x0003 ET_DYN +// Make sure there are no section headers +// GNU: e_shnum = 0x00000000 +// Make sure we were able to load the symbols +// GNU: Symtab, file = {{.*}}elf-dynsym.test.tmp.gnu, num_symbols = 2: +// GNU-DAG: undefined +// GNU-DAG: defined + +// RUN: llvm-mc -triple=x86_64-pc-linux -filetype=obj \ +// RUN: -o - - <<<".globl defined, undefined; defined:" | \ +// RUN: ld.lld /dev/stdin -o - --hash-style=sysv -export-dynamic -shared \ +// RUN: -z nosectionheader -o %t.sysv +// RUN: %lldb %t.sysv -b \ +// RUN: -o "image dump objfile" \ +// RUN: | FileCheck %s --dump-input=always --check-prefix=HASH +// HASH: (lldb) image dump objfile +// HASH: Dumping headers for 1 module(s). +// HASH: ObjectFileELF, file = +// HASH: ELF Header +// HASH: e_type = 0x0003 ET_DYN +// Make sure there are no section headers +// HASH: e_shnum = 0x00000000 +// Make sure we were able to load the symbols +// HASH: Symtab, file = {{.*}}elf-dynsym.test.tmp.sysv, num_symbols = 2: +// HASH-DAG: undefined +// HASH-DAG: defined