-
Notifications
You must be signed in to change notification settings - Fork 12.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add support for reading the dynamic symbol table from PT_DYNAMIC #112596
Conversation
@llvm/pr-subscribers-lldb Author: Greg Clayton (clayborg) ChangesAllow LLDB to parse the dynamic symbol table from an ELF file or memory image in an ELF file that has no section headers. This patch uses the ability to parse the PT_DYNAMIC segment and find the DT_SYMTAB, DT_SYMENT, DT_HASH or DT_GNU_HASH to find and parse the dynamic symbol table if the section headers are not present. It also adds a helper function to read data from a .dynamic key/value pair entry correctly from the file or from memory. Patch is 33.95 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/112596.diff 3 Files Affected:
diff --git a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp
index 10d09662c0a47a..7374ac10a1e27a 100644
--- a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp
+++ b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp
@@ -44,6 +44,8 @@
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/MipsABIFlags.h"
+#include "lldb/Target/Process.h"
+
#define CASE_AND_STREAM(s, def, width) \
case def: \
@@ -2990,18 +2992,34 @@ void ObjectFileELF::ParseSymtab(Symtab &lldb_symtab) {
// section, nomatter if .symtab was already parsed or not. This is because
// minidebuginfo normally removes the .symtab symbols which have their
// matching .dynsym counterparts.
+ bool found_dynsym = false;
if (!symtab ||
GetSectionList()->FindSectionByName(ConstString(".gnu_debugdata"))) {
Section *dynsym =
section_list->FindSectionByType(eSectionTypeELFDynamicSymbols, true)
.get();
if (dynsym) {
+ found_dynsym = true;
auto [num_symbols, address_class_map] =
ParseSymbolTable(&lldb_symtab, symbol_id, dynsym);
symbol_id += num_symbols;
m_address_class_map.merge(address_class_map);
}
}
+ if (!found_dynsym) {
+ // Try and read the dynamic symbol table from the .dynamic section.
+ uint32_t num_symbols = 0;
+ std::optional<DataExtractor> symtab_data =
+ GetDynsymDataFromDynamic(num_symbols);
+ std::optional<DataExtractor> strtab_data = GetDynstrData();
+ if (symtab_data && strtab_data) {
+ auto [num_symbols_parsed, address_class_map] =
+ ParseSymbols(&lldb_symtab, symbol_id, section_list, num_symbols,
+ symtab_data.value(), strtab_data.value());
+ symbol_id += num_symbols_parsed;
+ m_address_class_map.merge(address_class_map);
+ }
+ }
// DT_JMPREL
// If present, this entry's d_ptr member holds the address of
@@ -3811,6 +3829,33 @@ ObjectFileELF::MapFileDataWritable(const FileSpec &file, uint64_t Size,
Offset);
}
+std::optional<DataExtractor>
+ObjectFileELF::ReadDataFromDynamic(const ELFDynamic *dyn, uint64_t length,
+ uint64_t offset) {
+ // ELFDynamic values contain a "d_ptr" member that will be a load address if
+ // we have an ELF file read from memory, or it will be a file address if it
+ // was read from a ELF file. This function will correctly fetch data pointed
+ // to by the ELFDynamic::d_ptr, or return std::nullopt if the data isn't
+ // available.
+ const lldb::addr_t d_ptr_addr = dyn->d_ptr + offset;
+ if (ProcessSP process_sp = m_process_wp.lock()) {
+ if (DataBufferSP data_sp = ReadMemory(process_sp, d_ptr_addr, length))
+ return DataExtractor(data_sp, GetByteOrder(), GetAddressByteSize());
+ } else {
+ // We have an ELF file with no section headers or we didn't find the
+ // .dynamic section. Try and find the .dynstr section.
+ Address addr;
+ if (!addr.ResolveAddressUsingFileSections(d_ptr_addr, GetSectionList()))
+ return std::nullopt;
+ DataExtractor data;
+ addr.GetSection()->GetSectionData(data);
+ return DataExtractor(data,
+ d_ptr_addr - addr.GetSection()->GetFileAddress(),
+ length);
+ }
+ return std::nullopt;
+}
+
std::optional<DataExtractor> ObjectFileELF::GetDynstrData() {
if (SectionList *section_list = GetSectionList()) {
// Find the SHT_DYNAMIC section.
@@ -3846,23 +3891,7 @@ std::optional<DataExtractor> ObjectFileELF::GetDynstrData() {
if (strtab == nullptr || strsz == nullptr)
return std::nullopt;
- if (ProcessSP process_sp = m_process_wp.lock()) {
- if (DataBufferSP data_sp =
- ReadMemory(process_sp, strtab->d_ptr, strsz->d_val))
- return DataExtractor(data_sp, GetByteOrder(), GetAddressByteSize());
- } else {
- // We have an ELF file with no section headers or we didn't find the
- // .dynamic section. Try and find the .dynstr section.
- Address addr;
- if (addr.ResolveAddressUsingFileSections(strtab->d_ptr, GetSectionList())) {
- DataExtractor data;
- addr.GetSection()->GetSectionData(data);
- return DataExtractor(data,
- strtab->d_ptr - addr.GetSection()->GetFileAddress(),
- strsz->d_val);
- }
- }
- return std::nullopt;
+ return ReadDataFromDynamic(strtab, strsz->d_val, /*offset=*/0);
}
std::optional<lldb_private::DataExtractor> ObjectFileELF::GetDynamicData() {
@@ -3895,3 +3924,103 @@ std::optional<lldb_private::DataExtractor> ObjectFileELF::GetDynamicData() {
}
return std::nullopt;
}
+
+
+std::optional<DataExtractor>
+ObjectFileELF::GetDynsymDataFromDynamic(uint32_t &num_symbols) {
+ // Every ELF file which represents an executable or shared library has
+ // mandatory .dynamic entries. The DT_SYMTAB value contains a pointer to the
+ // symbol table, and DT_SYMENT contains the size of a symbol table entry.
+ // We then can use either the DT_HASH or DT_GNU_HASH to find the number of
+ // symbols in the symbol table as the symbol count is not stored in the
+ // .dynamic section as a key/value pair.
+ //
+ // When loading and ELF file from memory, only the program headers end up
+ // being mapped into memory, and we can find these values in the PT_DYNAMIC
+ // segment.
+ num_symbols = 0;
+ // Get the process in case this is an in memory ELF file.
+ ProcessSP process_sp(m_process_wp.lock());
+ const ELFDynamic *symtab = FindDynamicSymbol(DT_SYMTAB);
+ const ELFDynamic *syment = FindDynamicSymbol(DT_SYMENT);
+ const ELFDynamic *hash = FindDynamicSymbol(DT_HASH);
+ const ELFDynamic *gnu_hash = FindDynamicSymbol(DT_GNU_HASH);
+ // DT_SYMTAB and DT_SYMENT are mandatory.
+ if (symtab == nullptr || syment == nullptr)
+ return std::nullopt;
+ // We must have either a DT_HASH or a DT_GNU_HASH.
+ if (hash == nullptr && gnu_hash == nullptr)
+ return std::nullopt;
+ // The number of symbols in the symbol table is the number of entries in the
+ // symbol table divided by the size of each symbol table entry.
+ // We must figure out the number of symbols in the symbol table using the
+ // DT_HASH or the DT_GNU_HASH as the number of symbols isn't stored anywhere
+ // in the .dynamic section.
+
+ lldb::offset_t offset;
+ if (hash) {
+ // The DT_HASH header contains the number of symbols in the "nchain"
+ // member. The header looks like this:
+ // struct DT_HASH_HEADER {
+ // uint32_t nbucket;
+ // uint32_t nchain;
+ // };
+ if (auto data = ReadDataFromDynamic(hash, 8)) {
+ offset = 4;
+ num_symbols = data->GetU32(&offset);
+ }
+ }
+ if (num_symbols == 0 && gnu_hash) {
+ struct DT_GNU_HASH_HEADER {
+ uint32_t nbuckets = 0;
+ uint32_t symoffset = 0;
+ uint32_t bloom_size = 0;
+ uint32_t bloom_shift = 0;
+ };
+ if (auto data = ReadDataFromDynamic(gnu_hash, sizeof(DT_GNU_HASH_HEADER))) {
+ offset = 0;
+ DT_GNU_HASH_HEADER header;
+ header.nbuckets = data->GetU32(&offset);
+ header.symoffset = data->GetU32(&offset);
+ header.bloom_size = data->GetU32(&offset);
+ header.bloom_shift = data->GetU32(&offset);
+ const size_t addr_size = GetAddressByteSize();
+ const addr_t buckets_offset =
+ sizeof(DT_GNU_HASH_HEADER) + addr_size * header.bloom_size;
+ std::vector<uint32_t> buckets;
+ if (auto bucket_data = ReadDataFromDynamic(gnu_hash, header.nbuckets * 4, buckets_offset)) {
+ offset = 0;
+ for (uint32_t i = 0; i < header.nbuckets; ++i)
+ buckets.push_back(bucket_data->GetU32(&offset));
+ // Locate the chain that handles the largest index bucket.
+ uint32_t last_symbol = 0;
+ for (uint32_t bucket_value : buckets)
+ last_symbol = std::max(bucket_value, last_symbol);
+ if (last_symbol < header.symoffset) {
+ num_symbols = header.symoffset;
+ } else {
+ // Walk the bucket's chain to add the chain length to the total.
+ const addr_t chains_base_offset = buckets_offset + header.nbuckets * 4;
+ for (;;) {
+ if (auto chain_entry_data = ReadDataFromDynamic(gnu_hash, 4, chains_base_offset + (last_symbol - header.symoffset) * 4)) {
+ offset = 0;
+ uint32_t chain_entry = chain_entry_data->GetU32(&offset);
+ ++last_symbol;
+ // If the low bit is set, this entry is the end of the chain.
+ if (chain_entry & 1)
+ break;
+ } else {
+ break;
+ }
+ }
+ num_symbols = last_symbol;
+ }
+ }
+ }
+ if (num_symbols > 0)
+ ++num_symbols; // First symbol is always all zeros
+ }
+ if (num_symbols == 0)
+ return std::nullopt;
+ return ReadDataFromDynamic(symtab, syment->d_val * num_symbols);
+}
diff --git a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.h b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.h
index aba3a5bfcbf5b6..34d9ae74fbb23f 100644
--- a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.h
+++ b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.h
@@ -435,6 +435,40 @@ class ObjectFileELF : public lldb_private::ObjectFile {
/// \return The bytes that represent the string table data or \c std::nullopt
/// if an error occured.
std::optional<lldb_private::DataExtractor> GetDynstrData();
+
+ /// Read the bytes pointed to by the \a dyn dynamic entry.
+ ///
+ /// ELFDynamic::d_ptr values contain file addresses if we load the ELF file
+ /// form a file on disk, or they contain load addresses if they were read
+ /// from memory. This function will correctly extract the data in both cases
+ /// if it is available.
+ ///
+ /// \param[in] dyn The dynamic entry to use to fetch the data from.
+ ///
+ /// \param[in] length The number of bytes to read.
+ ///
+ /// \param[in] offset The number of bytes to skip after the d_ptr value
+ /// before reading data.
+ ///
+ /// \return The bytes that represent the dynanic entries data or
+ /// \c std::nullopt if an error occured or the data is not available.
+ std::optional<lldb_private::DataExtractor>
+ ReadDataFromDynamic(const elf::ELFDynamic *dyn, uint64_t length,
+ uint64_t offset = 0);
+
+ /// Get the bytes that represent the dynamic symbol table from the .dynamic
+ /// section from process memory.
+ ///
+ /// This functon uses the DT_SYMTAB value from the .dynamic section to read
+ /// the symbols table data from process memory. The number of symbols in the
+ /// symbol table is calculated by looking at the DT_HASH or DT_GNU_HASH
+ /// values as the symbol count isn't stored in the .dynamic section.
+ ///
+ /// \return The bytes that represent the symbol table data from the .dynamic
+ /// section or section headers or \c std::nullopt if an error
+ /// occured or if there is no dynamic symbol data available.
+ std::optional<lldb_private::DataExtractor>
+ GetDynsymDataFromDynamic(uint32_t &num_symbols);
};
#endif // LLDB_SOURCE_PLUGINS_OBJECTFILE_ELF_OBJECTFILEELF_H
diff --git a/lldb/test/Shell/ObjectFile/ELF/elf-dynsym.yaml b/lldb/test/Shell/ObjectFile/ELF/elf-dynsym.yaml
new file mode 100644
index 00000000000000..2763aac1df4893
--- /dev/null
+++ b/lldb/test/Shell/ObjectFile/ELF/elf-dynsym.yaml
@@ -0,0 +1,631 @@
+## This test verifies that loading an ELF file that has no section headers can
+## load the dynamic symbol table using the DT_SYMTAB, DT_SYMENT, DT_HASH or
+## the DT_GNU_HASH .dynamic key/value pairs that are loaded via the PT_DYNAMIC
+## segment.
+##
+## This test will convert a shared library from yaml, strip its section headers,
+## and varify that LLDB can load the dynamic symbol table. We must manually
+## strip the section headers from a full shared library because our ELF YAML
+## support in obj2yaml/yaml2obj doesn't support ELF files with program headers
+## only, they must have sections or the file doesn't get recreated correctlty.
+
+# RUN: yaml2obj %s -o %t
+# RUN: llvm-strip --strip-sections %t -o %t.noshdrs
+
+# RUN: %lldb -b \
+# RUN: -o "target create -d '%t.noshdrs'" \
+# RUN: -o "image dump objfile" \
+# RUN: | FileCheck %s --dump-input=always
+# CHECK: (lldb) image dump objfile
+# CHECK: Dumping headers for 1 module(s).
+# CHECK: ObjectFileELF, file =
+# CHECK: ELF Header
+# Make sure there are no section headers
+# CHECK: e_shnum = 0x00000000
+# Make sure we were able to load the symbols
+# CHECK: elf-dynsym.yaml.tmp.noshdrs, num_symbols = 9:
+# CHECK: [ 0] 1 Undefined 0x0000000000000000 0x0000000000000000 0x00000022 __cxa_finalize
+# CHECK: [ 1] 2 X Undefined 0x0000000000000000 0x0000000000000000 0x00000012 puts
+# CHECK: [ 2] 3 Undefined 0x0000000000000000 0x0000000000000000 0x00000020 _ITM_deregisterTMCloneTable
+# CHECK: [ 3] 4 Undefined 0x0000000000000000 0x0000000000000000 0x00000020 __gmon_start__
+# CHECK: [ 4] 5 Undefined 0x0000000000000000 0x0000000000000000 0x00000020 _ITM_registerTMCloneTable
+# CHECK: [ 5] 6 X Code 0x0000000000001135 0x0000000000000016 0x00000012 baz()
+# CHECK: [ 6] 7 X Code 0x000000000000111f 0x0000000000000016 0x00000012 bar()
+# CHECK: [ 7] 8 X Code 0x000000000000114b 0x0000000000000016 0x00000012 biz()
+# CHECK: [ 8] 9 X Code 0x0000000000001109 0x0000000000000016 0x00000012 foo()
+
+--- !ELF
+FileHeader:
+ Class: ELFCLASS64
+ Data: ELFDATA2LSB
+ Type: ET_DYN
+ Machine: EM_X86_64
+ Entry: 0x1050
+ProgramHeaders:
+ - Type: PT_LOAD
+ Flags: [ PF_R ]
+ FirstSec: .note.gnu.build-id
+ LastSec: .rela.plt
+ Align: 0x1000
+ Offset: 0x0
+ - Type: PT_LOAD
+ Flags: [ PF_X, PF_R ]
+ FirstSec: .init
+ LastSec: .fini
+ VAddr: 0x1000
+ Align: 0x1000
+ Offset: 0x1000
+ - Type: PT_LOAD
+ Flags: [ PF_R ]
+ FirstSec: .rodata
+ LastSec: .eh_frame
+ VAddr: 0x2000
+ Align: 0x1000
+ Offset: 0x2000
+ - Type: PT_LOAD
+ Flags: [ PF_W, PF_R ]
+ FirstSec: .init_array
+ LastSec: .bss
+ VAddr: 0x3DC8
+ Align: 0x1000
+ Offset: 0x2DC8
+ - Type: PT_DYNAMIC
+ Flags: [ PF_W, PF_R ]
+ FirstSec: .dynamic
+ LastSec: .dynamic
+ VAddr: 0x3DE0
+ Align: 0x8
+ Offset: 0x2DE0
+ - Type: PT_NOTE
+ Flags: [ PF_R ]
+ FirstSec: .note.gnu.build-id
+ LastSec: .note.gnu.build-id
+ VAddr: 0x238
+ Align: 0x4
+ Offset: 0x238
+ - Type: PT_GNU_EH_FRAME
+ Flags: [ PF_R ]
+ FirstSec: .eh_frame_hdr
+ LastSec: .eh_frame_hdr
+ VAddr: 0x202C
+ Align: 0x4
+ Offset: 0x202C
+ - Type: PT_GNU_STACK
+ Flags: [ PF_W, PF_R ]
+ Align: 0x10
+ Offset: 0x0
+ - Type: PT_GNU_RELRO
+ Flags: [ PF_R ]
+ FirstSec: .init_array
+ LastSec: .got
+ VAddr: 0x3DC8
+ Offset: 0x2DC8
+Sections:
+ - Name: .note.gnu.build-id
+ Type: SHT_NOTE
+ Flags: [ SHF_ALLOC ]
+ Address: 0x238
+ AddressAlign: 0x4
+ Notes:
+ - Name: GNU
+ Desc: E98A07D11FFBEC0C57492B71EEE529C65D183408
+ Type: NT_PRPSINFO
+ - Name: .gnu.hash
+ Type: SHT_GNU_HASH
+ Flags: [ SHF_ALLOC ]
+ Address: 0x260
+ Link: .dynsym
+ AddressAlign: 0x8
+ Header:
+ SymNdx: 0x6
+ Shift2: 0x6
+ BloomFilter: [ 0x3021080800001010 ]
+ HashBuckets: [ 0x0, 0x6, 0x0 ]
+ HashValues: [ 0x6A5EBD44, 0x6A5EBC3C, 0x6A5EDF4C, 0x6A6128EB ]
+ - Name: .dynsym
+ Type: SHT_DYNSYM
+ Flags: [ SHF_ALLOC ]
+ Address: 0x298
+ Link: .dynstr
+ AddressAlign: 0x8
+ - Name: .dynstr
+ Type: SHT_STRTAB
+ Flags: [ SHF_ALLOC ]
+ Address: 0x388
+ AddressAlign: 0x1
+ - Name: .gnu.version
+ Type: SHT_GNU_versym
+ Flags: [ SHF_ALLOC ]
+ Address: 0x44E
+ Link: .dynsym
+ AddressAlign: 0x2
+ Entries: [ 0, 2, 2, 0, 0, 0, 1, 1, 1, 1 ]
+ - Name: .gnu.version_r
+ Type: SHT_GNU_verneed
+ Flags: [ SHF_ALLOC ]
+ Address: 0x468
+ Link: .dynstr
+ AddressAlign: 0x8
+ Dependencies:
+ - Version: 1
+ File: libc.so.6
+ Entries:
+ - Name: GLIBC_2.2.5
+ Hash: 157882997
+ Flags: 0
+ Other: 2
+ - Name: .rela.dyn
+ Type: SHT_RELA
+ Flags: [ SHF_ALLOC ]
+ Address: 0x488
+ Link: .dynsym
+ AddressAlign: 0x8
+ Relocations:
+ - Offset: 0x3DC8
+ Type: R_X86_64_RELATIVE
+ Addend: 4352
+ - Offset: 0x3DD0
+ Type: R_X86_64_RELATIVE
+ Addend: 4288
+ - Offset: 0x3DD8
+ Type: R_X86_64_RELATIVE
+ Addend: 15832
+ - Offset: 0x3FE0
+ Symbol: __cxa_finalize
+ Type: R_X86_64_GLOB_DAT
+ - Offset: 0x3FE8
+ Symbol: _ITM_deregisterTMCloneTable
+ Type: R_X86_64_GLOB_DAT
+ - Offset: 0x3FF0
+ Symbol: __gmon_start__
+ Type: R_X86_64_GLOB_DAT
+ - Offset: 0x3FF8
+ Symbol: _ITM_registerTMCloneTable
+ Type: R_X86_64_GLOB_DAT
+ - Name: .rela.plt
+ Type: SHT_RELA
+ Flags: [ SHF_ALLOC, SHF_INFO_LINK ]
+ Address: 0x530
+ Link: .dynsym
+ AddressAlign: 0x8
+ Info: .got.plt
+ Relocations:
+ - Offset: 0x4018
+ Symbol: __cxa_finalize
+ Type: R_X86_64_JUMP_SLOT
+ - Offset: 0x4020
+ Symbol: puts
+ Type: R_X86_64_JUMP_SLOT
+ - Name: .init
+ Type: SHT_PROGBITS
+ Flags: [ SHF_ALLOC, SHF_EXECINSTR ]
+ Address: 0x1000
+ AddressAlign: 0x4
+ Offset: 0x1000
+ Content: F30F1EFA4883EC08488B05E12F00004885C07402FFD04883C408C3
+ - Name: .plt
+ Type: SHT_PROGBITS
+ Flags: [ SHF_ALLOC, SHF_EXECINSTR ]
+ Address: 0x1020
+ AddressAlign: 0x10
+ EntSize: 0x10
+ Content: FF35E22F0000FF25E42F00000F1F4000FF25E22F00006800000000E9E0FFFFFFFF25DA2F00006801000000E9D0FFFFFF
+ - Name: .text
+ Type: SHT_PROGBITS
+ Flags: [ SHF_ALLOC, SHF_EXECINSTR ]
+ Address: 0x1050
+ AddressAlign: 0x10
+ Content: 488D3DD12F0000488D05CA2F00004839F87415488B057E2F00004885C07409FFE00F1F8000000000C30F1F8000000000488D3DA12F0000488D359A2F00004829FE4889F048C1EE3F48C1F8034801C648D1FE7414488B054D2F00004885C07408FFE0660F1F440000C30F1F8000000000F30F1EFA803D5D2F000000752B5548833D0A2F0000004889E5740C488D3DF62C0000E849FFFFFFE864FFFFFFC605352F0000015DC30F1F00C30F1F8000000000F30F1EFAE977FFFFFF554889E5488D05EC0E00004889C7E824FFFFFF905DC3554889E5488D05E10E00004889C7E80EFFFFFF905DC3554889E5488D05D60E00004889C7E8F8FEFFFF905DC3554889E5488D05CB0E00004889C7E8E2FEFFFF905DC3
+ - Name: .fini
+ Type: SHT_PROGBITS
+ Flags: [ SHF_ALLOC, SHF_EXECINSTR ]
+ Address: 0x1164
+ AddressAlign: 0x4
+ Content: F30F1EFA4883EC084883C408C3
+ - Name: ...
[truncated]
|
You can test this locally with the following command:git-clang-format --diff 0f8dbb2fac532e37a9859d52982f0e8994305a11 a174d9bb6bdd99d33d12f179e5c60de9a3258f64 --extensions h,cpp -- lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.h View the diff from clang-format here.diff --git a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp
index c5c37aafe5..13b71068e5 100644
--- a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp
+++ b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp
@@ -22,6 +22,7 @@
#include "lldb/Host/LZMA.h"
#include "lldb/Symbol/DWARFCallFrameInfo.h"
#include "lldb/Symbol/SymbolContext.h"
+#include "lldb/Target/Process.h"
#include "lldb/Target/SectionLoadList.h"
#include "lldb/Target/Target.h"
#include "lldb/Utility/ArchSpec.h"
@@ -44,7 +45,6 @@
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/MipsABIFlags.h"
-#include "lldb/Target/Process.h"
#define CASE_AND_STREAM(s, def, width) \
case def: \
@@ -3013,7 +3013,7 @@ void ObjectFileELF::ParseSymtab(Symtab &lldb_symtab) {
if (symtab_data && strtab_data) {
auto [num_symbols_parsed, address_class_map] =
ParseSymbols(&lldb_symtab, symbol_id, section_list, num_symbols,
- symtab_data.value(), strtab_data.value());
+ symtab_data.value(), strtab_data.value());
symbol_id += num_symbols_parsed;
m_address_class_map.merge(address_class_map);
}
@@ -3847,8 +3847,7 @@ ObjectFileELF::ReadDataFromDynamic(const ELFDynamic *dyn, uint64_t length,
return std::nullopt;
DataExtractor data;
addr.GetSection()->GetSectionData(data);
- return DataExtractor(data,
- d_ptr_addr - addr.GetSection()->GetFileAddress(),
+ return DataExtractor(data, d_ptr_addr - addr.GetSection()->GetFileAddress(),
length);
}
return std::nullopt;
@@ -3969,7 +3968,8 @@ std::optional<uint32_t> ObjectFileELF::GetNumSymbolsFromDynamicGnuHash() {
const addr_t buckets_offset =
sizeof(DtGnuHashHeader) + addr_size * header.bloom_size;
std::vector<uint32_t> buckets;
- if (auto bucket_data = ReadDataFromDynamic(gnu_hash, header.nbuckets * 4, buckets_offset)) {
+ if (auto bucket_data = ReadDataFromDynamic(gnu_hash, header.nbuckets * 4,
+ buckets_offset)) {
offset = 0;
for (uint32_t i = 0; i < header.nbuckets; ++i)
buckets.push_back(bucket_data->GetU32(&offset));
@@ -3983,7 +3983,9 @@ std::optional<uint32_t> ObjectFileELF::GetNumSymbolsFromDynamicGnuHash() {
// Walk the bucket's chain to add the chain length to the total.
const addr_t chains_base_offset = buckets_offset + header.nbuckets * 4;
for (;;) {
- if (auto chain_entry_data = ReadDataFromDynamic(gnu_hash, 4, chains_base_offset + (last_symbol - header.symoffset) * 4)) {
+ if (auto chain_entry_data = ReadDataFromDynamic(
+ gnu_hash, 4,
+ chains_base_offset + (last_symbol - header.symoffset) * 4)) {
offset = 0;
uint32_t chain_entry = chain_entry_data->GetU32(&offset);
++last_symbol;
diff --git a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.h b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.h
index 16c216eb81..41b8ce189e 100644
--- a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.h
+++ b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.h
@@ -475,7 +475,6 @@ private:
/// Get the number of symbols from the DT_GNU_HASH dynamic entry.
std::optional<uint32_t> GetNumSymbolsFromDynamicGnuHash();
-
};
#endif // LLDB_SOURCE_PLUGINS_OBJECTFILE_ELF_OBJECTFILEELF_H
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I left some comments. All code layout options and comment concerns, otherwise LGTM!
@@ -44,6 +44,8 @@ | |||
#include "llvm/Support/MathExtras.h" | |||
#include "llvm/Support/MemoryBuffer.h" | |||
#include "llvm/Support/MipsABIFlags.h" | |||
#include "lldb/Target/Process.h" | |||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nit: empty line
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
btw, this grouping of include blocks with empty lines is an lldb-ism (other llvm subprojects don't have that), and one not very strictly adhered to (it looks like this file does not put lldb includes in a separate block). One problem with adding empty lines like that is that it prevents clang-format from sorting the includes. Right now clang-format (correctly) wants to put this include next to the other lldb includes. If you added an empty line, it would leave it alone, thinking that is intentional (which I don't think it is).
auto [num_symbols, address_class_map] = | ||
ParseSymbolTable(&lldb_symtab, symbol_id, dynsym); | ||
symbol_id += num_symbols; | ||
m_address_class_map.merge(address_class_map); | ||
} | ||
} | ||
if (!found_dynsym) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We added this bool, but why can't this be the else to if (dynsym)
? We have two very similar sounding variables here. I think we can simplify this to
Section *dynsym = !symtab || GetSection()->... ? section_list->FindSection(..) : nullptr;
and then we have a simple if else
// symbols in the symbol table as the symbol count is not stored in the | ||
// .dynamic section as a key/value pair. | ||
// | ||
// When loading and ELF file from memory, only the program headers end up |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Great comment, but I would rephrase that only the program headers are guaranteed to be mapped
// uint32_t nchain; | ||
// }; | ||
if (auto data = ReadDataFromDynamic(hash, 8)) { | ||
offset = 4; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should we make a comment why we're ignoring the first field
@@ -44,6 +44,8 @@ | |||
#include "llvm/Support/MathExtras.h" | |||
#include "llvm/Support/MemoryBuffer.h" | |||
#include "llvm/Support/MipsABIFlags.h" | |||
#include "lldb/Target/Process.h" | |||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
btw, this grouping of include blocks with empty lines is an lldb-ism (other llvm subprojects don't have that), and one not very strictly adhered to (it looks like this file does not put lldb includes in a separate block). One problem with adding empty lines like that is that it prevents clang-format from sorting the includes. Right now clang-format (correctly) wants to put this include next to the other lldb includes. If you added an empty line, it would leave it alone, thinking that is intentional (which I don't think it is).
// DT_HASH or the DT_GNU_HASH as the number of symbols isn't stored anywhere | ||
// in the .dynamic section. | ||
|
||
lldb::offset_t offset; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Declare this where its being initialized (the offset
used for parsing DT_HASH and the one used for DT_GNU_HASH are conceptually unrelated and could be separate vars).
num_symbols = data->GetU32(&offset); | ||
} | ||
} | ||
if (num_symbols == 0 && gnu_hash) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This code is rather involved, and it would be better to have it in a helper function. Then it could also use early returns for some of the paths. I'd recommend something like:
if (std::optional<uint32_t> syms = GetNumSymbolsFromHash())
num_symbols = *syms;
else if (std::optional<uint32_t> syms = GetNumSymbolsFromDtHash())
num_symbols = *syms;
...
} | ||
} | ||
if (num_symbols == 0 && gnu_hash) { | ||
struct DT_GNU_HASH_HEADER { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It doesn't look like there's an existing struct with this name (in any project) so I'd prefer something more similar to our existing naming convention -- (Dt)GnuHashHeader?
uint32_t bloom_size = 0; | ||
uint32_t bloom_shift = 0; | ||
}; | ||
if (auto data = ReadDataFromDynamic(gnu_hash, sizeof(DT_GNU_HASH_HEADER))) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This could use a short description (or a link to one) of the format of the section. Nothing too involved, just a couple of sentences to let the reader know what to expect..
# CHECK: [ 7] 8 X Code 0x000000000000114b 0x0000000000000016 0x00000012 biz() | ||
# CHECK: [ 8] 9 X Code 0x0000000000001109 0x0000000000000016 0x00000012 foo() | ||
|
||
--- !ELF |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This input contains a lot of data that's irrelevant for the test. Can you make it smaller? For example, something like llvm-mc -triple=x86_64-pc-linux -filetype=obj -o - - <<<".globl defined, undefined; defined:" | ld.lld /dev/stdin -o - --hash-style=gnu -export-dynamic
produces a much smaller file which still has a dynamic section, and you can control exactly which entries it contains (in practice you may want to expand the input slightly, just so that all the symbols don't end up with the same value).
You could yamlize that, or even just leave that as the input to the test itself (it should be deterministic enough). If you go with the second option, you can add -z nosectionheader
to the linker command to skip the llvm-strip
step.
I think think that another test with the DT_HASH hash style would be in order (just change the above command to --hash-style=sysv)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I am happy to use the llvm-mc + ld.lld trick, bit I need:
- the output file to have a .dynamic section or PT_DYNAMIC (above example doesn't)
- it needs to have a dynamic symbol table (SHT_DYNSYM or .dynamic section needs to have
DT_SYMTAB
andDT_SYMENT
entries - Needs to have a DT_HASH or DT_GNU_HASH
I don't know much about the llvm-mc
tool, but if you can get it to generate a file that i can use I will use it.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I see the problem. I used the version of lld on my system, and it looks like the behavior has changed slightly for the version at HEAD. Sorry about that. I believe this has everything you need:
$ bin/llvm-mc -triple=x86_64-pc-linux -filetype=obj -o - - <<<".globl defined, undefined; defined:" | bin/ld.lld /dev/stdin -o - --hash-style=gnu -export-dynamic -shared -z nosectionheader | llvm-readelf - -d -l
Elf file type is DYN (Shared object file)
Entry point 0x0
There are 6 program headers, starting at offset 64
Program Headers:
Type Offset VirtAddr PhysAddr FileSiz MemSiz Flg Align
PHDR 0x000040 0x0000000000000040 0x0000000000000040 0x000188 0x000188 R 0x8
LOAD 0x000000 0x0000000000000000 0x0000000000000000 0x000243 0x000243 R 0x1000
LOAD 0x000248 0x0000000000002248 0x0000000000002248 0x000060 0x000db8 RW 0x1000
DYNAMIC 0x000248 0x0000000000002248 0x0000000000002248 0x000060 0x000060 RW 0x8
GNU_RELRO 0x000248 0x0000000000002248 0x0000000000002248 0x000060 0x000db8 R 0x1
GNU_STACK 0x000000 0x0000000000000000 0x0000000000000000 0x000000 0x000000 RW 0x0
Section to Segment mapping:
Segment Sections...
00
01
02
03
04
05
Dynamic section at offset 0x248 contains 6 entries:
Tag Type Name/Value
0x0000000000000006 (SYMTAB) 0x1c8
0x000000000000000b (SYMENT) 24 (bytes)
0x0000000000000005 (STRTAB) 0x230
0x000000000000000a (STRSZ) 19 (bytes)
0x000000006ffffef5 (GNU_HASH) 0x210
0x0000000000000000 (NULL) 0x0
// RUN: llvm-mc -triple=x86_64-pc-linux -filetype=obj \ | ||
// RUN: -o - - <<<".globl defined, undefined; defined:" | \ | ||
// RUN: ld.lld /dev/stdin -o - --hash-style=gnu -export-dynamic -shared \ |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
// RUN: llvm-mc -triple=x86_64-pc-linux -filetype=obj \ | |
// RUN: -o - - <<<".globl defined, undefined; defined:" | \ | |
// RUN: ld.lld /dev/stdin -o - --hash-style=gnu -export-dynamic -shared \ | |
// RUN: llvm-mc -triple=x86_64-pc-linux -filetype=obj %s -o %t.o \ | |
// RUN: ld.lld %t.o --hash-style=gnu -export-dynamic -shared \ |
.. and put the asm input into this file. I don't think <<<
and /dev/stdin
will work on windows. I've just used as a simple command for experimentation.
I would also recommend more unique names and thinking about whether there are any corner cases that are worth explicitly testing (e.g. deliberately creating some hash collisions?).
// RUN: llvm-mc -triple=x86_64-pc-linux -filetype=obj \ | ||
// RUN: -o - - <<<".globl defined, undefined; defined:" | \ | ||
// RUN: ld.lld /dev/stdin -o - --hash-style=sysv -export-dynamic -shared \ |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
// RUN: llvm-mc -triple=x86_64-pc-linux -filetype=obj \ | |
// RUN: -o - - <<<".globl defined, undefined; defined:" | \ | |
// RUN: ld.lld /dev/stdin -o - --hash-style=sysv -export-dynamic -shared \ | |
// RUN: ld.lld %t.o -o - --hash-style=sysv -export-dynamic -shared \ |
(we can reuse the object file)
// HASH: (lldb) image dump objfile | ||
// HASH: Dumping headers for 1 module(s). | ||
// HASH: ObjectFileELF, file = | ||
// HASH: ELF Header | ||
// HASH: e_type = 0x0003 ET_DYN | ||
// Make sure there are no section headers | ||
// HASH: e_shnum = 0x00000000 | ||
// Make sure we were able to load the symbols | ||
// HASH: Symtab, file = {{.*}}elf-dynsym.test.tmp.sysv, num_symbols = 2: | ||
// HASH-DAG: undefined | ||
// HASH-DAG: defined |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In cases like this where the two outputs are (nearly) identical, I think its better to have a one check prefix for the common parts of the output -- it's less code and it better emphasizes the difference. This can be achieved by passing different sets of prefixes to the FileCheck command:
// RUN: ... | FileCheck --check-prefixes=BOTH,GNU
// RUN: ... | FileCheck --check-prefixes=BOTH,SYSV
// BOTH: This line is common for both formats
// GNU: This is gnu-specific output
// SYSV: This is sysv-specific output
… and PT_DYNAMIC. Allow LLDB to parse the dynamic symbol table from an ELF file or memory image in an ELF file that has no section headers. This patch uses the ability to parse the PT_DYNAMIC segment and find the DT_SYMTAB, DT_SYMENT, DT_HASH or DT_GNU_HASH to find and parse the dynamic symbol table if the section headers are not present. It also adds a helper function to read data from a .dynamic key/value pair entry correctly from the file or from memory.
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/162/builds/10733 Here is the relevant piece of the build log for the reference
|
It seems like this patch is responsible for breaking greendragon https://green.lab.llvm.org/job/llvm.org/view/LLDB/job/as-lldb-cmake/15470/ |
…MIC (#112596)" This reverts commit a7b2e73. This patch broke the greendragon bot Failed Tests (10): lldb-api :: python_api/sbplatform/TestLocateModuleCallback.py lldb-unit :: Target/./TargetTests/LocateModuleCallbackTest/GetOrCreateModuleCallbackSuccessWithModuleAndSymbol lldb-unit :: Target/./TargetTests/LocateModuleCallbackTest/GetOrCreateModuleCallbackSuccessWithOnlySymbol lldb-unit :: Target/./TargetTests/LocateModuleCallbackTest/GetOrCreateModuleCallbackSuccessWithSymbolAsModule lldb-unit :: Target/./TargetTests/LocateModuleCallbackTest/GetOrCreateModuleCallbackSuccessWithSymbolAsModuleAndSymbol lldb-unit :: Target/./TargetTests/LocateModuleCallbackTest/GetOrCreateModuleCallbackSuccessWithSymbolByPlatformUUID lldb-unit :: Target/./TargetTests/LocateModuleCallbackTest/GetOrCreateModuleWithCachedModuleAndSymbol lldb-unit :: Target/./TargetTests/ModuleCacheTest/GetAndPut lldb-unit :: Target/./TargetTests/ModuleCacheTest/GetAndPutStrangeHostname lldb-unit :: Target/./TargetTests/ModuleCacheTest/GetAndPutUuidExists
I have reverted this change with f14e1a8 |
I found the issue and have a fix. |
…6689) Resubmissions of #112596 with buildbot fixes. Allow LLDB to parse the dynamic symbol table from an ELF file or memory image in an ELF file that has no section headers. This patch uses the ability to parse the PT_DYNAMIC segment and find the DT_SYMTAB, DT_SYMENT, DT_HASH or DT_GNU_HASH to find and parse the dynamic symbol table if the section headers are not present. It also adds a helper function to read data from a .dynamic key/value pair entry correctly from the file or from memory.
Allow LLDB to parse the dynamic symbol table from an ELF file or memory image in an ELF file that has no section headers. This patch uses the ability to parse the PT_DYNAMIC segment and find the DT_SYMTAB, DT_SYMENT, DT_HASH or DT_GNU_HASH to find and parse the dynamic symbol table if the section headers are not present. It also adds a helper function to read data from a .dynamic key/value pair entry correctly from the file or from memory.