From 06ee098147b9b63cbb84acabea22547b48a61e0f Mon Sep 17 00:00:00 2001 From: Benjamin Mourad Date: Sat, 23 Oct 2021 15:08:57 -0400 Subject: [PATCH 1/4] Update LLVM backend to work with version 12 This may also work with later versions, but I did not test them --- lib/bap_llvm/llvm_disasm.cpp | 25 ++++++++++++++++++++- lib/bap_llvm/llvm_elf_loader.hpp | 38 +++++++++++++++++++++++++++++++- 2 files changed, 61 insertions(+), 2 deletions(-) diff --git a/lib/bap_llvm/llvm_disasm.cpp b/lib/bap_llvm/llvm_disasm.cpp index 8bedd3956..315dc49c9 100644 --- a/lib/bap_llvm/llvm_disasm.cpp +++ b/lib/bap_llvm/llvm_disasm.cpp @@ -7,6 +7,10 @@ #include #include #include +#if LLVM_VERSION_MAJOR >= 12 +#include +#include +#endif #include #if LLVM_VERSION_MAJOR >= 10 @@ -627,8 +631,27 @@ struct create_llvm_disassembler : disasm_factory { } +static void parse_environment_options(const char *prog_name, const char *env_var) { +#if LLVM_VERSION_MAJOR >= 12 + llvm::Optional env_value = llvm::sys::Process::GetEnv(llvm::StringRef(env_var)); + if (!env_value) + return; + + llvm::SmallVector new_argv; + llvm::BumpPtrAllocator alloc; + llvm::StringSaver saver(alloc); + new_argv.push_back(saver.save(prog_name).data()); + + llvm::cl::TokenizeGNUCommandLine(*env_value, saver, new_argv); + int new_argc = static_cast(new_argv.size()); + llvm::cl::ParseCommandLineOptions(new_argc, &new_argv[0], llvm::StringRef("")); +#else + llvm::cl::ParseEnvironmentOptions(prog_name, env_var); +#endif +} + int disasm_llvm_init() { - llvm::cl::ParseEnvironmentOptions("bap", "BAP_LLVM_OPTIONS"); + parse_environment_options("bap", "BAP_LLVM_OPTIONS"); bap::initialize_llvm(); auto f = std::make_shared(); return bap::register_disassembler("llvm", f); diff --git a/lib/bap_llvm/llvm_elf_loader.hpp b/lib/bap_llvm/llvm_elf_loader.hpp index 31de988b9..917ab2652 100644 --- a/lib/bap_llvm/llvm_elf_loader.hpp +++ b/lib/bap_llvm/llvm_elf_loader.hpp @@ -19,7 +19,11 @@ using namespace llvm::object; template bool has_addresses(const ELFObjectFile &obj) { +#if LLVM_VERSION_MAJOR >= 12 + auto hdr = &obj.getELFFile().getHeader(); +#else auto hdr = obj.getELFFile()->getHeader(); +#endif return (hdr->e_type == ELF::ET_EXEC || hdr->e_type == ELF::ET_DYN || hdr->e_type == ELF::ET_CORE); @@ -28,7 +32,11 @@ bool has_addresses(const ELFObjectFile &obj) { template bool is_executable(const ELFObjectFile &obj) { +#if LLVM_VERSION_MAJOR >= 12 + auto hdr = &obj.getELFFile().getHeader(); +#else auto hdr = obj.getELFFile()->getHeader(); +#endif return (hdr->e_type == ELF::ET_EXEC || hdr->e_type == ELF::ET_DYN); } @@ -42,7 +50,11 @@ bool is_executable(const ELFObjectFile &obj) { template uint64_t base_address(const ELFObjectFile &obj) { uint64_t base = 0L; +#if LLVM_VERSION_MAJOR >= 12 + auto elf = obj.getELFFile(); +#else auto elf = *obj.getELFFile(); +#endif auto segs = prim::elf_program_headers(elf); auto code = segs.end(); @@ -58,7 +70,12 @@ uint64_t base_address(const ELFObjectFile &obj) { template uint64_t minimal_progbits_offset(const ELFObjectFile &obj) { auto smallest = std::numeric_limits::max(); - for (auto sec : prim::elf_sections(*obj.getELFFile())) { +#if LLVM_VERSION_MAJOR >= 12 + auto &elf_file = obj.getELFFile(); +#else + auto &elf_file = *obj.getELFFile(); +#endif + for (auto sec : prim::elf_sections(elf_file)) { if (sec.sh_type == ELF::SHT_PROGBITS && sec.sh_offset < smallest) { smallest = sec.sh_offset; } @@ -68,7 +85,11 @@ uint64_t minimal_progbits_offset(const ELFObjectFile &obj) { template void emit_entry_point(const ELFObjectFile &obj, ogre_doc &s) { +#if LLVM_VERSION_MAJOR >= 12 + auto hdr = &obj.getELFFile().getHeader(); +#else auto hdr = obj.getELFFile()->getHeader(); +#endif s.entry("llvm:entry-point") << hdr->e_entry; } @@ -80,7 +101,11 @@ std::string name_of_index(std::size_t i) { template void emit_program_headers(const ELFObjectFile &obj, ogre_doc &s) { +#if LLVM_VERSION_MAJOR >= 12 + auto hdrs = prim::elf_program_headers(obj.getELFFile()); +#else auto hdrs = prim::elf_program_headers(*obj.getELFFile()); +#endif for (auto it = hdrs.begin(); it != hdrs.end(); ++it) { bool ld = (it->p_type == ELF::PT_LOAD); bool r = static_cast(it->p_flags & ELF::PF_R); @@ -162,7 +187,14 @@ bool is_external(uint64_t addr, uint64_t offset, uint64_t size) { template void emit_symbol_entry(const ELFObjectFile &obj, const SymbolRef &sym, ogre_doc &s) { +#if LLVM_VERSION_MAJOR >= 12 + auto sym_elf_or_error = obj.getSymbol(sym.getRawDataRefImpl()); + if (!sym_elf_or_error) + return; + auto sym_elf = *sym_elf_or_error; +#else auto sym_elf = obj.getSymbol(sym.getRawDataRefImpl()); +#endif auto name = prim::symbol_name(sym); auto addr = prim::symbol_address(sym); auto off = symbol_file_offset(obj, sym); @@ -188,7 +220,11 @@ void emit_symbol_entries(const ELFObjectFile &obj, symbol_iterator begin, sym template void emit_symbol_entries(const ELFObjectFile &obj, ogre_doc &s) { +#if LLVM_VERSION_MAJOR >= 12 + auto elf = &obj.getELFFile(); +#else auto elf = obj.getELFFile(); +#endif emit_symbol_entries(obj, obj.symbol_begin(), obj.symbol_end(), s); auto secs = prim::elf_sections(*elf); emit_symbol_entries(obj, obj.dynamic_symbol_begin(), obj.dynamic_symbol_end(), s); From d96d508748253594cce83e9c37926a5342b248dd Mon Sep 17 00:00:00 2001 From: bmourad01 Date: Thu, 30 Dec 2021 09:50:31 -0500 Subject: [PATCH 2/4] Fixes ARM/Thumb `movt` semantics The lifter was incorrectly re-using the upper 16 bits of the destination register, when the manual states that it is completely overwritten by the source operand. --- lib/arm/arm_lifter.ml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/arm/arm_lifter.ml b/lib/arm/arm_lifter.ml index f1fd33a3d..286ea7feb 100644 --- a/lib/arm/arm_lifter.ml +++ b/lib/arm/arm_lifter.ml @@ -227,7 +227,8 @@ let lift_move ~encoding mem ops (insn : move_insn) : stmt list = | `MOVTi16, [|`Reg dest; _; src; cond; _wflag|] -> let dest = Env.of_reg dest in - [Bil.move dest Bil.(var dest lor exp_of_op src lsl int32 16)] |> + let src16 = Bil.(cast unsigned 32 (cast low 16 (var dest))) in + [Bil.move dest Bil.(src16 lor exp_of_op src lsl int32 16)] |> fun ins -> exec ins cond | insn,ops -> fail [%here] "ops %s doesn't match move insn %s" From 59a4889abaf955fc5691ced86df0d7c8b3e1ca58 Mon Sep 17 00:00:00 2001 From: Benjamin Mourad Date: Tue, 4 Jan 2022 15:54:32 -0500 Subject: [PATCH 3/4] Use bit-wise operations instead of casts Co-authored-by: Ivan Gotovchits --- lib/arm/arm_lifter.ml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/arm/arm_lifter.ml b/lib/arm/arm_lifter.ml index 286ea7feb..cf29c308e 100644 --- a/lib/arm/arm_lifter.ml +++ b/lib/arm/arm_lifter.ml @@ -227,8 +227,8 @@ let lift_move ~encoding mem ops (insn : move_insn) : stmt list = | `MOVTi16, [|`Reg dest; _; src; cond; _wflag|] -> let dest = Env.of_reg dest in - let src16 = Bil.(cast unsigned 32 (cast low 16 (var dest))) in - [Bil.move dest Bil.(src16 lor exp_of_op src lsl int32 16)] |> + let dest = Env.of_reg dest and src = exp_of_op src in + Bil.[dest := var dest land int32 0xFFFF lor src lsl int32 16] |> fun ins -> exec ins cond | insn,ops -> fail [%here] "ops %s doesn't match move insn %s" From 0dbce6e9ee73fcadf4faa9adddc5191d68f32ec1 Mon Sep 17 00:00:00 2001 From: Benjamin Mourad Date: Tue, 4 Jan 2022 17:03:30 -0500 Subject: [PATCH 4/4] Fix error Co-authored-by: Ivan Gotovchits --- lib/arm/arm_lifter.ml | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/arm/arm_lifter.ml b/lib/arm/arm_lifter.ml index cf29c308e..c1cf8cc45 100644 --- a/lib/arm/arm_lifter.ml +++ b/lib/arm/arm_lifter.ml @@ -226,7 +226,6 @@ let lift_move ~encoding mem ops (insn : move_insn) : stmt list = exec [Bil.move (Env.of_reg dest) (exp_of_op src)] cond | `MOVTi16, [|`Reg dest; _; src; cond; _wflag|] -> - let dest = Env.of_reg dest in let dest = Env.of_reg dest and src = exp_of_op src in Bil.[dest := var dest land int32 0xFFFF lor src lsl int32 16] |> fun ins -> exec ins cond