diff --git a/common/goos/Reader.cpp b/common/goos/Reader.cpp index ec348f0957..fd42da1323 100644 --- a/common/goos/Reader.cpp +++ b/common/goos/Reader.cpp @@ -159,13 +159,13 @@ Object Reader::read_from_stdin(const std::string& prompt_name) { /*! * Read a string. */ -Object Reader::read_from_string(const std::string& str) { +Object Reader::read_from_string(const std::string& str, bool add_top_level) { // create text fragment and add to the DB auto textFrag = std::make_shared(str); db.insert(textFrag); // perform read - auto result = internal_read(textFrag); + auto result = internal_read(textFrag, add_top_level); db.link(result, textFrag, 0); return result; } @@ -185,7 +185,7 @@ Object Reader::read_from_file(const std::vector& file_path) { /*! * Common read for a SourceText */ -Object Reader::internal_read(std::shared_ptr text) { +Object Reader::internal_read(std::shared_ptr text, bool add_top_level) { // first create stream TextStream ts(text); @@ -194,7 +194,11 @@ Object Reader::internal_read(std::shared_ptr text) { // read list! auto objs = read_list(ts, false); - return PairObject::make_new(SymbolObject::make_new(symbolTable, "top-level"), objs); + if (add_top_level) { + return PairObject::make_new(SymbolObject::make_new(symbolTable, "top-level"), objs); + } else { + return objs; + } } /*! diff --git a/common/goos/Reader.h b/common/goos/Reader.h index f301be7d7e..0382ceb423 100644 --- a/common/goos/Reader.h +++ b/common/goos/Reader.h @@ -67,7 +67,7 @@ struct Token { class Reader { public: Reader(); - Object read_from_string(const std::string& str); + Object read_from_string(const std::string& str, bool add_top_level = true); Object read_from_stdin(const std::string& prompt_name); Object read_from_file(const std::vector& file_path); @@ -77,7 +77,7 @@ class Reader { TextDb db; private: - Object internal_read(std::shared_ptr text); + Object internal_read(std::shared_ptr text, bool add_top_level = true); Object read_list(TextStream& stream, bool expect_close_paren = true); bool read_object(Token& tok, TextStream& ts, Object& obj); bool read_array(TextStream& stream, Object& o); diff --git a/decompiler/Disasm/Register.h b/decompiler/Disasm/Register.h index 122faf18c4..881cfc723e 100644 --- a/decompiler/Disasm/Register.h +++ b/decompiler/Disasm/Register.h @@ -144,6 +144,7 @@ class Register { bool operator==(const Register& other) const; bool operator!=(const Register& other) const; + bool operator<(const Register& other) { return id < other.id; } struct hash { auto operator()(const Register& x) const { return std::hash()(x.id); } diff --git a/decompiler/Function/TypeAnalysis.cpp b/decompiler/Function/TypeAnalysis.cpp index 6aee375eb4..5a22d52663 100644 --- a/decompiler/Function/TypeAnalysis.cpp +++ b/decompiler/Function/TypeAnalysis.cpp @@ -142,10 +142,12 @@ bool Function::run_type_analysis_ir2(const TypeSpec& my_type, (void)file; // STEP 0 - set decompiler type system settings for this function. In config we can manually // specify some settings for type propagation to reduce the strictness of type propagation. - dts.type_prop_settings.reset(); - if (get_config().pair_functions_by_name.find(guessed_name.to_string()) != - get_config().pair_functions_by_name.end()) { - dts.type_prop_settings.allow_pair = true; + if (!dts.type_prop_settings.locked) { + dts.type_prop_settings.reset(); + if (get_config().pair_functions_by_name.find(guessed_name.to_string()) != + get_config().pair_functions_by_name.end()) { + dts.type_prop_settings.allow_pair = true; + } } if (guessed_name.kind == FunctionName::FunctionKind::METHOD) { diff --git a/decompiler/IR2/AtomicOp.h b/decompiler/IR2/AtomicOp.h index 572ad6b2be..0ef2174393 100644 --- a/decompiler/IR2/AtomicOp.h +++ b/decompiler/IR2/AtomicOp.h @@ -199,7 +199,7 @@ class SimpleAtom { private: Kind m_kind = Kind::INVALID; std::string m_string; // for symbol ptr and symbol val - s64 m_int = 0; // for integer constant and static address label id + s64 m_int = -1; // for integer constant and static address label id Variable m_variable; }; diff --git a/decompiler/IR2/Form.cpp b/decompiler/IR2/Form.cpp index cbf53ba643..475adb05a7 100644 --- a/decompiler/IR2/Form.cpp +++ b/decompiler/IR2/Form.cpp @@ -383,7 +383,7 @@ void CondWithElseElement::apply_form(const std::function& f) { // EmptyElement ///////////////////////////// -goos::Object EmptyElement::to_form(const Env& env) const { +goos::Object EmptyElement::to_form(const Env&) const { return pretty_print::build_list("empty"); } diff --git a/decompiler/IR2/variable_naming.cpp b/decompiler/IR2/variable_naming.cpp index a0a42f1bf0..2e0dbb69fa 100644 --- a/decompiler/IR2/variable_naming.cpp +++ b/decompiler/IR2/variable_naming.cpp @@ -60,19 +60,49 @@ int VarMapSSA::get_next_var_id(Register reg) { */ void VarMapSSA::merge(const VarSSA& var_a, const VarSSA& var_b) { auto& a = m_entries.at(var_a.m_entry_id); - auto& b = m_entries.at(var_b.m_entry_id); + auto b = m_entries.at(var_b.m_entry_id); assert(a.reg == b.reg); if (b.var_id == 0) { + // fmt::print("Merge {} <- {}\n", to_string(var_b), to_string(var_a)); + + for (auto& entry : m_entries) { + if (entry.var_id == a.var_id && entry.reg == a.reg) { + entry.var_id = b.var_id; + } + } a.var_id = b.var_id; } else { + // fmt::print("Merge {} <- {}\n", to_string(var_a), to_string(var_b)); + + for (auto& entry : m_entries) { + if (entry.var_id == b.var_id && entry.reg == b.reg) { + entry.var_id = a.var_id; + } + } b.var_id = a.var_id; } } +/*! + * Make all Bs A. + */ void VarMapSSA::merge_to_first(const VarSSA& var_a, const VarSSA& var_b) { auto& a = m_entries.at(var_a.m_entry_id); - auto& b = m_entries.at(var_b.m_entry_id); + auto b = m_entries.at(var_b.m_entry_id); + + // fmt::print("Merge-to-first {} <- {}\n", to_string(var_a), to_string(var_b)); assert(a.reg == b.reg); + + // for (auto& entry : m_entries) { + for (size_t i = 0; i < m_entries.size(); i++) { + auto& entry = m_entries.at(i); + if (entry.var_id == b.var_id && entry.reg == b.reg) { + // fmt::print("remap extra {} var_id from {} to {}\n", i, entry.var_id, a.var_id); + entry.var_id = a.var_id; + } else { + // fmt::print("no remap at {} (prev is {} {})\n", i, entry.reg.to_charp(), entry.var_id); + } + } b.var_id = a.var_id; } @@ -117,6 +147,12 @@ void VarMapSSA::remap_reg(Register reg, const std::unordered_map& rema } } +void VarMapSSA::debug_print_map() const { + for (auto& entry : m_entries) { + fmt::print("[{:02d}] {} {}\n", entry.entry_id, entry.reg.to_charp(), entry.var_id); + } +} + std::string SSA::Phi::print(const VarMapSSA& var_map) const { std::string result = var_map.to_string(dest); result += " <- phi("; @@ -177,6 +213,7 @@ SSA::Phi& SSA::get_phi(int block, Register dest_reg) { auto& phi_map = blocks.at(block).phis; auto kv = phi_map.find(dest_reg); if (kv == phi_map.end()) { + // printf("Allocate new get_phi for %s B%d\n", dest_reg.to_charp(), block); auto dest_var = map.allocate_init_phi(dest_reg, block); phi_map.insert(std::make_pair(dest_reg, dest_var)); } @@ -385,7 +422,18 @@ void SSA::merge_all_phis() { } void SSA::remap() { - std::unordered_map, Register::hash> used_vars; + // this keeps the order of variable assignments in the instruction order, not var_id order. + struct VarIdRecord { + std::unordered_set set; + std::vector order; + void insert(int x) { + if (set.find(x) == set.end()) { + set.insert(x); + order.push_back(x); + } + } + }; + std::unordered_map used_vars; for (auto& block : blocks) { assert(block.phis.empty()); for (auto& instr : block.ins) { @@ -401,7 +449,7 @@ void SSA::remap() { for (auto& reg_vars : used_vars) { std::unordered_map var_remap; int i = 0; - for (auto var_id : reg_vars.second) { + for (auto var_id : reg_vars.second.order) { var_remap[var_id] = i++; } map.remap_reg(reg_vars.first, var_remap); @@ -547,10 +595,17 @@ std::optional run_variable_renaming(const Function& function, } // Merge phis to return to executable code. + if (debug_prints) { + ssa.map.debug_print_map(); + } + ssa.merge_all_phis(); if (debug_prints) { fmt::print("{}", ssa.print()); } + if (debug_prints) { + ssa.map.debug_print_map(); + } // merge same vars (decided this made things worse) diff --git a/decompiler/IR2/variable_naming.h b/decompiler/IR2/variable_naming.h index a4a4c8bddf..1c4abf3849 100644 --- a/decompiler/IR2/variable_naming.h +++ b/decompiler/IR2/variable_naming.h @@ -74,6 +74,7 @@ class VarMapSSA { bool same(const VarSSA& var_a, const VarSSA& var_b) const; int var_id(const VarSSA& var); void remap_reg(Register reg, const std::unordered_map& remap); + void debug_print_map() const; private: int get_next_var_id(Register reg); diff --git a/decompiler/ObjectFile/LinkedObjectFile.cpp b/decompiler/ObjectFile/LinkedObjectFile.cpp index 888d99e1ce..16d7059f03 100644 --- a/decompiler/ObjectFile/LinkedObjectFile.cpp +++ b/decompiler/ObjectFile/LinkedObjectFile.cpp @@ -1065,4 +1065,13 @@ std::string LinkedObjectFile::get_goal_string_by_label(const DecompilerLabel& la assert(0 == (label.offset % 4)); return get_goal_string(label.target_segment, (label.offset / 4) - 1, false); } + +const DecompilerLabel& LinkedObjectFile::get_label_by_name(const std::string& name) const { + for (auto& label : labels) { + if (label.name == name) { + return label; + } + } + throw std::runtime_error("Can't find label " + name); +} } // namespace decompiler \ No newline at end of file diff --git a/decompiler/ObjectFile/LinkedObjectFile.h b/decompiler/ObjectFile/LinkedObjectFile.h index 88c16cd143..3ecc68134f 100644 --- a/decompiler/ObjectFile/LinkedObjectFile.h +++ b/decompiler/ObjectFile/LinkedObjectFile.h @@ -62,6 +62,7 @@ class LinkedObjectFile { std::string print_asm_function_disassembly(const std::string& my_name); u32 read_data_word(const DecompilerLabel& label); + const DecompilerLabel& get_label_by_name(const std::string& name) const; std::string get_goal_string_by_label(const DecompilerLabel& label) const; std::string get_goal_string(int seg, int word_idx, bool with_quotes = true) const; bool is_string(int seg, int byte_idx) const; diff --git a/decompiler/util/DecompilerTypeSystem.h b/decompiler/util/DecompilerTypeSystem.h index 8c0e61bc17..e098e296da 100644 --- a/decompiler/util/DecompilerTypeSystem.h +++ b/decompiler/util/DecompilerTypeSystem.h @@ -43,6 +43,7 @@ class DecompilerTypeSystem { int get_format_arg_count(const std::string& str) const; int get_format_arg_count(const TP_Type& type) const; struct { + bool locked = false; bool allow_pair; std::string current_method_type; void reset() { diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 652805070e..3061db2daf 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -19,6 +19,7 @@ add_executable(goalc-test test_zydis.cpp goalc/test_goal_kernel.cpp decompiler/test_AtomicOpBuilder.cpp + decompiler/test_FormRegression.cpp decompiler/test_InstructionParser.cpp ${GOALC_TEST_FRAMEWORK_SOURCES} ${GOALC_TEST_CASES}) diff --git a/test/decompiler/test_FormRegression.cpp b/test/decompiler/test_FormRegression.cpp new file mode 100644 index 0000000000..05a3218ce0 --- /dev/null +++ b/test/decompiler/test_FormRegression.cpp @@ -0,0 +1,1025 @@ +#include +#include "gtest/gtest.h" +#include "decompiler/Disasm/InstructionParser.h" +#include "decompiler/Disasm/DecompilerLabel.h" +#include "decompiler/Function/Function.h" +#include "decompiler/ObjectFile/ObjectFileDB.h" +#include "decompiler/IR2/variable_naming.h" +#include "decompiler/IR2/cfg_builder.h" +#include "common/goos/PrettyPrinter.h" + +using namespace decompiler; + +class DecompilerRegressionTest : public ::testing::Test { + protected: + static std::unique_ptr parser; + static std::unique_ptr dts; + + static void SetUpTestCase() { + parser = std::make_unique(); + dts = std::make_unique(); + dts->parse_type_defs({"decompiler", "config", "all-types.gc"}); + } + + static void TearDownTestCase() { + parser.reset(); + dts.reset(); + parser.reset(); + } + + struct TestData { + explicit TestData(int instrs) : func(0, instrs) {} + Function func; + LinkedObjectFile file; + + void add_string_at_label(const std::string& label_name, const std::string& data) { + // first, align segment 1: + while (file.words_by_seg.at(1).size() % 4) { + file.words_by_seg.at(1).push_back(LinkedWord(0)); + } + + // add string type tag: + LinkedWord type_tag(0); + type_tag.kind = LinkedWord::Kind::TYPE_PTR; + type_tag.symbol_name = "string"; + file.words_by_seg.at(1).push_back(type_tag); + int string_start = 4 * int(file.words_by_seg.at(1).size()); + + // add size + file.words_by_seg.at(1).push_back(LinkedWord(int(data.length()))); + + // add string: + std::vector bytes; + bytes.resize(((data.size() + 1 + 3) / 4) * 4); + for (size_t i = 0; i < data.size(); i++) { + bytes[i] = data[i]; + } + for (size_t i = 0; i < bytes.size() / 4; i++) { + auto word = ((uint32_t*)bytes.data())[i]; + file.words_by_seg.at(1).push_back(LinkedWord(word)); + } + for (int i = 0; i < 3; i++) { + file.words_by_seg.at(1).push_back(LinkedWord(0)); + } + // will be already null terminated. + + for (auto& label : file.labels) { + if (label.name == label_name) { + label.target_segment = 1; + label.offset = string_start; + return; + } + } + + EXPECT_TRUE(false); + } + }; + + std::unique_ptr make_function( + const std::string& code, + const TypeSpec& function_type, + bool allow_pairs = false, + const std::string& method_name = "", + const std::vector>& strings = {}) { + dts->type_prop_settings.locked = true; + dts->type_prop_settings.reset(); + dts->type_prop_settings.allow_pair = allow_pairs; + dts->type_prop_settings.current_method_type = method_name; + auto program = parser->parse_program(code); + // printf("prg:\n%s\n\n", program.print().c_str()); + auto test = std::make_unique(program.instructions.size()); + test->file.words_by_seg.resize(3); + test->file.labels = program.labels; + test->func.ir2.env.file = &test->file; + test->func.instructions = program.instructions; + test->func.guessed_name.set_as_global("test-function"); + + for (auto& str : strings) { + test->add_string_at_label(str.first, str.second); + } + + test->func.basic_blocks = find_blocks_in_function(test->file, 0, test->func); + test->func.analyze_prologue(test->file); + test->func.cfg = build_cfg(test->file, 0, test->func); + EXPECT_TRUE(test->func.cfg->is_fully_resolved()); + + auto ops = convert_function_to_atomic_ops(test->func, program.labels); + test->func.ir2.atomic_ops = std::make_shared(std::move(ops)); + test->func.ir2.atomic_ops_succeeded = true; + + if (test->func.run_type_analysis_ir2(function_type, *dts, test->file, {})) { + test->func.ir2.has_type_info = true; + } else { + EXPECT_TRUE(false); + } + + test->func.ir2.reg_use = analyze_ir2_register_usage(test->func); + test->func.ir2.has_reg_use = true; + + auto result = + run_variable_renaming(test->func, test->func.ir2.reg_use, *test->func.ir2.atomic_ops, *dts); + if (result.has_value()) { + test->func.ir2.env.set_local_vars(*result); + } else { + EXPECT_TRUE(false); + } + + build_initial_forms(test->func); + EXPECT_TRUE(test->func.ir2.top_form); + + return test; + } + + void test(const std::string& code, + const std::string& type, + const std::string& expected, + bool allow_pairs = false, + const std::string& method_name = "", + const std::vector>& strings = {}) { + auto ts = dts->parse_type_spec(type); + auto test = make_function(code, ts, allow_pairs, method_name, strings); + auto expected_form = + pretty_print::get_pretty_printer_reader().read_from_string(expected, false).as_pair()->car; + auto actual_form = + pretty_print::get_pretty_printer_reader() + .read_from_string(test->func.ir2.top_form->to_form(test->func.ir2.env).print(), false) + .as_pair() + ->car; + if (expected_form != actual_form) { + printf("Got:\n%s\n\nExpected\n%s\n", actual_form.print().c_str(), + expected_form.print().c_str()); + } + + EXPECT_TRUE(expected_form == actual_form); + } +}; + +std::unique_ptr DecompilerRegressionTest::parser; +std::unique_ptr DecompilerRegressionTest::dts; + +TEST_F(DecompilerRegressionTest, StringTest) { + std::string func = + " sll r0, r0, 0\n" + "L100:\n" + " or v0, a0, r0\n" + "L101:\n" + " jr ra\n" + " daddu sp, sp, r0"; + auto test = make_function(func, TypeSpec("function", {TypeSpec("none")}), false, "", + {{"L100", "testing-string"}, {"L101", "testing-string-2"}}); + + EXPECT_EQ(test->file.get_goal_string_by_label(test->file.get_label_by_name("L100")), + "testing-string"); + EXPECT_EQ(test->file.get_goal_string_by_label(test->file.get_label_by_name("L101")), + "testing-string-2"); +} + +TEST_F(DecompilerRegressionTest, SimplestTest) { + std::string func = + " sll r0, r0, 0\n" + " or v0, a0, r0\n" + " jr ra\n" + " daddu sp, sp, r0"; + std::string type = "(function object object)"; + std::string expected = "(set! v0-0 a0-0)"; + test(func, type, expected); +} + +TEST_F(DecompilerRegressionTest, FloatingPointBasic) { + std::string func = + " sll r0, r0, 0\n" + "L345:\n" + " daddiu sp, sp, -16\n" + " sd fp, 8(sp)\n" + " or fp, t9, r0\n" + " lwc1 f0, L345(fp)\n" + " mtc1 f1, a0\n" + " div.s f0, f0, f1\n" + " mfc1 v0, f0\n" + " ld fp, 8(sp)\n" + " jr ra\n" + " daddiu sp, sp, 16"; + std::string type = "(function float float)"; + std::string expected = + "(begin\n" + " (set! f0-0 (l.f L345))\n" + " (set! f1-0 (gpr->fpr a0-0))\n" + " (set! f0-1 (/.s f0-0 f1-0))\n" + " (set! v0-0 (fpr->gpr f0-1))\n" + " )"; + test(func, type, expected); +} + +TEST_F(DecompilerRegressionTest, Op3) { + std::string func = + " sll r0, r0, 0\n" + "L308:\n" + " mult3 v0, a0, a1\n" + " jr ra\n" + " daddu sp, sp, r0"; + std::string type = "(function int int int)"; + std::string expected = "(set! v0-0 (*.si a0-0 a1-0))"; + test(func, type, expected); +} + +TEST_F(DecompilerRegressionTest, Division) { + std::string func = + " sll r0, r0, 0\n" + "L307:\n" + " div a0, a1\n" + " mflo v0\n" + " jr ra\n" + " daddu sp, sp, r0"; + std::string type = "(function int int int)"; + std::string expected = "(set! v0-0 (/.si a0-0 a1-0))"; + test(func, type, expected); +} + +TEST_F(DecompilerRegressionTest, Ash) { + std::string func = + " sll r0, r0, 0\n" + "L305:\n" + " or v1, a0, r0\n" + " bgezl a1, L306\n" + " dsllv v0, v1, a1\n" + + " dsubu a0, r0, a1\n" + " dsrav v0, v1, a0\n" + "L306:\n" + " jr ra\n" + " daddu sp, sp, r0\n" + " sll r0, r0, 0\n" + " sll r0, r0, 0"; + std::string type = "(function int int int)"; + std::string expected = "(begin (set! v1-0 a0-0) (set! v0-0 (ash.si v1-0 a1-0)))"; + test(func, type, expected); +} + +TEST_F(DecompilerRegressionTest, Abs) { + std::string func = + " sll r0, r0, 0\n" + "L301:\n" + " or v0, a0, r0\n" + " bltzl v0, L302\n" + " dsubu v0, r0, v0\n" + + "L302:\n" + " jr ra\n" + " daddu sp, sp, r0"; + std::string type = "(function int int)"; + std::string expected = "(begin (set! v0-0 a0-0) (set! v0-1 (abs v0-0)))"; + test(func, type, expected); +} + +TEST_F(DecompilerRegressionTest, Min) { + std::string func = + " sll r0, r0, 0\n" + " or v0, a0, r0\n" + " or v1, a1, r0 \n" + " slt a0, v0, v1\n" + " movz v0, v1, a0\n" + " jr ra\n" + " daddu sp, sp, r0"; + std::string type = "(function int int int)"; + std::string expected = "(begin (set! v0-0 a0-0) (set! v1-0 a1-0) (set! v0-1 (min.si v0-0 v1-0)))"; + test(func, type, expected); +} + +TEST_F(DecompilerRegressionTest, Max) { + std::string func = + " sll r0, r0, 0\n" + "L299:\n" + " or v0, a0, r0\n" + " or v1, a1, r0\n" + " slt a0, v0, v1\n" + " movn v0, v1, a0\n" + " jr ra\n" + " daddu sp, sp, r0"; + std::string type = "(function int int int)"; + std::string expected = "(begin (set! v0-0 a0-0) (set! v1-0 a1-0) (set! v0-1 (max.si v0-0 v1-0)))"; + test(func, type, expected); +} + +TEST_F(DecompilerRegressionTest, FormatString) { + std::string func = + " sll r0, r0, 0\n" + "L343:\n" + " daddiu sp, sp, -32\n" + " sd ra, 0(sp)\n" + " sd fp, 8(sp)\n" + " or fp, t9, r0\n" + " sq gp, 16(sp)\n" + + " or gp, a0, r0\n" + " lw t9, format(s7)\n" + " daddiu a0, s7, #t\n" + " daddiu a1, fp, L343\n" + " lwc1 f0, 0(gp)\n" + " mfc1 a2, f0\n" + " jalr ra, t9\n" + " sll v0, ra, 0\n" + + " or v0, gp, r0 \n" + " ld ra, 0(sp)\n" + " ld fp, 8(sp)\n" + " lq gp, 16(sp)\n" + " jr ra\n" + " daddiu sp, sp, 32"; + std::string type = "(function bfloat bfloat)"; + std::string expected = + "(begin\n" + " (set! gp-0 a0-0)\n" + " (set! t9-0 format)\n" + " (set! a0-1 '#t)\n" + " (set! a1-0 L343)\n" + " (set! f0-0 (l.f gp-0))\n" + " (set! a2-0 (fpr->gpr f0-0))\n" + " (set! v0-0 (call!))\n" + " (set! v0-1 gp-0)\n" + " )"; + test(func, type, expected, false, "", {{"L343", "~f"}}); +} + +TEST_F(DecompilerRegressionTest, WhileLoop) { + std::string func = + " sll r0, r0, 0\n" + "L285:\n" + " lwu v1, -4(a0)\n" + " lw a0, object(s7)\n" + + "L286:\n" + " bne v1, a1, L287\n" + " or a2, s7, r0\n" + + " daddiu v1, s7, #t\n" + " or v0, v1, r0\n" + " beq r0, r0, L288\n" + " sll r0, r0, 0\n" + + " or v1, r0, r0\n" + "L287:\n" + " lwu v1, 4(v1)\n" + " bne v1, a0, L286\n" + " sll r0, r0, 0\n" + " or v0, s7, r0\n" + "L288:\n" + " jr ra\n" + " daddu sp, sp, r0"; + std::string type = "(function basic type symbol)"; + std::string expected = + "(begin\n" + " (set! v1-0 (l.wu (+ a0-0 -4)))\n" + " (set! a0-1 object)\n" + " (until\n" + " (begin (set! v1-0 (l.wu (+ v1-0 4))) (= v1-0 a0-1))\n" + " (if\n" + " (= v1-0 a1-0)\n" + " (return ((begin (set! v1-1 '#t) (set! v0-0 v1-1))) ((set! v1-0 0)))\n" + " )\n" + " )\n" + " (set! v0-1 '#f)\n" + " )"; + test(func, type, expected); +} + +// Note - this test looks weird because or's aren't fully processed at this point. +TEST_F(DecompilerRegressionTest, Or) { + std::string func = + " sll r0, r0, 0\n" + "L280:\n" + " lw v1, object(s7)\n" + + "L281:\n" + " bne a0, a1, L282\n" + " or a2, s7, r0\n" + + " daddiu v1, s7, #t\n" + " or v0, v1, r0\n" + " beq r0, r0, L284\n" + " sll r0, r0, 0\n" + + " or v1, r0, r0\n" + + "L282:\n" + " lwu a0, 4(a0)\n" + " dsubu a2, a0, v1\n" + " daddiu a3, s7, 8\n" + " movn a3, s7, a2\n" + " bnel s7, a3, L283\n" + " or a2, a3, r0\n" + + " daddiu a2, s7, 8\n" + " movn a2, s7, a0\n" + + "L283:\n" + " beq s7, a2, L281\n" + " sll r0, r0, 0\n" + + " or v0, s7, r0\n" + + "L284:\n" + " jr ra\n" + " daddu sp, sp, r0"; + std::string type = "(function type type symbol)"; + std::string expected = + "(begin\n" + " (set! v1-0 object)\n" + " (until\n" + " (begin\n" + " (or\n" + " (begin\n" + " (set! a0-0 (l.wu (+ a0-0 4)))\n" + " (set! a3-0 (= a0-0 v1-0))\n" + " (truthy a3-0)\n" // this sets a2-0, the unused result of the OR. it gets a separate + // variable because it's not used. + " )\n" + " (set! a2-1 (zero? a0-0))\n" // so this should be a2-1. + " )\n" + " (truthy a2-1)\n" + " )\n" + " (if\n" + " (= a0-0 a1-0)\n" + " (return ((begin (set! v1-1 '#t) (set! v0-0 v1-1))) ((set! v1-0 0)))\n" + " )\n" + " )\n" + " (set! v0-1 '#f)\n" + " )"; + test(func, type, expected); +} + +TEST_F(DecompilerRegressionTest, DynamicMethodAccess) { + std::string func = + " sll r0, r0, 0\n" + + "L275:\n" + " dsll v1, a1, 2\n" + " daddu v1, v1, a0\n" + " lwu v1, 16(v1)\n" + + "L276:\n" + " lw a2, object(s7)\n" + " bne a0, a2, L277\n" + " or a2, s7, r0\n" + + " lw v1, nothing(s7)\n" + " or v0, v1, r0\n" + " beq r0, r0, L279\n" + " sll r0, r0, 0\n" + + " or v1, r0, r0\n" + + "L277:\n" + " lwu a0, 4(a0)\n" + " dsll a2, a1, 2\n" + " daddu a2, a2, a0\n" + " lwu v0, 16(a2)\n" + " bne v0, r0, L278\n" + " or a2, s7, r0\n" + + " lw v1, nothing(s7)\n" + " or v0, v1, r0\n" + " beq r0, r0, L279\n" + " sll r0, r0, 0\n" + + " or v1, r0, r0\n" + + "L278:\n" + " beq v0, v1, L276\n" + " sll r0, r0, 0\n" + + " or v1, s7, r0\n" + + "L279:\n" + " jr ra\n" + " daddu sp, sp, r0"; + std::string type = "(function type int function)"; + std::string expected = + "(begin\n" + " (set! v1-0 (sll a1-0 2))\n" + " (set! v1-1 (+ v1-0 a0-0))\n" + " (set! v1-2 (l.wu (+ v1-1 16)))\n" // get the method of the given type. + " (until\n" + " (!= v0-1 v1-2)\n" // actually goes after the body, so it's fine to refer to v0-1/v1-2 + " (if\n" + " (begin\n" + " (if\n" + " (begin (set! a2-0 object) (= a0-0 a2-0))\n" // if we reached the top + " (return ((begin (set! v1-3 nothing) (set! v0-0 v1-3))) ((set! v1-2 0)))\n" // return + // nothing. + " )\n" + " (set! a0-0 (l.wu (+ a0-0 4)))\n" // get next parent type + " (set! a2-2 (sll a1-0 2))\n" // fancy access + " (set! a2-3 (+ a2-2 a0-0))\n" + " (set! v0-1 (l.wu (+ a2-3 16)))\n" // get method (in v0-1, the same var as loop + // condition) + " (zero? v0-1)\n" // is it defined? + " )\n" + " (return ((begin (set! v1-4 nothing) (set! v0-2 v1-4))) ((set! v1-2 0)))\n" // also + // return + // nothing. + " )\n" + " )\n" + " (set! v1-5 '#f)\n" + " )"; + test(func, type, expected); +} + +TEST_F(DecompilerRegressionTest, SimpleLoopMergeCheck) { + std::string func = + " sll r0, r0, 0\n" + + "L272:\n" + " addiu v1, r0, 0\n" + " beq r0, r0, L274\n" + " sll r0, r0, 0\n" + + "L273:\n" + " sll r0, r0, 0\n" + " sll r0, r0, 0\n" + " lw a0, 2(a0)\n" + " daddiu v1, v1, 1\n" + + "L274:\n" + " slt a2, v1, a1\n" + " bne a2, r0, L273\n" + " sll r0, r0, 0\n" + + " or v1, s7, r0\n" + " or v1, s7, r0\n" + " lw v0, -2(a0)\n" + " jr ra\n" + " daddu sp, sp, r0"; + std::string type = "(function pair int)"; + std::string expected = + "(begin\n" + " (set! v1-0 0)\n" + " (while\n" + " (<.si v1-0 a1-0)\n" + " (nop!)\n" + " (nop!)\n" + " (set! a0-0 (l.w (+ a0-0 2)))\n" // should have merged + " (set! v1-0 (+ v1-0 1))\n" // also should have merged + " )\n" + " (set! v1-1 '#f)\n" + " (set! v1-2 '#f)\n" + " (set! v0-0 (l.w (+ a0-0 -2)))\n" + " )"; + test(func, type, expected, true); +} + +TEST_F(DecompilerRegressionTest, And) { + std::string func = + " sll r0, r0, 0\n" + + "L266:\n" + " daddiu v1, s7, -10\n" + " bne a0, v1, L267\n" + " sll r0, r0, 0\n" + + " addiu v0, r0, 0\n" + " beq r0, r0, L271\n" + " sll r0, r0, 0\n" + + "L267:\n" + " lw v1, 2(a0)\n" + " addiu v0, r0, 1\n" + " beq r0, r0, L269\n" + " sll r0, r0, 0\n" + + "L268:\n" + " daddiu v0, v0, 1\n" + " lw v1, 2(v1)\n" + + "L269:\n" + " daddiu a0, s7, -10\n" + " dsubu a0, v1, a0\n" + " daddiu a1, s7, 8\n" + " movz a1, s7, a0\n" + " beql s7, a1, L270\n" + " or a0, a1, r0\n" + + " dsll32 a0, v1, 30\n" + " slt a1, a0, r0\n" + " daddiu a0, s7, 8\n" + " movz a0, s7, a1\n" + + "L270:\n" + " bne s7, a0, L268\n" + " sll r0, r0, 0\n" + + " or v1, s7, r0\n" + + "L271:\n" + " jr ra\n" + " daddu sp, sp, r0"; + std::string type = "(function pair int)"; + std::string expected = + "(cond\n" + " ((begin (set! v1-0 '()) (= a0-0 v1-0)) (set! v0-0 0))\n" // should be a case, not a return + " (else\n" + " (set! v1-1 (l.w (+ a0-0 2)))\n" // v1-1 iteration. + " (set! v0-1 1)\n" // v0-1 count + " (while\n" + " (begin\n" + " (and\n" + " (begin (set! a0-1 '()) (set! a1-0 (!= v1-1 a0-1)) (truthy a1-0))\n" // check v1-1 + " (begin (set! a0-3 (sll v1-1 62)) (set! a0-2 (<0.si a0-3)))\n" // check v1-1 + " )\n" + " (truthy a0-2)\n" // this variable doesn't appear, but is set by the and. + " )\n" + " (set! v0-1 (+ v0-1 1))\n" // merged (and the result) + " (set! v1-1 (l.w (+ v1-1 2)))\n" // also merged. + " )\n" + " (set! v1-2 '#f)\n" // while's false, I think. + " )\n" + " )"; + test(func, type, expected, true); +} + +TEST_F(DecompilerRegressionTest, FunctionCall) { + // nmember + std::string func = + " sll r0, r0, 0\n" + + "L252:\n" + " daddiu sp, sp, -48\n" + " sd ra, 0(sp)\n" + " sq s5, 16(sp)\n" + " sq gp, 32(sp)\n" + + " or s5, a0, r0\n" + " or gp, a1, r0\n" + " beq r0, r0, L254\n" + " sll r0, r0, 0\n" + + "L253:\n" + " lw gp, 2(gp)\n" + + "L254:\n" + " daddiu v1, s7, -10\n" + " dsubu v1, gp, v1\n" + " daddiu a0, s7, 8\n" + " movn a0, s7, v1\n" + " bnel s7, a0, L255\n" + " or v1, a0, r0\n" + + " lw t9, name=(s7)\n" + " lw a0, -2(gp)\n" + " or a1, s5, r0\n" + " jalr ra, t9\n" + " sll v0, ra, 0\n" + + " or v1, v0, r0\n" + + "L255:\n" + " beq s7, v1, L253\n" + " sll r0, r0, 0\n" + + " or v1, s7, r0\n" + " daddiu v1, s7, -10\n" + " beq gp, v1, L256\n" + " or v0, s7, r0\n" + + " or v0, gp, r0\n" + + "L256:\n" + " ld ra, 0(sp)\n" + " lq gp, 32(sp)\n" + " lq s5, 16(sp)\n" + " jr ra\n" + " daddiu sp, sp, 48"; + std::string type = "(function basic object object)"; + std::string expected = + "(if\n" // this if needs regrouping. + " (begin\n" + " (set! s5-0 a0-0)\n" // s5-0 is the thing to check + " (set! gp-0 a1-0)\n" // gp-0 is the list + " (while\n" + " (begin\n" + " (or\n" + " (begin (set! v1-0 '()) (set! a0-1 (= gp-0 v1-0)) (truthy a0-1))\n" // got empty list. + " (begin\n" + " (set! t9-0 name=)\n" + " (set! a0-2 (l.w (+ gp-0 -2)))\n" + " (set! a1-1 s5-0)\n" + " (set! v0-0 (call!))\n" + " (set! v1-1 v0-0)\n" // name match + " )\n" + " )\n" + " (not v1-1)\n" // no name match AND no empty list. + " )\n" + " (set! gp-0 (l.w (+ gp-0 2)))\n" // get next (merged) + " )\n" + " (set! v1-2 '#f)\n" // while loop thing + " (set! v1-3 '())\n" // + " (!= gp-0 v1-3)\n" // IF CONDITION + " )\n" + " (set! v0-2 gp-0)\n" // not empty, so return the result + " )"; // the (set! v0 #f) from the if is added later. + test(func, type, expected, true); +} + +TEST_F(DecompilerRegressionTest, NestedAndOr) { + std::string func = + " sll r0, r0, 0\n" + + "L200:\n" + " daddiu sp, sp, -112\n" + " sd ra, 0(sp)\n" + " sq s1, 16(sp)\n" + " sq s2, 32(sp)\n" + " sq s3, 48(sp)\n" + " sq s4, 64(sp)\n" + " sq s5, 80(sp)\n" + " sq gp, 96(sp)\n" + + " or gp, a0, r0\n" + " or s5, a1, r0\n" + " addiu s4, r0, -1\n" + " beq r0, r0, L208\n" + " sll r0, r0, 0\n" + + "L201:\n" + " addiu s4, r0, 0\n" + " or s3, gp, r0\n" + " beq r0, r0, L206\n" + " sll r0, r0, 0\n" + + "L202:\n" + " lw s2, -2(s3)\n" + " lw v1, 2(s3)\n" + " lw s1, -2(v1)\n" + " or t9, s5, r0\n" + " or a0, s2, r0\n" + " or a1, s1, r0\n" + " jalr ra, t9\n" + " sll v0, ra, 0\n" + + " or v1, v0, r0\n" + " beql s7, v1, L203\n" + " daddiu a0, s7, 8\n" + + " slt a1, r0, v1\n" + " daddiu a0, s7, 8\n" + " movz a0, s7, a1\n" + + "L203:\n" + " beql s7, a0, L204\n" + " or v1, a0, r0\n" + + " daddiu a0, s7, #t\n" + " dsubu a0, v1, a0\n" + " daddiu v1, s7, 8\n" + " movz v1, s7, a0\n" + + "L204:\n" + " beq s7, v1, L205\n" + " or v1, s7, r0\n" + + " daddiu s4, s4, 1\n" + " sw s1, -2(s3)\n" + " lw v1, 2(s3)\n" + " sw s2, -2(v1)\n" + " or v1, s2, r0\n" + + "L205:\n" + " lw s3, 2(s3)\n" + + "L206:\n" + " lw v1, 2(s3)\n" + " daddiu a0, s7, -10\n" + " dsubu v1, v1, a0\n" + " daddiu a0, s7, 8\n" + " movn a0, s7, v1\n" + " bnel s7, a0, L207\n" + " or v1, a0, r0\n" + + " lw v1, 2(s3)\n" + " dsll32 v1, v1, 30\n" + " slt a0, v1, r0\n" + " daddiu v1, s7, 8\n" + " movn v1, s7, a0\n" + + "L207:\n" + " beq s7, v1, L202\n" + " sll r0, r0, 0\n" + + " or v1, s7, r0\n" + " or v1, s7, r0\n" + + "L208:\n" + " bne s4, r0, L201\n" + " sll r0, r0, 0\n" + + " or v1, s7, r0\n" + " or v0, gp, r0\n" + " ld ra, 0(sp)\n" + " lq gp, 96(sp)\n" + " lq s5, 80(sp)\n" + " lq s4, 64(sp)\n" + " lq s3, 48(sp)\n" + " lq s2, 32(sp)\n" + " lq s1, 16(sp)\n" + " jr ra\n" + " daddiu sp, sp, 112"; + std::string type = "(function object (function object object object) object)"; + std::string expected = + "(begin\n" + " (set! gp-0 a0-0)\n" // gp-0 = list + " (set! s5-0 a1-0)\n" // s5-0 = func + " (set! s4-0 -1)\n" // s4-0 = flag + " (while\n" + " (nonzero? s4-0)\n" // there is stuff to do... + " (set! s4-0 0)\n" // flag = 0 + " (set! s3-0 gp-0)\n" // s3 = list-iter + " (while\n" + " (begin\n" + " (or\n" + " (begin\n" + " (set! v1-6 (l.w (+ s3-0 2)))\n" // s3-0 = cdr + " (set! a0-4 '())\n" + " (set! a0-5 (= v1-6 a0-4))\n" + " (truthy a0-5)\n" // cdr = empty list (sets v1-7 secretly) + " )\n" + " (begin\n" + " (set! v1-8 (l.w (+ s3-0 2)))\n" + " (set! v1-9 (sll v1-8 62))\n" + " (set! v1-7 (>=0.si v1-9))\n" // car is not a list. + " )\n" + " )\n" + " (not v1-7)\n" // while we still have an iterable list... + " )\n" + " (when\n" + " (begin\n" + " (and\n" + " (begin\n" + " (or\n" + " (begin\n" + " (set! s2-0 (l.w (+ s3-0 -2)))\n" // s2 = car + " (set! v1-0 (l.w (+ s3-0 2)))\n" + " (set! s1-0 (l.w (+ v1-0 -2)))\n" // s1 = cadr + " (set! t9-0 s5-0)\n" // func + " (set! a0-1 s2-0)\n" // car + " (set! a1-1 s1-0)\n" // cadr + " (set! v0-0 (call!))\n" // compare! + " (set! v1-1 v0-0)\n" + " (not v1-1)\n" // result is false (secretly sets a0-2) + " )\n" + " (set! a0-2 (>0.si v1-1))\n" // >0 + " )\n" + " (truthy a0-2)\n" // false or >0 + " )\n" + " (begin (set! a0-3 '#t) (set! v1-2 (!= v1-2 a0-3)))\n" // not #t + " )\n" + " (truthy v1-2)\n" // (and (or false >0) (not #t)) + " )\n" + " (set! s4-0 (+ s4-0 1))\n" // increment, merge + " (s.w! (+ s3-0 -2) s1-0)\n" // set iter's car to cadr + " (set! v1-4 (l.w (+ s3-0 2)))\n" // current cdr + " (s.w! (+ v1-4 -2) s2-0)\n" // set cadr + " (set! v1-5 s2-0)\n" // iteration thing? + " )\n" + " (set! s3-0 (l.w (+ s3-0 2)))\n" // increment! + " )\n" + " (set! v1-10 '#f)\n" + " (set! v1-11 '#f)\n" + " )\n" + " (set! v1-12 '#f)\n" + " (set! v0-1 gp-0)\n" + " )"; + test(func, type, expected, true); +} + +TEST_F(DecompilerRegressionTest, NewMethod) { + // inline-array-class new + std::string func = + " sll r0, r0, 0\n" + + "L198:\n" + " daddiu sp, sp, -32\n" + " sd ra, 0(sp)\n" + " sq gp, 16(sp)\n" + + " or gp, a2, r0\n" + " lw v1, object(s7)\n" + " lwu t9, 16(v1)\n" + " or v1, a1, r0\n" + " lhu a2, 8(a1)\n" + " lhu a1, 12(a1)\n" + " multu3 a1, gp, a1\n" + " daddu a2, a2, a1\n" + " or a1, v1, r0\n" + " jalr ra, t9\n" + " sll v0, ra, 0\n" + + " beq v0, r0, L199\n" + " or v1, s7, r0\n" + + " sw gp, 0(v0)\n" + " sw gp, 4(v0)\n" + + "L199:\n" + " ld ra, 0(sp)\n" + " lq gp, 16(sp)\n" + " jr ra\n" + " daddiu sp, sp, 32"; + std::string type = "(function symbol type int inline-array-class)"; + std::string expected = + "(when\n" + " (begin\n" + " (set! gp-0 a2-0)\n" // gp-0 is size + " (set! v1-0 object)\n" + " (set! t9-0 (l.wu (+ v1-0 16)))\n" // object new + " (set! v1-1 a1-0)\n" // ? + " (set! a2-1 (l.hu (+ a1-0 8)))\n" // math + " (set! a1-1 (l.hu (+ a1-0 12)))\n" + " (set! a1-2 (*.ui gp-0 a1-1))\n" + " (set! a2-2 (+ a2-1 a1-2))\n" + " (set! a1-3 v1-1)\n" // size! + " (set! v0-0 (call!))\n" + " (nonzero? v0-0)\n" // only if we got memory... + " )\n" + " (s.w! v0-0 gp-0)\n" // store size + " (s.w! (+ v0-0 4) gp-0)\n" + " )"; + test(func, type, expected, false, "inline-array-class"); +} + +TEST_F(DecompilerRegressionTest, Recursive) { + std::string func = + " sll r0, r0, 0\n" + + "L65:\n" + " daddiu sp, sp, -32\n" + " sd ra, 0(sp)\n" + " sq gp, 16(sp)\n" + + " or gp, a0, r0\n" + " addiu v1, r0, 1\n" + " bne gp, v1, L66\n" + " sll r0, r0, 0\n" + + " addiu v0, r0, 1\n" + " beq r0, r0, L67\n" + " sll r0, r0, 0\n" + + "L66:\n" + " lw t9, fact(s7)\n" + " daddiu a0, gp, -1\n" + " jalr ra, t9\n" + " sll v0, ra, 0\n" + + " mult3 v0, gp, v0\n" + "L67:\n" + " ld ra, 0(sp)\n" + " lq gp, 16(sp)\n" + " jr ra\n" + " daddiu sp, sp, 32"; + std::string type = "(function int int)"; + std::string expected = + "(cond\n" + " ((begin (set! gp-0 a0-0) (set! v1-0 1) (= gp-0 v1-0)) (set! v0-0 1))\n" // base + " (else\n" + " (set! t9-0 fact)\n" // recurse! + " (set! a0-1 (+ gp-0 -1))\n" + " (set! v0-1 (call!))\n" + " (set! v0-2 (*.si gp-0 v0-1))\n" // not quite a tail call... + " )\n" + " )"; + test(func, type, expected, false); +} + +TEST_F(DecompilerRegressionTest, TypeOf) { + std::string func = + " sll r0, r0, 0\n" + + "L63:\n" + " daddiu sp, sp, -16\n" + " sd ra, 0(sp)\n" + + " dsll32 v1, a0, 29\n" + " beql v1, r0, L64\n" + " lw v1, binteger(s7)\n" + + " bgtzl v1, L64\n" + " lw v1, pair(s7)\n" + + " lwu v1, -4(a0)\n" + + "L64:\n" + " lwu t9, 24(v1)\n" + " jalr ra, t9\n" + " sll v0, ra, 0\n" + + " ld ra, 0(sp)\n" + " jr ra\n" + " daddiu sp, sp, 16"; + std::string type = "(function object object)"; + std::string expected = + "(begin\n" + " (set! v1-1 (type-of a0-0))\n" + " (set! t9-0 (l.wu (+ v1-1 24)))\n" // print method. + " (set! v0-0 (call!))\n" + " )"; + test(func, type, expected, false); +} \ No newline at end of file